superlocalmemory 2.7.6 → 2.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +120 -155
- package/README.md +115 -89
- package/api_server.py +2 -12
- package/docs/PATTERN-LEARNING.md +64 -199
- package/docs/example_graph_usage.py +4 -6
- package/install.sh +59 -0
- package/mcp_server.py +83 -7
- package/package.json +1 -8
- package/scripts/generate-thumbnails.py +3 -5
- package/skills/slm-build-graph/SKILL.md +1 -1
- package/skills/slm-list-recent/SKILL.md +1 -1
- package/skills/slm-recall/SKILL.md +1 -1
- package/skills/slm-remember/SKILL.md +1 -1
- package/skills/slm-show-patterns/SKILL.md +1 -1
- package/skills/slm-status/SKILL.md +1 -1
- package/skills/slm-switch-profile/SKILL.md +1 -1
- package/src/agent_registry.py +7 -18
- package/src/auth_middleware.py +3 -5
- package/src/auto_backup.py +3 -7
- package/src/behavioral/__init__.py +49 -0
- package/src/behavioral/behavioral_listener.py +203 -0
- package/src/behavioral/behavioral_patterns.py +275 -0
- package/src/behavioral/cross_project_transfer.py +206 -0
- package/src/behavioral/outcome_inference.py +194 -0
- package/src/behavioral/outcome_tracker.py +193 -0
- package/src/behavioral/tests/__init__.py +4 -0
- package/src/behavioral/tests/test_behavioral_integration.py +108 -0
- package/src/behavioral/tests/test_behavioral_patterns.py +150 -0
- package/src/behavioral/tests/test_cross_project_transfer.py +142 -0
- package/src/behavioral/tests/test_mcp_behavioral.py +139 -0
- package/src/behavioral/tests/test_mcp_report_outcome.py +117 -0
- package/src/behavioral/tests/test_outcome_inference.py +107 -0
- package/src/behavioral/tests/test_outcome_tracker.py +96 -0
- package/src/cache_manager.py +4 -6
- package/src/compliance/__init__.py +48 -0
- package/src/compliance/abac_engine.py +149 -0
- package/src/compliance/abac_middleware.py +116 -0
- package/src/compliance/audit_db.py +215 -0
- package/src/compliance/audit_logger.py +148 -0
- package/src/compliance/retention_manager.py +289 -0
- package/src/compliance/retention_scheduler.py +186 -0
- package/src/compliance/tests/__init__.py +4 -0
- package/src/compliance/tests/test_abac_enforcement.py +95 -0
- package/src/compliance/tests/test_abac_engine.py +124 -0
- package/src/compliance/tests/test_abac_mcp_integration.py +118 -0
- package/src/compliance/tests/test_audit_db.py +123 -0
- package/src/compliance/tests/test_audit_logger.py +98 -0
- package/src/compliance/tests/test_mcp_audit.py +128 -0
- package/src/compliance/tests/test_mcp_retention_policy.py +125 -0
- package/src/compliance/tests/test_retention_manager.py +131 -0
- package/src/compliance/tests/test_retention_scheduler.py +99 -0
- package/src/db_connection_manager.py +2 -12
- package/src/embedding_engine.py +61 -669
- package/src/embeddings/__init__.py +47 -0
- package/src/embeddings/cache.py +70 -0
- package/src/embeddings/cli.py +113 -0
- package/src/embeddings/constants.py +47 -0
- package/src/embeddings/database.py +91 -0
- package/src/embeddings/engine.py +247 -0
- package/src/embeddings/model_loader.py +145 -0
- package/src/event_bus.py +3 -13
- package/src/graph/__init__.py +36 -0
- package/src/graph/build_helpers.py +74 -0
- package/src/graph/cli.py +87 -0
- package/src/graph/cluster_builder.py +188 -0
- package/src/graph/cluster_summary.py +148 -0
- package/src/graph/constants.py +47 -0
- package/src/graph/edge_builder.py +162 -0
- package/src/graph/entity_extractor.py +95 -0
- package/src/graph/graph_core.py +226 -0
- package/src/graph/graph_search.py +231 -0
- package/src/graph/hierarchical.py +207 -0
- package/src/graph/schema.py +99 -0
- package/src/graph_engine.py +45 -1451
- package/src/hnsw_index.py +3 -7
- package/src/hybrid_search.py +36 -683
- package/src/learning/__init__.py +27 -12
- package/src/learning/adaptive_ranker.py +50 -12
- package/src/learning/cross_project_aggregator.py +2 -12
- package/src/learning/engagement_tracker.py +2 -12
- package/src/learning/feature_extractor.py +175 -43
- package/src/learning/feedback_collector.py +7 -12
- package/src/learning/learning_db.py +180 -12
- package/src/learning/project_context_manager.py +2 -12
- package/src/learning/source_quality_scorer.py +2 -12
- package/src/learning/synthetic_bootstrap.py +2 -12
- package/src/learning/tests/__init__.py +2 -0
- package/src/learning/tests/test_adaptive_ranker.py +2 -6
- package/src/learning/tests/test_adaptive_ranker_v28.py +60 -0
- package/src/learning/tests/test_aggregator.py +2 -6
- package/src/learning/tests/test_auto_retrain_v28.py +35 -0
- package/src/learning/tests/test_e2e_ranking_v28.py +82 -0
- package/src/learning/tests/test_feature_extractor_v28.py +93 -0
- package/src/learning/tests/test_feedback_collector.py +2 -6
- package/src/learning/tests/test_learning_db.py +2 -6
- package/src/learning/tests/test_learning_db_v28.py +110 -0
- package/src/learning/tests/test_learning_init_v28.py +48 -0
- package/src/learning/tests/test_outcome_signals.py +48 -0
- package/src/learning/tests/test_project_context.py +2 -6
- package/src/learning/tests/test_schema_migration.py +319 -0
- package/src/learning/tests/test_signal_inference.py +11 -13
- package/src/learning/tests/test_source_quality.py +2 -6
- package/src/learning/tests/test_synthetic_bootstrap.py +3 -7
- package/src/learning/tests/test_workflow_miner.py +2 -6
- package/src/learning/workflow_pattern_miner.py +2 -12
- package/src/lifecycle/__init__.py +54 -0
- package/src/lifecycle/bounded_growth.py +239 -0
- package/src/lifecycle/compaction_engine.py +226 -0
- package/src/lifecycle/lifecycle_engine.py +302 -0
- package/src/lifecycle/lifecycle_evaluator.py +225 -0
- package/src/lifecycle/lifecycle_scheduler.py +130 -0
- package/src/lifecycle/retention_policy.py +285 -0
- package/src/lifecycle/tests/__init__.py +4 -0
- package/src/lifecycle/tests/test_bounded_growth.py +193 -0
- package/src/lifecycle/tests/test_compaction.py +179 -0
- package/src/lifecycle/tests/test_lifecycle_engine.py +137 -0
- package/src/lifecycle/tests/test_lifecycle_evaluation.py +177 -0
- package/src/lifecycle/tests/test_lifecycle_scheduler.py +127 -0
- package/src/lifecycle/tests/test_lifecycle_search.py +109 -0
- package/src/lifecycle/tests/test_mcp_compact.py +149 -0
- package/src/lifecycle/tests/test_mcp_lifecycle_status.py +114 -0
- package/src/lifecycle/tests/test_retention_policy.py +162 -0
- package/src/mcp_tools_v28.py +280 -0
- package/src/memory-profiles.py +2 -12
- package/src/memory-reset.py +2 -12
- package/src/memory_compression.py +2 -12
- package/src/memory_store_v2.py +76 -20
- package/src/migrate_v1_to_v2.py +2 -12
- package/src/pattern_learner.py +29 -975
- package/src/patterns/__init__.py +24 -0
- package/src/patterns/analyzers.py +247 -0
- package/src/patterns/learner.py +267 -0
- package/src/patterns/scoring.py +167 -0
- package/src/patterns/store.py +223 -0
- package/src/patterns/terminology.py +138 -0
- package/src/provenance_tracker.py +4 -14
- package/src/query_optimizer.py +4 -6
- package/src/rate_limiter.py +2 -6
- package/src/search/__init__.py +20 -0
- package/src/search/cli.py +77 -0
- package/src/search/constants.py +26 -0
- package/src/search/engine.py +239 -0
- package/src/search/fusion.py +122 -0
- package/src/search/index_loader.py +112 -0
- package/src/search/methods.py +162 -0
- package/src/search_engine_v2.py +4 -6
- package/src/setup_validator.py +7 -13
- package/src/subscription_manager.py +2 -12
- package/src/tree/__init__.py +59 -0
- package/src/tree/builder.py +183 -0
- package/src/tree/nodes.py +196 -0
- package/src/tree/queries.py +252 -0
- package/src/tree/schema.py +76 -0
- package/src/tree_manager.py +10 -711
- package/src/trust/__init__.py +45 -0
- package/src/trust/constants.py +66 -0
- package/src/trust/queries.py +157 -0
- package/src/trust/schema.py +95 -0
- package/src/trust/scorer.py +299 -0
- package/src/trust/signals.py +95 -0
- package/src/trust_scorer.py +39 -697
- package/src/webhook_dispatcher.py +2 -12
- package/ui/app.js +1 -1
- package/ui/js/agents.js +1 -1
- package/ui_server.py +2 -14
- package/ATTRIBUTION.md +0 -140
- package/docs/ARCHITECTURE-V2.5.md +0 -190
- package/docs/GRAPH-ENGINE.md +0 -503
- package/docs/architecture-diagram.drawio +0 -405
- package/docs/plans/2026-02-13-benchmark-suite.md +0 -1349
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""Model loading and backend encoder methods for EmbeddingEngine.
|
|
5
|
+
"""
|
|
6
|
+
import time
|
|
7
|
+
import logging
|
|
8
|
+
from typing import List
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
from embeddings.constants import (
|
|
13
|
+
SENTENCE_TRANSFORMERS_AVAILABLE,
|
|
14
|
+
SKLEARN_AVAILABLE,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ModelLoaderMixin:
|
|
21
|
+
"""
|
|
22
|
+
Mixin that handles model initialization and raw encoding backends.
|
|
23
|
+
|
|
24
|
+
Expects the host class to have:
|
|
25
|
+
- self.use_transformers: bool
|
|
26
|
+
- self.model_cache_path: Path
|
|
27
|
+
- self.model_name: str
|
|
28
|
+
- self.device: str
|
|
29
|
+
- self.model: Optional[SentenceTransformer]
|
|
30
|
+
- self.dimension: int
|
|
31
|
+
- self.tfidf_vectorizer
|
|
32
|
+
- self.tfidf_fitted: bool
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def _load_model(self):
|
|
36
|
+
"""Load sentence transformer model or fallback to TF-IDF."""
|
|
37
|
+
if not self.use_transformers:
|
|
38
|
+
logger.warning(
|
|
39
|
+
"sentence-transformers unavailable. Install with: "
|
|
40
|
+
"pip install sentence-transformers"
|
|
41
|
+
)
|
|
42
|
+
self._init_fallback()
|
|
43
|
+
return
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
from sentence_transformers import SentenceTransformer
|
|
47
|
+
|
|
48
|
+
# Create model cache directory
|
|
49
|
+
self.model_cache_path.mkdir(parents=True, exist_ok=True)
|
|
50
|
+
|
|
51
|
+
logger.info(f"Loading model: {self.model_name}")
|
|
52
|
+
start_time = time.time()
|
|
53
|
+
|
|
54
|
+
# Load model with local cache
|
|
55
|
+
self.model = SentenceTransformer(
|
|
56
|
+
self.model_name,
|
|
57
|
+
device=self.device,
|
|
58
|
+
cache_folder=str(self.model_cache_path)
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Get actual dimension
|
|
62
|
+
self.dimension = self.model.get_sentence_embedding_dimension()
|
|
63
|
+
|
|
64
|
+
elapsed = time.time() - start_time
|
|
65
|
+
logger.info(
|
|
66
|
+
f"Loaded {self.model_name} ({self.dimension}D) in {elapsed:.2f}s"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
except Exception as e:
|
|
70
|
+
logger.error(f"Failed to load sentence transformer: {e}")
|
|
71
|
+
logger.info("Falling back to TF-IDF")
|
|
72
|
+
self.use_transformers = False
|
|
73
|
+
self._init_fallback()
|
|
74
|
+
|
|
75
|
+
def _init_fallback(self):
|
|
76
|
+
"""Initialize TF-IDF fallback."""
|
|
77
|
+
if not SKLEARN_AVAILABLE:
|
|
78
|
+
logger.error(
|
|
79
|
+
"sklearn unavailable - no fallback available. "
|
|
80
|
+
"Install: pip install scikit-learn"
|
|
81
|
+
)
|
|
82
|
+
return
|
|
83
|
+
|
|
84
|
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
85
|
+
|
|
86
|
+
logger.info("Using TF-IDF fallback (dimension will be dynamic)")
|
|
87
|
+
self.tfidf_vectorizer = TfidfVectorizer(
|
|
88
|
+
max_features=384, # Match sentence transformer dimension
|
|
89
|
+
stop_words='english',
|
|
90
|
+
ngram_range=(1, 2),
|
|
91
|
+
min_df=1
|
|
92
|
+
)
|
|
93
|
+
self.dimension = 384
|
|
94
|
+
|
|
95
|
+
def _encode_transformer(
|
|
96
|
+
self,
|
|
97
|
+
texts: List[str],
|
|
98
|
+
batch_size: int,
|
|
99
|
+
show_progress: bool
|
|
100
|
+
) -> np.ndarray:
|
|
101
|
+
"""Generate embeddings using sentence transformer."""
|
|
102
|
+
try:
|
|
103
|
+
start_time = time.time()
|
|
104
|
+
|
|
105
|
+
embeddings = self.model.encode(
|
|
106
|
+
texts,
|
|
107
|
+
batch_size=batch_size,
|
|
108
|
+
show_progress_bar=show_progress,
|
|
109
|
+
convert_to_numpy=True,
|
|
110
|
+
normalize_embeddings=False # We'll normalize separately
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
elapsed = time.time() - start_time
|
|
114
|
+
rate = len(texts) / elapsed if elapsed > 0 else 0
|
|
115
|
+
logger.debug(f"Encoded {len(texts)} texts in {elapsed:.2f}s ({rate:.0f} texts/sec)")
|
|
116
|
+
|
|
117
|
+
return embeddings
|
|
118
|
+
|
|
119
|
+
except Exception as e:
|
|
120
|
+
logger.error(f"Transformer encoding failed: {e}")
|
|
121
|
+
raise
|
|
122
|
+
|
|
123
|
+
def _encode_tfidf(self, texts: List[str]) -> np.ndarray:
|
|
124
|
+
"""Generate embeddings using TF-IDF fallback."""
|
|
125
|
+
try:
|
|
126
|
+
if not self.tfidf_fitted:
|
|
127
|
+
# Fit on first use
|
|
128
|
+
logger.info("Fitting TF-IDF vectorizer...")
|
|
129
|
+
self.tfidf_vectorizer.fit(texts)
|
|
130
|
+
self.tfidf_fitted = True
|
|
131
|
+
|
|
132
|
+
embeddings = self.tfidf_vectorizer.transform(texts).toarray()
|
|
133
|
+
|
|
134
|
+
# Pad or truncate to target dimension
|
|
135
|
+
if embeddings.shape[1] < self.dimension:
|
|
136
|
+
padding = np.zeros((embeddings.shape[0], self.dimension - embeddings.shape[1]))
|
|
137
|
+
embeddings = np.hstack([embeddings, padding])
|
|
138
|
+
elif embeddings.shape[1] > self.dimension:
|
|
139
|
+
embeddings = embeddings[:, :self.dimension]
|
|
140
|
+
|
|
141
|
+
return embeddings
|
|
142
|
+
|
|
143
|
+
except Exception as e:
|
|
144
|
+
logger.error(f"TF-IDF encoding failed: {e}")
|
|
145
|
+
raise
|
package/src/event_bus.py
CHANGED
|
@@ -1,16 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
Copyright (c) 2026 Varun Pratap Bhardwaj
|
|
5
|
-
Licensed under MIT License
|
|
6
|
-
|
|
7
|
-
Repository: https://github.com/varun369/SuperLocalMemoryV2
|
|
8
|
-
Author: Varun Pratap Bhardwaj (Solution Architect)
|
|
9
|
-
|
|
10
|
-
NOTICE: This software is protected by MIT License.
|
|
11
|
-
Attribution must be preserved in all copies or derivatives.
|
|
12
|
-
"""
|
|
13
|
-
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
14
4
|
"""
|
|
15
5
|
EventBus — Real-time event broadcasting for memory operations.
|
|
16
6
|
|
|
@@ -222,7 +212,7 @@ class EventBus:
|
|
|
222
212
|
payload: Event-specific data (dict, serialized to JSON)
|
|
223
213
|
memory_id: Associated memory ID (if applicable)
|
|
224
214
|
source_agent: Agent that triggered the event
|
|
225
|
-
source_protocol: Protocol used (mcp, cli, rest, python
|
|
215
|
+
source_protocol: Protocol used (mcp, cli, rest, python)
|
|
226
216
|
importance: Event importance 1-10 (affects retention)
|
|
227
217
|
|
|
228
218
|
Returns:
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# SPDX-License-Identifier: MIT
|
|
2
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
3
|
+
"""graph package - Knowledge Graph Clustering for SuperLocalMemory V2
|
|
4
|
+
|
|
5
|
+
Re-exports all public classes, constants, and functions so that
|
|
6
|
+
``from graph import GraphEngine`` (or any other symbol) works.
|
|
7
|
+
"""
|
|
8
|
+
from graph.constants import (
|
|
9
|
+
MAX_MEMORIES_FOR_GRAPH,
|
|
10
|
+
SKLEARN_AVAILABLE,
|
|
11
|
+
IGRAPH_AVAILABLE,
|
|
12
|
+
MEMORY_DIR,
|
|
13
|
+
DB_PATH,
|
|
14
|
+
)
|
|
15
|
+
from graph.entity_extractor import EntityExtractor, ClusterNamer
|
|
16
|
+
from graph.edge_builder import EdgeBuilder
|
|
17
|
+
from graph.cluster_builder import ClusterBuilder
|
|
18
|
+
from graph.graph_core import GraphEngine
|
|
19
|
+
from graph.cli import main
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
# Constants
|
|
23
|
+
"MAX_MEMORIES_FOR_GRAPH",
|
|
24
|
+
"SKLEARN_AVAILABLE",
|
|
25
|
+
"IGRAPH_AVAILABLE",
|
|
26
|
+
"MEMORY_DIR",
|
|
27
|
+
"DB_PATH",
|
|
28
|
+
# Classes
|
|
29
|
+
"EntityExtractor",
|
|
30
|
+
"ClusterNamer",
|
|
31
|
+
"EdgeBuilder",
|
|
32
|
+
"ClusterBuilder",
|
|
33
|
+
"GraphEngine",
|
|
34
|
+
# Functions
|
|
35
|
+
"main",
|
|
36
|
+
]
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""Helper functions for the graph build process.
|
|
5
|
+
|
|
6
|
+
Provides sampling and cleanup utilities used during full graph builds.
|
|
7
|
+
"""
|
|
8
|
+
from graph.constants import logger, MAX_MEMORIES_FOR_GRAPH
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def apply_sampling(cursor, memories, active_profile):
|
|
12
|
+
"""Apply intelligent sampling if memory count exceeds cap.
|
|
13
|
+
|
|
14
|
+
Returns a (possibly truncated) list of memory tuples.
|
|
15
|
+
"""
|
|
16
|
+
if len(memories) > MAX_MEMORIES_FOR_GRAPH:
|
|
17
|
+
logger.warning(
|
|
18
|
+
"Memory count (%d) exceeds graph cap (%d). Using intelligent sampling.",
|
|
19
|
+
len(memories), MAX_MEMORIES_FOR_GRAPH
|
|
20
|
+
)
|
|
21
|
+
recent_count = int(MAX_MEMORIES_FOR_GRAPH * 0.6)
|
|
22
|
+
important_count = int(MAX_MEMORIES_FOR_GRAPH * 0.4)
|
|
23
|
+
|
|
24
|
+
recent_memories = cursor.execute('''
|
|
25
|
+
SELECT id, content, summary FROM memories
|
|
26
|
+
WHERE profile = ? ORDER BY created_at DESC LIMIT ?
|
|
27
|
+
''', (active_profile, recent_count)).fetchall()
|
|
28
|
+
|
|
29
|
+
important_memories = cursor.execute('''
|
|
30
|
+
SELECT id, content, summary FROM memories
|
|
31
|
+
WHERE profile = ? ORDER BY importance DESC, access_count DESC LIMIT ?
|
|
32
|
+
''', (active_profile, important_count)).fetchall()
|
|
33
|
+
|
|
34
|
+
seen_ids = set()
|
|
35
|
+
sampled = []
|
|
36
|
+
for m in recent_memories + important_memories:
|
|
37
|
+
if m[0] not in seen_ids:
|
|
38
|
+
seen_ids.add(m[0])
|
|
39
|
+
sampled.append(m)
|
|
40
|
+
memories = sampled[:MAX_MEMORIES_FOR_GRAPH]
|
|
41
|
+
logger.info("Sampled %d memories for graph build", len(memories))
|
|
42
|
+
|
|
43
|
+
elif len(memories) > MAX_MEMORIES_FOR_GRAPH * 0.8:
|
|
44
|
+
logger.warning(
|
|
45
|
+
"Approaching graph cap: %d/%d memories (%.0f%%). "
|
|
46
|
+
"Consider running memory compression.",
|
|
47
|
+
len(memories), MAX_MEMORIES_FOR_GRAPH,
|
|
48
|
+
len(memories) / MAX_MEMORIES_FOR_GRAPH * 100
|
|
49
|
+
)
|
|
50
|
+
return memories
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def clear_profile_graph_data(cursor, conn, memories, active_profile):
|
|
54
|
+
"""Clear existing graph data for a profile's memories."""
|
|
55
|
+
profile_memory_ids = [m[0] for m in memories]
|
|
56
|
+
if profile_memory_ids:
|
|
57
|
+
placeholders = ','.join('?' * len(profile_memory_ids))
|
|
58
|
+
cursor.execute(f'''
|
|
59
|
+
DELETE FROM graph_edges
|
|
60
|
+
WHERE source_memory_id IN ({placeholders})
|
|
61
|
+
OR target_memory_id IN ({placeholders})
|
|
62
|
+
''', profile_memory_ids + profile_memory_ids)
|
|
63
|
+
cursor.execute(f'''
|
|
64
|
+
DELETE FROM graph_nodes WHERE memory_id IN ({placeholders})
|
|
65
|
+
''', profile_memory_ids)
|
|
66
|
+
cursor.execute('''
|
|
67
|
+
DELETE FROM graph_clusters
|
|
68
|
+
WHERE id NOT IN (
|
|
69
|
+
SELECT DISTINCT cluster_id FROM memories WHERE cluster_id IS NOT NULL
|
|
70
|
+
)
|
|
71
|
+
''')
|
|
72
|
+
cursor.execute('UPDATE memories SET cluster_id = NULL WHERE profile = ?',
|
|
73
|
+
(active_profile,))
|
|
74
|
+
conn.commit()
|
package/src/graph/cli.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""CLI interface for manual graph operations.
|
|
5
|
+
|
|
6
|
+
Provides a command-line interface for building graphs, viewing stats,
|
|
7
|
+
finding related memories, inspecting clusters, and generating summaries.
|
|
8
|
+
"""
|
|
9
|
+
import json
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def main():
|
|
13
|
+
"""CLI interface for manual graph operations."""
|
|
14
|
+
import argparse
|
|
15
|
+
from graph.graph_core import GraphEngine
|
|
16
|
+
from graph.cluster_builder import ClusterBuilder
|
|
17
|
+
|
|
18
|
+
parser = argparse.ArgumentParser(description='GraphEngine - Knowledge Graph Management')
|
|
19
|
+
parser.add_argument('command', choices=['build', 'stats', 'related', 'cluster', 'hierarchical', 'summaries'],
|
|
20
|
+
help='Command to execute')
|
|
21
|
+
parser.add_argument('--memory-id', type=int, help='Memory ID for related/add commands')
|
|
22
|
+
parser.add_argument('--cluster-id', type=int, help='Cluster ID for cluster command')
|
|
23
|
+
parser.add_argument('--min-similarity', type=float, default=0.3,
|
|
24
|
+
help='Minimum similarity for edges (default: 0.3)')
|
|
25
|
+
parser.add_argument('--hops', type=int, default=2, help='Max hops for related (default: 2)')
|
|
26
|
+
|
|
27
|
+
args = parser.parse_args()
|
|
28
|
+
|
|
29
|
+
engine = GraphEngine()
|
|
30
|
+
|
|
31
|
+
if args.command == 'build':
|
|
32
|
+
print("Building knowledge graph...")
|
|
33
|
+
stats = engine.build_graph(min_similarity=args.min_similarity)
|
|
34
|
+
print(json.dumps(stats, indent=2))
|
|
35
|
+
|
|
36
|
+
elif args.command == 'stats':
|
|
37
|
+
print("Graph Statistics:")
|
|
38
|
+
stats = engine.get_stats()
|
|
39
|
+
print(json.dumps(stats, indent=2))
|
|
40
|
+
|
|
41
|
+
elif args.command == 'related':
|
|
42
|
+
if not args.memory_id:
|
|
43
|
+
print("Error: --memory-id required for 'related' command")
|
|
44
|
+
return
|
|
45
|
+
|
|
46
|
+
print(f"Finding memories related to #{args.memory_id}...")
|
|
47
|
+
related = engine.get_related(args.memory_id, max_hops=args.hops)
|
|
48
|
+
|
|
49
|
+
if not related:
|
|
50
|
+
print("No related memories found")
|
|
51
|
+
else:
|
|
52
|
+
for idx, mem in enumerate(related, 1):
|
|
53
|
+
print(f"\n{idx}. Memory #{mem['id']} ({mem['hops']}-hop, weight={mem['weight']:.3f})")
|
|
54
|
+
print(f" Relationship: {mem['relationship']}")
|
|
55
|
+
summary = mem['summary'] or '[No summary]'
|
|
56
|
+
print(f" Summary: {summary[:100]}...")
|
|
57
|
+
if mem['shared_entities']:
|
|
58
|
+
print(f" Shared: {', '.join(mem['shared_entities'][:5])}")
|
|
59
|
+
|
|
60
|
+
elif args.command == 'cluster':
|
|
61
|
+
if not args.cluster_id:
|
|
62
|
+
print("Error: --cluster-id required for 'cluster' command")
|
|
63
|
+
return
|
|
64
|
+
|
|
65
|
+
print(f"Cluster #{args.cluster_id} members:")
|
|
66
|
+
members = engine.get_cluster_members(args.cluster_id)
|
|
67
|
+
|
|
68
|
+
for idx, mem in enumerate(members, 1):
|
|
69
|
+
print(f"\n{idx}. Memory #{mem['id']} (importance={mem['importance']})")
|
|
70
|
+
summary = mem['summary'] or '[No summary]'
|
|
71
|
+
print(f" {summary[:100]}...")
|
|
72
|
+
|
|
73
|
+
elif args.command == 'hierarchical':
|
|
74
|
+
print("Running hierarchical sub-clustering...")
|
|
75
|
+
cluster_builder = ClusterBuilder(engine.db_path)
|
|
76
|
+
stats = cluster_builder.hierarchical_cluster()
|
|
77
|
+
print(json.dumps(stats, indent=2))
|
|
78
|
+
|
|
79
|
+
elif args.command == 'summaries':
|
|
80
|
+
print("Generating cluster summaries...")
|
|
81
|
+
cluster_builder = ClusterBuilder(engine.db_path)
|
|
82
|
+
count = cluster_builder.generate_cluster_summaries()
|
|
83
|
+
print(f"Generated summaries for {count} clusters")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
if __name__ == '__main__':
|
|
87
|
+
main()
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""Community detection and cluster management for the graph engine.
|
|
5
|
+
|
|
6
|
+
Implements Leiden algorithm based community detection. Hierarchical
|
|
7
|
+
sub-clustering is delegated to the ``hierarchical`` module.
|
|
8
|
+
"""
|
|
9
|
+
import sqlite3
|
|
10
|
+
import json
|
|
11
|
+
from typing import List, Dict
|
|
12
|
+
from collections import Counter
|
|
13
|
+
|
|
14
|
+
from graph.constants import logger, IGRAPH_AVAILABLE, MEMORY_DIR
|
|
15
|
+
from graph.cluster_summary import generate_cluster_summaries as _generate_summaries
|
|
16
|
+
from graph.hierarchical import hierarchical_cluster as _hierarchical_cluster
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ClusterBuilder:
|
|
20
|
+
"""Detect memory communities using Leiden algorithm."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, db_path):
|
|
23
|
+
"""Initialize cluster builder."""
|
|
24
|
+
self.db_path = db_path
|
|
25
|
+
|
|
26
|
+
def _get_active_profile(self) -> str:
|
|
27
|
+
"""Get the currently active profile name from config."""
|
|
28
|
+
config_file = MEMORY_DIR / "profiles.json"
|
|
29
|
+
if config_file.exists():
|
|
30
|
+
try:
|
|
31
|
+
with open(config_file, 'r') as f:
|
|
32
|
+
config = json.load(f)
|
|
33
|
+
return config.get('active_profile', 'default')
|
|
34
|
+
except (json.JSONDecodeError, IOError):
|
|
35
|
+
pass
|
|
36
|
+
return 'default'
|
|
37
|
+
|
|
38
|
+
def detect_communities(self) -> int:
|
|
39
|
+
"""
|
|
40
|
+
Run Leiden algorithm to find memory clusters (active profile only).
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Number of clusters created
|
|
44
|
+
"""
|
|
45
|
+
if not IGRAPH_AVAILABLE:
|
|
46
|
+
logger.warning("igraph/leidenalg not installed. Graph clustering disabled. Install with: pip3 install python-igraph leidenalg")
|
|
47
|
+
return 0
|
|
48
|
+
import igraph as ig
|
|
49
|
+
import leidenalg
|
|
50
|
+
|
|
51
|
+
conn = sqlite3.connect(self.db_path)
|
|
52
|
+
cursor = conn.cursor()
|
|
53
|
+
active_profile = self._get_active_profile()
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
# Load edges for active profile's memories only
|
|
57
|
+
edges = cursor.execute('''
|
|
58
|
+
SELECT ge.source_memory_id, ge.target_memory_id, ge.weight
|
|
59
|
+
FROM graph_edges ge
|
|
60
|
+
WHERE ge.source_memory_id IN (SELECT id FROM memories WHERE profile = ?)
|
|
61
|
+
AND ge.target_memory_id IN (SELECT id FROM memories WHERE profile = ?)
|
|
62
|
+
''', (active_profile, active_profile)).fetchall()
|
|
63
|
+
|
|
64
|
+
if not edges:
|
|
65
|
+
logger.warning("No edges found - cannot build clusters")
|
|
66
|
+
return 0
|
|
67
|
+
|
|
68
|
+
# Build memory ID mapping
|
|
69
|
+
memory_ids = set()
|
|
70
|
+
for source, target, _ in edges:
|
|
71
|
+
memory_ids.add(source)
|
|
72
|
+
memory_ids.add(target)
|
|
73
|
+
|
|
74
|
+
memory_ids = sorted(list(memory_ids))
|
|
75
|
+
memory_id_to_vertex = {mid: idx for idx, mid in enumerate(memory_ids)}
|
|
76
|
+
vertex_to_memory_id = {idx: mid for mid, idx in memory_id_to_vertex.items()}
|
|
77
|
+
|
|
78
|
+
# Create igraph graph
|
|
79
|
+
g = ig.Graph()
|
|
80
|
+
g.add_vertices(len(memory_ids))
|
|
81
|
+
|
|
82
|
+
edge_list = []
|
|
83
|
+
edge_weights = []
|
|
84
|
+
for source, target, weight in edges:
|
|
85
|
+
edge_list.append((memory_id_to_vertex[source], memory_id_to_vertex[target]))
|
|
86
|
+
edge_weights.append(weight)
|
|
87
|
+
|
|
88
|
+
g.add_edges(edge_list)
|
|
89
|
+
|
|
90
|
+
# Run Leiden algorithm
|
|
91
|
+
logger.info(f"Running Leiden on {len(memory_ids)} nodes, {len(edges)} edges")
|
|
92
|
+
partition = leidenalg.find_partition(
|
|
93
|
+
g, leidenalg.ModularityVertexPartition,
|
|
94
|
+
weights=edge_weights, n_iterations=100, seed=42
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
clusters_created = 0
|
|
98
|
+
for cluster_idx, community in enumerate(partition):
|
|
99
|
+
if len(community) < 2:
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
cluster_memory_ids = [vertex_to_memory_id[v] for v in community]
|
|
103
|
+
avg_importance = self._get_avg_importance(cursor, cluster_memory_ids)
|
|
104
|
+
cluster_name = self._generate_cluster_name(cursor, cluster_memory_ids)
|
|
105
|
+
|
|
106
|
+
result = cursor.execute('''
|
|
107
|
+
INSERT INTO graph_clusters (name, member_count, avg_importance)
|
|
108
|
+
VALUES (?, ?, ?)
|
|
109
|
+
''', (cluster_name, len(cluster_memory_ids), avg_importance))
|
|
110
|
+
|
|
111
|
+
cluster_id = result.lastrowid
|
|
112
|
+
cursor.executemany('''
|
|
113
|
+
UPDATE memories SET cluster_id = ? WHERE id = ?
|
|
114
|
+
''', [(cluster_id, mid) for mid in cluster_memory_ids])
|
|
115
|
+
|
|
116
|
+
clusters_created += 1
|
|
117
|
+
logger.info(f"Cluster {cluster_id}: '{cluster_name}' ({len(cluster_memory_ids)} members)")
|
|
118
|
+
|
|
119
|
+
conn.commit()
|
|
120
|
+
logger.info(f"Created {clusters_created} clusters")
|
|
121
|
+
return clusters_created
|
|
122
|
+
|
|
123
|
+
except Exception as e:
|
|
124
|
+
logger.error(f"Community detection failed: {e}")
|
|
125
|
+
conn.rollback()
|
|
126
|
+
return 0
|
|
127
|
+
finally:
|
|
128
|
+
conn.close()
|
|
129
|
+
|
|
130
|
+
def _get_avg_importance(self, cursor, memory_ids: List[int]) -> float:
|
|
131
|
+
"""Calculate average importance for cluster."""
|
|
132
|
+
placeholders = ','.join('?' * len(memory_ids))
|
|
133
|
+
result = cursor.execute(f'''
|
|
134
|
+
SELECT AVG(importance) FROM memories WHERE id IN ({placeholders})
|
|
135
|
+
''', memory_ids).fetchone()
|
|
136
|
+
return result[0] if result and result[0] else 5.0
|
|
137
|
+
|
|
138
|
+
def _generate_cluster_name(self, cursor, memory_ids: List[int]) -> str:
|
|
139
|
+
"""Generate cluster name from member entities (TF-IDF approach)."""
|
|
140
|
+
placeholders = ','.join('?' * len(memory_ids))
|
|
141
|
+
nodes = cursor.execute(f'''
|
|
142
|
+
SELECT entities FROM graph_nodes WHERE memory_id IN ({placeholders})
|
|
143
|
+
''', memory_ids).fetchall()
|
|
144
|
+
|
|
145
|
+
all_entities = []
|
|
146
|
+
for node in nodes:
|
|
147
|
+
if node[0]:
|
|
148
|
+
all_entities.extend(json.loads(node[0]))
|
|
149
|
+
|
|
150
|
+
if not all_entities:
|
|
151
|
+
return f"Cluster (ID auto-assigned)"
|
|
152
|
+
|
|
153
|
+
entity_counts = Counter(all_entities)
|
|
154
|
+
top_entities = [e for e, _ in entity_counts.most_common(3)]
|
|
155
|
+
|
|
156
|
+
if len(top_entities) >= 2:
|
|
157
|
+
name = f"{top_entities[0].title()} & {top_entities[1].title()}"
|
|
158
|
+
elif len(top_entities) == 1:
|
|
159
|
+
name = f"{top_entities[0].title()} Contexts"
|
|
160
|
+
else:
|
|
161
|
+
name = "Mixed Contexts"
|
|
162
|
+
|
|
163
|
+
return name[:100]
|
|
164
|
+
|
|
165
|
+
def hierarchical_cluster(self, min_subcluster_size: int = 5, max_depth: int = 3) -> Dict[str, any]:
|
|
166
|
+
"""
|
|
167
|
+
Run recursive Leiden clustering -- cluster the clusters.
|
|
168
|
+
|
|
169
|
+
Delegates to the hierarchical module.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
min_subcluster_size: Minimum members to attempt sub-clustering (default 5)
|
|
173
|
+
max_depth: Maximum recursion depth (default 3)
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
Dictionary with hierarchical clustering statistics
|
|
177
|
+
"""
|
|
178
|
+
return _hierarchical_cluster(
|
|
179
|
+
self.db_path,
|
|
180
|
+
get_avg_importance_fn=self._get_avg_importance,
|
|
181
|
+
generate_cluster_name_fn=self._generate_cluster_name,
|
|
182
|
+
min_subcluster_size=min_subcluster_size,
|
|
183
|
+
max_depth=max_depth,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
def generate_cluster_summaries(self) -> int:
|
|
187
|
+
"""Generate TF-IDF structured summaries for all clusters."""
|
|
188
|
+
return _generate_summaries(self.db_path)
|