superlocalmemory 2.8.2 → 2.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ATTRIBUTION.md +1 -1
- package/CHANGELOG.md +17 -0
- package/README.md +7 -5
- package/api_server.py +5 -0
- package/bin/slm +35 -0
- package/bin/slm.bat +3 -3
- package/docs/SECURITY-QUICK-REFERENCE.md +214 -0
- package/install.ps1 +11 -11
- package/mcp_server.py +78 -10
- package/package.json +2 -2
- package/requirements-core.txt +16 -18
- package/requirements-learning.txt +8 -8
- package/requirements.txt +9 -7
- package/scripts/prepack.js +33 -0
- package/scripts/verify-v27.ps1 +301 -0
- package/src/agent_registry.py +32 -28
- package/src/auto_backup.py +12 -6
- package/src/cache_manager.py +2 -2
- package/src/compression/__init__.py +25 -0
- package/src/compression/cli.py +150 -0
- package/src/compression/cold_storage.py +217 -0
- package/src/compression/config.py +72 -0
- package/src/compression/orchestrator.py +133 -0
- package/src/compression/tier2_compressor.py +228 -0
- package/src/compression/tier3_compressor.py +153 -0
- package/src/compression/tier_classifier.py +148 -0
- package/src/db_connection_manager.py +5 -5
- package/src/event_bus.py +24 -22
- package/src/hnsw_index.py +3 -3
- package/src/learning/__init__.py +5 -4
- package/src/learning/adaptive_ranker.py +14 -265
- package/src/learning/bootstrap/__init__.py +69 -0
- package/src/learning/bootstrap/constants.py +93 -0
- package/src/learning/bootstrap/db_queries.py +316 -0
- package/src/learning/bootstrap/sampling.py +82 -0
- package/src/learning/bootstrap/text_utils.py +71 -0
- package/src/learning/cross_project_aggregator.py +58 -57
- package/src/learning/db/__init__.py +40 -0
- package/src/learning/db/constants.py +44 -0
- package/src/learning/db/schema.py +279 -0
- package/src/learning/learning_db.py +15 -234
- package/src/learning/ranking/__init__.py +33 -0
- package/src/learning/ranking/constants.py +84 -0
- package/src/learning/ranking/helpers.py +278 -0
- package/src/learning/source_quality_scorer.py +66 -65
- package/src/learning/synthetic_bootstrap.py +28 -310
- package/src/memory/__init__.py +36 -0
- package/src/memory/cli.py +205 -0
- package/src/memory/constants.py +39 -0
- package/src/memory/helpers.py +28 -0
- package/src/memory/schema.py +166 -0
- package/src/memory-profiles.py +94 -86
- package/src/memory-reset.py +187 -185
- package/src/memory_compression.py +2 -2
- package/src/memory_store_v2.py +44 -354
- package/src/migrate_v1_to_v2.py +11 -10
- package/src/patterns/analyzers.py +104 -100
- package/src/patterns/learner.py +17 -13
- package/src/patterns/scoring.py +25 -21
- package/src/patterns/store.py +40 -38
- package/src/patterns/terminology.py +53 -51
- package/src/provenance_tracker.py +2 -2
- package/src/qualixar_attribution.py +1 -1
- package/src/search/engine.py +16 -14
- package/src/search/index_loader.py +13 -11
- package/src/setup_validator.py +160 -158
- package/src/subscription_manager.py +20 -18
- package/src/tree/builder.py +66 -64
- package/src/tree/nodes.py +103 -97
- package/src/tree/queries.py +142 -137
- package/src/tree/schema.py +46 -42
- package/src/webhook_dispatcher.py +3 -3
- package/ui_server.py +7 -4
|
@@ -36,9 +36,6 @@ Research Backing:
|
|
|
36
36
|
|
|
37
37
|
import hashlib
|
|
38
38
|
import logging
|
|
39
|
-
import re
|
|
40
|
-
import sqlite3
|
|
41
|
-
from collections import Counter
|
|
42
39
|
from datetime import datetime
|
|
43
40
|
from pathlib import Path
|
|
44
41
|
from typing import Any, Dict, List, Optional, Set
|
|
@@ -59,79 +56,27 @@ except ImportError:
|
|
|
59
56
|
HAS_NUMPY = False
|
|
60
57
|
|
|
61
58
|
from .feature_extractor import FeatureExtractor, FEATURE_NAMES, NUM_FEATURES
|
|
59
|
+
from .bootstrap import (
|
|
60
|
+
MEMORY_DB_PATH,
|
|
61
|
+
MODELS_DIR,
|
|
62
|
+
MODEL_PATH,
|
|
63
|
+
MIN_MEMORIES_FOR_BOOTSTRAP,
|
|
64
|
+
BOOTSTRAP_CONFIG,
|
|
65
|
+
BOOTSTRAP_PARAMS,
|
|
66
|
+
extract_keywords,
|
|
67
|
+
get_memory_count,
|
|
68
|
+
get_memories_by_access,
|
|
69
|
+
get_memories_by_importance,
|
|
70
|
+
get_recent_memories,
|
|
71
|
+
get_learned_patterns,
|
|
72
|
+
search_memories,
|
|
73
|
+
find_negative_memories,
|
|
74
|
+
diverse_sample,
|
|
75
|
+
count_sources,
|
|
76
|
+
)
|
|
62
77
|
|
|
63
78
|
logger = logging.getLogger("superlocalmemory.learning.synthetic_bootstrap")
|
|
64
79
|
|
|
65
|
-
# ============================================================================
|
|
66
|
-
# Constants
|
|
67
|
-
# ============================================================================
|
|
68
|
-
|
|
69
|
-
MEMORY_DB_PATH = Path.home() / ".claude-memory" / "memory.db"
|
|
70
|
-
MODELS_DIR = Path.home() / ".claude-memory" / "models"
|
|
71
|
-
MODEL_PATH = MODELS_DIR / "ranker.txt"
|
|
72
|
-
|
|
73
|
-
# Minimum memories needed before bootstrap makes sense
|
|
74
|
-
MIN_MEMORIES_FOR_BOOTSTRAP = 50
|
|
75
|
-
|
|
76
|
-
# Tiered config — bootstrap model complexity scales with data size
|
|
77
|
-
BOOTSTRAP_CONFIG = {
|
|
78
|
-
'small': {
|
|
79
|
-
'min_memories': 50,
|
|
80
|
-
'max_memories': 499,
|
|
81
|
-
'target_samples': 200,
|
|
82
|
-
'n_estimators': 30,
|
|
83
|
-
'max_depth': 3,
|
|
84
|
-
},
|
|
85
|
-
'medium': {
|
|
86
|
-
'min_memories': 500,
|
|
87
|
-
'max_memories': 4999,
|
|
88
|
-
'target_samples': 1000,
|
|
89
|
-
'n_estimators': 50,
|
|
90
|
-
'max_depth': 4,
|
|
91
|
-
},
|
|
92
|
-
'large': {
|
|
93
|
-
'min_memories': 5000,
|
|
94
|
-
'max_memories': float('inf'),
|
|
95
|
-
'target_samples': 2000,
|
|
96
|
-
'n_estimators': 100,
|
|
97
|
-
'max_depth': 6,
|
|
98
|
-
},
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
# LightGBM bootstrap parameters — MORE aggressive regularization than
|
|
102
|
-
# real training because synthetic data has systematic biases
|
|
103
|
-
BOOTSTRAP_PARAMS = {
|
|
104
|
-
'objective': 'lambdarank',
|
|
105
|
-
'metric': 'ndcg',
|
|
106
|
-
'ndcg_eval_at': [5, 10],
|
|
107
|
-
'learning_rate': 0.1,
|
|
108
|
-
'num_leaves': 8,
|
|
109
|
-
'max_depth': 3,
|
|
110
|
-
'min_child_samples': 5,
|
|
111
|
-
'subsample': 0.7,
|
|
112
|
-
'reg_alpha': 0.5,
|
|
113
|
-
'reg_lambda': 2.0,
|
|
114
|
-
'boosting_type': 'dart',
|
|
115
|
-
'verbose': -1,
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
# English stopwords for keyword extraction (no external deps)
|
|
119
|
-
_STOPWORDS = frozenset({
|
|
120
|
-
'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
|
|
121
|
-
'of', 'with', 'by', 'from', 'is', 'it', 'this', 'that', 'was', 'are',
|
|
122
|
-
'be', 'has', 'have', 'had', 'do', 'does', 'did', 'will', 'would',
|
|
123
|
-
'could', 'should', 'may', 'might', 'can', 'not', 'no', 'if', 'then',
|
|
124
|
-
'so', 'as', 'up', 'out', 'about', 'into', 'over', 'after', 'before',
|
|
125
|
-
'when', 'where', 'how', 'what', 'which', 'who', 'whom', 'why',
|
|
126
|
-
'all', 'each', 'every', 'both', 'few', 'more', 'most', 'other',
|
|
127
|
-
'some', 'such', 'than', 'too', 'very', 'just', 'also', 'now',
|
|
128
|
-
'here', 'there', 'use', 'used', 'using', 'make', 'made',
|
|
129
|
-
'need', 'needed', 'get', 'got', 'set', 'new', 'old', 'one', 'two',
|
|
130
|
-
})
|
|
131
|
-
|
|
132
|
-
# Minimum word length for keyword extraction
|
|
133
|
-
_MIN_KEYWORD_LENGTH = 3
|
|
134
|
-
|
|
135
80
|
|
|
136
81
|
class SyntheticBootstrapper:
|
|
137
82
|
"""
|
|
@@ -232,18 +177,7 @@ class SyntheticBootstrapper:
|
|
|
232
177
|
|
|
233
178
|
def _get_memory_count(self) -> int:
|
|
234
179
|
"""Count total memories in memory.db."""
|
|
235
|
-
|
|
236
|
-
return 0
|
|
237
|
-
try:
|
|
238
|
-
conn = sqlite3.connect(str(self._memory_db), timeout=5)
|
|
239
|
-
cursor = conn.cursor()
|
|
240
|
-
cursor.execute('SELECT COUNT(*) FROM memories')
|
|
241
|
-
count = cursor.fetchone()[0]
|
|
242
|
-
conn.close()
|
|
243
|
-
return count
|
|
244
|
-
except Exception as e:
|
|
245
|
-
logger.warning("Failed to count memories: %s", e)
|
|
246
|
-
return 0
|
|
180
|
+
return get_memory_count(self._memory_db)
|
|
247
181
|
|
|
248
182
|
# ========================================================================
|
|
249
183
|
# Synthetic Data Generation
|
|
@@ -699,26 +633,7 @@ class SyntheticBootstrapper:
|
|
|
699
633
|
|
|
700
634
|
These are memories the user keeps coming back to — strong positive signal.
|
|
701
635
|
"""
|
|
702
|
-
|
|
703
|
-
return []
|
|
704
|
-
try:
|
|
705
|
-
conn = sqlite3.connect(str(self._memory_db), timeout=5)
|
|
706
|
-
conn.row_factory = sqlite3.Row
|
|
707
|
-
cursor = conn.cursor()
|
|
708
|
-
cursor.execute('''
|
|
709
|
-
SELECT id, content, summary, project_name, tags,
|
|
710
|
-
category, importance, created_at, access_count
|
|
711
|
-
FROM memories
|
|
712
|
-
WHERE access_count >= ?
|
|
713
|
-
ORDER BY access_count DESC
|
|
714
|
-
LIMIT 100
|
|
715
|
-
''', (min_access,))
|
|
716
|
-
results = [dict(row) for row in cursor.fetchall()]
|
|
717
|
-
conn.close()
|
|
718
|
-
return results
|
|
719
|
-
except Exception as e:
|
|
720
|
-
logger.warning("Failed to fetch high-access memories: %s", e)
|
|
721
|
-
return []
|
|
636
|
+
return get_memories_by_access(self._memory_db, min_access)
|
|
722
637
|
|
|
723
638
|
def _get_memories_by_importance(self, min_importance: int = 8) -> List[dict]:
|
|
724
639
|
"""
|
|
@@ -726,48 +641,11 @@ class SyntheticBootstrapper:
|
|
|
726
641
|
|
|
727
642
|
High importance = user explicitly rated these as valuable.
|
|
728
643
|
"""
|
|
729
|
-
|
|
730
|
-
return []
|
|
731
|
-
try:
|
|
732
|
-
conn = sqlite3.connect(str(self._memory_db), timeout=5)
|
|
733
|
-
conn.row_factory = sqlite3.Row
|
|
734
|
-
cursor = conn.cursor()
|
|
735
|
-
cursor.execute('''
|
|
736
|
-
SELECT id, content, summary, project_name, tags,
|
|
737
|
-
category, importance, created_at, access_count
|
|
738
|
-
FROM memories
|
|
739
|
-
WHERE importance >= ?
|
|
740
|
-
ORDER BY importance DESC
|
|
741
|
-
LIMIT 100
|
|
742
|
-
''', (min_importance,))
|
|
743
|
-
results = [dict(row) for row in cursor.fetchall()]
|
|
744
|
-
conn.close()
|
|
745
|
-
return results
|
|
746
|
-
except Exception as e:
|
|
747
|
-
logger.warning("Failed to fetch high-importance memories: %s", e)
|
|
748
|
-
return []
|
|
644
|
+
return get_memories_by_importance(self._memory_db, min_importance)
|
|
749
645
|
|
|
750
646
|
def _get_recent_memories(self, limit: int = 30) -> List[dict]:
|
|
751
647
|
"""Fetch the N most recently created memories."""
|
|
752
|
-
|
|
753
|
-
return []
|
|
754
|
-
try:
|
|
755
|
-
conn = sqlite3.connect(str(self._memory_db), timeout=5)
|
|
756
|
-
conn.row_factory = sqlite3.Row
|
|
757
|
-
cursor = conn.cursor()
|
|
758
|
-
cursor.execute('''
|
|
759
|
-
SELECT id, content, summary, project_name, tags,
|
|
760
|
-
category, importance, created_at, access_count
|
|
761
|
-
FROM memories
|
|
762
|
-
ORDER BY created_at DESC
|
|
763
|
-
LIMIT ?
|
|
764
|
-
''', (limit,))
|
|
765
|
-
results = [dict(row) for row in cursor.fetchall()]
|
|
766
|
-
conn.close()
|
|
767
|
-
return results
|
|
768
|
-
except Exception as e:
|
|
769
|
-
logger.warning("Failed to fetch recent memories: %s", e)
|
|
770
|
-
return []
|
|
648
|
+
return get_recent_memories(self._memory_db, limit)
|
|
771
649
|
|
|
772
650
|
def _get_learned_patterns(
|
|
773
651
|
self,
|
|
@@ -782,36 +660,7 @@ class SyntheticBootstrapper:
|
|
|
782
660
|
Returns empty list if identity_patterns table doesn't exist
|
|
783
661
|
(backward compatible with pre-v2.3 databases).
|
|
784
662
|
"""
|
|
785
|
-
|
|
786
|
-
return []
|
|
787
|
-
try:
|
|
788
|
-
conn = sqlite3.connect(str(self._memory_db), timeout=5)
|
|
789
|
-
conn.row_factory = sqlite3.Row
|
|
790
|
-
cursor = conn.cursor()
|
|
791
|
-
|
|
792
|
-
# Check if table exists (backward compatibility)
|
|
793
|
-
cursor.execute('''
|
|
794
|
-
SELECT name FROM sqlite_master
|
|
795
|
-
WHERE type='table' AND name='identity_patterns'
|
|
796
|
-
''')
|
|
797
|
-
if cursor.fetchone() is None:
|
|
798
|
-
conn.close()
|
|
799
|
-
return []
|
|
800
|
-
|
|
801
|
-
cursor.execute('''
|
|
802
|
-
SELECT id, pattern_type, key, value, confidence,
|
|
803
|
-
evidence_count, category
|
|
804
|
-
FROM identity_patterns
|
|
805
|
-
WHERE confidence >= ?
|
|
806
|
-
ORDER BY confidence DESC
|
|
807
|
-
LIMIT 50
|
|
808
|
-
''', (min_confidence,))
|
|
809
|
-
results = [dict(row) for row in cursor.fetchall()]
|
|
810
|
-
conn.close()
|
|
811
|
-
return results
|
|
812
|
-
except Exception as e:
|
|
813
|
-
logger.warning("Failed to fetch learned patterns: %s", e)
|
|
814
|
-
return []
|
|
663
|
+
return get_learned_patterns(self._memory_db, min_confidence)
|
|
815
664
|
|
|
816
665
|
def _search_memories(self, query: str, limit: int = 20) -> List[dict]:
|
|
817
666
|
"""
|
|
@@ -820,38 +669,7 @@ class SyntheticBootstrapper:
|
|
|
820
669
|
Used to find memories matching synthetic query terms.
|
|
821
670
|
This is a lightweight search — no TF-IDF, no HNSW, just FTS5.
|
|
822
671
|
"""
|
|
823
|
-
|
|
824
|
-
return []
|
|
825
|
-
if not query or not query.strip():
|
|
826
|
-
return []
|
|
827
|
-
|
|
828
|
-
try:
|
|
829
|
-
conn = sqlite3.connect(str(self._memory_db), timeout=5)
|
|
830
|
-
conn.row_factory = sqlite3.Row
|
|
831
|
-
cursor = conn.cursor()
|
|
832
|
-
|
|
833
|
-
# Clean query for FTS5 (same approach as memory_store_v2.search)
|
|
834
|
-
fts_tokens = re.findall(r'\w+', query)
|
|
835
|
-
if not fts_tokens:
|
|
836
|
-
conn.close()
|
|
837
|
-
return []
|
|
838
|
-
fts_query = ' OR '.join(fts_tokens)
|
|
839
|
-
|
|
840
|
-
cursor.execute('''
|
|
841
|
-
SELECT m.id, m.content, m.summary, m.project_name, m.tags,
|
|
842
|
-
m.category, m.importance, m.created_at, m.access_count
|
|
843
|
-
FROM memories m
|
|
844
|
-
JOIN memories_fts fts ON m.id = fts.rowid
|
|
845
|
-
WHERE memories_fts MATCH ?
|
|
846
|
-
ORDER BY rank
|
|
847
|
-
LIMIT ?
|
|
848
|
-
''', (fts_query, limit))
|
|
849
|
-
results = [dict(row) for row in cursor.fetchall()]
|
|
850
|
-
conn.close()
|
|
851
|
-
return results
|
|
852
|
-
except Exception as e:
|
|
853
|
-
logger.debug("FTS5 search failed (may not exist yet): %s", e)
|
|
854
|
-
return []
|
|
672
|
+
return search_memories(self._memory_db, query, limit)
|
|
855
673
|
|
|
856
674
|
def _find_negative_memories(
|
|
857
675
|
self,
|
|
@@ -865,51 +683,7 @@ class SyntheticBootstrapper:
|
|
|
865
683
|
Simple heuristic: pick memories from a different category or project.
|
|
866
684
|
Falls back to random sample if no structured differences available.
|
|
867
685
|
"""
|
|
868
|
-
|
|
869
|
-
return []
|
|
870
|
-
exclude_ids = exclude_ids or set()
|
|
871
|
-
|
|
872
|
-
try:
|
|
873
|
-
conn = sqlite3.connect(str(self._memory_db), timeout=5)
|
|
874
|
-
conn.row_factory = sqlite3.Row
|
|
875
|
-
cursor = conn.cursor()
|
|
876
|
-
|
|
877
|
-
anchor_project = anchor_memory.get('project_name', '')
|
|
878
|
-
anchor_category = anchor_memory.get('category', '')
|
|
879
|
-
|
|
880
|
-
# Try to find memories from different project or category
|
|
881
|
-
conditions = []
|
|
882
|
-
params: list = []
|
|
883
|
-
|
|
884
|
-
if anchor_project:
|
|
885
|
-
conditions.append('project_name != ?')
|
|
886
|
-
params.append(anchor_project)
|
|
887
|
-
if anchor_category:
|
|
888
|
-
conditions.append('category != ?')
|
|
889
|
-
params.append(anchor_category)
|
|
890
|
-
|
|
891
|
-
# Exclude specified IDs
|
|
892
|
-
if exclude_ids:
|
|
893
|
-
placeholders = ','.join('?' for _ in exclude_ids)
|
|
894
|
-
conditions.append(f'id NOT IN ({placeholders})')
|
|
895
|
-
params.extend(exclude_ids)
|
|
896
|
-
|
|
897
|
-
where_clause = ' AND '.join(conditions) if conditions else '1=1'
|
|
898
|
-
|
|
899
|
-
cursor.execute(f'''
|
|
900
|
-
SELECT id, content, summary, project_name, tags,
|
|
901
|
-
category, importance, created_at, access_count
|
|
902
|
-
FROM memories
|
|
903
|
-
WHERE {where_clause}
|
|
904
|
-
ORDER BY RANDOM()
|
|
905
|
-
LIMIT ?
|
|
906
|
-
''', (*params, limit))
|
|
907
|
-
results = [dict(row) for row in cursor.fetchall()]
|
|
908
|
-
conn.close()
|
|
909
|
-
return results
|
|
910
|
-
except Exception as e:
|
|
911
|
-
logger.debug("Failed to find negative memories: %s", e)
|
|
912
|
-
return []
|
|
686
|
+
return find_negative_memories(self._memory_db, anchor_memory, exclude_ids, limit)
|
|
913
687
|
|
|
914
688
|
# ========================================================================
|
|
915
689
|
# Text Processing
|
|
@@ -926,24 +700,7 @@ class SyntheticBootstrapper:
|
|
|
926
700
|
|
|
927
701
|
No external NLP dependencies — just regex + counter.
|
|
928
702
|
"""
|
|
929
|
-
|
|
930
|
-
return []
|
|
931
|
-
|
|
932
|
-
# Tokenize: extract alphanumeric words
|
|
933
|
-
words = re.findall(r'[a-zA-Z][a-zA-Z0-9_.-]*[a-zA-Z0-9]|[a-zA-Z]', content.lower())
|
|
934
|
-
|
|
935
|
-
# Filter stopwords and short words
|
|
936
|
-
meaningful = [
|
|
937
|
-
w for w in words
|
|
938
|
-
if w not in _STOPWORDS and len(w) >= _MIN_KEYWORD_LENGTH
|
|
939
|
-
]
|
|
940
|
-
|
|
941
|
-
if not meaningful:
|
|
942
|
-
return []
|
|
943
|
-
|
|
944
|
-
# Count and return top N
|
|
945
|
-
counter = Counter(meaningful)
|
|
946
|
-
return [word for word, _count in counter.most_common(top_n)]
|
|
703
|
+
return extract_keywords(content, top_n)
|
|
947
704
|
|
|
948
705
|
# ========================================================================
|
|
949
706
|
# Utility
|
|
@@ -960,50 +717,11 @@ class SyntheticBootstrapper:
|
|
|
960
717
|
Takes proportional samples from each source strategy to ensure
|
|
961
718
|
the training data isn't dominated by one strategy.
|
|
962
719
|
"""
|
|
963
|
-
|
|
964
|
-
return records
|
|
965
|
-
|
|
966
|
-
# Group by source
|
|
967
|
-
by_source: Dict[str, List[dict]] = {}
|
|
968
|
-
for r in records:
|
|
969
|
-
src = r.get('source', 'unknown')
|
|
970
|
-
if src not in by_source:
|
|
971
|
-
by_source[src] = []
|
|
972
|
-
by_source[src].append(r)
|
|
973
|
-
|
|
974
|
-
# Proportional allocation
|
|
975
|
-
n_sources = len(by_source)
|
|
976
|
-
if n_sources == 0:
|
|
977
|
-
return records[:target]
|
|
978
|
-
|
|
979
|
-
per_source = max(1, target // n_sources)
|
|
980
|
-
sampled = []
|
|
981
|
-
|
|
982
|
-
for source, source_records in by_source.items():
|
|
983
|
-
# Take up to per_source from each, or all if fewer
|
|
984
|
-
take = min(len(source_records), per_source)
|
|
985
|
-
sampled.extend(source_records[:take])
|
|
986
|
-
|
|
987
|
-
# If under target, fill from remaining
|
|
988
|
-
if len(sampled) < target:
|
|
989
|
-
used_ids = {(r['query_hash'], r['memory_id']) for r in sampled}
|
|
990
|
-
for r in records:
|
|
991
|
-
if len(sampled) >= target:
|
|
992
|
-
break
|
|
993
|
-
key = (r['query_hash'], r['memory_id'])
|
|
994
|
-
if key not in used_ids:
|
|
995
|
-
sampled.append(r)
|
|
996
|
-
used_ids.add(key)
|
|
997
|
-
|
|
998
|
-
return sampled[:target]
|
|
720
|
+
return diverse_sample(records, target)
|
|
999
721
|
|
|
1000
722
|
def _count_sources(self, records: List[dict]) -> Dict[str, int]:
|
|
1001
723
|
"""Count records by source strategy."""
|
|
1002
|
-
|
|
1003
|
-
for r in records:
|
|
1004
|
-
src = r.get('source', 'unknown')
|
|
1005
|
-
counts[src] = counts.get(src, 0) + 1
|
|
1006
|
-
return counts
|
|
724
|
+
return count_sources(records)
|
|
1007
725
|
|
|
1008
726
|
|
|
1009
727
|
# ============================================================================
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""
|
|
5
|
+
Memory package - Constants, schema, and utilities for MemoryStoreV2.
|
|
6
|
+
|
|
7
|
+
This package contains extracted utilities from memory_store_v2.py to keep
|
|
8
|
+
the main class focused and under the 800-line target.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from .constants import (
|
|
12
|
+
MEMORY_DIR,
|
|
13
|
+
DB_PATH,
|
|
14
|
+
VECTORS_PATH,
|
|
15
|
+
MAX_CONTENT_SIZE,
|
|
16
|
+
MAX_SUMMARY_SIZE,
|
|
17
|
+
MAX_TAG_LENGTH,
|
|
18
|
+
MAX_TAGS,
|
|
19
|
+
CREATOR_METADATA,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
from .helpers import format_content
|
|
23
|
+
from .cli import run_cli
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
'MEMORY_DIR',
|
|
27
|
+
'DB_PATH',
|
|
28
|
+
'VECTORS_PATH',
|
|
29
|
+
'MAX_CONTENT_SIZE',
|
|
30
|
+
'MAX_SUMMARY_SIZE',
|
|
31
|
+
'MAX_TAG_LENGTH',
|
|
32
|
+
'MAX_TAGS',
|
|
33
|
+
'CREATOR_METADATA',
|
|
34
|
+
'format_content',
|
|
35
|
+
'run_cli',
|
|
36
|
+
]
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""
|
|
5
|
+
Command-line interface for MemoryStoreV2.
|
|
6
|
+
|
|
7
|
+
This module contains the CLI implementation extracted from memory_store_v2.py
|
|
8
|
+
to reduce file size and improve maintainability.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import sys
|
|
12
|
+
import json
|
|
13
|
+
from .helpers import format_content
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def run_cli():
|
|
17
|
+
"""Main CLI entry point for MemoryStoreV2."""
|
|
18
|
+
# Import here to avoid circular dependency
|
|
19
|
+
from memory_store_v2 import MemoryStoreV2
|
|
20
|
+
|
|
21
|
+
store = MemoryStoreV2()
|
|
22
|
+
|
|
23
|
+
if len(sys.argv) < 2:
|
|
24
|
+
print("MemoryStore V2 CLI")
|
|
25
|
+
print("\nV1 Compatible Commands:")
|
|
26
|
+
print(" python memory_store_v2.py add <content> [--project <path>] [--tags tag1,tag2]")
|
|
27
|
+
print(" python memory_store_v2.py search <query> [--full]")
|
|
28
|
+
print(" python memory_store_v2.py list [limit] [--full]")
|
|
29
|
+
print(" python memory_store_v2.py get <id>")
|
|
30
|
+
print(" python memory_store_v2.py recent [limit] [--full]")
|
|
31
|
+
print(" python memory_store_v2.py stats")
|
|
32
|
+
print(" python memory_store_v2.py context <query>")
|
|
33
|
+
print(" python memory_store_v2.py delete <id>")
|
|
34
|
+
print("\nV2 Extensions:")
|
|
35
|
+
print(" python memory_store_v2.py tree [parent_id]")
|
|
36
|
+
print(" python memory_store_v2.py cluster <cluster_id> [--full]")
|
|
37
|
+
print("\nOptions:")
|
|
38
|
+
print(" --full Show complete content (default: smart truncation at 5000 chars)")
|
|
39
|
+
sys.exit(0)
|
|
40
|
+
|
|
41
|
+
command = sys.argv[1]
|
|
42
|
+
|
|
43
|
+
if command == "tree":
|
|
44
|
+
parent_id = int(sys.argv[2]) if len(sys.argv) > 2 else None
|
|
45
|
+
results = store.get_tree(parent_id)
|
|
46
|
+
|
|
47
|
+
if not results:
|
|
48
|
+
print("No memories in tree.")
|
|
49
|
+
else:
|
|
50
|
+
for r in results:
|
|
51
|
+
indent = " " * r['depth']
|
|
52
|
+
print(f"{indent}[{r['id']}] {r['content'][:50]}...")
|
|
53
|
+
if r.get('category'):
|
|
54
|
+
print(f"{indent} Category: {r['category']}")
|
|
55
|
+
|
|
56
|
+
elif command == "cluster" and len(sys.argv) >= 3:
|
|
57
|
+
cluster_id = int(sys.argv[2])
|
|
58
|
+
show_full = '--full' in sys.argv
|
|
59
|
+
results = store.get_by_cluster(cluster_id)
|
|
60
|
+
|
|
61
|
+
if not results:
|
|
62
|
+
print(f"No memories in cluster {cluster_id}.")
|
|
63
|
+
else:
|
|
64
|
+
print(f"Cluster {cluster_id} - {len(results)} memories:")
|
|
65
|
+
for r in results:
|
|
66
|
+
print(f"\n[{r['id']}] Importance: {r['importance']}")
|
|
67
|
+
print(f" {format_content(r['content'], full=show_full)}")
|
|
68
|
+
|
|
69
|
+
elif command == "stats":
|
|
70
|
+
stats = store.get_stats()
|
|
71
|
+
print(json.dumps(stats, indent=2))
|
|
72
|
+
|
|
73
|
+
elif command == "add":
|
|
74
|
+
# Parse content and options
|
|
75
|
+
if len(sys.argv) < 3:
|
|
76
|
+
print("Error: Content required")
|
|
77
|
+
print("Usage: python memory_store_v2.py add <content> [--project <path>] [--tags tag1,tag2]")
|
|
78
|
+
sys.exit(1)
|
|
79
|
+
|
|
80
|
+
content = sys.argv[2]
|
|
81
|
+
project_path = None
|
|
82
|
+
tags = []
|
|
83
|
+
|
|
84
|
+
i = 3
|
|
85
|
+
while i < len(sys.argv):
|
|
86
|
+
if sys.argv[i] == '--project' and i + 1 < len(sys.argv):
|
|
87
|
+
project_path = sys.argv[i + 1]
|
|
88
|
+
i += 2
|
|
89
|
+
elif sys.argv[i] == '--tags' and i + 1 < len(sys.argv):
|
|
90
|
+
tags = [t.strip() for t in sys.argv[i + 1].split(',')]
|
|
91
|
+
i += 2
|
|
92
|
+
else:
|
|
93
|
+
i += 1
|
|
94
|
+
|
|
95
|
+
mem_id = store.add_memory(content, project_path=project_path, tags=tags)
|
|
96
|
+
print(f"Memory added with ID: {mem_id}")
|
|
97
|
+
|
|
98
|
+
elif command == "search":
|
|
99
|
+
if len(sys.argv) < 3:
|
|
100
|
+
print("Error: Search query required")
|
|
101
|
+
print("Usage: python memory_store_v2.py search <query> [--full]")
|
|
102
|
+
sys.exit(1)
|
|
103
|
+
|
|
104
|
+
query = sys.argv[2]
|
|
105
|
+
show_full = '--full' in sys.argv
|
|
106
|
+
results = store.search(query, limit=5)
|
|
107
|
+
|
|
108
|
+
if not results:
|
|
109
|
+
print("No results found.")
|
|
110
|
+
else:
|
|
111
|
+
for r in results:
|
|
112
|
+
print(f"\n[{r['id']}] Score: {r['score']:.2f}")
|
|
113
|
+
if r.get('project_name'):
|
|
114
|
+
print(f"Project: {r['project_name']}")
|
|
115
|
+
if r.get('tags'):
|
|
116
|
+
print(f"Tags: {', '.join(r['tags'])}")
|
|
117
|
+
print(f"Content: {format_content(r['content'], full=show_full)}")
|
|
118
|
+
print(f"Created: {r['created_at']}")
|
|
119
|
+
|
|
120
|
+
elif command == "recent":
|
|
121
|
+
show_full = '--full' in sys.argv
|
|
122
|
+
# Parse limit (skip --full flag)
|
|
123
|
+
limit = 10
|
|
124
|
+
for i, arg in enumerate(sys.argv[2:], start=2):
|
|
125
|
+
if arg != '--full' and arg.isdigit():
|
|
126
|
+
limit = int(arg)
|
|
127
|
+
break
|
|
128
|
+
|
|
129
|
+
results = store.get_recent(limit)
|
|
130
|
+
|
|
131
|
+
if not results:
|
|
132
|
+
print("No memories found.")
|
|
133
|
+
else:
|
|
134
|
+
for r in results:
|
|
135
|
+
print(f"\n[{r['id']}] {r['created_at']}")
|
|
136
|
+
if r.get('project_name'):
|
|
137
|
+
print(f"Project: {r['project_name']}")
|
|
138
|
+
if r.get('tags'):
|
|
139
|
+
print(f"Tags: {', '.join(r['tags'])}")
|
|
140
|
+
print(f"Content: {format_content(r['content'], full=show_full)}")
|
|
141
|
+
|
|
142
|
+
elif command == "list":
|
|
143
|
+
show_full = '--full' in sys.argv
|
|
144
|
+
# Parse limit (skip --full flag)
|
|
145
|
+
limit = 10
|
|
146
|
+
for i, arg in enumerate(sys.argv[2:], start=2):
|
|
147
|
+
if arg != '--full' and arg.isdigit():
|
|
148
|
+
limit = int(arg)
|
|
149
|
+
break
|
|
150
|
+
|
|
151
|
+
results = store.get_recent(limit)
|
|
152
|
+
|
|
153
|
+
if not results:
|
|
154
|
+
print("No memories found.")
|
|
155
|
+
else:
|
|
156
|
+
for r in results:
|
|
157
|
+
print(f"[{r['id']}] {format_content(r['content'], full=show_full)}")
|
|
158
|
+
|
|
159
|
+
elif command == "get":
|
|
160
|
+
if len(sys.argv) < 3:
|
|
161
|
+
print("Error: Memory ID required")
|
|
162
|
+
print("Usage: python memory_store_v2.py get <id>")
|
|
163
|
+
sys.exit(1)
|
|
164
|
+
|
|
165
|
+
mem_id = int(sys.argv[2])
|
|
166
|
+
memory = store.get_by_id(mem_id)
|
|
167
|
+
|
|
168
|
+
if not memory:
|
|
169
|
+
print(f"Memory {mem_id} not found.")
|
|
170
|
+
else:
|
|
171
|
+
print(f"\nID: {memory['id']}")
|
|
172
|
+
print(f"Content: {memory['content']}")
|
|
173
|
+
if memory.get('summary'):
|
|
174
|
+
print(f"Summary: {memory['summary']}")
|
|
175
|
+
if memory.get('project_name'):
|
|
176
|
+
print(f"Project: {memory['project_name']}")
|
|
177
|
+
if memory.get('tags'):
|
|
178
|
+
print(f"Tags: {', '.join(memory['tags'])}")
|
|
179
|
+
print(f"Created: {memory['created_at']}")
|
|
180
|
+
print(f"Importance: {memory['importance']}")
|
|
181
|
+
print(f"Access Count: {memory['access_count']}")
|
|
182
|
+
|
|
183
|
+
elif command == "context":
|
|
184
|
+
if len(sys.argv) < 3:
|
|
185
|
+
print("Error: Query required")
|
|
186
|
+
print("Usage: python memory_store_v2.py context <query>")
|
|
187
|
+
sys.exit(1)
|
|
188
|
+
|
|
189
|
+
query = sys.argv[2]
|
|
190
|
+
context = store.export_for_context(query)
|
|
191
|
+
print(context)
|
|
192
|
+
|
|
193
|
+
elif command == "delete":
|
|
194
|
+
if len(sys.argv) < 3:
|
|
195
|
+
print("Error: Memory ID required")
|
|
196
|
+
print("Usage: python memory_store_v2.py delete <id>")
|
|
197
|
+
sys.exit(1)
|
|
198
|
+
|
|
199
|
+
mem_id = int(sys.argv[2])
|
|
200
|
+
store.delete_memory(mem_id)
|
|
201
|
+
print(f"Memory {mem_id} deleted.")
|
|
202
|
+
|
|
203
|
+
else:
|
|
204
|
+
print(f"Unknown command: {command}")
|
|
205
|
+
print("Run without arguments to see available commands.")
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""
|
|
5
|
+
Constants and configuration values for MemoryStoreV2.
|
|
6
|
+
|
|
7
|
+
This module contains all module-level constants extracted from memory_store_v2.py
|
|
8
|
+
to reduce file size and improve maintainability.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
# Database paths
|
|
14
|
+
MEMORY_DIR = Path.home() / ".claude-memory"
|
|
15
|
+
DB_PATH = MEMORY_DIR / "memory.db"
|
|
16
|
+
VECTORS_PATH = MEMORY_DIR / "vectors"
|
|
17
|
+
|
|
18
|
+
# Security: Input validation limits
|
|
19
|
+
MAX_CONTENT_SIZE = 1_000_000 # 1MB max content
|
|
20
|
+
MAX_SUMMARY_SIZE = 10_000 # 10KB max summary
|
|
21
|
+
MAX_TAG_LENGTH = 50 # 50 chars per tag
|
|
22
|
+
MAX_TAGS = 20 # 20 tags max
|
|
23
|
+
|
|
24
|
+
# Creator Attribution Metadata (REQUIRED by MIT License)
|
|
25
|
+
# This data is embedded in the database creator_metadata table
|
|
26
|
+
CREATOR_METADATA = {
|
|
27
|
+
'creator_name': 'Varun Pratap Bhardwaj',
|
|
28
|
+
'creator_role': 'Solution Architect & Original Creator',
|
|
29
|
+
'creator_github': 'varun369',
|
|
30
|
+
'project_name': 'SuperLocalMemory V2',
|
|
31
|
+
'project_url': 'https://github.com/varun369/SuperLocalMemoryV2',
|
|
32
|
+
'license': 'MIT',
|
|
33
|
+
'attribution_required': 'yes',
|
|
34
|
+
'version': '2.5.0',
|
|
35
|
+
'architecture_date': '2026-01-15',
|
|
36
|
+
'release_date': '2026-02-07',
|
|
37
|
+
'signature': 'VBPB-SLM-V2-2026-ARCHITECT',
|
|
38
|
+
'verification_hash': 'sha256:c9f3d1a8b5e2f4c6d8a9b3e7f1c4d6a8b9c3e7f2d5a8c1b4e6f9d2a7c5b8e1'
|
|
39
|
+
}
|