superlocalmemory 2.8.2 → 2.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ATTRIBUTION.md +1 -1
- package/CHANGELOG.md +17 -0
- package/README.md +7 -5
- package/api_server.py +5 -0
- package/bin/slm +35 -0
- package/bin/slm.bat +3 -3
- package/docs/SECURITY-QUICK-REFERENCE.md +214 -0
- package/install.ps1 +11 -11
- package/mcp_server.py +78 -10
- package/package.json +2 -2
- package/requirements-core.txt +16 -18
- package/requirements-learning.txt +8 -8
- package/requirements.txt +9 -7
- package/scripts/prepack.js +33 -0
- package/scripts/verify-v27.ps1 +301 -0
- package/src/agent_registry.py +32 -28
- package/src/auto_backup.py +12 -6
- package/src/cache_manager.py +2 -2
- package/src/compression/__init__.py +25 -0
- package/src/compression/cli.py +150 -0
- package/src/compression/cold_storage.py +217 -0
- package/src/compression/config.py +72 -0
- package/src/compression/orchestrator.py +133 -0
- package/src/compression/tier2_compressor.py +228 -0
- package/src/compression/tier3_compressor.py +153 -0
- package/src/compression/tier_classifier.py +148 -0
- package/src/db_connection_manager.py +5 -5
- package/src/event_bus.py +24 -22
- package/src/hnsw_index.py +3 -3
- package/src/learning/__init__.py +5 -4
- package/src/learning/adaptive_ranker.py +14 -265
- package/src/learning/bootstrap/__init__.py +69 -0
- package/src/learning/bootstrap/constants.py +93 -0
- package/src/learning/bootstrap/db_queries.py +316 -0
- package/src/learning/bootstrap/sampling.py +82 -0
- package/src/learning/bootstrap/text_utils.py +71 -0
- package/src/learning/cross_project_aggregator.py +58 -57
- package/src/learning/db/__init__.py +40 -0
- package/src/learning/db/constants.py +44 -0
- package/src/learning/db/schema.py +279 -0
- package/src/learning/learning_db.py +15 -234
- package/src/learning/ranking/__init__.py +33 -0
- package/src/learning/ranking/constants.py +84 -0
- package/src/learning/ranking/helpers.py +278 -0
- package/src/learning/source_quality_scorer.py +66 -65
- package/src/learning/synthetic_bootstrap.py +28 -310
- package/src/memory/__init__.py +36 -0
- package/src/memory/cli.py +205 -0
- package/src/memory/constants.py +39 -0
- package/src/memory/helpers.py +28 -0
- package/src/memory/schema.py +166 -0
- package/src/memory-profiles.py +94 -86
- package/src/memory-reset.py +187 -185
- package/src/memory_compression.py +2 -2
- package/src/memory_store_v2.py +44 -354
- package/src/migrate_v1_to_v2.py +11 -10
- package/src/patterns/analyzers.py +104 -100
- package/src/patterns/learner.py +17 -13
- package/src/patterns/scoring.py +25 -21
- package/src/patterns/store.py +40 -38
- package/src/patterns/terminology.py +53 -51
- package/src/provenance_tracker.py +2 -2
- package/src/qualixar_attribution.py +1 -1
- package/src/search/engine.py +16 -14
- package/src/search/index_loader.py +13 -11
- package/src/setup_validator.py +160 -158
- package/src/subscription_manager.py +20 -18
- package/src/tree/builder.py +66 -64
- package/src/tree/nodes.py +103 -97
- package/src/tree/queries.py +142 -137
- package/src/tree/schema.py +46 -42
- package/src/webhook_dispatcher.py +3 -3
- package/ui_server.py +7 -4
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""
|
|
5
|
+
Database query utilities for synthetic bootstrap.
|
|
6
|
+
|
|
7
|
+
All read-only queries against memory.db used by SyntheticBootstrapper.
|
|
8
|
+
These functions are stateless and take db_path as parameter.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
import sqlite3
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import List, Optional, Set
|
|
15
|
+
|
|
16
|
+
from .text_utils import clean_fts_query
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger("superlocalmemory.learning.bootstrap.db_queries")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_memory_count(db_path: Path) -> int:
|
|
22
|
+
"""
|
|
23
|
+
Count total memories in memory.db.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
db_path: Path to memory.db.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Total number of memories, or 0 if error.
|
|
30
|
+
"""
|
|
31
|
+
if not db_path.exists():
|
|
32
|
+
return 0
|
|
33
|
+
try:
|
|
34
|
+
conn = sqlite3.connect(str(db_path), timeout=5)
|
|
35
|
+
cursor = conn.cursor()
|
|
36
|
+
cursor.execute('SELECT COUNT(*) FROM memories')
|
|
37
|
+
count = cursor.fetchone()[0]
|
|
38
|
+
conn.close()
|
|
39
|
+
return count
|
|
40
|
+
except Exception as e:
|
|
41
|
+
logger.warning("Failed to count memories: %s", e)
|
|
42
|
+
return 0
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_memories_by_access(db_path: Path, min_access: int = 5) -> List[dict]:
|
|
46
|
+
"""
|
|
47
|
+
Fetch memories with access_count >= min_access from memory.db.
|
|
48
|
+
|
|
49
|
+
These are memories the user keeps coming back to — strong positive signal.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
db_path: Path to memory.db.
|
|
53
|
+
min_access: Minimum access_count threshold.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
List of memory dicts.
|
|
57
|
+
"""
|
|
58
|
+
if not db_path.exists():
|
|
59
|
+
return []
|
|
60
|
+
try:
|
|
61
|
+
conn = sqlite3.connect(str(db_path), timeout=5)
|
|
62
|
+
conn.row_factory = sqlite3.Row
|
|
63
|
+
cursor = conn.cursor()
|
|
64
|
+
cursor.execute('''
|
|
65
|
+
SELECT id, content, summary, project_name, tags,
|
|
66
|
+
category, importance, created_at, access_count
|
|
67
|
+
FROM memories
|
|
68
|
+
WHERE access_count >= ?
|
|
69
|
+
ORDER BY access_count DESC
|
|
70
|
+
LIMIT 100
|
|
71
|
+
''', (min_access,))
|
|
72
|
+
results = [dict(row) for row in cursor.fetchall()]
|
|
73
|
+
conn.close()
|
|
74
|
+
return results
|
|
75
|
+
except Exception as e:
|
|
76
|
+
logger.warning("Failed to fetch high-access memories: %s", e)
|
|
77
|
+
return []
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def get_memories_by_importance(db_path: Path, min_importance: int = 8) -> List[dict]:
|
|
81
|
+
"""
|
|
82
|
+
Fetch memories with importance >= min_importance from memory.db.
|
|
83
|
+
|
|
84
|
+
High importance = user explicitly rated these as valuable.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
db_path: Path to memory.db.
|
|
88
|
+
min_importance: Minimum importance threshold.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
List of memory dicts.
|
|
92
|
+
"""
|
|
93
|
+
if not db_path.exists():
|
|
94
|
+
return []
|
|
95
|
+
try:
|
|
96
|
+
conn = sqlite3.connect(str(db_path), timeout=5)
|
|
97
|
+
conn.row_factory = sqlite3.Row
|
|
98
|
+
cursor = conn.cursor()
|
|
99
|
+
cursor.execute('''
|
|
100
|
+
SELECT id, content, summary, project_name, tags,
|
|
101
|
+
category, importance, created_at, access_count
|
|
102
|
+
FROM memories
|
|
103
|
+
WHERE importance >= ?
|
|
104
|
+
ORDER BY importance DESC
|
|
105
|
+
LIMIT 100
|
|
106
|
+
''', (min_importance,))
|
|
107
|
+
results = [dict(row) for row in cursor.fetchall()]
|
|
108
|
+
conn.close()
|
|
109
|
+
return results
|
|
110
|
+
except Exception as e:
|
|
111
|
+
logger.warning("Failed to fetch high-importance memories: %s", e)
|
|
112
|
+
return []
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def get_recent_memories(db_path: Path, limit: int = 30) -> List[dict]:
|
|
116
|
+
"""
|
|
117
|
+
Fetch the N most recently created memories.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
db_path: Path to memory.db.
|
|
121
|
+
limit: Maximum number of memories to return.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
List of memory dicts, sorted by created_at DESC.
|
|
125
|
+
"""
|
|
126
|
+
if not db_path.exists():
|
|
127
|
+
return []
|
|
128
|
+
try:
|
|
129
|
+
conn = sqlite3.connect(str(db_path), timeout=5)
|
|
130
|
+
conn.row_factory = sqlite3.Row
|
|
131
|
+
cursor = conn.cursor()
|
|
132
|
+
cursor.execute('''
|
|
133
|
+
SELECT id, content, summary, project_name, tags,
|
|
134
|
+
category, importance, created_at, access_count
|
|
135
|
+
FROM memories
|
|
136
|
+
ORDER BY created_at DESC
|
|
137
|
+
LIMIT ?
|
|
138
|
+
''', (limit,))
|
|
139
|
+
results = [dict(row) for row in cursor.fetchall()]
|
|
140
|
+
conn.close()
|
|
141
|
+
return results
|
|
142
|
+
except Exception as e:
|
|
143
|
+
logger.warning("Failed to fetch recent memories: %s", e)
|
|
144
|
+
return []
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def get_learned_patterns(
|
|
148
|
+
db_path: Path,
|
|
149
|
+
min_confidence: float = 0.7,
|
|
150
|
+
) -> List[dict]:
|
|
151
|
+
"""
|
|
152
|
+
Fetch high-confidence identity_patterns from memory.db.
|
|
153
|
+
|
|
154
|
+
These are patterns detected by pattern_learner.py (Layer 4) —
|
|
155
|
+
tech preferences, coding style, terminology, etc.
|
|
156
|
+
|
|
157
|
+
Returns empty list if identity_patterns table doesn't exist
|
|
158
|
+
(backward compatible with pre-v2.3 databases).
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
db_path: Path to memory.db.
|
|
162
|
+
min_confidence: Minimum confidence threshold.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
List of pattern dicts.
|
|
166
|
+
"""
|
|
167
|
+
if not db_path.exists():
|
|
168
|
+
return []
|
|
169
|
+
try:
|
|
170
|
+
conn = sqlite3.connect(str(db_path), timeout=5)
|
|
171
|
+
try:
|
|
172
|
+
conn.row_factory = sqlite3.Row
|
|
173
|
+
cursor = conn.cursor()
|
|
174
|
+
|
|
175
|
+
# Check if table exists (backward compatibility)
|
|
176
|
+
cursor.execute('''
|
|
177
|
+
SELECT name FROM sqlite_master
|
|
178
|
+
WHERE type='table' AND name='identity_patterns'
|
|
179
|
+
''')
|
|
180
|
+
if cursor.fetchone() is None:
|
|
181
|
+
return []
|
|
182
|
+
|
|
183
|
+
cursor.execute('''
|
|
184
|
+
SELECT id, pattern_type, key, value, confidence,
|
|
185
|
+
evidence_count, category
|
|
186
|
+
FROM identity_patterns
|
|
187
|
+
WHERE confidence >= ?
|
|
188
|
+
ORDER BY confidence DESC
|
|
189
|
+
LIMIT 50
|
|
190
|
+
''', (min_confidence,))
|
|
191
|
+
results = [dict(row) for row in cursor.fetchall()]
|
|
192
|
+
return results
|
|
193
|
+
finally:
|
|
194
|
+
conn.close()
|
|
195
|
+
except Exception as e:
|
|
196
|
+
logger.warning("Failed to fetch learned patterns: %s", e)
|
|
197
|
+
return []
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def search_memories(db_path: Path, query: str, limit: int = 20) -> List[dict]:
|
|
201
|
+
"""
|
|
202
|
+
Simple FTS5 search in memory.db.
|
|
203
|
+
|
|
204
|
+
Used to find memories matching synthetic query terms.
|
|
205
|
+
This is a lightweight search — no TF-IDF, no HNSW, just FTS5.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
db_path: Path to memory.db.
|
|
209
|
+
query: Search query string.
|
|
210
|
+
limit: Maximum results to return.
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
List of memory dicts matching the query.
|
|
214
|
+
"""
|
|
215
|
+
if not db_path.exists():
|
|
216
|
+
return []
|
|
217
|
+
if not query or not query.strip():
|
|
218
|
+
return []
|
|
219
|
+
|
|
220
|
+
try:
|
|
221
|
+
conn = sqlite3.connect(str(db_path), timeout=5)
|
|
222
|
+
try:
|
|
223
|
+
conn.row_factory = sqlite3.Row
|
|
224
|
+
cursor = conn.cursor()
|
|
225
|
+
|
|
226
|
+
# Clean query for FTS5 (same approach as memory_store_v2.search)
|
|
227
|
+
fts_query = clean_fts_query(query)
|
|
228
|
+
if not fts_query:
|
|
229
|
+
return []
|
|
230
|
+
|
|
231
|
+
cursor.execute('''
|
|
232
|
+
SELECT m.id, m.content, m.summary, m.project_name, m.tags,
|
|
233
|
+
m.category, m.importance, m.created_at, m.access_count
|
|
234
|
+
FROM memories m
|
|
235
|
+
JOIN memories_fts fts ON m.id = fts.rowid
|
|
236
|
+
WHERE memories_fts MATCH ?
|
|
237
|
+
ORDER BY rank
|
|
238
|
+
LIMIT ?
|
|
239
|
+
''', (fts_query, limit))
|
|
240
|
+
results = [dict(row) for row in cursor.fetchall()]
|
|
241
|
+
return results
|
|
242
|
+
finally:
|
|
243
|
+
conn.close()
|
|
244
|
+
except Exception as e:
|
|
245
|
+
logger.debug("FTS5 search failed (may not exist yet): %s", e)
|
|
246
|
+
return []
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def find_negative_memories(
|
|
250
|
+
db_path: Path,
|
|
251
|
+
anchor_memory: dict,
|
|
252
|
+
exclude_ids: Optional[Set[int]] = None,
|
|
253
|
+
limit: int = 2,
|
|
254
|
+
) -> List[dict]:
|
|
255
|
+
"""
|
|
256
|
+
Find memories dissimilar to the anchor (for negative examples).
|
|
257
|
+
|
|
258
|
+
Simple heuristic: pick memories from a different category or project.
|
|
259
|
+
Falls back to random sample if no structured differences available.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
db_path: Path to memory.db.
|
|
263
|
+
anchor_memory: The reference memory to find negatives for.
|
|
264
|
+
exclude_ids: Set of memory IDs to exclude from results.
|
|
265
|
+
limit: Maximum number of negatives to return.
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
List of negative example memory dicts.
|
|
269
|
+
"""
|
|
270
|
+
if not db_path.exists():
|
|
271
|
+
return []
|
|
272
|
+
exclude_ids = exclude_ids or set()
|
|
273
|
+
|
|
274
|
+
try:
|
|
275
|
+
conn = sqlite3.connect(str(db_path), timeout=5)
|
|
276
|
+
try:
|
|
277
|
+
conn.row_factory = sqlite3.Row
|
|
278
|
+
cursor = conn.cursor()
|
|
279
|
+
|
|
280
|
+
anchor_project = anchor_memory.get('project_name', '')
|
|
281
|
+
anchor_category = anchor_memory.get('category', '')
|
|
282
|
+
|
|
283
|
+
# Try to find memories from different project or category
|
|
284
|
+
conditions = []
|
|
285
|
+
params: list = []
|
|
286
|
+
|
|
287
|
+
if anchor_project:
|
|
288
|
+
conditions.append('project_name != ?')
|
|
289
|
+
params.append(anchor_project)
|
|
290
|
+
if anchor_category:
|
|
291
|
+
conditions.append('category != ?')
|
|
292
|
+
params.append(anchor_category)
|
|
293
|
+
|
|
294
|
+
# Exclude specified IDs
|
|
295
|
+
if exclude_ids:
|
|
296
|
+
placeholders = ','.join('?' for _ in exclude_ids)
|
|
297
|
+
conditions.append(f'id NOT IN ({placeholders})')
|
|
298
|
+
params.extend(exclude_ids)
|
|
299
|
+
|
|
300
|
+
where_clause = ' AND '.join(conditions) if conditions else '1=1'
|
|
301
|
+
|
|
302
|
+
cursor.execute(f'''
|
|
303
|
+
SELECT id, content, summary, project_name, tags,
|
|
304
|
+
category, importance, created_at, access_count
|
|
305
|
+
FROM memories
|
|
306
|
+
WHERE {where_clause}
|
|
307
|
+
ORDER BY RANDOM()
|
|
308
|
+
LIMIT ?
|
|
309
|
+
''', (*params, limit))
|
|
310
|
+
results = [dict(row) for row in cursor.fetchall()]
|
|
311
|
+
return results
|
|
312
|
+
finally:
|
|
313
|
+
conn.close()
|
|
314
|
+
except Exception as e:
|
|
315
|
+
logger.debug("Failed to find negative memories: %s", e)
|
|
316
|
+
return []
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""
|
|
5
|
+
Sampling utilities for synthetic bootstrap.
|
|
6
|
+
|
|
7
|
+
Functions for diverse sampling and record aggregation.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import Dict, List
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def diverse_sample(
|
|
14
|
+
records: List[dict],
|
|
15
|
+
target: int,
|
|
16
|
+
) -> List[dict]:
|
|
17
|
+
"""
|
|
18
|
+
Sample records while maintaining source diversity.
|
|
19
|
+
|
|
20
|
+
Takes proportional samples from each source strategy to ensure
|
|
21
|
+
the training data isn't dominated by one strategy.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
records: List of training records with 'source' field.
|
|
25
|
+
target: Target number of samples to return.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Sampled list of records (at most target items).
|
|
29
|
+
"""
|
|
30
|
+
if len(records) <= target:
|
|
31
|
+
return records
|
|
32
|
+
|
|
33
|
+
# Group by source
|
|
34
|
+
by_source: Dict[str, List[dict]] = {}
|
|
35
|
+
for r in records:
|
|
36
|
+
src = r.get('source', 'unknown')
|
|
37
|
+
if src not in by_source:
|
|
38
|
+
by_source[src] = []
|
|
39
|
+
by_source[src].append(r)
|
|
40
|
+
|
|
41
|
+
# Proportional allocation
|
|
42
|
+
n_sources = len(by_source)
|
|
43
|
+
if n_sources == 0:
|
|
44
|
+
return records[:target]
|
|
45
|
+
|
|
46
|
+
per_source = max(1, target // n_sources)
|
|
47
|
+
sampled = []
|
|
48
|
+
|
|
49
|
+
for source, source_records in by_source.items():
|
|
50
|
+
# Take up to per_source from each, or all if fewer
|
|
51
|
+
take = min(len(source_records), per_source)
|
|
52
|
+
sampled.extend(source_records[:take])
|
|
53
|
+
|
|
54
|
+
# If under target, fill from remaining
|
|
55
|
+
if len(sampled) < target:
|
|
56
|
+
used_ids = {(r['query_hash'], r['memory_id']) for r in sampled}
|
|
57
|
+
for r in records:
|
|
58
|
+
if len(sampled) >= target:
|
|
59
|
+
break
|
|
60
|
+
key = (r['query_hash'], r['memory_id'])
|
|
61
|
+
if key not in used_ids:
|
|
62
|
+
sampled.append(r)
|
|
63
|
+
used_ids.add(key)
|
|
64
|
+
|
|
65
|
+
return sampled[:target]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def count_sources(records: List[dict]) -> Dict[str, int]:
|
|
69
|
+
"""
|
|
70
|
+
Count records by source strategy.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
records: List of training records with 'source' field.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Dict mapping source name to count.
|
|
77
|
+
"""
|
|
78
|
+
counts: Dict[str, int] = {}
|
|
79
|
+
for r in records:
|
|
80
|
+
src = r.get('source', 'unknown')
|
|
81
|
+
counts[src] = counts.get(src, 0) + 1
|
|
82
|
+
return counts
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""
|
|
5
|
+
Text processing utilities for synthetic bootstrap.
|
|
6
|
+
|
|
7
|
+
Simple keyword extraction and text processing functions
|
|
8
|
+
with no external NLP dependencies.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import re
|
|
12
|
+
from collections import Counter
|
|
13
|
+
from typing import List
|
|
14
|
+
|
|
15
|
+
from .constants import STOPWORDS, MIN_KEYWORD_LENGTH
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def extract_keywords(content: str, top_n: int = 3) -> List[str]:
|
|
19
|
+
"""
|
|
20
|
+
Extract meaningful keywords from memory content.
|
|
21
|
+
|
|
22
|
+
Simple frequency-based extraction:
|
|
23
|
+
1. Tokenize (alphanumeric words)
|
|
24
|
+
2. Remove stopwords and short words
|
|
25
|
+
3. Return top N by frequency
|
|
26
|
+
|
|
27
|
+
No external NLP dependencies — just regex + counter.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
content: Text content to extract keywords from.
|
|
31
|
+
top_n: Number of top keywords to return.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
List of top N keywords by frequency.
|
|
35
|
+
"""
|
|
36
|
+
if not content:
|
|
37
|
+
return []
|
|
38
|
+
|
|
39
|
+
# Tokenize: extract alphanumeric words
|
|
40
|
+
words = re.findall(r'[a-zA-Z][a-zA-Z0-9_.-]*[a-zA-Z0-9]|[a-zA-Z]', content.lower())
|
|
41
|
+
|
|
42
|
+
# Filter stopwords and short words
|
|
43
|
+
meaningful = [
|
|
44
|
+
w for w in words
|
|
45
|
+
if w not in STOPWORDS and len(w) >= MIN_KEYWORD_LENGTH
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
if not meaningful:
|
|
49
|
+
return []
|
|
50
|
+
|
|
51
|
+
# Count and return top N
|
|
52
|
+
counter = Counter(meaningful)
|
|
53
|
+
return [word for word, _count in counter.most_common(top_n)]
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def clean_fts_query(query: str) -> str:
|
|
57
|
+
"""
|
|
58
|
+
Clean and prepare query for FTS5 search.
|
|
59
|
+
|
|
60
|
+
Extracts word tokens and joins them with OR for FTS5 MATCH syntax.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
query: Raw query string.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
FTS5-compatible query string, or empty string if no valid tokens.
|
|
67
|
+
"""
|
|
68
|
+
fts_tokens = re.findall(r'\w+', query)
|
|
69
|
+
if not fts_tokens:
|
|
70
|
+
return ''
|
|
71
|
+
return ' OR '.join(fts_tokens)
|
|
@@ -260,56 +260,55 @@ class CrossProjectAggregator:
|
|
|
260
260
|
|
|
261
261
|
try:
|
|
262
262
|
conn = sqlite3.connect(str(self.memory_db_path), timeout=10)
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
# Get distinct profiles
|
|
267
|
-
cursor.execute(
|
|
268
|
-
"SELECT DISTINCT profile FROM memories "
|
|
269
|
-
"WHERE profile IS NOT NULL ORDER BY profile"
|
|
270
|
-
)
|
|
271
|
-
profiles = [row[0] for row in cursor.fetchall()]
|
|
272
|
-
|
|
273
|
-
if not profiles:
|
|
274
|
-
# Fallback: if no profile column or all NULL, treat as 'default'
|
|
275
|
-
cursor.execute("SELECT id FROM memories ORDER BY created_at")
|
|
276
|
-
all_ids = [row[0] for row in cursor.fetchall()]
|
|
277
|
-
if all_ids:
|
|
278
|
-
# Get the latest timestamp
|
|
279
|
-
cursor.execute(
|
|
280
|
-
"SELECT MAX(created_at) FROM memories"
|
|
281
|
-
)
|
|
282
|
-
latest = cursor.fetchone()[0] or datetime.now().isoformat()
|
|
283
|
-
results.append({
|
|
284
|
-
"profile": "default",
|
|
285
|
-
"memory_ids": all_ids,
|
|
286
|
-
"latest_timestamp": latest,
|
|
287
|
-
})
|
|
288
|
-
conn.close()
|
|
289
|
-
return results
|
|
290
|
-
|
|
291
|
-
# For each profile, get memory IDs and latest timestamp
|
|
292
|
-
for profile in profiles:
|
|
293
|
-
cursor.execute(
|
|
294
|
-
"SELECT id FROM memories WHERE profile = ? ORDER BY created_at",
|
|
295
|
-
(profile,),
|
|
296
|
-
)
|
|
297
|
-
memory_ids = [row[0] for row in cursor.fetchall()]
|
|
263
|
+
try:
|
|
264
|
+
conn.execute("PRAGMA busy_timeout=5000")
|
|
265
|
+
cursor = conn.cursor()
|
|
298
266
|
|
|
267
|
+
# Get distinct profiles
|
|
299
268
|
cursor.execute(
|
|
300
|
-
"SELECT
|
|
301
|
-
|
|
269
|
+
"SELECT DISTINCT profile FROM memories "
|
|
270
|
+
"WHERE profile IS NOT NULL ORDER BY profile"
|
|
302
271
|
)
|
|
303
|
-
|
|
272
|
+
profiles = [row[0] for row in cursor.fetchall()]
|
|
273
|
+
|
|
274
|
+
if not profiles:
|
|
275
|
+
# Fallback: if no profile column or all NULL, treat as 'default'
|
|
276
|
+
cursor.execute("SELECT id FROM memories ORDER BY created_at")
|
|
277
|
+
all_ids = [row[0] for row in cursor.fetchall()]
|
|
278
|
+
if all_ids:
|
|
279
|
+
# Get the latest timestamp
|
|
280
|
+
cursor.execute(
|
|
281
|
+
"SELECT MAX(created_at) FROM memories"
|
|
282
|
+
)
|
|
283
|
+
latest = cursor.fetchone()[0] or datetime.now().isoformat()
|
|
284
|
+
results.append({
|
|
285
|
+
"profile": "default",
|
|
286
|
+
"memory_ids": all_ids,
|
|
287
|
+
"latest_timestamp": latest,
|
|
288
|
+
})
|
|
289
|
+
|
|
290
|
+
# For each profile, get memory IDs and latest timestamp
|
|
291
|
+
for profile in profiles:
|
|
292
|
+
cursor.execute(
|
|
293
|
+
"SELECT id FROM memories WHERE profile = ? ORDER BY created_at",
|
|
294
|
+
(profile,),
|
|
295
|
+
)
|
|
296
|
+
memory_ids = [row[0] for row in cursor.fetchall()]
|
|
304
297
|
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
})
|
|
298
|
+
cursor.execute(
|
|
299
|
+
"SELECT MAX(created_at) FROM memories WHERE profile = ?",
|
|
300
|
+
(profile,),
|
|
301
|
+
)
|
|
302
|
+
latest = cursor.fetchone()[0] or datetime.now().isoformat()
|
|
311
303
|
|
|
312
|
-
|
|
304
|
+
if memory_ids:
|
|
305
|
+
results.append({
|
|
306
|
+
"profile": profile,
|
|
307
|
+
"memory_ids": memory_ids,
|
|
308
|
+
"latest_timestamp": latest,
|
|
309
|
+
})
|
|
310
|
+
finally:
|
|
311
|
+
conn.close()
|
|
313
312
|
|
|
314
313
|
except sqlite3.OperationalError as e:
|
|
315
314
|
# Handle case where 'profile' column doesn't exist
|
|
@@ -320,18 +319,20 @@ class CrossProjectAggregator:
|
|
|
320
319
|
)
|
|
321
320
|
try:
|
|
322
321
|
conn = sqlite3.connect(str(self.memory_db_path), timeout=10)
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
322
|
+
try:
|
|
323
|
+
cursor = conn.cursor()
|
|
324
|
+
cursor.execute("SELECT id FROM memories ORDER BY created_at")
|
|
325
|
+
all_ids = [row[0] for row in cursor.fetchall()]
|
|
326
|
+
if all_ids:
|
|
327
|
+
cursor.execute("SELECT MAX(created_at) FROM memories")
|
|
328
|
+
latest = cursor.fetchone()[0] or datetime.now().isoformat()
|
|
329
|
+
results.append({
|
|
330
|
+
"profile": "default",
|
|
331
|
+
"memory_ids": all_ids,
|
|
332
|
+
"latest_timestamp": latest,
|
|
333
|
+
})
|
|
334
|
+
finally:
|
|
335
|
+
conn.close()
|
|
335
336
|
except Exception as inner_e:
|
|
336
337
|
logger.error("Failed to read memory.db: %s", inner_e)
|
|
337
338
|
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""
|
|
5
|
+
Learning database utilities and schema management.
|
|
6
|
+
|
|
7
|
+
This package contains extracted modules from learning_db.py to improve
|
|
8
|
+
code organization and maintainability.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from .constants import (
|
|
12
|
+
MEMORY_DIR,
|
|
13
|
+
LEARNING_DB_PATH,
|
|
14
|
+
DEFAULT_PROFILE,
|
|
15
|
+
DEFAULT_CONFIDENCE,
|
|
16
|
+
DEFAULT_LIMIT,
|
|
17
|
+
ALL_TABLES,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
from .schema import (
|
|
21
|
+
initialize_schema,
|
|
22
|
+
create_all_tables,
|
|
23
|
+
add_profile_columns,
|
|
24
|
+
create_indexes,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
# Constants
|
|
29
|
+
"MEMORY_DIR",
|
|
30
|
+
"LEARNING_DB_PATH",
|
|
31
|
+
"DEFAULT_PROFILE",
|
|
32
|
+
"DEFAULT_CONFIDENCE",
|
|
33
|
+
"DEFAULT_LIMIT",
|
|
34
|
+
"ALL_TABLES",
|
|
35
|
+
# Schema functions
|
|
36
|
+
"initialize_schema",
|
|
37
|
+
"create_all_tables",
|
|
38
|
+
"add_profile_columns",
|
|
39
|
+
"create_indexes",
|
|
40
|
+
]
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
+
"""
|
|
5
|
+
Constants and defaults for learning.db.
|
|
6
|
+
|
|
7
|
+
This module contains database paths, configuration defaults, and other
|
|
8
|
+
constants used by the learning database system.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
# Database paths
|
|
14
|
+
MEMORY_DIR = Path.home() / ".claude-memory"
|
|
15
|
+
LEARNING_DB_PATH = MEMORY_DIR / "learning.db"
|
|
16
|
+
|
|
17
|
+
# Default values
|
|
18
|
+
DEFAULT_PROFILE = "default"
|
|
19
|
+
DEFAULT_CONFIDENCE = 1.0
|
|
20
|
+
DEFAULT_LIMIT = 100
|
|
21
|
+
|
|
22
|
+
# Table names (for reference and testing)
|
|
23
|
+
TABLE_TRANSFERABLE_PATTERNS = "transferable_patterns"
|
|
24
|
+
TABLE_WORKFLOW_PATTERNS = "workflow_patterns"
|
|
25
|
+
TABLE_RANKING_FEEDBACK = "ranking_feedback"
|
|
26
|
+
TABLE_RANKING_MODELS = "ranking_models"
|
|
27
|
+
TABLE_SOURCE_QUALITY = "source_quality"
|
|
28
|
+
TABLE_ENGAGEMENT_METRICS = "engagement_metrics"
|
|
29
|
+
TABLE_ACTION_OUTCOMES = "action_outcomes"
|
|
30
|
+
TABLE_BEHAVIORAL_PATTERNS = "behavioral_patterns"
|
|
31
|
+
TABLE_CROSS_PROJECT_BEHAVIORS = "cross_project_behaviors"
|
|
32
|
+
|
|
33
|
+
# All table names for iteration
|
|
34
|
+
ALL_TABLES = [
|
|
35
|
+
TABLE_TRANSFERABLE_PATTERNS,
|
|
36
|
+
TABLE_WORKFLOW_PATTERNS,
|
|
37
|
+
TABLE_RANKING_FEEDBACK,
|
|
38
|
+
TABLE_RANKING_MODELS,
|
|
39
|
+
TABLE_SOURCE_QUALITY,
|
|
40
|
+
TABLE_ENGAGEMENT_METRICS,
|
|
41
|
+
TABLE_ACTION_OUTCOMES,
|
|
42
|
+
TABLE_BEHAVIORAL_PATTERNS,
|
|
43
|
+
TABLE_CROSS_PROJECT_BEHAVIORS,
|
|
44
|
+
]
|