superlocalmemory 2.6.0 → 2.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +122 -1806
- package/README.md +142 -410
- package/docs/ACCESSIBILITY.md +291 -0
- package/docs/FRAMEWORK-INTEGRATIONS.md +300 -0
- package/package.json +1 -1
- package/src/learning/__init__.py +201 -0
- package/src/learning/adaptive_ranker.py +826 -0
- package/src/learning/cross_project_aggregator.py +866 -0
- package/src/learning/engagement_tracker.py +638 -0
- package/src/learning/feature_extractor.py +461 -0
- package/src/learning/feedback_collector.py +690 -0
- package/src/learning/learning_db.py +842 -0
- package/src/learning/project_context_manager.py +582 -0
- package/src/learning/source_quality_scorer.py +685 -0
- package/src/learning/workflow_pattern_miner.py +665 -0
- package/ui/index.html +346 -13
- package/ui/js/clusters.js +90 -1
- package/ui/js/graph-core.js +445 -0
- package/ui/js/graph-cytoscape-monolithic-backup.js +1168 -0
- package/ui/js/graph-cytoscape.js +1168 -0
- package/ui/js/graph-d3-backup.js +32 -0
- package/ui/js/graph-filters.js +220 -0
- package/ui/js/graph-interactions.js +354 -0
- package/ui/js/graph-ui.js +214 -0
- package/ui/js/memories.js +52 -0
- package/ui/js/modal.js +104 -1
|
@@ -0,0 +1,842 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
SuperLocalMemory V2 - Learning Database Manager (v2.7)
|
|
4
|
+
Copyright (c) 2026 Varun Pratap Bhardwaj
|
|
5
|
+
Licensed under MIT License
|
|
6
|
+
|
|
7
|
+
Repository: https://github.com/varun369/SuperLocalMemoryV2
|
|
8
|
+
Author: Varun Pratap Bhardwaj (Solution Architect)
|
|
9
|
+
|
|
10
|
+
NOTICE: This software is protected by MIT License.
|
|
11
|
+
Attribution must be preserved in all copies or derivatives.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
"""
|
|
15
|
+
LearningDB — Manages the separate learning.db for behavioral data.
|
|
16
|
+
|
|
17
|
+
CRITICAL DESIGN DECISIONS:
|
|
18
|
+
1. learning.db is SEPARATE from memory.db (GDPR erasable, security isolation)
|
|
19
|
+
2. All tables use CREATE TABLE IF NOT EXISTS (safe for re-runs)
|
|
20
|
+
3. WAL mode for concurrent read/write from multiple agents
|
|
21
|
+
4. Singleton pattern matches existing DbConnectionManager approach
|
|
22
|
+
5. Thread-safe via threading.Lock on write operations
|
|
23
|
+
|
|
24
|
+
Tables (6):
|
|
25
|
+
transferable_patterns — Layer 1: Cross-project tech preferences
|
|
26
|
+
workflow_patterns — Layer 3: Sequence + temporal patterns
|
|
27
|
+
ranking_feedback — Feedback from all channels (MCP, CLI, dashboard)
|
|
28
|
+
ranking_models — Model metadata and training history
|
|
29
|
+
source_quality — Per-source learning (which tools produce better memories)
|
|
30
|
+
engagement_metrics — Local-only engagement stats (never transmitted)
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
import json
|
|
34
|
+
import logging
|
|
35
|
+
import sqlite3
|
|
36
|
+
import threading
|
|
37
|
+
from datetime import datetime, date
|
|
38
|
+
from pathlib import Path
|
|
39
|
+
from typing import Optional, Dict, List, Any
|
|
40
|
+
|
|
41
|
+
logger = logging.getLogger("superlocalmemory.learning.db")
|
|
42
|
+
|
|
43
|
+
MEMORY_DIR = Path.home() / ".claude-memory"
|
|
44
|
+
LEARNING_DB_PATH = MEMORY_DIR / "learning.db"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class LearningDB:
|
|
48
|
+
"""
|
|
49
|
+
Manages the learning.db database for behavioral data.
|
|
50
|
+
|
|
51
|
+
Singleton per database path. Thread-safe writes.
|
|
52
|
+
Separate from memory.db for GDPR compliance and security isolation.
|
|
53
|
+
|
|
54
|
+
Usage:
|
|
55
|
+
db = LearningDB()
|
|
56
|
+
db.store_feedback(query_hash="abc123", memory_id=42, signal_type="mcp_used")
|
|
57
|
+
stats = db.get_stats()
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
_instances: Dict[str, "LearningDB"] = {}
|
|
61
|
+
_instances_lock = threading.Lock()
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def get_instance(cls, db_path: Optional[Path] = None) -> "LearningDB":
|
|
65
|
+
"""Get or create the singleton LearningDB."""
|
|
66
|
+
if db_path is None:
|
|
67
|
+
db_path = LEARNING_DB_PATH
|
|
68
|
+
key = str(db_path)
|
|
69
|
+
with cls._instances_lock:
|
|
70
|
+
if key not in cls._instances:
|
|
71
|
+
cls._instances[key] = cls(db_path)
|
|
72
|
+
return cls._instances[key]
|
|
73
|
+
|
|
74
|
+
@classmethod
|
|
75
|
+
def reset_instance(cls, db_path: Optional[Path] = None):
|
|
76
|
+
"""Remove singleton. Used for testing."""
|
|
77
|
+
with cls._instances_lock:
|
|
78
|
+
if db_path is None:
|
|
79
|
+
cls._instances.clear()
|
|
80
|
+
else:
|
|
81
|
+
key = str(db_path)
|
|
82
|
+
if key in cls._instances:
|
|
83
|
+
del cls._instances[key]
|
|
84
|
+
|
|
85
|
+
def __init__(self, db_path: Optional[Path] = None):
|
|
86
|
+
self.db_path = Path(db_path) if db_path else LEARNING_DB_PATH
|
|
87
|
+
self._write_lock = threading.Lock()
|
|
88
|
+
self._ensure_directory()
|
|
89
|
+
self._init_schema()
|
|
90
|
+
logger.info("LearningDB initialized: %s", self.db_path)
|
|
91
|
+
|
|
92
|
+
def _ensure_directory(self):
|
|
93
|
+
"""Ensure the parent directory exists."""
|
|
94
|
+
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
95
|
+
|
|
96
|
+
def _get_connection(self) -> sqlite3.Connection:
|
|
97
|
+
"""Get a new database connection with standard pragmas."""
|
|
98
|
+
conn = sqlite3.connect(str(self.db_path), timeout=10)
|
|
99
|
+
conn.row_factory = sqlite3.Row
|
|
100
|
+
conn.execute("PRAGMA journal_mode=WAL")
|
|
101
|
+
conn.execute("PRAGMA busy_timeout=5000")
|
|
102
|
+
conn.execute("PRAGMA foreign_keys=ON")
|
|
103
|
+
return conn
|
|
104
|
+
|
|
105
|
+
def _init_schema(self):
|
|
106
|
+
"""Create all learning tables if they don't exist."""
|
|
107
|
+
conn = self._get_connection()
|
|
108
|
+
cursor = conn.cursor()
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
# ------------------------------------------------------------------
|
|
112
|
+
# Layer 1: Cross-project transferable patterns
|
|
113
|
+
# ------------------------------------------------------------------
|
|
114
|
+
cursor.execute('''
|
|
115
|
+
CREATE TABLE IF NOT EXISTS transferable_patterns (
|
|
116
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
117
|
+
pattern_type TEXT NOT NULL,
|
|
118
|
+
key TEXT NOT NULL,
|
|
119
|
+
value TEXT NOT NULL,
|
|
120
|
+
confidence REAL DEFAULT 0.0,
|
|
121
|
+
evidence_count INTEGER DEFAULT 0,
|
|
122
|
+
profiles_seen INTEGER DEFAULT 0,
|
|
123
|
+
first_seen TIMESTAMP,
|
|
124
|
+
last_seen TIMESTAMP,
|
|
125
|
+
decay_factor REAL DEFAULT 1.0,
|
|
126
|
+
contradictions TEXT DEFAULT '[]',
|
|
127
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
128
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
129
|
+
UNIQUE(pattern_type, key)
|
|
130
|
+
)
|
|
131
|
+
''')
|
|
132
|
+
|
|
133
|
+
# ------------------------------------------------------------------
|
|
134
|
+
# Layer 3: Workflow patterns (sequences + temporal + style)
|
|
135
|
+
# ------------------------------------------------------------------
|
|
136
|
+
cursor.execute('''
|
|
137
|
+
CREATE TABLE IF NOT EXISTS workflow_patterns (
|
|
138
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
139
|
+
pattern_type TEXT NOT NULL,
|
|
140
|
+
pattern_key TEXT NOT NULL,
|
|
141
|
+
pattern_value TEXT NOT NULL,
|
|
142
|
+
confidence REAL DEFAULT 0.0,
|
|
143
|
+
evidence_count INTEGER DEFAULT 0,
|
|
144
|
+
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
145
|
+
metadata TEXT DEFAULT '{}'
|
|
146
|
+
)
|
|
147
|
+
''')
|
|
148
|
+
|
|
149
|
+
# ------------------------------------------------------------------
|
|
150
|
+
# Feedback from all channels
|
|
151
|
+
# ------------------------------------------------------------------
|
|
152
|
+
cursor.execute('''
|
|
153
|
+
CREATE TABLE IF NOT EXISTS ranking_feedback (
|
|
154
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
155
|
+
query_hash TEXT NOT NULL,
|
|
156
|
+
query_keywords TEXT,
|
|
157
|
+
memory_id INTEGER NOT NULL,
|
|
158
|
+
rank_position INTEGER,
|
|
159
|
+
signal_type TEXT NOT NULL,
|
|
160
|
+
signal_value REAL DEFAULT 1.0,
|
|
161
|
+
channel TEXT NOT NULL,
|
|
162
|
+
source_tool TEXT,
|
|
163
|
+
dwell_time REAL,
|
|
164
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
165
|
+
)
|
|
166
|
+
''')
|
|
167
|
+
|
|
168
|
+
# ------------------------------------------------------------------
|
|
169
|
+
# Model metadata
|
|
170
|
+
# ------------------------------------------------------------------
|
|
171
|
+
cursor.execute('''
|
|
172
|
+
CREATE TABLE IF NOT EXISTS ranking_models (
|
|
173
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
174
|
+
model_version TEXT NOT NULL,
|
|
175
|
+
training_samples INTEGER,
|
|
176
|
+
synthetic_samples INTEGER DEFAULT 0,
|
|
177
|
+
real_samples INTEGER DEFAULT 0,
|
|
178
|
+
ndcg_at_10 REAL,
|
|
179
|
+
model_path TEXT,
|
|
180
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
181
|
+
)
|
|
182
|
+
''')
|
|
183
|
+
|
|
184
|
+
# ------------------------------------------------------------------
|
|
185
|
+
# Source quality scores (per-source learning)
|
|
186
|
+
# ------------------------------------------------------------------
|
|
187
|
+
cursor.execute('''
|
|
188
|
+
CREATE TABLE IF NOT EXISTS source_quality (
|
|
189
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
190
|
+
source_id TEXT NOT NULL UNIQUE,
|
|
191
|
+
positive_signals INTEGER DEFAULT 0,
|
|
192
|
+
total_memories INTEGER DEFAULT 0,
|
|
193
|
+
quality_score REAL DEFAULT 0.5,
|
|
194
|
+
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
195
|
+
)
|
|
196
|
+
''')
|
|
197
|
+
|
|
198
|
+
# ------------------------------------------------------------------
|
|
199
|
+
# Engagement metrics (local only, never transmitted)
|
|
200
|
+
# ------------------------------------------------------------------
|
|
201
|
+
cursor.execute('''
|
|
202
|
+
CREATE TABLE IF NOT EXISTS engagement_metrics (
|
|
203
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
204
|
+
metric_date DATE NOT NULL UNIQUE,
|
|
205
|
+
memories_created INTEGER DEFAULT 0,
|
|
206
|
+
recalls_performed INTEGER DEFAULT 0,
|
|
207
|
+
feedback_signals INTEGER DEFAULT 0,
|
|
208
|
+
patterns_updated INTEGER DEFAULT 0,
|
|
209
|
+
active_sources TEXT DEFAULT '[]',
|
|
210
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
211
|
+
)
|
|
212
|
+
''')
|
|
213
|
+
|
|
214
|
+
# ------------------------------------------------------------------
|
|
215
|
+
# Indexes for performance
|
|
216
|
+
# ------------------------------------------------------------------
|
|
217
|
+
cursor.execute(
|
|
218
|
+
'CREATE INDEX IF NOT EXISTS idx_feedback_query '
|
|
219
|
+
'ON ranking_feedback(query_hash)'
|
|
220
|
+
)
|
|
221
|
+
cursor.execute(
|
|
222
|
+
'CREATE INDEX IF NOT EXISTS idx_feedback_memory '
|
|
223
|
+
'ON ranking_feedback(memory_id)'
|
|
224
|
+
)
|
|
225
|
+
cursor.execute(
|
|
226
|
+
'CREATE INDEX IF NOT EXISTS idx_feedback_channel '
|
|
227
|
+
'ON ranking_feedback(channel)'
|
|
228
|
+
)
|
|
229
|
+
cursor.execute(
|
|
230
|
+
'CREATE INDEX IF NOT EXISTS idx_feedback_created '
|
|
231
|
+
'ON ranking_feedback(created_at)'
|
|
232
|
+
)
|
|
233
|
+
cursor.execute(
|
|
234
|
+
'CREATE INDEX IF NOT EXISTS idx_patterns_type '
|
|
235
|
+
'ON transferable_patterns(pattern_type)'
|
|
236
|
+
)
|
|
237
|
+
cursor.execute(
|
|
238
|
+
'CREATE INDEX IF NOT EXISTS idx_workflow_type '
|
|
239
|
+
'ON workflow_patterns(pattern_type)'
|
|
240
|
+
)
|
|
241
|
+
cursor.execute(
|
|
242
|
+
'CREATE INDEX IF NOT EXISTS idx_engagement_date '
|
|
243
|
+
'ON engagement_metrics(metric_date)'
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
conn.commit()
|
|
247
|
+
logger.info("Learning schema initialized successfully")
|
|
248
|
+
|
|
249
|
+
except Exception as e:
|
|
250
|
+
logger.error("Failed to initialize learning schema: %s", e)
|
|
251
|
+
conn.rollback()
|
|
252
|
+
raise
|
|
253
|
+
finally:
|
|
254
|
+
conn.close()
|
|
255
|
+
|
|
256
|
+
# ======================================================================
|
|
257
|
+
# Feedback Operations
|
|
258
|
+
# ======================================================================
|
|
259
|
+
|
|
260
|
+
def store_feedback(
|
|
261
|
+
self,
|
|
262
|
+
query_hash: str,
|
|
263
|
+
memory_id: int,
|
|
264
|
+
signal_type: str,
|
|
265
|
+
signal_value: float = 1.0,
|
|
266
|
+
channel: str = "mcp",
|
|
267
|
+
query_keywords: Optional[str] = None,
|
|
268
|
+
rank_position: Optional[int] = None,
|
|
269
|
+
source_tool: Optional[str] = None,
|
|
270
|
+
dwell_time: Optional[float] = None,
|
|
271
|
+
) -> int:
|
|
272
|
+
"""
|
|
273
|
+
Store a ranking feedback signal.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
query_hash: SHA256[:16] of the query (privacy-preserving)
|
|
277
|
+
memory_id: ID of the memory in memory.db
|
|
278
|
+
signal_type: One of 'mcp_used', 'cli_useful', 'dashboard_click', 'passive_decay'
|
|
279
|
+
signal_value: 1.0=strong positive, 0.5=weak, 0.0=negative
|
|
280
|
+
channel: 'mcp', 'cli', or 'dashboard'
|
|
281
|
+
query_keywords: Top keywords for grouping (optional)
|
|
282
|
+
rank_position: Where it appeared in results (1-50)
|
|
283
|
+
source_tool: Tool that originated the query (e.g., 'claude-desktop')
|
|
284
|
+
dwell_time: Seconds spent viewing (dashboard only)
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
Row ID of the inserted feedback record.
|
|
288
|
+
"""
|
|
289
|
+
with self._write_lock:
|
|
290
|
+
conn = self._get_connection()
|
|
291
|
+
try:
|
|
292
|
+
cursor = conn.cursor()
|
|
293
|
+
cursor.execute('''
|
|
294
|
+
INSERT INTO ranking_feedback
|
|
295
|
+
(query_hash, memory_id, signal_type, signal_value,
|
|
296
|
+
channel, query_keywords, rank_position, source_tool,
|
|
297
|
+
dwell_time)
|
|
298
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
299
|
+
''', (
|
|
300
|
+
query_hash, memory_id, signal_type, signal_value,
|
|
301
|
+
channel, query_keywords, rank_position, source_tool,
|
|
302
|
+
dwell_time,
|
|
303
|
+
))
|
|
304
|
+
conn.commit()
|
|
305
|
+
row_id = cursor.lastrowid
|
|
306
|
+
logger.debug(
|
|
307
|
+
"Feedback stored: memory=%d, type=%s, value=%.1f",
|
|
308
|
+
memory_id, signal_type, signal_value
|
|
309
|
+
)
|
|
310
|
+
return row_id
|
|
311
|
+
except Exception as e:
|
|
312
|
+
conn.rollback()
|
|
313
|
+
logger.error("Failed to store feedback: %s", e)
|
|
314
|
+
raise
|
|
315
|
+
finally:
|
|
316
|
+
conn.close()
|
|
317
|
+
|
|
318
|
+
def get_feedback_count(self) -> int:
|
|
319
|
+
"""Get total number of feedback signals."""
|
|
320
|
+
conn = self._get_connection()
|
|
321
|
+
try:
|
|
322
|
+
cursor = conn.cursor()
|
|
323
|
+
cursor.execute('SELECT COUNT(*) FROM ranking_feedback')
|
|
324
|
+
return cursor.fetchone()[0]
|
|
325
|
+
finally:
|
|
326
|
+
conn.close()
|
|
327
|
+
|
|
328
|
+
def get_unique_query_count(self) -> int:
|
|
329
|
+
"""Get number of unique queries with feedback."""
|
|
330
|
+
conn = self._get_connection()
|
|
331
|
+
try:
|
|
332
|
+
cursor = conn.cursor()
|
|
333
|
+
cursor.execute(
|
|
334
|
+
'SELECT COUNT(DISTINCT query_hash) FROM ranking_feedback'
|
|
335
|
+
)
|
|
336
|
+
return cursor.fetchone()[0]
|
|
337
|
+
finally:
|
|
338
|
+
conn.close()
|
|
339
|
+
|
|
340
|
+
def get_feedback_for_training(
|
|
341
|
+
self,
|
|
342
|
+
limit: int = 10000,
|
|
343
|
+
) -> List[Dict[str, Any]]:
|
|
344
|
+
"""
|
|
345
|
+
Get feedback records suitable for model training.
|
|
346
|
+
|
|
347
|
+
Returns list of dicts with query_hash, memory_id, signal_value, etc.
|
|
348
|
+
Ordered by created_at DESC (newest first).
|
|
349
|
+
"""
|
|
350
|
+
conn = self._get_connection()
|
|
351
|
+
try:
|
|
352
|
+
cursor = conn.cursor()
|
|
353
|
+
cursor.execute('''
|
|
354
|
+
SELECT query_hash, query_keywords, memory_id, rank_position,
|
|
355
|
+
signal_type, signal_value, channel, source_tool,
|
|
356
|
+
created_at
|
|
357
|
+
FROM ranking_feedback
|
|
358
|
+
ORDER BY created_at DESC
|
|
359
|
+
LIMIT ?
|
|
360
|
+
''', (limit,))
|
|
361
|
+
return [dict(row) for row in cursor.fetchall()]
|
|
362
|
+
finally:
|
|
363
|
+
conn.close()
|
|
364
|
+
|
|
365
|
+
# ======================================================================
|
|
366
|
+
# Transferable Pattern Operations
|
|
367
|
+
# ======================================================================
|
|
368
|
+
|
|
369
|
+
def upsert_transferable_pattern(
|
|
370
|
+
self,
|
|
371
|
+
pattern_type: str,
|
|
372
|
+
key: str,
|
|
373
|
+
value: str,
|
|
374
|
+
confidence: float,
|
|
375
|
+
evidence_count: int,
|
|
376
|
+
profiles_seen: int = 1,
|
|
377
|
+
decay_factor: float = 1.0,
|
|
378
|
+
contradictions: Optional[List[str]] = None,
|
|
379
|
+
) -> int:
|
|
380
|
+
"""Insert or update a transferable pattern."""
|
|
381
|
+
now = datetime.now().isoformat()
|
|
382
|
+
contradictions_json = json.dumps(contradictions or [])
|
|
383
|
+
|
|
384
|
+
with self._write_lock:
|
|
385
|
+
conn = self._get_connection()
|
|
386
|
+
try:
|
|
387
|
+
cursor = conn.cursor()
|
|
388
|
+
|
|
389
|
+
# Check if pattern exists
|
|
390
|
+
cursor.execute(
|
|
391
|
+
'SELECT id, first_seen FROM transferable_patterns '
|
|
392
|
+
'WHERE pattern_type = ? AND key = ?',
|
|
393
|
+
(pattern_type, key)
|
|
394
|
+
)
|
|
395
|
+
existing = cursor.fetchone()
|
|
396
|
+
|
|
397
|
+
if existing:
|
|
398
|
+
cursor.execute('''
|
|
399
|
+
UPDATE transferable_patterns
|
|
400
|
+
SET value = ?, confidence = ?, evidence_count = ?,
|
|
401
|
+
profiles_seen = ?, last_seen = ?, decay_factor = ?,
|
|
402
|
+
contradictions = ?, updated_at = ?
|
|
403
|
+
WHERE id = ?
|
|
404
|
+
''', (
|
|
405
|
+
value, confidence, evidence_count,
|
|
406
|
+
profiles_seen, now, decay_factor,
|
|
407
|
+
contradictions_json, now, existing['id']
|
|
408
|
+
))
|
|
409
|
+
row_id = existing['id']
|
|
410
|
+
else:
|
|
411
|
+
cursor.execute('''
|
|
412
|
+
INSERT INTO transferable_patterns
|
|
413
|
+
(pattern_type, key, value, confidence, evidence_count,
|
|
414
|
+
profiles_seen, first_seen, last_seen, decay_factor,
|
|
415
|
+
contradictions, created_at, updated_at)
|
|
416
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
417
|
+
''', (
|
|
418
|
+
pattern_type, key, value, confidence, evidence_count,
|
|
419
|
+
profiles_seen, now, now, decay_factor,
|
|
420
|
+
contradictions_json, now, now
|
|
421
|
+
))
|
|
422
|
+
row_id = cursor.lastrowid
|
|
423
|
+
|
|
424
|
+
conn.commit()
|
|
425
|
+
return row_id
|
|
426
|
+
except Exception as e:
|
|
427
|
+
conn.rollback()
|
|
428
|
+
logger.error("Failed to upsert pattern: %s", e)
|
|
429
|
+
raise
|
|
430
|
+
finally:
|
|
431
|
+
conn.close()
|
|
432
|
+
|
|
433
|
+
def get_transferable_patterns(
|
|
434
|
+
self,
|
|
435
|
+
min_confidence: float = 0.0,
|
|
436
|
+
pattern_type: Optional[str] = None,
|
|
437
|
+
) -> List[Dict[str, Any]]:
|
|
438
|
+
"""Get transferable patterns filtered by confidence and type."""
|
|
439
|
+
conn = self._get_connection()
|
|
440
|
+
try:
|
|
441
|
+
cursor = conn.cursor()
|
|
442
|
+
if pattern_type:
|
|
443
|
+
cursor.execute('''
|
|
444
|
+
SELECT * FROM transferable_patterns
|
|
445
|
+
WHERE confidence >= ? AND pattern_type = ?
|
|
446
|
+
ORDER BY confidence DESC
|
|
447
|
+
''', (min_confidence, pattern_type))
|
|
448
|
+
else:
|
|
449
|
+
cursor.execute('''
|
|
450
|
+
SELECT * FROM transferable_patterns
|
|
451
|
+
WHERE confidence >= ?
|
|
452
|
+
ORDER BY confidence DESC
|
|
453
|
+
''', (min_confidence,))
|
|
454
|
+
return [dict(row) for row in cursor.fetchall()]
|
|
455
|
+
finally:
|
|
456
|
+
conn.close()
|
|
457
|
+
|
|
458
|
+
# ======================================================================
|
|
459
|
+
# Workflow Pattern Operations
|
|
460
|
+
# ======================================================================
|
|
461
|
+
|
|
462
|
+
def store_workflow_pattern(
|
|
463
|
+
self,
|
|
464
|
+
pattern_type: str,
|
|
465
|
+
pattern_key: str,
|
|
466
|
+
pattern_value: str,
|
|
467
|
+
confidence: float = 0.0,
|
|
468
|
+
evidence_count: int = 0,
|
|
469
|
+
metadata: Optional[Dict] = None,
|
|
470
|
+
) -> int:
|
|
471
|
+
"""Store a workflow pattern (sequence, temporal, or style)."""
|
|
472
|
+
metadata_json = json.dumps(metadata or {})
|
|
473
|
+
|
|
474
|
+
with self._write_lock:
|
|
475
|
+
conn = self._get_connection()
|
|
476
|
+
try:
|
|
477
|
+
cursor = conn.cursor()
|
|
478
|
+
cursor.execute('''
|
|
479
|
+
INSERT INTO workflow_patterns
|
|
480
|
+
(pattern_type, pattern_key, pattern_value,
|
|
481
|
+
confidence, evidence_count, metadata)
|
|
482
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
483
|
+
''', (
|
|
484
|
+
pattern_type, pattern_key, pattern_value,
|
|
485
|
+
confidence, evidence_count, metadata_json
|
|
486
|
+
))
|
|
487
|
+
conn.commit()
|
|
488
|
+
return cursor.lastrowid
|
|
489
|
+
except Exception as e:
|
|
490
|
+
conn.rollback()
|
|
491
|
+
logger.error("Failed to store workflow pattern: %s", e)
|
|
492
|
+
raise
|
|
493
|
+
finally:
|
|
494
|
+
conn.close()
|
|
495
|
+
|
|
496
|
+
def get_workflow_patterns(
|
|
497
|
+
self,
|
|
498
|
+
pattern_type: Optional[str] = None,
|
|
499
|
+
min_confidence: float = 0.0,
|
|
500
|
+
) -> List[Dict[str, Any]]:
|
|
501
|
+
"""Get workflow patterns filtered by type and confidence."""
|
|
502
|
+
conn = self._get_connection()
|
|
503
|
+
try:
|
|
504
|
+
cursor = conn.cursor()
|
|
505
|
+
if pattern_type:
|
|
506
|
+
cursor.execute('''
|
|
507
|
+
SELECT * FROM workflow_patterns
|
|
508
|
+
WHERE pattern_type = ? AND confidence >= ?
|
|
509
|
+
ORDER BY confidence DESC
|
|
510
|
+
''', (pattern_type, min_confidence))
|
|
511
|
+
else:
|
|
512
|
+
cursor.execute('''
|
|
513
|
+
SELECT * FROM workflow_patterns
|
|
514
|
+
WHERE confidence >= ?
|
|
515
|
+
ORDER BY confidence DESC
|
|
516
|
+
''', (min_confidence,))
|
|
517
|
+
return [dict(row) for row in cursor.fetchall()]
|
|
518
|
+
finally:
|
|
519
|
+
conn.close()
|
|
520
|
+
|
|
521
|
+
def clear_workflow_patterns(self, pattern_type: Optional[str] = None):
|
|
522
|
+
"""Clear workflow patterns (used before re-mining)."""
|
|
523
|
+
with self._write_lock:
|
|
524
|
+
conn = self._get_connection()
|
|
525
|
+
try:
|
|
526
|
+
cursor = conn.cursor()
|
|
527
|
+
if pattern_type:
|
|
528
|
+
cursor.execute(
|
|
529
|
+
'DELETE FROM workflow_patterns WHERE pattern_type = ?',
|
|
530
|
+
(pattern_type,)
|
|
531
|
+
)
|
|
532
|
+
else:
|
|
533
|
+
cursor.execute('DELETE FROM workflow_patterns')
|
|
534
|
+
conn.commit()
|
|
535
|
+
except Exception as e:
|
|
536
|
+
conn.rollback()
|
|
537
|
+
logger.error("Failed to clear workflow patterns: %s", e)
|
|
538
|
+
raise
|
|
539
|
+
finally:
|
|
540
|
+
conn.close()
|
|
541
|
+
|
|
542
|
+
# ======================================================================
|
|
543
|
+
# Source Quality Operations
|
|
544
|
+
# ======================================================================
|
|
545
|
+
|
|
546
|
+
def update_source_quality(
|
|
547
|
+
self,
|
|
548
|
+
source_id: str,
|
|
549
|
+
positive_signals: int,
|
|
550
|
+
total_memories: int,
|
|
551
|
+
):
|
|
552
|
+
"""Update quality score for a memory source."""
|
|
553
|
+
# Beta-Binomial smoothing: (alpha + pos) / (alpha + beta + total)
|
|
554
|
+
quality_score = (1.0 + positive_signals) / (2.0 + total_memories)
|
|
555
|
+
|
|
556
|
+
with self._write_lock:
|
|
557
|
+
conn = self._get_connection()
|
|
558
|
+
try:
|
|
559
|
+
cursor = conn.cursor()
|
|
560
|
+
cursor.execute('''
|
|
561
|
+
INSERT INTO source_quality
|
|
562
|
+
(source_id, positive_signals, total_memories,
|
|
563
|
+
quality_score, last_updated)
|
|
564
|
+
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP)
|
|
565
|
+
ON CONFLICT(source_id) DO UPDATE SET
|
|
566
|
+
positive_signals = ?,
|
|
567
|
+
total_memories = ?,
|
|
568
|
+
quality_score = ?,
|
|
569
|
+
last_updated = CURRENT_TIMESTAMP
|
|
570
|
+
''', (
|
|
571
|
+
source_id, positive_signals, total_memories, quality_score,
|
|
572
|
+
positive_signals, total_memories, quality_score,
|
|
573
|
+
))
|
|
574
|
+
conn.commit()
|
|
575
|
+
except Exception as e:
|
|
576
|
+
conn.rollback()
|
|
577
|
+
logger.error("Failed to update source quality: %s", e)
|
|
578
|
+
raise
|
|
579
|
+
finally:
|
|
580
|
+
conn.close()
|
|
581
|
+
|
|
582
|
+
def get_source_scores(self) -> Dict[str, float]:
|
|
583
|
+
"""Get quality scores for all known sources."""
|
|
584
|
+
conn = self._get_connection()
|
|
585
|
+
try:
|
|
586
|
+
cursor = conn.cursor()
|
|
587
|
+
cursor.execute('SELECT source_id, quality_score FROM source_quality')
|
|
588
|
+
return {row['source_id']: row['quality_score'] for row in cursor.fetchall()}
|
|
589
|
+
finally:
|
|
590
|
+
conn.close()
|
|
591
|
+
|
|
592
|
+
# ======================================================================
|
|
593
|
+
# Model Metadata Operations
|
|
594
|
+
# ======================================================================
|
|
595
|
+
|
|
596
|
+
def record_model_training(
|
|
597
|
+
self,
|
|
598
|
+
model_version: str,
|
|
599
|
+
training_samples: int,
|
|
600
|
+
synthetic_samples: int = 0,
|
|
601
|
+
real_samples: int = 0,
|
|
602
|
+
ndcg_at_10: Optional[float] = None,
|
|
603
|
+
model_path: Optional[str] = None,
|
|
604
|
+
) -> int:
|
|
605
|
+
"""Record metadata about a trained ranking model."""
|
|
606
|
+
with self._write_lock:
|
|
607
|
+
conn = self._get_connection()
|
|
608
|
+
try:
|
|
609
|
+
cursor = conn.cursor()
|
|
610
|
+
cursor.execute('''
|
|
611
|
+
INSERT INTO ranking_models
|
|
612
|
+
(model_version, training_samples, synthetic_samples,
|
|
613
|
+
real_samples, ndcg_at_10, model_path)
|
|
614
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
615
|
+
''', (
|
|
616
|
+
model_version, training_samples, synthetic_samples,
|
|
617
|
+
real_samples, ndcg_at_10, model_path,
|
|
618
|
+
))
|
|
619
|
+
conn.commit()
|
|
620
|
+
return cursor.lastrowid
|
|
621
|
+
except Exception as e:
|
|
622
|
+
conn.rollback()
|
|
623
|
+
logger.error("Failed to record model training: %s", e)
|
|
624
|
+
raise
|
|
625
|
+
finally:
|
|
626
|
+
conn.close()
|
|
627
|
+
|
|
628
|
+
def get_latest_model(self) -> Optional[Dict[str, Any]]:
|
|
629
|
+
"""Get metadata for the most recently trained model."""
|
|
630
|
+
conn = self._get_connection()
|
|
631
|
+
try:
|
|
632
|
+
cursor = conn.cursor()
|
|
633
|
+
cursor.execute('''
|
|
634
|
+
SELECT * FROM ranking_models
|
|
635
|
+
ORDER BY created_at DESC
|
|
636
|
+
LIMIT 1
|
|
637
|
+
''')
|
|
638
|
+
row = cursor.fetchone()
|
|
639
|
+
return dict(row) if row else None
|
|
640
|
+
finally:
|
|
641
|
+
conn.close()
|
|
642
|
+
|
|
643
|
+
# ======================================================================
|
|
644
|
+
# Engagement Metrics Operations
|
|
645
|
+
# ======================================================================
|
|
646
|
+
|
|
647
|
+
def increment_engagement(
|
|
648
|
+
self,
|
|
649
|
+
metric_type: str,
|
|
650
|
+
count: int = 1,
|
|
651
|
+
source: Optional[str] = None,
|
|
652
|
+
):
|
|
653
|
+
"""
|
|
654
|
+
Increment a daily engagement metric.
|
|
655
|
+
|
|
656
|
+
Args:
|
|
657
|
+
metric_type: One of 'memories_created', 'recalls_performed',
|
|
658
|
+
'feedback_signals', 'patterns_updated'
|
|
659
|
+
count: Increment amount (default 1)
|
|
660
|
+
source: Source tool identifier to track in active_sources
|
|
661
|
+
"""
|
|
662
|
+
today = date.today().isoformat()
|
|
663
|
+
valid_metrics = {
|
|
664
|
+
'memories_created', 'recalls_performed',
|
|
665
|
+
'feedback_signals', 'patterns_updated',
|
|
666
|
+
}
|
|
667
|
+
if metric_type not in valid_metrics:
|
|
668
|
+
logger.warning("Invalid metric type: %s", metric_type)
|
|
669
|
+
return
|
|
670
|
+
|
|
671
|
+
with self._write_lock:
|
|
672
|
+
conn = self._get_connection()
|
|
673
|
+
try:
|
|
674
|
+
cursor = conn.cursor()
|
|
675
|
+
|
|
676
|
+
# Ensure today's row exists
|
|
677
|
+
cursor.execute('''
|
|
678
|
+
INSERT OR IGNORE INTO engagement_metrics (metric_date)
|
|
679
|
+
VALUES (?)
|
|
680
|
+
''', (today,))
|
|
681
|
+
|
|
682
|
+
# Increment the specific metric
|
|
683
|
+
cursor.execute(f'''
|
|
684
|
+
UPDATE engagement_metrics
|
|
685
|
+
SET {metric_type} = {metric_type} + ?
|
|
686
|
+
WHERE metric_date = ?
|
|
687
|
+
''', (count, today))
|
|
688
|
+
|
|
689
|
+
# Update active sources if provided
|
|
690
|
+
if source:
|
|
691
|
+
cursor.execute('''
|
|
692
|
+
SELECT active_sources FROM engagement_metrics
|
|
693
|
+
WHERE metric_date = ?
|
|
694
|
+
''', (today,))
|
|
695
|
+
row = cursor.fetchone()
|
|
696
|
+
if row:
|
|
697
|
+
sources = json.loads(row['active_sources'] or '[]')
|
|
698
|
+
if source not in sources:
|
|
699
|
+
sources.append(source)
|
|
700
|
+
cursor.execute('''
|
|
701
|
+
UPDATE engagement_metrics
|
|
702
|
+
SET active_sources = ?
|
|
703
|
+
WHERE metric_date = ?
|
|
704
|
+
''', (json.dumps(sources), today))
|
|
705
|
+
|
|
706
|
+
conn.commit()
|
|
707
|
+
except Exception as e:
|
|
708
|
+
conn.rollback()
|
|
709
|
+
logger.error("Failed to update engagement: %s", e)
|
|
710
|
+
finally:
|
|
711
|
+
conn.close()
|
|
712
|
+
|
|
713
|
+
def get_engagement_history(
|
|
714
|
+
self,
|
|
715
|
+
days: int = 30,
|
|
716
|
+
) -> List[Dict[str, Any]]:
|
|
717
|
+
"""Get engagement metrics for the last N days."""
|
|
718
|
+
conn = self._get_connection()
|
|
719
|
+
try:
|
|
720
|
+
cursor = conn.cursor()
|
|
721
|
+
cursor.execute('''
|
|
722
|
+
SELECT * FROM engagement_metrics
|
|
723
|
+
ORDER BY metric_date DESC
|
|
724
|
+
LIMIT ?
|
|
725
|
+
''', (days,))
|
|
726
|
+
return [dict(row) for row in cursor.fetchall()]
|
|
727
|
+
finally:
|
|
728
|
+
conn.close()
|
|
729
|
+
|
|
730
|
+
# ======================================================================
|
|
731
|
+
# Statistics & Diagnostics
|
|
732
|
+
# ======================================================================
|
|
733
|
+
|
|
734
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
735
|
+
"""Get comprehensive learning database statistics."""
|
|
736
|
+
conn = self._get_connection()
|
|
737
|
+
try:
|
|
738
|
+
cursor = conn.cursor()
|
|
739
|
+
stats = {}
|
|
740
|
+
|
|
741
|
+
# Feedback stats
|
|
742
|
+
cursor.execute('SELECT COUNT(*) FROM ranking_feedback')
|
|
743
|
+
stats['feedback_count'] = cursor.fetchone()[0]
|
|
744
|
+
|
|
745
|
+
cursor.execute(
|
|
746
|
+
'SELECT COUNT(DISTINCT query_hash) FROM ranking_feedback'
|
|
747
|
+
)
|
|
748
|
+
stats['unique_queries'] = cursor.fetchone()[0]
|
|
749
|
+
|
|
750
|
+
# Pattern stats
|
|
751
|
+
cursor.execute('SELECT COUNT(*) FROM transferable_patterns')
|
|
752
|
+
stats['transferable_patterns'] = cursor.fetchone()[0]
|
|
753
|
+
|
|
754
|
+
cursor.execute(
|
|
755
|
+
'SELECT COUNT(*) FROM transferable_patterns '
|
|
756
|
+
'WHERE confidence >= 0.6'
|
|
757
|
+
)
|
|
758
|
+
stats['high_confidence_patterns'] = cursor.fetchone()[0]
|
|
759
|
+
|
|
760
|
+
# Workflow stats
|
|
761
|
+
cursor.execute('SELECT COUNT(*) FROM workflow_patterns')
|
|
762
|
+
stats['workflow_patterns'] = cursor.fetchone()[0]
|
|
763
|
+
|
|
764
|
+
# Source quality stats
|
|
765
|
+
cursor.execute('SELECT COUNT(*) FROM source_quality')
|
|
766
|
+
stats['tracked_sources'] = cursor.fetchone()[0]
|
|
767
|
+
|
|
768
|
+
# Model stats
|
|
769
|
+
cursor.execute(
|
|
770
|
+
'SELECT COUNT(*) FROM ranking_models'
|
|
771
|
+
)
|
|
772
|
+
stats['models_trained'] = cursor.fetchone()[0]
|
|
773
|
+
|
|
774
|
+
latest_model = self.get_latest_model()
|
|
775
|
+
if latest_model:
|
|
776
|
+
stats['latest_model_version'] = latest_model['model_version']
|
|
777
|
+
stats['latest_model_ndcg'] = latest_model['ndcg_at_10']
|
|
778
|
+
else:
|
|
779
|
+
stats['latest_model_version'] = None
|
|
780
|
+
stats['latest_model_ndcg'] = None
|
|
781
|
+
|
|
782
|
+
# DB file size
|
|
783
|
+
if self.db_path.exists():
|
|
784
|
+
stats['db_size_bytes'] = self.db_path.stat().st_size
|
|
785
|
+
stats['db_size_kb'] = round(stats['db_size_bytes'] / 1024, 1)
|
|
786
|
+
else:
|
|
787
|
+
stats['db_size_bytes'] = 0
|
|
788
|
+
stats['db_size_kb'] = 0
|
|
789
|
+
|
|
790
|
+
return stats
|
|
791
|
+
finally:
|
|
792
|
+
conn.close()
|
|
793
|
+
|
|
794
|
+
# ======================================================================
|
|
795
|
+
# Reset / Cleanup
|
|
796
|
+
# ======================================================================
|
|
797
|
+
|
|
798
|
+
def reset(self):
|
|
799
|
+
"""
|
|
800
|
+
Delete all learning data. Memories in memory.db are preserved.
|
|
801
|
+
|
|
802
|
+
This is the GDPR Article 17 "Right to Erasure" handler for
|
|
803
|
+
behavioral data.
|
|
804
|
+
"""
|
|
805
|
+
with self._write_lock:
|
|
806
|
+
conn = self._get_connection()
|
|
807
|
+
try:
|
|
808
|
+
cursor = conn.cursor()
|
|
809
|
+
cursor.execute('DELETE FROM ranking_feedback')
|
|
810
|
+
cursor.execute('DELETE FROM transferable_patterns')
|
|
811
|
+
cursor.execute('DELETE FROM workflow_patterns')
|
|
812
|
+
cursor.execute('DELETE FROM ranking_models')
|
|
813
|
+
cursor.execute('DELETE FROM source_quality')
|
|
814
|
+
cursor.execute('DELETE FROM engagement_metrics')
|
|
815
|
+
conn.commit()
|
|
816
|
+
logger.info(
|
|
817
|
+
"Learning data reset. Memories in memory.db preserved."
|
|
818
|
+
)
|
|
819
|
+
except Exception as e:
|
|
820
|
+
conn.rollback()
|
|
821
|
+
logger.error("Failed to reset learning data: %s", e)
|
|
822
|
+
raise
|
|
823
|
+
finally:
|
|
824
|
+
conn.close()
|
|
825
|
+
|
|
826
|
+
def delete_database(self):
|
|
827
|
+
"""
|
|
828
|
+
Completely delete learning.db file.
|
|
829
|
+
More aggressive than reset() — removes the file entirely.
|
|
830
|
+
"""
|
|
831
|
+
with self._write_lock:
|
|
832
|
+
LearningDB.reset_instance(self.db_path)
|
|
833
|
+
if self.db_path.exists():
|
|
834
|
+
self.db_path.unlink()
|
|
835
|
+
logger.info("Learning database deleted: %s", self.db_path)
|
|
836
|
+
# Also clean WAL/SHM files
|
|
837
|
+
wal = self.db_path.with_suffix('.db-wal')
|
|
838
|
+
shm = self.db_path.with_suffix('.db-shm')
|
|
839
|
+
if wal.exists():
|
|
840
|
+
wal.unlink()
|
|
841
|
+
if shm.exists():
|
|
842
|
+
shm.unlink()
|