superlocalmemory 2.6.0 → 2.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,842 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ SuperLocalMemory V2 - Learning Database Manager (v2.7)
4
+ Copyright (c) 2026 Varun Pratap Bhardwaj
5
+ Licensed under MIT License
6
+
7
+ Repository: https://github.com/varun369/SuperLocalMemoryV2
8
+ Author: Varun Pratap Bhardwaj (Solution Architect)
9
+
10
+ NOTICE: This software is protected by MIT License.
11
+ Attribution must be preserved in all copies or derivatives.
12
+ """
13
+
14
+ """
15
+ LearningDB — Manages the separate learning.db for behavioral data.
16
+
17
+ CRITICAL DESIGN DECISIONS:
18
+ 1. learning.db is SEPARATE from memory.db (GDPR erasable, security isolation)
19
+ 2. All tables use CREATE TABLE IF NOT EXISTS (safe for re-runs)
20
+ 3. WAL mode for concurrent read/write from multiple agents
21
+ 4. Singleton pattern matches existing DbConnectionManager approach
22
+ 5. Thread-safe via threading.Lock on write operations
23
+
24
+ Tables (6):
25
+ transferable_patterns — Layer 1: Cross-project tech preferences
26
+ workflow_patterns — Layer 3: Sequence + temporal patterns
27
+ ranking_feedback — Feedback from all channels (MCP, CLI, dashboard)
28
+ ranking_models — Model metadata and training history
29
+ source_quality — Per-source learning (which tools produce better memories)
30
+ engagement_metrics — Local-only engagement stats (never transmitted)
31
+ """
32
+
33
+ import json
34
+ import logging
35
+ import sqlite3
36
+ import threading
37
+ from datetime import datetime, date
38
+ from pathlib import Path
39
+ from typing import Optional, Dict, List, Any
40
+
41
+ logger = logging.getLogger("superlocalmemory.learning.db")
42
+
43
+ MEMORY_DIR = Path.home() / ".claude-memory"
44
+ LEARNING_DB_PATH = MEMORY_DIR / "learning.db"
45
+
46
+
47
+ class LearningDB:
48
+ """
49
+ Manages the learning.db database for behavioral data.
50
+
51
+ Singleton per database path. Thread-safe writes.
52
+ Separate from memory.db for GDPR compliance and security isolation.
53
+
54
+ Usage:
55
+ db = LearningDB()
56
+ db.store_feedback(query_hash="abc123", memory_id=42, signal_type="mcp_used")
57
+ stats = db.get_stats()
58
+ """
59
+
60
+ _instances: Dict[str, "LearningDB"] = {}
61
+ _instances_lock = threading.Lock()
62
+
63
+ @classmethod
64
+ def get_instance(cls, db_path: Optional[Path] = None) -> "LearningDB":
65
+ """Get or create the singleton LearningDB."""
66
+ if db_path is None:
67
+ db_path = LEARNING_DB_PATH
68
+ key = str(db_path)
69
+ with cls._instances_lock:
70
+ if key not in cls._instances:
71
+ cls._instances[key] = cls(db_path)
72
+ return cls._instances[key]
73
+
74
+ @classmethod
75
+ def reset_instance(cls, db_path: Optional[Path] = None):
76
+ """Remove singleton. Used for testing."""
77
+ with cls._instances_lock:
78
+ if db_path is None:
79
+ cls._instances.clear()
80
+ else:
81
+ key = str(db_path)
82
+ if key in cls._instances:
83
+ del cls._instances[key]
84
+
85
+ def __init__(self, db_path: Optional[Path] = None):
86
+ self.db_path = Path(db_path) if db_path else LEARNING_DB_PATH
87
+ self._write_lock = threading.Lock()
88
+ self._ensure_directory()
89
+ self._init_schema()
90
+ logger.info("LearningDB initialized: %s", self.db_path)
91
+
92
+ def _ensure_directory(self):
93
+ """Ensure the parent directory exists."""
94
+ self.db_path.parent.mkdir(parents=True, exist_ok=True)
95
+
96
+ def _get_connection(self) -> sqlite3.Connection:
97
+ """Get a new database connection with standard pragmas."""
98
+ conn = sqlite3.connect(str(self.db_path), timeout=10)
99
+ conn.row_factory = sqlite3.Row
100
+ conn.execute("PRAGMA journal_mode=WAL")
101
+ conn.execute("PRAGMA busy_timeout=5000")
102
+ conn.execute("PRAGMA foreign_keys=ON")
103
+ return conn
104
+
105
+ def _init_schema(self):
106
+ """Create all learning tables if they don't exist."""
107
+ conn = self._get_connection()
108
+ cursor = conn.cursor()
109
+
110
+ try:
111
+ # ------------------------------------------------------------------
112
+ # Layer 1: Cross-project transferable patterns
113
+ # ------------------------------------------------------------------
114
+ cursor.execute('''
115
+ CREATE TABLE IF NOT EXISTS transferable_patterns (
116
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
117
+ pattern_type TEXT NOT NULL,
118
+ key TEXT NOT NULL,
119
+ value TEXT NOT NULL,
120
+ confidence REAL DEFAULT 0.0,
121
+ evidence_count INTEGER DEFAULT 0,
122
+ profiles_seen INTEGER DEFAULT 0,
123
+ first_seen TIMESTAMP,
124
+ last_seen TIMESTAMP,
125
+ decay_factor REAL DEFAULT 1.0,
126
+ contradictions TEXT DEFAULT '[]',
127
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
128
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
129
+ UNIQUE(pattern_type, key)
130
+ )
131
+ ''')
132
+
133
+ # ------------------------------------------------------------------
134
+ # Layer 3: Workflow patterns (sequences + temporal + style)
135
+ # ------------------------------------------------------------------
136
+ cursor.execute('''
137
+ CREATE TABLE IF NOT EXISTS workflow_patterns (
138
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
139
+ pattern_type TEXT NOT NULL,
140
+ pattern_key TEXT NOT NULL,
141
+ pattern_value TEXT NOT NULL,
142
+ confidence REAL DEFAULT 0.0,
143
+ evidence_count INTEGER DEFAULT 0,
144
+ last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
145
+ metadata TEXT DEFAULT '{}'
146
+ )
147
+ ''')
148
+
149
+ # ------------------------------------------------------------------
150
+ # Feedback from all channels
151
+ # ------------------------------------------------------------------
152
+ cursor.execute('''
153
+ CREATE TABLE IF NOT EXISTS ranking_feedback (
154
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
155
+ query_hash TEXT NOT NULL,
156
+ query_keywords TEXT,
157
+ memory_id INTEGER NOT NULL,
158
+ rank_position INTEGER,
159
+ signal_type TEXT NOT NULL,
160
+ signal_value REAL DEFAULT 1.0,
161
+ channel TEXT NOT NULL,
162
+ source_tool TEXT,
163
+ dwell_time REAL,
164
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
165
+ )
166
+ ''')
167
+
168
+ # ------------------------------------------------------------------
169
+ # Model metadata
170
+ # ------------------------------------------------------------------
171
+ cursor.execute('''
172
+ CREATE TABLE IF NOT EXISTS ranking_models (
173
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
174
+ model_version TEXT NOT NULL,
175
+ training_samples INTEGER,
176
+ synthetic_samples INTEGER DEFAULT 0,
177
+ real_samples INTEGER DEFAULT 0,
178
+ ndcg_at_10 REAL,
179
+ model_path TEXT,
180
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
181
+ )
182
+ ''')
183
+
184
+ # ------------------------------------------------------------------
185
+ # Source quality scores (per-source learning)
186
+ # ------------------------------------------------------------------
187
+ cursor.execute('''
188
+ CREATE TABLE IF NOT EXISTS source_quality (
189
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
190
+ source_id TEXT NOT NULL UNIQUE,
191
+ positive_signals INTEGER DEFAULT 0,
192
+ total_memories INTEGER DEFAULT 0,
193
+ quality_score REAL DEFAULT 0.5,
194
+ last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
195
+ )
196
+ ''')
197
+
198
+ # ------------------------------------------------------------------
199
+ # Engagement metrics (local only, never transmitted)
200
+ # ------------------------------------------------------------------
201
+ cursor.execute('''
202
+ CREATE TABLE IF NOT EXISTS engagement_metrics (
203
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
204
+ metric_date DATE NOT NULL UNIQUE,
205
+ memories_created INTEGER DEFAULT 0,
206
+ recalls_performed INTEGER DEFAULT 0,
207
+ feedback_signals INTEGER DEFAULT 0,
208
+ patterns_updated INTEGER DEFAULT 0,
209
+ active_sources TEXT DEFAULT '[]',
210
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
211
+ )
212
+ ''')
213
+
214
+ # ------------------------------------------------------------------
215
+ # Indexes for performance
216
+ # ------------------------------------------------------------------
217
+ cursor.execute(
218
+ 'CREATE INDEX IF NOT EXISTS idx_feedback_query '
219
+ 'ON ranking_feedback(query_hash)'
220
+ )
221
+ cursor.execute(
222
+ 'CREATE INDEX IF NOT EXISTS idx_feedback_memory '
223
+ 'ON ranking_feedback(memory_id)'
224
+ )
225
+ cursor.execute(
226
+ 'CREATE INDEX IF NOT EXISTS idx_feedback_channel '
227
+ 'ON ranking_feedback(channel)'
228
+ )
229
+ cursor.execute(
230
+ 'CREATE INDEX IF NOT EXISTS idx_feedback_created '
231
+ 'ON ranking_feedback(created_at)'
232
+ )
233
+ cursor.execute(
234
+ 'CREATE INDEX IF NOT EXISTS idx_patterns_type '
235
+ 'ON transferable_patterns(pattern_type)'
236
+ )
237
+ cursor.execute(
238
+ 'CREATE INDEX IF NOT EXISTS idx_workflow_type '
239
+ 'ON workflow_patterns(pattern_type)'
240
+ )
241
+ cursor.execute(
242
+ 'CREATE INDEX IF NOT EXISTS idx_engagement_date '
243
+ 'ON engagement_metrics(metric_date)'
244
+ )
245
+
246
+ conn.commit()
247
+ logger.info("Learning schema initialized successfully")
248
+
249
+ except Exception as e:
250
+ logger.error("Failed to initialize learning schema: %s", e)
251
+ conn.rollback()
252
+ raise
253
+ finally:
254
+ conn.close()
255
+
256
+ # ======================================================================
257
+ # Feedback Operations
258
+ # ======================================================================
259
+
260
+ def store_feedback(
261
+ self,
262
+ query_hash: str,
263
+ memory_id: int,
264
+ signal_type: str,
265
+ signal_value: float = 1.0,
266
+ channel: str = "mcp",
267
+ query_keywords: Optional[str] = None,
268
+ rank_position: Optional[int] = None,
269
+ source_tool: Optional[str] = None,
270
+ dwell_time: Optional[float] = None,
271
+ ) -> int:
272
+ """
273
+ Store a ranking feedback signal.
274
+
275
+ Args:
276
+ query_hash: SHA256[:16] of the query (privacy-preserving)
277
+ memory_id: ID of the memory in memory.db
278
+ signal_type: One of 'mcp_used', 'cli_useful', 'dashboard_click', 'passive_decay'
279
+ signal_value: 1.0=strong positive, 0.5=weak, 0.0=negative
280
+ channel: 'mcp', 'cli', or 'dashboard'
281
+ query_keywords: Top keywords for grouping (optional)
282
+ rank_position: Where it appeared in results (1-50)
283
+ source_tool: Tool that originated the query (e.g., 'claude-desktop')
284
+ dwell_time: Seconds spent viewing (dashboard only)
285
+
286
+ Returns:
287
+ Row ID of the inserted feedback record.
288
+ """
289
+ with self._write_lock:
290
+ conn = self._get_connection()
291
+ try:
292
+ cursor = conn.cursor()
293
+ cursor.execute('''
294
+ INSERT INTO ranking_feedback
295
+ (query_hash, memory_id, signal_type, signal_value,
296
+ channel, query_keywords, rank_position, source_tool,
297
+ dwell_time)
298
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
299
+ ''', (
300
+ query_hash, memory_id, signal_type, signal_value,
301
+ channel, query_keywords, rank_position, source_tool,
302
+ dwell_time,
303
+ ))
304
+ conn.commit()
305
+ row_id = cursor.lastrowid
306
+ logger.debug(
307
+ "Feedback stored: memory=%d, type=%s, value=%.1f",
308
+ memory_id, signal_type, signal_value
309
+ )
310
+ return row_id
311
+ except Exception as e:
312
+ conn.rollback()
313
+ logger.error("Failed to store feedback: %s", e)
314
+ raise
315
+ finally:
316
+ conn.close()
317
+
318
+ def get_feedback_count(self) -> int:
319
+ """Get total number of feedback signals."""
320
+ conn = self._get_connection()
321
+ try:
322
+ cursor = conn.cursor()
323
+ cursor.execute('SELECT COUNT(*) FROM ranking_feedback')
324
+ return cursor.fetchone()[0]
325
+ finally:
326
+ conn.close()
327
+
328
+ def get_unique_query_count(self) -> int:
329
+ """Get number of unique queries with feedback."""
330
+ conn = self._get_connection()
331
+ try:
332
+ cursor = conn.cursor()
333
+ cursor.execute(
334
+ 'SELECT COUNT(DISTINCT query_hash) FROM ranking_feedback'
335
+ )
336
+ return cursor.fetchone()[0]
337
+ finally:
338
+ conn.close()
339
+
340
+ def get_feedback_for_training(
341
+ self,
342
+ limit: int = 10000,
343
+ ) -> List[Dict[str, Any]]:
344
+ """
345
+ Get feedback records suitable for model training.
346
+
347
+ Returns list of dicts with query_hash, memory_id, signal_value, etc.
348
+ Ordered by created_at DESC (newest first).
349
+ """
350
+ conn = self._get_connection()
351
+ try:
352
+ cursor = conn.cursor()
353
+ cursor.execute('''
354
+ SELECT query_hash, query_keywords, memory_id, rank_position,
355
+ signal_type, signal_value, channel, source_tool,
356
+ created_at
357
+ FROM ranking_feedback
358
+ ORDER BY created_at DESC
359
+ LIMIT ?
360
+ ''', (limit,))
361
+ return [dict(row) for row in cursor.fetchall()]
362
+ finally:
363
+ conn.close()
364
+
365
+ # ======================================================================
366
+ # Transferable Pattern Operations
367
+ # ======================================================================
368
+
369
+ def upsert_transferable_pattern(
370
+ self,
371
+ pattern_type: str,
372
+ key: str,
373
+ value: str,
374
+ confidence: float,
375
+ evidence_count: int,
376
+ profiles_seen: int = 1,
377
+ decay_factor: float = 1.0,
378
+ contradictions: Optional[List[str]] = None,
379
+ ) -> int:
380
+ """Insert or update a transferable pattern."""
381
+ now = datetime.now().isoformat()
382
+ contradictions_json = json.dumps(contradictions or [])
383
+
384
+ with self._write_lock:
385
+ conn = self._get_connection()
386
+ try:
387
+ cursor = conn.cursor()
388
+
389
+ # Check if pattern exists
390
+ cursor.execute(
391
+ 'SELECT id, first_seen FROM transferable_patterns '
392
+ 'WHERE pattern_type = ? AND key = ?',
393
+ (pattern_type, key)
394
+ )
395
+ existing = cursor.fetchone()
396
+
397
+ if existing:
398
+ cursor.execute('''
399
+ UPDATE transferable_patterns
400
+ SET value = ?, confidence = ?, evidence_count = ?,
401
+ profiles_seen = ?, last_seen = ?, decay_factor = ?,
402
+ contradictions = ?, updated_at = ?
403
+ WHERE id = ?
404
+ ''', (
405
+ value, confidence, evidence_count,
406
+ profiles_seen, now, decay_factor,
407
+ contradictions_json, now, existing['id']
408
+ ))
409
+ row_id = existing['id']
410
+ else:
411
+ cursor.execute('''
412
+ INSERT INTO transferable_patterns
413
+ (pattern_type, key, value, confidence, evidence_count,
414
+ profiles_seen, first_seen, last_seen, decay_factor,
415
+ contradictions, created_at, updated_at)
416
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
417
+ ''', (
418
+ pattern_type, key, value, confidence, evidence_count,
419
+ profiles_seen, now, now, decay_factor,
420
+ contradictions_json, now, now
421
+ ))
422
+ row_id = cursor.lastrowid
423
+
424
+ conn.commit()
425
+ return row_id
426
+ except Exception as e:
427
+ conn.rollback()
428
+ logger.error("Failed to upsert pattern: %s", e)
429
+ raise
430
+ finally:
431
+ conn.close()
432
+
433
+ def get_transferable_patterns(
434
+ self,
435
+ min_confidence: float = 0.0,
436
+ pattern_type: Optional[str] = None,
437
+ ) -> List[Dict[str, Any]]:
438
+ """Get transferable patterns filtered by confidence and type."""
439
+ conn = self._get_connection()
440
+ try:
441
+ cursor = conn.cursor()
442
+ if pattern_type:
443
+ cursor.execute('''
444
+ SELECT * FROM transferable_patterns
445
+ WHERE confidence >= ? AND pattern_type = ?
446
+ ORDER BY confidence DESC
447
+ ''', (min_confidence, pattern_type))
448
+ else:
449
+ cursor.execute('''
450
+ SELECT * FROM transferable_patterns
451
+ WHERE confidence >= ?
452
+ ORDER BY confidence DESC
453
+ ''', (min_confidence,))
454
+ return [dict(row) for row in cursor.fetchall()]
455
+ finally:
456
+ conn.close()
457
+
458
+ # ======================================================================
459
+ # Workflow Pattern Operations
460
+ # ======================================================================
461
+
462
+ def store_workflow_pattern(
463
+ self,
464
+ pattern_type: str,
465
+ pattern_key: str,
466
+ pattern_value: str,
467
+ confidence: float = 0.0,
468
+ evidence_count: int = 0,
469
+ metadata: Optional[Dict] = None,
470
+ ) -> int:
471
+ """Store a workflow pattern (sequence, temporal, or style)."""
472
+ metadata_json = json.dumps(metadata or {})
473
+
474
+ with self._write_lock:
475
+ conn = self._get_connection()
476
+ try:
477
+ cursor = conn.cursor()
478
+ cursor.execute('''
479
+ INSERT INTO workflow_patterns
480
+ (pattern_type, pattern_key, pattern_value,
481
+ confidence, evidence_count, metadata)
482
+ VALUES (?, ?, ?, ?, ?, ?)
483
+ ''', (
484
+ pattern_type, pattern_key, pattern_value,
485
+ confidence, evidence_count, metadata_json
486
+ ))
487
+ conn.commit()
488
+ return cursor.lastrowid
489
+ except Exception as e:
490
+ conn.rollback()
491
+ logger.error("Failed to store workflow pattern: %s", e)
492
+ raise
493
+ finally:
494
+ conn.close()
495
+
496
+ def get_workflow_patterns(
497
+ self,
498
+ pattern_type: Optional[str] = None,
499
+ min_confidence: float = 0.0,
500
+ ) -> List[Dict[str, Any]]:
501
+ """Get workflow patterns filtered by type and confidence."""
502
+ conn = self._get_connection()
503
+ try:
504
+ cursor = conn.cursor()
505
+ if pattern_type:
506
+ cursor.execute('''
507
+ SELECT * FROM workflow_patterns
508
+ WHERE pattern_type = ? AND confidence >= ?
509
+ ORDER BY confidence DESC
510
+ ''', (pattern_type, min_confidence))
511
+ else:
512
+ cursor.execute('''
513
+ SELECT * FROM workflow_patterns
514
+ WHERE confidence >= ?
515
+ ORDER BY confidence DESC
516
+ ''', (min_confidence,))
517
+ return [dict(row) for row in cursor.fetchall()]
518
+ finally:
519
+ conn.close()
520
+
521
+ def clear_workflow_patterns(self, pattern_type: Optional[str] = None):
522
+ """Clear workflow patterns (used before re-mining)."""
523
+ with self._write_lock:
524
+ conn = self._get_connection()
525
+ try:
526
+ cursor = conn.cursor()
527
+ if pattern_type:
528
+ cursor.execute(
529
+ 'DELETE FROM workflow_patterns WHERE pattern_type = ?',
530
+ (pattern_type,)
531
+ )
532
+ else:
533
+ cursor.execute('DELETE FROM workflow_patterns')
534
+ conn.commit()
535
+ except Exception as e:
536
+ conn.rollback()
537
+ logger.error("Failed to clear workflow patterns: %s", e)
538
+ raise
539
+ finally:
540
+ conn.close()
541
+
542
+ # ======================================================================
543
+ # Source Quality Operations
544
+ # ======================================================================
545
+
546
+ def update_source_quality(
547
+ self,
548
+ source_id: str,
549
+ positive_signals: int,
550
+ total_memories: int,
551
+ ):
552
+ """Update quality score for a memory source."""
553
+ # Beta-Binomial smoothing: (alpha + pos) / (alpha + beta + total)
554
+ quality_score = (1.0 + positive_signals) / (2.0 + total_memories)
555
+
556
+ with self._write_lock:
557
+ conn = self._get_connection()
558
+ try:
559
+ cursor = conn.cursor()
560
+ cursor.execute('''
561
+ INSERT INTO source_quality
562
+ (source_id, positive_signals, total_memories,
563
+ quality_score, last_updated)
564
+ VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP)
565
+ ON CONFLICT(source_id) DO UPDATE SET
566
+ positive_signals = ?,
567
+ total_memories = ?,
568
+ quality_score = ?,
569
+ last_updated = CURRENT_TIMESTAMP
570
+ ''', (
571
+ source_id, positive_signals, total_memories, quality_score,
572
+ positive_signals, total_memories, quality_score,
573
+ ))
574
+ conn.commit()
575
+ except Exception as e:
576
+ conn.rollback()
577
+ logger.error("Failed to update source quality: %s", e)
578
+ raise
579
+ finally:
580
+ conn.close()
581
+
582
+ def get_source_scores(self) -> Dict[str, float]:
583
+ """Get quality scores for all known sources."""
584
+ conn = self._get_connection()
585
+ try:
586
+ cursor = conn.cursor()
587
+ cursor.execute('SELECT source_id, quality_score FROM source_quality')
588
+ return {row['source_id']: row['quality_score'] for row in cursor.fetchall()}
589
+ finally:
590
+ conn.close()
591
+
592
+ # ======================================================================
593
+ # Model Metadata Operations
594
+ # ======================================================================
595
+
596
+ def record_model_training(
597
+ self,
598
+ model_version: str,
599
+ training_samples: int,
600
+ synthetic_samples: int = 0,
601
+ real_samples: int = 0,
602
+ ndcg_at_10: Optional[float] = None,
603
+ model_path: Optional[str] = None,
604
+ ) -> int:
605
+ """Record metadata about a trained ranking model."""
606
+ with self._write_lock:
607
+ conn = self._get_connection()
608
+ try:
609
+ cursor = conn.cursor()
610
+ cursor.execute('''
611
+ INSERT INTO ranking_models
612
+ (model_version, training_samples, synthetic_samples,
613
+ real_samples, ndcg_at_10, model_path)
614
+ VALUES (?, ?, ?, ?, ?, ?)
615
+ ''', (
616
+ model_version, training_samples, synthetic_samples,
617
+ real_samples, ndcg_at_10, model_path,
618
+ ))
619
+ conn.commit()
620
+ return cursor.lastrowid
621
+ except Exception as e:
622
+ conn.rollback()
623
+ logger.error("Failed to record model training: %s", e)
624
+ raise
625
+ finally:
626
+ conn.close()
627
+
628
+ def get_latest_model(self) -> Optional[Dict[str, Any]]:
629
+ """Get metadata for the most recently trained model."""
630
+ conn = self._get_connection()
631
+ try:
632
+ cursor = conn.cursor()
633
+ cursor.execute('''
634
+ SELECT * FROM ranking_models
635
+ ORDER BY created_at DESC
636
+ LIMIT 1
637
+ ''')
638
+ row = cursor.fetchone()
639
+ return dict(row) if row else None
640
+ finally:
641
+ conn.close()
642
+
643
+ # ======================================================================
644
+ # Engagement Metrics Operations
645
+ # ======================================================================
646
+
647
+ def increment_engagement(
648
+ self,
649
+ metric_type: str,
650
+ count: int = 1,
651
+ source: Optional[str] = None,
652
+ ):
653
+ """
654
+ Increment a daily engagement metric.
655
+
656
+ Args:
657
+ metric_type: One of 'memories_created', 'recalls_performed',
658
+ 'feedback_signals', 'patterns_updated'
659
+ count: Increment amount (default 1)
660
+ source: Source tool identifier to track in active_sources
661
+ """
662
+ today = date.today().isoformat()
663
+ valid_metrics = {
664
+ 'memories_created', 'recalls_performed',
665
+ 'feedback_signals', 'patterns_updated',
666
+ }
667
+ if metric_type not in valid_metrics:
668
+ logger.warning("Invalid metric type: %s", metric_type)
669
+ return
670
+
671
+ with self._write_lock:
672
+ conn = self._get_connection()
673
+ try:
674
+ cursor = conn.cursor()
675
+
676
+ # Ensure today's row exists
677
+ cursor.execute('''
678
+ INSERT OR IGNORE INTO engagement_metrics (metric_date)
679
+ VALUES (?)
680
+ ''', (today,))
681
+
682
+ # Increment the specific metric
683
+ cursor.execute(f'''
684
+ UPDATE engagement_metrics
685
+ SET {metric_type} = {metric_type} + ?
686
+ WHERE metric_date = ?
687
+ ''', (count, today))
688
+
689
+ # Update active sources if provided
690
+ if source:
691
+ cursor.execute('''
692
+ SELECT active_sources FROM engagement_metrics
693
+ WHERE metric_date = ?
694
+ ''', (today,))
695
+ row = cursor.fetchone()
696
+ if row:
697
+ sources = json.loads(row['active_sources'] or '[]')
698
+ if source not in sources:
699
+ sources.append(source)
700
+ cursor.execute('''
701
+ UPDATE engagement_metrics
702
+ SET active_sources = ?
703
+ WHERE metric_date = ?
704
+ ''', (json.dumps(sources), today))
705
+
706
+ conn.commit()
707
+ except Exception as e:
708
+ conn.rollback()
709
+ logger.error("Failed to update engagement: %s", e)
710
+ finally:
711
+ conn.close()
712
+
713
+ def get_engagement_history(
714
+ self,
715
+ days: int = 30,
716
+ ) -> List[Dict[str, Any]]:
717
+ """Get engagement metrics for the last N days."""
718
+ conn = self._get_connection()
719
+ try:
720
+ cursor = conn.cursor()
721
+ cursor.execute('''
722
+ SELECT * FROM engagement_metrics
723
+ ORDER BY metric_date DESC
724
+ LIMIT ?
725
+ ''', (days,))
726
+ return [dict(row) for row in cursor.fetchall()]
727
+ finally:
728
+ conn.close()
729
+
730
+ # ======================================================================
731
+ # Statistics & Diagnostics
732
+ # ======================================================================
733
+
734
+ def get_stats(self) -> Dict[str, Any]:
735
+ """Get comprehensive learning database statistics."""
736
+ conn = self._get_connection()
737
+ try:
738
+ cursor = conn.cursor()
739
+ stats = {}
740
+
741
+ # Feedback stats
742
+ cursor.execute('SELECT COUNT(*) FROM ranking_feedback')
743
+ stats['feedback_count'] = cursor.fetchone()[0]
744
+
745
+ cursor.execute(
746
+ 'SELECT COUNT(DISTINCT query_hash) FROM ranking_feedback'
747
+ )
748
+ stats['unique_queries'] = cursor.fetchone()[0]
749
+
750
+ # Pattern stats
751
+ cursor.execute('SELECT COUNT(*) FROM transferable_patterns')
752
+ stats['transferable_patterns'] = cursor.fetchone()[0]
753
+
754
+ cursor.execute(
755
+ 'SELECT COUNT(*) FROM transferable_patterns '
756
+ 'WHERE confidence >= 0.6'
757
+ )
758
+ stats['high_confidence_patterns'] = cursor.fetchone()[0]
759
+
760
+ # Workflow stats
761
+ cursor.execute('SELECT COUNT(*) FROM workflow_patterns')
762
+ stats['workflow_patterns'] = cursor.fetchone()[0]
763
+
764
+ # Source quality stats
765
+ cursor.execute('SELECT COUNT(*) FROM source_quality')
766
+ stats['tracked_sources'] = cursor.fetchone()[0]
767
+
768
+ # Model stats
769
+ cursor.execute(
770
+ 'SELECT COUNT(*) FROM ranking_models'
771
+ )
772
+ stats['models_trained'] = cursor.fetchone()[0]
773
+
774
+ latest_model = self.get_latest_model()
775
+ if latest_model:
776
+ stats['latest_model_version'] = latest_model['model_version']
777
+ stats['latest_model_ndcg'] = latest_model['ndcg_at_10']
778
+ else:
779
+ stats['latest_model_version'] = None
780
+ stats['latest_model_ndcg'] = None
781
+
782
+ # DB file size
783
+ if self.db_path.exists():
784
+ stats['db_size_bytes'] = self.db_path.stat().st_size
785
+ stats['db_size_kb'] = round(stats['db_size_bytes'] / 1024, 1)
786
+ else:
787
+ stats['db_size_bytes'] = 0
788
+ stats['db_size_kb'] = 0
789
+
790
+ return stats
791
+ finally:
792
+ conn.close()
793
+
794
+ # ======================================================================
795
+ # Reset / Cleanup
796
+ # ======================================================================
797
+
798
+ def reset(self):
799
+ """
800
+ Delete all learning data. Memories in memory.db are preserved.
801
+
802
+ This is the GDPR Article 17 "Right to Erasure" handler for
803
+ behavioral data.
804
+ """
805
+ with self._write_lock:
806
+ conn = self._get_connection()
807
+ try:
808
+ cursor = conn.cursor()
809
+ cursor.execute('DELETE FROM ranking_feedback')
810
+ cursor.execute('DELETE FROM transferable_patterns')
811
+ cursor.execute('DELETE FROM workflow_patterns')
812
+ cursor.execute('DELETE FROM ranking_models')
813
+ cursor.execute('DELETE FROM source_quality')
814
+ cursor.execute('DELETE FROM engagement_metrics')
815
+ conn.commit()
816
+ logger.info(
817
+ "Learning data reset. Memories in memory.db preserved."
818
+ )
819
+ except Exception as e:
820
+ conn.rollback()
821
+ logger.error("Failed to reset learning data: %s", e)
822
+ raise
823
+ finally:
824
+ conn.close()
825
+
826
+ def delete_database(self):
827
+ """
828
+ Completely delete learning.db file.
829
+ More aggressive than reset() — removes the file entirely.
830
+ """
831
+ with self._write_lock:
832
+ LearningDB.reset_instance(self.db_path)
833
+ if self.db_path.exists():
834
+ self.db_path.unlink()
835
+ logger.info("Learning database deleted: %s", self.db_path)
836
+ # Also clean WAL/SHM files
837
+ wal = self.db_path.with_suffix('.db-wal')
838
+ shm = self.db_path.with_suffix('.db-shm')
839
+ if wal.exists():
840
+ wal.unlink()
841
+ if shm.exists():
842
+ shm.unlink()