superlocalmemory 2.6.5 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,606 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ SuperLocalMemory V2 - Tests for LearningDB (v2.7)
4
+ Copyright (c) 2026 Varun Pratap Bhardwaj
5
+ Licensed under MIT License
6
+ """
7
+
8
+ import json
9
+ import sqlite3
10
+ import threading
11
+ import time
12
+ from pathlib import Path
13
+
14
+ import pytest
15
+
16
+
17
+ # ---------------------------------------------------------------------------
18
+ # Fixtures
19
+ # ---------------------------------------------------------------------------
20
+
21
+ @pytest.fixture(autouse=True)
22
+ def reset_singleton():
23
+ """Reset LearningDB singleton between tests to avoid cross-test pollution."""
24
+ from src.learning.learning_db import LearningDB
25
+ LearningDB.reset_instance()
26
+ yield
27
+ LearningDB.reset_instance()
28
+
29
+
30
+ @pytest.fixture
31
+ def learning_db(tmp_path):
32
+ """Create a fresh LearningDB backed by a temp directory."""
33
+ from src.learning.learning_db import LearningDB
34
+ db_path = tmp_path / "learning.db"
35
+ db = LearningDB(db_path=db_path)
36
+ return db
37
+
38
+
39
+ # ---------------------------------------------------------------------------
40
+ # Schema Initialisation
41
+ # ---------------------------------------------------------------------------
42
+
43
+ class TestSchema:
44
+ """Verify all 6 tables and indexes are created correctly."""
45
+
46
+ def test_all_tables_exist(self, learning_db):
47
+ conn = learning_db._get_connection()
48
+ cursor = conn.cursor()
49
+ cursor.execute(
50
+ "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"
51
+ )
52
+ tables = {row[0] for row in cursor.fetchall()}
53
+ conn.close()
54
+
55
+ expected = {
56
+ "transferable_patterns",
57
+ "workflow_patterns",
58
+ "ranking_feedback",
59
+ "ranking_models",
60
+ "source_quality",
61
+ "engagement_metrics",
62
+ }
63
+ assert expected.issubset(tables), f"Missing tables: {expected - tables}"
64
+
65
+ def test_indexes_exist(self, learning_db):
66
+ conn = learning_db._get_connection()
67
+ cursor = conn.cursor()
68
+ cursor.execute(
69
+ "SELECT name FROM sqlite_master WHERE type='index' ORDER BY name"
70
+ )
71
+ indexes = {row[0] for row in cursor.fetchall()}
72
+ conn.close()
73
+
74
+ expected_indexes = {
75
+ "idx_feedback_query",
76
+ "idx_feedback_memory",
77
+ "idx_feedback_channel",
78
+ "idx_feedback_created",
79
+ "idx_patterns_type",
80
+ "idx_workflow_type",
81
+ "idx_engagement_date",
82
+ }
83
+ assert expected_indexes.issubset(indexes), (
84
+ f"Missing indexes: {expected_indexes - indexes}"
85
+ )
86
+
87
+ def test_wal_mode_enabled(self, learning_db):
88
+ conn = learning_db._get_connection()
89
+ cursor = conn.cursor()
90
+ cursor.execute("PRAGMA journal_mode")
91
+ mode = cursor.fetchone()[0]
92
+ conn.close()
93
+ assert mode.lower() == "wal"
94
+
95
+
96
+ # ---------------------------------------------------------------------------
97
+ # Feedback Operations
98
+ # ---------------------------------------------------------------------------
99
+
100
+ class TestFeedback:
101
+ """Tests for store_feedback / get_feedback_count / get_unique_query_count."""
102
+
103
+ def test_store_feedback_basic(self, learning_db):
104
+ row_id = learning_db.store_feedback(
105
+ query_hash="abc123",
106
+ memory_id=42,
107
+ signal_type="mcp_used",
108
+ signal_value=1.0,
109
+ channel="mcp",
110
+ )
111
+ assert row_id is not None
112
+ assert row_id >= 1
113
+
114
+ def test_store_feedback_with_all_fields(self, learning_db):
115
+ row_id = learning_db.store_feedback(
116
+ query_hash="def456",
117
+ memory_id=99,
118
+ signal_type="dashboard_click",
119
+ signal_value=0.8,
120
+ channel="dashboard",
121
+ query_keywords="deploy,fastapi",
122
+ rank_position=3,
123
+ source_tool="cursor",
124
+ dwell_time=12.5,
125
+ )
126
+ assert row_id is not None
127
+
128
+ # Verify all fields were stored
129
+ rows = learning_db.get_feedback_for_training()
130
+ assert len(rows) == 1
131
+ row = rows[0]
132
+ assert row["query_hash"] == "def456"
133
+ assert row["memory_id"] == 99
134
+ assert row["signal_type"] == "dashboard_click"
135
+ assert row["signal_value"] == 0.8
136
+ assert row["channel"] == "dashboard"
137
+ assert row["query_keywords"] == "deploy,fastapi"
138
+ assert row["rank_position"] == 3
139
+ assert row["source_tool"] == "cursor"
140
+
141
+ def test_feedback_count(self, learning_db):
142
+ assert learning_db.get_feedback_count() == 0
143
+
144
+ for i in range(5):
145
+ learning_db.store_feedback(
146
+ query_hash=f"q{i}",
147
+ memory_id=i,
148
+ signal_type="mcp_used",
149
+ )
150
+ assert learning_db.get_feedback_count() == 5
151
+
152
+ def test_unique_query_count(self, learning_db):
153
+ # 3 feedback entries across 2 distinct queries
154
+ learning_db.store_feedback(query_hash="q1", memory_id=1, signal_type="mcp_used")
155
+ learning_db.store_feedback(query_hash="q1", memory_id=2, signal_type="mcp_used")
156
+ learning_db.store_feedback(query_hash="q2", memory_id=3, signal_type="cli_useful")
157
+
158
+ assert learning_db.get_unique_query_count() == 2
159
+
160
+ def test_feedback_for_training_limit(self, learning_db):
161
+ for i in range(15):
162
+ learning_db.store_feedback(
163
+ query_hash=f"q{i % 5}",
164
+ memory_id=i,
165
+ signal_type="mcp_used",
166
+ )
167
+
168
+ # Fetch with limit
169
+ rows = learning_db.get_feedback_for_training(limit=5)
170
+ assert len(rows) == 5
171
+
172
+ def test_feedback_for_training_order(self, learning_db):
173
+ """Newest first ordering."""
174
+ learning_db.store_feedback(query_hash="old", memory_id=1, signal_type="mcp_used")
175
+ time.sleep(0.05)
176
+ learning_db.store_feedback(query_hash="new", memory_id=2, signal_type="cli_useful")
177
+
178
+ rows = learning_db.get_feedback_for_training()
179
+ assert len(rows) == 2
180
+ assert rows[0]["query_hash"] == "new"
181
+
182
+ def test_signal_value_variations(self, learning_db):
183
+ """Various signal values: 0.0, 0.5, 1.0."""
184
+ for sv in [0.0, 0.4, 0.7, 1.0]:
185
+ learning_db.store_feedback(
186
+ query_hash="q",
187
+ memory_id=1,
188
+ signal_type="mcp_used",
189
+ signal_value=sv,
190
+ )
191
+ rows = learning_db.get_feedback_for_training()
192
+ values = sorted(r["signal_value"] for r in rows)
193
+ assert values == [0.0, 0.4, 0.7, 1.0]
194
+
195
+
196
+ # ---------------------------------------------------------------------------
197
+ # Transferable Patterns
198
+ # ---------------------------------------------------------------------------
199
+
200
+ class TestTransferablePatterns:
201
+ def test_upsert_insert(self, learning_db):
202
+ row_id = learning_db.upsert_transferable_pattern(
203
+ pattern_type="preference",
204
+ key="frontend_framework",
205
+ value="react",
206
+ confidence=0.85,
207
+ evidence_count=12,
208
+ )
209
+ assert row_id >= 1
210
+
211
+ patterns = learning_db.get_transferable_patterns()
212
+ assert len(patterns) == 1
213
+ assert patterns[0]["key"] == "frontend_framework"
214
+ assert patterns[0]["value"] == "react"
215
+
216
+ def test_upsert_update(self, learning_db):
217
+ """Second upsert with same type+key should UPDATE, not insert."""
218
+ learning_db.upsert_transferable_pattern(
219
+ pattern_type="preference",
220
+ key="lang",
221
+ value="python",
222
+ confidence=0.6,
223
+ evidence_count=5,
224
+ )
225
+ learning_db.upsert_transferable_pattern(
226
+ pattern_type="preference",
227
+ key="lang",
228
+ value="typescript",
229
+ confidence=0.8,
230
+ evidence_count=10,
231
+ )
232
+
233
+ patterns = learning_db.get_transferable_patterns()
234
+ assert len(patterns) == 1
235
+ assert patterns[0]["value"] == "typescript"
236
+ assert patterns[0]["confidence"] == 0.8
237
+ assert patterns[0]["evidence_count"] == 10
238
+
239
+ def test_get_with_confidence_filter(self, learning_db):
240
+ learning_db.upsert_transferable_pattern(
241
+ pattern_type="preference", key="a", value="v1",
242
+ confidence=0.3, evidence_count=1,
243
+ )
244
+ learning_db.upsert_transferable_pattern(
245
+ pattern_type="preference", key="b", value="v2",
246
+ confidence=0.9, evidence_count=10,
247
+ )
248
+
249
+ high = learning_db.get_transferable_patterns(min_confidence=0.6)
250
+ assert len(high) == 1
251
+ assert high[0]["key"] == "b"
252
+
253
+ def test_get_with_type_filter(self, learning_db):
254
+ learning_db.upsert_transferable_pattern(
255
+ pattern_type="preference", key="k1", value="v1",
256
+ confidence=0.7, evidence_count=5,
257
+ )
258
+ learning_db.upsert_transferable_pattern(
259
+ pattern_type="style", key="k2", value="v2",
260
+ confidence=0.8, evidence_count=8,
261
+ )
262
+
263
+ prefs = learning_db.get_transferable_patterns(pattern_type="preference")
264
+ assert len(prefs) == 1
265
+ assert prefs[0]["key"] == "k1"
266
+
267
+ def test_contradictions_stored_as_json(self, learning_db):
268
+ learning_db.upsert_transferable_pattern(
269
+ pattern_type="preference",
270
+ key="db",
271
+ value="postgres",
272
+ confidence=0.7,
273
+ evidence_count=5,
274
+ contradictions=["Profile 'work' prefers 'mysql'"],
275
+ )
276
+ patterns = learning_db.get_transferable_patterns()
277
+ raw = patterns[0]["contradictions"]
278
+ parsed = json.loads(raw) if isinstance(raw, str) else raw
279
+ assert parsed == ["Profile 'work' prefers 'mysql'"]
280
+
281
+
282
+ # ---------------------------------------------------------------------------
283
+ # Workflow Patterns
284
+ # ---------------------------------------------------------------------------
285
+
286
+ class TestWorkflowPatterns:
287
+ def test_store_and_get(self, learning_db):
288
+ learning_db.store_workflow_pattern(
289
+ pattern_type="sequence",
290
+ pattern_key="docs -> code -> test",
291
+ pattern_value='{"sequence": ["docs", "code", "test"]}',
292
+ confidence=0.45,
293
+ evidence_count=12,
294
+ )
295
+
296
+ patterns = learning_db.get_workflow_patterns()
297
+ assert len(patterns) == 1
298
+ assert patterns[0]["pattern_key"] == "docs -> code -> test"
299
+
300
+ def test_get_with_type_filter(self, learning_db):
301
+ learning_db.store_workflow_pattern(
302
+ pattern_type="sequence", pattern_key="a", pattern_value="v",
303
+ confidence=0.5, evidence_count=5,
304
+ )
305
+ learning_db.store_workflow_pattern(
306
+ pattern_type="temporal", pattern_key="morning", pattern_value="{}",
307
+ confidence=0.6, evidence_count=8,
308
+ )
309
+
310
+ seq = learning_db.get_workflow_patterns(pattern_type="sequence")
311
+ assert len(seq) == 1
312
+ assert seq[0]["pattern_type"] == "sequence"
313
+
314
+ def test_clear_all(self, learning_db):
315
+ for i in range(3):
316
+ learning_db.store_workflow_pattern(
317
+ pattern_type="sequence",
318
+ pattern_key=f"p{i}",
319
+ pattern_value="{}",
320
+ )
321
+ learning_db.clear_workflow_patterns()
322
+ assert learning_db.get_workflow_patterns() == []
323
+
324
+ def test_clear_by_type(self, learning_db):
325
+ learning_db.store_workflow_pattern(
326
+ pattern_type="sequence", pattern_key="a", pattern_value="{}",
327
+ )
328
+ learning_db.store_workflow_pattern(
329
+ pattern_type="temporal", pattern_key="b", pattern_value="{}",
330
+ )
331
+ learning_db.clear_workflow_patterns(pattern_type="sequence")
332
+
333
+ remaining = learning_db.get_workflow_patterns()
334
+ assert len(remaining) == 1
335
+ assert remaining[0]["pattern_type"] == "temporal"
336
+
337
+ def test_confidence_filter(self, learning_db):
338
+ learning_db.store_workflow_pattern(
339
+ pattern_type="sequence", pattern_key="low",
340
+ pattern_value="{}", confidence=0.2,
341
+ )
342
+ learning_db.store_workflow_pattern(
343
+ pattern_type="sequence", pattern_key="high",
344
+ pattern_value="{}", confidence=0.8,
345
+ )
346
+
347
+ high = learning_db.get_workflow_patterns(min_confidence=0.5)
348
+ assert len(high) == 1
349
+ assert high[0]["pattern_key"] == "high"
350
+
351
+
352
+ # ---------------------------------------------------------------------------
353
+ # Source Quality
354
+ # ---------------------------------------------------------------------------
355
+
356
+ class TestSourceQuality:
357
+ def test_update_and_get(self, learning_db):
358
+ learning_db.update_source_quality("mcp:claude", 8, 10)
359
+
360
+ scores = learning_db.get_source_scores()
361
+ assert "mcp:claude" in scores
362
+ # Beta-Binomial: (1 + 8) / (2 + 10) = 0.75
363
+ assert abs(scores["mcp:claude"] - 0.75) < 0.001
364
+
365
+ def test_beta_binomial_calculation(self, learning_db):
366
+ """Verify the Beta-Binomial formula: (1+pos)/(2+total)."""
367
+ cases = [
368
+ (0, 0, 0.5), # No data = neutral
369
+ (5, 10, 0.5), # 50% positive = 0.5
370
+ (1, 10, 2.0 / 12.0), # Low positive
371
+ (9, 10, 10.0 / 12.0), # High positive
372
+ ]
373
+ for pos, total, expected in cases:
374
+ learning_db.update_source_quality(f"src_{pos}_{total}", pos, total)
375
+ scores = learning_db.get_source_scores()
376
+ actual = scores[f"src_{pos}_{total}"]
377
+ assert abs(actual - expected) < 0.001, (
378
+ f"pos={pos}, total={total}: expected {expected}, got {actual}"
379
+ )
380
+
381
+ def test_upsert_on_conflict(self, learning_db):
382
+ """Updating same source_id should overwrite, not duplicate."""
383
+ learning_db.update_source_quality("mcp:cursor", 2, 10)
384
+ learning_db.update_source_quality("mcp:cursor", 8, 10)
385
+
386
+ scores = learning_db.get_source_scores()
387
+ assert abs(scores["mcp:cursor"] - 0.75) < 0.001
388
+
389
+ def test_empty_scores(self, learning_db):
390
+ scores = learning_db.get_source_scores()
391
+ assert scores == {}
392
+
393
+
394
+ # ---------------------------------------------------------------------------
395
+ # Model Metadata
396
+ # ---------------------------------------------------------------------------
397
+
398
+ class TestModelMetadata:
399
+ def test_record_and_get_latest(self, learning_db):
400
+ learning_db.record_model_training(
401
+ model_version="v1",
402
+ training_samples=500,
403
+ synthetic_samples=200,
404
+ real_samples=300,
405
+ ndcg_at_10=0.85,
406
+ model_path="/tmp/model.txt",
407
+ )
408
+ latest = learning_db.get_latest_model()
409
+ assert latest is not None
410
+ assert latest["model_version"] == "v1"
411
+ assert latest["training_samples"] == 500
412
+ assert latest["ndcg_at_10"] == 0.85
413
+
414
+ def test_latest_model_ordering(self, learning_db):
415
+ """Latest model should be the one with the highest rowid."""
416
+ learning_db.record_model_training("v1", 100)
417
+ # SQLite CURRENT_TIMESTAMP has second precision, so sleep long enough
418
+ time.sleep(1.1)
419
+ learning_db.record_model_training("v2", 200)
420
+
421
+ latest = learning_db.get_latest_model()
422
+ assert latest["model_version"] == "v2"
423
+
424
+ def test_no_models(self, learning_db):
425
+ assert learning_db.get_latest_model() is None
426
+
427
+
428
+ # ---------------------------------------------------------------------------
429
+ # Engagement Metrics
430
+ # ---------------------------------------------------------------------------
431
+
432
+ class TestEngagement:
433
+ def test_increment_memories_created(self, learning_db):
434
+ learning_db.increment_engagement("memories_created", count=3)
435
+ history = learning_db.get_engagement_history(days=1)
436
+ assert len(history) >= 1
437
+ assert history[0]["memories_created"] == 3
438
+
439
+ def test_increment_multiple_types(self, learning_db):
440
+ learning_db.increment_engagement("memories_created", count=2)
441
+ learning_db.increment_engagement("recalls_performed", count=5)
442
+ learning_db.increment_engagement("feedback_signals", count=1)
443
+
444
+ history = learning_db.get_engagement_history(days=1)
445
+ row = history[0]
446
+ assert row["memories_created"] == 2
447
+ assert row["recalls_performed"] == 5
448
+ assert row["feedback_signals"] == 1
449
+
450
+ def test_invalid_metric_type_ignored(self, learning_db):
451
+ """Invalid metric types should be silently ignored."""
452
+ learning_db.increment_engagement("invalid_metric", count=1)
453
+ # No row created if no valid metric incremented
454
+ history = learning_db.get_engagement_history(days=1)
455
+ assert len(history) == 0
456
+
457
+ def test_source_tracking(self, learning_db):
458
+ learning_db.increment_engagement(
459
+ "memories_created", count=1, source="claude-desktop"
460
+ )
461
+ learning_db.increment_engagement(
462
+ "recalls_performed", count=1, source="cursor"
463
+ )
464
+
465
+ history = learning_db.get_engagement_history(days=1)
466
+ sources = json.loads(history[0]["active_sources"])
467
+ assert "claude-desktop" in sources
468
+ assert "cursor" in sources
469
+
470
+ def test_source_deduplication(self, learning_db):
471
+ """Same source added twice should appear only once."""
472
+ learning_db.increment_engagement("memories_created", count=1, source="cli")
473
+ learning_db.increment_engagement("recalls_performed", count=1, source="cli")
474
+
475
+ history = learning_db.get_engagement_history(days=1)
476
+ sources = json.loads(history[0]["active_sources"])
477
+ assert sources.count("cli") == 1
478
+
479
+
480
+ # ---------------------------------------------------------------------------
481
+ # Stats & Reset
482
+ # ---------------------------------------------------------------------------
483
+
484
+ class TestStatsAndReset:
485
+ def test_get_stats_empty(self, learning_db):
486
+ stats = learning_db.get_stats()
487
+ assert stats["feedback_count"] == 0
488
+ assert stats["unique_queries"] == 0
489
+ assert stats["transferable_patterns"] == 0
490
+ assert stats["high_confidence_patterns"] == 0
491
+ assert stats["workflow_patterns"] == 0
492
+ assert stats["tracked_sources"] == 0
493
+ assert stats["models_trained"] == 0
494
+ assert stats["latest_model_version"] is None
495
+ assert stats["db_size_bytes"] > 0 # DB file exists
496
+
497
+ def test_get_stats_populated(self, learning_db):
498
+ learning_db.store_feedback(
499
+ query_hash="q1", memory_id=1, signal_type="mcp_used",
500
+ )
501
+ learning_db.upsert_transferable_pattern(
502
+ pattern_type="preference", key="lang", value="python",
503
+ confidence=0.9, evidence_count=10,
504
+ )
505
+ learning_db.store_workflow_pattern(
506
+ pattern_type="sequence", pattern_key="a -> b",
507
+ pattern_value="{}", confidence=0.5, evidence_count=5,
508
+ )
509
+ learning_db.update_source_quality("cli", 3, 5)
510
+ learning_db.record_model_training("v1", 100, ndcg_at_10=0.8)
511
+
512
+ stats = learning_db.get_stats()
513
+ assert stats["feedback_count"] == 1
514
+ assert stats["transferable_patterns"] == 1
515
+ assert stats["high_confidence_patterns"] == 1
516
+ assert stats["workflow_patterns"] == 1
517
+ assert stats["tracked_sources"] == 1
518
+ assert stats["models_trained"] == 1
519
+ assert stats["latest_model_version"] == "v1"
520
+ assert stats["latest_model_ndcg"] == 0.8
521
+
522
+ def test_reset_clears_all(self, learning_db):
523
+ learning_db.store_feedback(query_hash="q", memory_id=1, signal_type="x")
524
+ learning_db.upsert_transferable_pattern(
525
+ pattern_type="p", key="k", value="v", confidence=0.5, evidence_count=1,
526
+ )
527
+ learning_db.store_workflow_pattern(
528
+ pattern_type="s", pattern_key="k", pattern_value="v",
529
+ )
530
+ learning_db.update_source_quality("src", 1, 1)
531
+ learning_db.record_model_training("v1", 10)
532
+ learning_db.increment_engagement("memories_created", count=1)
533
+
534
+ learning_db.reset()
535
+
536
+ stats = learning_db.get_stats()
537
+ assert stats["feedback_count"] == 0
538
+ assert stats["transferable_patterns"] == 0
539
+ assert stats["workflow_patterns"] == 0
540
+ assert stats["tracked_sources"] == 0
541
+ assert stats["models_trained"] == 0
542
+
543
+
544
+ # ---------------------------------------------------------------------------
545
+ # Concurrency
546
+ # ---------------------------------------------------------------------------
547
+
548
+ class TestConcurrency:
549
+ def test_concurrent_writes(self, learning_db):
550
+ """10 threads writing simultaneously should produce zero errors."""
551
+ errors = []
552
+
553
+ def writer(thread_id):
554
+ try:
555
+ for i in range(10):
556
+ learning_db.store_feedback(
557
+ query_hash=f"q_t{thread_id}_{i}",
558
+ memory_id=thread_id * 100 + i,
559
+ signal_type="mcp_used",
560
+ signal_value=1.0,
561
+ channel="mcp",
562
+ )
563
+ except Exception as e:
564
+ errors.append(str(e))
565
+
566
+ threads = [threading.Thread(target=writer, args=(tid,)) for tid in range(10)]
567
+ for t in threads:
568
+ t.start()
569
+ for t in threads:
570
+ t.join(timeout=30)
571
+
572
+ assert errors == [], f"Concurrent write errors: {errors}"
573
+ assert learning_db.get_feedback_count() == 100
574
+
575
+
576
+ # ---------------------------------------------------------------------------
577
+ # Singleton Pattern
578
+ # ---------------------------------------------------------------------------
579
+
580
+ class TestSingleton:
581
+ def test_get_instance_returns_same_object(self, tmp_path):
582
+ from src.learning.learning_db import LearningDB
583
+ LearningDB.reset_instance()
584
+
585
+ db_path = tmp_path / "singleton_test.db"
586
+ a = LearningDB.get_instance(db_path)
587
+ b = LearningDB.get_instance(db_path)
588
+ assert a is b
589
+
590
+ def test_different_paths_different_instances(self, tmp_path):
591
+ from src.learning.learning_db import LearningDB
592
+ LearningDB.reset_instance()
593
+
594
+ a = LearningDB.get_instance(tmp_path / "a.db")
595
+ b = LearningDB.get_instance(tmp_path / "b.db")
596
+ assert a is not b
597
+
598
+ def test_reset_instance_clears(self, tmp_path):
599
+ from src.learning.learning_db import LearningDB
600
+ LearningDB.reset_instance()
601
+
602
+ db_path = tmp_path / "reset_test.db"
603
+ a = LearningDB.get_instance(db_path)
604
+ LearningDB.reset_instance(db_path)
605
+ b = LearningDB.get_instance(db_path)
606
+ assert a is not b