npm - superlocalmemory - Versions diffs - 2.6.5 → 2.7.0 - Mend

superlocalmemory 2.6.5 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/CHANGELOG.md +48 -0
package/README.md +96 -13
package/bin/slm +179 -3
package/bin/superlocalmemoryv2:learning +4 -0
package/bin/superlocalmemoryv2:patterns +4 -0
package/docs/ARCHITECTURE.md +12 -6
package/docs/MCP-MANUAL-SETUP.md +14 -4
package/install.sh +99 -3
package/mcp_server.py +291 -1
package/package.json +2 -1
package/requirements-learning.txt +12 -0
package/scripts/verify-v27.sh +233 -0
package/skills/slm-show-patterns/SKILL.md +224 -0
package/src/learning/synthetic_bootstrap.py +1047 -0
package/src/learning/tests/__init__.py +0 -0
package/src/learning/tests/test_adaptive_ranker.py +328 -0
package/src/learning/tests/test_aggregator.py +309 -0
package/src/learning/tests/test_feedback_collector.py +295 -0
package/src/learning/tests/test_learning_db.py +606 -0
package/src/learning/tests/test_project_context.py +296 -0
package/src/learning/tests/test_source_quality.py +355 -0
package/src/learning/tests/test_synthetic_bootstrap.py +433 -0
package/src/learning/tests/test_workflow_miner.py +322 -0

package/src/learning/tests/__init__.py ADDED Viewed

File without changes

package/src/learning/tests/test_adaptive_ranker.py ADDED Viewed

@@ -0,0 +1,328 @@
+#!/usr/bin/env python3
+"""
+SuperLocalMemory V2 - Tests for AdaptiveRanker (v2.7)
+Copyright (c) 2026 Varun Pratap Bhardwaj
+Licensed under MIT License
+"""
+import pytest
+# Detect optional dependencies at import time
+try:
+    import lightgbm
+    HAS_LIGHTGBM = True
+except ImportError:
+    HAS_LIGHTGBM = False
+try:
+    import numpy as np
+    HAS_NUMPY = True
+except ImportError:
+    np = None
+    HAS_NUMPY = False
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+@pytest.fixture(autouse=True)
+def reset_singleton():
+    from src.learning.learning_db import LearningDB
+    LearningDB.reset_instance()
+    yield
+    LearningDB.reset_instance()
+@pytest.fixture
+def learning_db(tmp_path):
+    from src.learning.learning_db import LearningDB
+    db_path = tmp_path / "learning.db"
+    return LearningDB(db_path=db_path)
+@pytest.fixture
+def ranker(learning_db):
+    from src.learning.adaptive_ranker import AdaptiveRanker
+    return AdaptiveRanker(learning_db=learning_db)
+def _make_result(memory_id, score=0.5, content="test memory", importance=5,
+                 project_name=None, created_at="2026-02-16 10:00:00",
+                 access_count=0, match_type="keyword"):
+    """Helper to build a search result dict."""
+    return {
+        "id": memory_id,
+        "content": content,
+        "score": score,
+        "match_type": match_type,
+        "importance": importance,
+        "created_at": created_at,
+        "access_count": access_count,
+        "project_name": project_name,
+        "tags": [],
+        "created_by": None,
+    }
+# ---------------------------------------------------------------------------
+# Phase Detection
+# ---------------------------------------------------------------------------
+class TestGetPhase:
+    def test_baseline_with_zero_signals(self, ranker):
+        assert ranker.get_phase() == "baseline"
+    def test_baseline_with_few_signals(self, ranker, learning_db):
+        """Less than 20 signals should stay in baseline."""
+        for i in range(10):
+            learning_db.store_feedback(
+                query_hash=f"q{i}",
+                memory_id=i,
+                signal_type="mcp_used",
+            )
+        assert ranker.get_phase() == "baseline"
+    def test_rule_based_at_20_signals(self, ranker, learning_db):
+        """20+ signals should enter rule_based phase."""
+        for i in range(25):
+            learning_db.store_feedback(
+                query_hash=f"q{i}",
+                memory_id=i,
+                signal_type="mcp_used",
+            )
+        assert ranker.get_phase() == "rule_based"
+    @pytest.mark.skipif(not HAS_LIGHTGBM or not HAS_NUMPY,
+                        reason="LightGBM/NumPy required for ML phase")
+    def test_ml_model_at_200_signals(self, ranker, learning_db):
+        """200+ signals across 50+ queries should trigger ml_model."""
+        for i in range(250):
+            learning_db.store_feedback(
+                query_hash=f"q{i % 60}",  # 60 unique queries
+                memory_id=i,
+                signal_type="mcp_used",
+            )
+        assert ranker.get_phase() == "ml_model"
+    def test_ml_model_requires_enough_unique_queries(self, ranker, learning_db):
+        """200+ signals but only 10 unique queries should stay rule_based."""
+        for i in range(250):
+            learning_db.store_feedback(
+                query_hash=f"q{i % 10}",  # Only 10 unique queries
+                memory_id=i,
+                signal_type="mcp_used",
+            )
+        # Even with LightGBM available, not enough unique queries
+        phase = ranker.get_phase()
+        assert phase in ("rule_based", "ml_model")
+        if HAS_LIGHTGBM and HAS_NUMPY:
+            assert phase == "rule_based"  # 10 < 50 unique queries
+    def test_no_learning_db_returns_baseline(self):
+        from src.learning.adaptive_ranker import AdaptiveRanker
+        ranker = AdaptiveRanker(learning_db=None)
+        # Force no lazy init
+        ranker._learning_db = None
+        assert ranker.get_phase() == "baseline"
+# ---------------------------------------------------------------------------
+# Phase Info
+# ---------------------------------------------------------------------------
+class TestGetPhaseInfo:
+    def test_phase_info_structure(self, ranker):
+        info = ranker.get_phase_info()
+        assert "phase" in info
+        assert "feedback_count" in info
+        assert "unique_queries" in info
+        assert "thresholds" in info
+        assert "model_loaded" in info
+        assert "lightgbm_available" in info
+        assert "numpy_available" in info
+    def test_phase_info_values(self, ranker):
+        info = ranker.get_phase_info()
+        assert info["phase"] == "baseline"
+        assert info["feedback_count"] == 0
+        assert info["unique_queries"] == 0
+        assert info["model_loaded"] is False
+# ---------------------------------------------------------------------------
+# Rerank Routing
+# ---------------------------------------------------------------------------
+class TestRerank:
+    def test_empty_results(self, ranker):
+        result = ranker.rerank([], "query")
+        assert result == []
+    def test_single_result_baseline(self, ranker):
+        """Single result should get baseline phase annotation."""
+        results = [_make_result(1, score=0.8)]
+        reranked = ranker.rerank(results, "test query")
+        assert len(reranked) == 1
+        assert reranked[0]["ranking_phase"] == "baseline"
+        assert reranked[0]["base_score"] == 0.8
+    def test_baseline_preserves_order(self, ranker):
+        """In baseline phase, original order should be preserved."""
+        results = [
+            _make_result(1, score=0.9),
+            _make_result(2, score=0.5),
+            _make_result(3, score=0.3),
+        ]
+        reranked = ranker.rerank(results, "test query")
+        # All should be baseline
+        for r in reranked:
+            assert r["ranking_phase"] == "baseline"
+        # Order preserved (no re-sorting in baseline)
+        assert reranked[0]["id"] == 1
+        assert reranked[1]["id"] == 2
+        assert reranked[2]["id"] == 3
+    def test_base_score_preserved(self, ranker, learning_db):
+        """base_score should always contain the original score."""
+        # Add enough feedback for rule_based
+        for i in range(25):
+            learning_db.store_feedback(
+                query_hash=f"q{i}", memory_id=i, signal_type="mcp_used",
+            )
+        results = [
+            _make_result(1, score=0.8),
+            _make_result(2, score=0.5),
+        ]
+        reranked = ranker.rerank(results, "test query")
+        for r in reranked:
+            assert "base_score" in r
+# ---------------------------------------------------------------------------
+# Rule-Based Re-ranking
+# ---------------------------------------------------------------------------
+class TestRuleBasedReranking:
+    def test_boost_applied(self, ranker, learning_db):
+        """Rule-based should modify scores based on features."""
+        for i in range(25):
+            learning_db.store_feedback(
+                query_hash=f"q{i}", memory_id=i, signal_type="mcp_used",
+            )
+        results = [
+            _make_result(1, score=0.5, importance=9, access_count=8),
+            _make_result(2, score=0.5, importance=2, access_count=0),
+        ]
+        reranked = ranker.rerank(results, "test query")
+        # Both should be rule_based
+        assert all(r["ranking_phase"] == "rule_based" for r in reranked)
+        # High importance + access should get higher score
+        high_imp = next(r for r in reranked if r["id"] == 1)
+        low_imp = next(r for r in reranked if r["id"] == 2)
+        assert high_imp["score"] > low_imp["score"]
+    def test_project_match_boost(self, ranker, learning_db):
+        """Memory matching current project should be boosted."""
+        for i in range(25):
+            learning_db.store_feedback(
+                query_hash=f"q{i}", memory_id=i, signal_type="mcp_used",
+            )
+        results = [
+            _make_result(1, score=0.5, project_name="SLM"),
+            _make_result(2, score=0.5, project_name="OTHER"),
+        ]
+        context = {"current_project": "SLM"}
+        reranked = ranker.rerank(results, "test query", context=context)
+        slm_result = next(r for r in reranked if r["id"] == 1)
+        other_result = next(r for r in reranked if r["id"] == 2)
+        assert slm_result["score"] > other_result["score"]
+    def test_results_resorted(self, ranker, learning_db):
+        """Results should be re-sorted by boosted score."""
+        for i in range(25):
+            learning_db.store_feedback(
+                query_hash=f"q{i}", memory_id=i, signal_type="mcp_used",
+            )
+        # Second result has much higher importance
+        results = [
+            _make_result(1, score=0.5, importance=2),
+            _make_result(2, score=0.5, importance=10, access_count=10),
+        ]
+        reranked = ranker.rerank(results, "test query")
+        # Higher importance should float to top
+        assert reranked[0]["id"] == 2
+# ---------------------------------------------------------------------------
+# ML Training (skipped if LightGBM not available)
+# ---------------------------------------------------------------------------
+class TestTraining:
+    @pytest.mark.skipif(not HAS_LIGHTGBM or not HAS_NUMPY,
+                        reason="LightGBM/NumPy required")
+    def test_train_insufficient_data(self, ranker, learning_db):
+        """Training should return None with insufficient data."""
+        result = ranker.train()
+        assert result is None
+    def test_train_without_lightgbm(self, ranker):
+        """Should gracefully handle missing LightGBM."""
+        from src.learning import adaptive_ranker as ar_module
+        original = ar_module.HAS_LIGHTGBM
+        ar_module.HAS_LIGHTGBM = False
+        try:
+            result = ranker.train()
+            assert result is None
+        finally:
+            ar_module.HAS_LIGHTGBM = original
+# ---------------------------------------------------------------------------
+# Model Loading Fallback
+# ---------------------------------------------------------------------------
+class TestModelLoading:
+    def test_load_nonexistent_model(self, ranker):
+        """Loading a model that doesn't exist should return None."""
+        model = ranker._load_model()
+        assert model is None
+    def test_load_attempt_cached(self, ranker):
+        """After first failed load, _model_load_attempted should be True."""
+        ranker._load_model()
+        assert ranker._model_load_attempted is True
+    def test_second_load_returns_cached_none(self, ranker):
+        """Second load attempt should return None immediately (cached failure)."""
+        ranker._load_model()
+        result = ranker._load_model()
+        assert result is None
+    def test_reload_model_resets_flag(self, ranker):
+        """reload_model should reset the _model_load_attempted flag."""
+        ranker._load_model()
+        assert ranker._model_load_attempted is True
+        ranker.reload_model()
+        # After reload, the flag should have been reset and tried again
+        # (and failed again since no model file exists)
+        assert ranker._model is None
+# ---------------------------------------------------------------------------
+# Module-level convenience
+# ---------------------------------------------------------------------------
+class TestModuleLevel:
+    def test_get_phase_function(self):
+        from src.learning.adaptive_ranker import get_phase
+        phase = get_phase()
+        assert phase in ("baseline", "rule_based", "ml_model")

package/src/learning/tests/test_aggregator.py ADDED Viewed

@@ -0,0 +1,309 @@
+#!/usr/bin/env python3
+"""
+SuperLocalMemory V2 - Tests for CrossProjectAggregator (v2.7)
+Copyright (c) 2026 Varun Pratap Bhardwaj
+Licensed under MIT License
+"""
+import math
+import sqlite3
+from datetime import datetime, timedelta
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+import pytest
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+@pytest.fixture(autouse=True)
+def reset_singleton():
+    from src.learning.learning_db import LearningDB
+    LearningDB.reset_instance()
+    yield
+    LearningDB.reset_instance()
+@pytest.fixture
+def learning_db(tmp_path):
+    from src.learning.learning_db import LearningDB
+    db_path = tmp_path / "learning.db"
+    return LearningDB(db_path=db_path)
+@pytest.fixture
+def memory_db(tmp_path):
+    """Create a minimal memory.db with test data."""
+    db_path = tmp_path / "memory.db"
+    conn = sqlite3.connect(str(db_path))
+    cursor = conn.cursor()
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS memories (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            content TEXT NOT NULL,
+            summary TEXT,
+            project_path TEXT,
+            project_name TEXT,
+            tags TEXT DEFAULT '[]',
+            category TEXT,
+            parent_id INTEGER,
+            tree_path TEXT DEFAULT '/',
+            depth INTEGER DEFAULT 0,
+            memory_type TEXT DEFAULT 'session',
+            importance INTEGER DEFAULT 5,
+            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            last_accessed TIMESTAMP,
+            access_count INTEGER DEFAULT 0,
+            content_hash TEXT,
+            cluster_id INTEGER,
+            profile TEXT DEFAULT 'default',
+            created_by TEXT,
+            source_protocol TEXT,
+            trust_score REAL DEFAULT 1.0
+        )
+    ''')
+    conn.commit()
+    conn.close()
+    return db_path
+def _insert_memories(db_path, memories):
+    conn = sqlite3.connect(str(db_path))
+    cursor = conn.cursor()
+    for m in memories:
+        cursor.execute('''
+            INSERT INTO memories (content, tags, project_name, project_path,
+                                  importance, access_count, profile, created_by,
+                                  source_protocol, created_at)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+        ''', (
+            m.get('content', 'test'),
+            m.get('tags', '[]'),
+            m.get('project_name'),
+            m.get('project_path'),
+            m.get('importance', 5),
+            m.get('access_count', 0),
+            m.get('profile', 'default'),
+            m.get('created_by'),
+            m.get('source_protocol'),
+            m.get('created_at', '2026-02-16 10:00:00'),
+        ))
+    conn.commit()
+    conn.close()
+# ---------------------------------------------------------------------------
+# Temporal Decay
+# ---------------------------------------------------------------------------
+class TestTemporalDecay:
+    def test_days_since_recent(self):
+        from src.learning.cross_project_aggregator import CrossProjectAggregator
+        now = datetime(2026, 2, 16, 12, 0, 0)
+        ts = "2026-02-16T10:00:00"
+        days = CrossProjectAggregator._days_since(ts, now)
+        assert 0.0 <= days < 1.0
+    def test_days_since_365_days_ago(self):
+        from src.learning.cross_project_aggregator import CrossProjectAggregator
+        now = datetime(2026, 2, 16, 12, 0, 0)
+        old = (now - timedelta(days=365)).isoformat()
+        days = CrossProjectAggregator._days_since(old, now)
+        assert abs(days - 365.0) < 1.0
+    def test_days_since_empty_string(self):
+        from src.learning.cross_project_aggregator import CrossProjectAggregator
+        assert CrossProjectAggregator._days_since("") == 0.0
+    def test_days_since_invalid_string(self):
+        from src.learning.cross_project_aggregator import CrossProjectAggregator
+        assert CrossProjectAggregator._days_since("not-a-date") == 0.0
+    def test_days_since_space_separated(self):
+        from src.learning.cross_project_aggregator import CrossProjectAggregator
+        now = datetime(2026, 2, 16, 12, 0, 0)
+        ts = "2026-02-15 12:00:00"
+        days = CrossProjectAggregator._days_since(ts, now)
+        assert abs(days - 1.0) < 0.01
+    def test_decay_weight_recent(self):
+        """Recent timestamp -> weight close to 1.0."""
+        from src.learning.cross_project_aggregator import DECAY_HALF_LIFE_DAYS
+        # 0 days -> exp(0) = 1.0
+        weight = math.exp(-0.0 / DECAY_HALF_LIFE_DAYS)
+        assert abs(weight - 1.0) < 0.001
+    def test_decay_weight_365_days(self):
+        """365-day-old pattern -> weight ~ 0.37."""
+        from src.learning.cross_project_aggregator import DECAY_HALF_LIFE_DAYS
+        weight = math.exp(-365.0 / DECAY_HALF_LIFE_DAYS)
+        assert 0.30 < weight < 0.40
+# ---------------------------------------------------------------------------
+# Contradiction Detection
+# ---------------------------------------------------------------------------
+class TestContradictionDetection:
+    def test_cross_profile_disagreement(self, learning_db, memory_db):
+        """Two profiles with different values should trigger a contradiction."""
+        from src.learning.cross_project_aggregator import CrossProjectAggregator
+        aggregator = CrossProjectAggregator(
+            memory_db_path=memory_db,
+            learning_db=learning_db,
+        )
+        pattern_data = {
+            "value": "react",
+            "profile_history": [
+                {"profile": "work", "value": "react", "confidence": 0.8,
+                 "weight": 1.0, "timestamp": "2026-02-16"},
+                {"profile": "personal", "value": "vue", "confidence": 0.7,
+                 "weight": 0.9, "timestamp": "2026-02-15"},
+            ],
+        }
+        contradictions = aggregator._detect_contradictions("frontend", pattern_data)
+        assert len(contradictions) >= 1
+        assert any("vue" in c and "react" in c for c in contradictions)
+    def test_no_contradiction_when_unanimous(self, learning_db, memory_db):
+        from src.learning.cross_project_aggregator import CrossProjectAggregator
+        aggregator = CrossProjectAggregator(
+            memory_db_path=memory_db,
+            learning_db=learning_db,
+        )
+        pattern_data = {
+            "value": "python",
+            "profile_history": [
+                {"profile": "work", "value": "python", "confidence": 0.9,
+                 "weight": 1.0, "timestamp": "2026-02-16"},
+                {"profile": "personal", "value": "python", "confidence": 0.8,
+                 "weight": 0.9, "timestamp": "2026-02-15"},
+            ],
+        }
+        contradictions = aggregator._detect_contradictions("lang", pattern_data)
+        assert len(contradictions) == 0
+# ---------------------------------------------------------------------------
+# get_tech_preferences from learning.db
+# ---------------------------------------------------------------------------
+class TestGetTechPreferences:
+    def test_empty_db_returns_empty(self, learning_db, memory_db):
+        from src.learning.cross_project_aggregator import CrossProjectAggregator
+        aggregator = CrossProjectAggregator(
+            memory_db_path=memory_db,
+            learning_db=learning_db,
+        )
+        prefs = aggregator.get_tech_preferences(min_confidence=0.0)
+        assert prefs == {}
+    def test_stored_patterns_returned(self, learning_db, memory_db):
+        from src.learning.cross_project_aggregator import CrossProjectAggregator
+        # Pre-populate learning.db with a preference pattern
+        learning_db.upsert_transferable_pattern(
+            pattern_type="preference",
+            key="language",
+            value="python",
+            confidence=0.85,
+            evidence_count=15,
+            profiles_seen=2,
+        )
+        aggregator = CrossProjectAggregator(
+            memory_db_path=memory_db,
+            learning_db=learning_db,
+        )
+        prefs = aggregator.get_tech_preferences(min_confidence=0.5)
+        assert "language" in prefs
+        assert prefs["language"]["value"] == "python"
+        assert prefs["language"]["confidence"] == 0.85
+    def test_confidence_filter(self, learning_db, memory_db):
+        from src.learning.cross_project_aggregator import CrossProjectAggregator
+        learning_db.upsert_transferable_pattern(
+            pattern_type="preference", key="low", value="x",
+            confidence=0.3, evidence_count=2,
+        )
+        learning_db.upsert_transferable_pattern(
+            pattern_type="preference", key="high", value="y",
+            confidence=0.9, evidence_count=20,
+        )
+        aggregator = CrossProjectAggregator(
+            memory_db_path=memory_db,
+            learning_db=learning_db,
+        )
+        prefs = aggregator.get_tech_preferences(min_confidence=0.6)
+        assert "high" in prefs
+        assert "low" not in prefs
+    def test_no_learning_db(self, memory_db):
+        from src.learning.cross_project_aggregator import CrossProjectAggregator
+        aggregator = CrossProjectAggregator(
+            memory_db_path=memory_db,
+            learning_db=None,
+        )
+        # Should not crash
+        prefs = aggregator.get_tech_preferences()
+        assert prefs == {}
+# ---------------------------------------------------------------------------
+# is_within_window
+# ---------------------------------------------------------------------------
+class TestIsWithinWindow:
+    def test_recent_within_window(self):
+        from src.learning.cross_project_aggregator import CrossProjectAggregator
+        now_str = datetime.now().isoformat()
+        assert CrossProjectAggregator._is_within_window(now_str, 90) is True
+    def test_old_outside_window(self):
+        from src.learning.cross_project_aggregator import CrossProjectAggregator
+        old = (datetime.now() - timedelta(days=200)).isoformat()
+        assert CrossProjectAggregator._is_within_window(old, 90) is False
+    def test_empty_timestamp(self):
+        from src.learning.cross_project_aggregator import CrossProjectAggregator
+        assert CrossProjectAggregator._is_within_window("", 90) is False
+    def test_invalid_timestamp(self):
+        from src.learning.cross_project_aggregator import CrossProjectAggregator
+        assert CrossProjectAggregator._is_within_window("not-a-date", 90) is False
+# ---------------------------------------------------------------------------
+# Preference Context Formatting
+# ---------------------------------------------------------------------------
+class TestPreferenceContext:
+    def test_no_preferences(self, learning_db, memory_db):
+        from src.learning.cross_project_aggregator import CrossProjectAggregator
+        aggregator = CrossProjectAggregator(
+            memory_db_path=memory_db,
+            learning_db=learning_db,
+        )
+        ctx = aggregator.get_preference_context()
+        assert "No transferable preferences learned yet" in ctx
+    def test_with_preferences(self, learning_db, memory_db):
+        from src.learning.cross_project_aggregator import CrossProjectAggregator
+        learning_db.upsert_transferable_pattern(
+            pattern_type="preference", key="framework", value="FastAPI",
+            confidence=0.8, evidence_count=10, profiles_seen=2,
+        )
+        aggregator = CrossProjectAggregator(
+            memory_db_path=memory_db,
+            learning_db=learning_db,
+        )
+        ctx = aggregator.get_preference_context(min_confidence=0.5)
+        assert "FastAPI" in ctx
+        assert "Framework" in ctx  # Title-cased key