npm - superlocalmemory - Versions diffs - 3.4.18 → 3.4.21 - Mend

superlocalmemory 3.4.18 → 3.4.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (172) hide show

package/CHANGELOG.md +35 -0
package/README.md +42 -34
package/bin/slm +11 -0
package/bin/slm.bat +12 -0
package/package.json +4 -3
package/pyproject.toml +3 -2
package/scripts/build-slm-hook.ps1 +40 -0
package/scripts/build-slm-hook.sh +45 -0
package/scripts/build_entry.py +452 -0
package/scripts/ci/stage5b_gate.sh +50 -0
package/scripts/postinstall/validation.js +187 -0
package/scripts/postinstall-interactive.js +756 -0
package/scripts/postinstall_binary.js +287 -0
package/scripts/release_manifest.py +273 -0
package/scripts/slm-hook.spec +56 -0
package/skills/slm-build-graph/SKILL.md +423 -0
package/skills/slm-list-recent/SKILL.md +348 -0
package/skills/slm-recall/SKILL.md +343 -0
package/skills/slm-remember/SKILL.md +194 -0
package/skills/slm-show-patterns/SKILL.md +224 -0
package/skills/slm-status/SKILL.md +363 -0
package/skills/slm-switch-profile/SKILL.md +442 -0
package/src/superlocalmemory/cli/commands.py +219 -79
package/src/superlocalmemory/cli/context_commands.py +192 -0
package/src/superlocalmemory/cli/daemon.py +15 -1
package/src/superlocalmemory/cli/db_migrate.py +80 -0
package/src/superlocalmemory/cli/escape_hatch.py +220 -0
package/src/superlocalmemory/cli/main.py +72 -1
package/src/superlocalmemory/core/context_cache.py +397 -0
package/src/superlocalmemory/core/embeddings.py +8 -2
package/src/superlocalmemory/core/engine.py +38 -2
package/src/superlocalmemory/core/engine_wiring.py +1 -1
package/src/superlocalmemory/core/ram_lock.py +111 -0
package/src/superlocalmemory/core/recall_pipeline.py +433 -3
package/src/superlocalmemory/core/recall_worker.py +8 -3
package/src/superlocalmemory/core/security_primitives.py +635 -0
package/src/superlocalmemory/core/shadow_router.py +319 -0
package/src/superlocalmemory/core/slm_disabled.py +87 -0
package/src/superlocalmemory/core/slmignore.py +125 -0
package/src/superlocalmemory/core/topic_signature.py +143 -0
package/src/superlocalmemory/core/worker_pool.py +14 -3
package/src/superlocalmemory/encoding/cognitive_consolidator.py +2 -2
package/src/superlocalmemory/evolution/budget.py +321 -0
package/src/superlocalmemory/evolution/llm_dispatch.py +508 -0
package/src/superlocalmemory/evolution/skill_evolver.py +144 -94
package/src/superlocalmemory/hooks/_outcome_common.py +506 -0
package/src/superlocalmemory/hooks/adapter_base.py +317 -0
package/src/superlocalmemory/hooks/antigravity_adapter.py +192 -0
package/src/superlocalmemory/hooks/claude_code_hooks.py +33 -1
package/src/superlocalmemory/hooks/context_payload.py +312 -0
package/src/superlocalmemory/hooks/copilot_adapter.py +154 -0
package/src/superlocalmemory/hooks/cross_platform_connector.py +90 -0
package/src/superlocalmemory/hooks/cursor_adapter.py +195 -0
package/src/superlocalmemory/hooks/hook_handlers.py +109 -8
package/src/superlocalmemory/hooks/ide_connector.py +25 -2
package/src/superlocalmemory/hooks/post_tool_async_hook.py +165 -0
package/src/superlocalmemory/hooks/post_tool_outcome_hook.py +223 -0
package/src/superlocalmemory/hooks/prewarm_auth.py +170 -0
package/src/superlocalmemory/hooks/session_registry.py +186 -0
package/src/superlocalmemory/hooks/stop_outcome_hook.py +134 -0
package/src/superlocalmemory/hooks/sync_loop.py +114 -0
package/src/superlocalmemory/hooks/user_prompt_hook.py +128 -0
package/src/superlocalmemory/hooks/user_prompt_rehash_hook.py +202 -0
package/src/superlocalmemory/infra/backup.py +3 -3
package/src/superlocalmemory/infra/cloud_backup.py +2 -2
package/src/superlocalmemory/infra/event_bus.py +2 -2
package/src/superlocalmemory/infra/webhook_dispatcher.py +3 -3
package/src/superlocalmemory/learning/arm_catalog.py +99 -0
package/src/superlocalmemory/learning/bandit.py +526 -0
package/src/superlocalmemory/learning/bandit_cache.py +133 -0
package/src/superlocalmemory/learning/behavioral.py +53 -1
package/src/superlocalmemory/learning/consolidation_cycle.py +381 -0
package/src/superlocalmemory/learning/consolidation_worker.py +188 -520
package/src/superlocalmemory/learning/database.py +256 -0
package/src/superlocalmemory/learning/dedup_hnsw.py +413 -0
package/src/superlocalmemory/learning/ensemble.py +300 -0
package/src/superlocalmemory/learning/fact_outcome_joins.py +207 -0
package/src/superlocalmemory/learning/forgetting_scheduler.py +55 -0
package/src/superlocalmemory/learning/hnsw_dedup.py +69 -0
package/src/superlocalmemory/learning/labeler.py +87 -0
package/src/superlocalmemory/learning/legacy_migration.py +277 -0
package/src/superlocalmemory/learning/memory_merge.py +160 -0
package/src/superlocalmemory/learning/model_cache.py +269 -0
package/src/superlocalmemory/learning/model_rollback.py +278 -0
package/src/superlocalmemory/learning/outcome_queue.py +284 -0
package/src/superlocalmemory/learning/pattern_miner.py +415 -0
package/src/superlocalmemory/learning/pattern_miner_constants.py +47 -0
package/src/superlocalmemory/learning/ranker.py +225 -81
package/src/superlocalmemory/learning/ranker_common.py +163 -0
package/src/superlocalmemory/learning/ranker_retrain_legacy.py +202 -0
package/src/superlocalmemory/learning/ranker_retrain_online.py +411 -0
package/src/superlocalmemory/learning/reward.py +777 -0
package/src/superlocalmemory/learning/reward_archive.py +210 -0
package/src/superlocalmemory/learning/reward_boost.py +201 -0
package/src/superlocalmemory/learning/reward_proxy.py +326 -0
package/src/superlocalmemory/learning/shadow_test.py +524 -0
package/src/superlocalmemory/learning/signal_worker.py +270 -0
package/src/superlocalmemory/learning/signals.py +314 -0
package/src/superlocalmemory/learning/trigram_index.py +547 -0
package/src/superlocalmemory/mcp/server.py +5 -5
package/src/superlocalmemory/mcp/tools_context.py +183 -0
package/src/superlocalmemory/mcp/tools_core.py +92 -27
package/src/superlocalmemory/parameterization/soft_prompt_generator.py +13 -0
package/src/superlocalmemory/retrieval/engine.py +52 -0
package/src/superlocalmemory/retrieval/reranker.py +4 -2
package/src/superlocalmemory/server/api.py +2 -2
package/src/superlocalmemory/server/bandit_loops.py +140 -0
package/src/superlocalmemory/server/middleware/__init__.py +11 -0
package/src/superlocalmemory/server/middleware/security_headers.py +144 -0
package/src/superlocalmemory/server/routes/backup.py +36 -13
package/src/superlocalmemory/server/routes/behavioral.py +50 -19
package/src/superlocalmemory/server/routes/brain.py +1234 -0
package/src/superlocalmemory/server/routes/data_io.py +4 -4
package/src/superlocalmemory/server/routes/events.py +2 -2
package/src/superlocalmemory/server/routes/helpers.py +1 -1
package/src/superlocalmemory/server/routes/learning.py +192 -7
package/src/superlocalmemory/server/routes/memories.py +189 -1
package/src/superlocalmemory/server/routes/prewarm.py +171 -0
package/src/superlocalmemory/server/routes/profiles.py +3 -3
package/src/superlocalmemory/server/routes/token.py +88 -0
package/src/superlocalmemory/server/routes/ws.py +5 -5
package/src/superlocalmemory/server/security_middleware.py +13 -7
package/src/superlocalmemory/server/ui.py +2 -2
package/src/superlocalmemory/server/unified_daemon.py +335 -3
package/src/superlocalmemory/storage/migration_runner.py +545 -0
package/src/superlocalmemory/storage/migrations/M001_add_signal_features_columns.py +67 -0
package/src/superlocalmemory/storage/migrations/M002_model_state_history.py +132 -0
package/src/superlocalmemory/storage/migrations/M003_migration_log.py +38 -0
package/src/superlocalmemory/storage/migrations/M004_cross_platform_sync_log.py +46 -0
package/src/superlocalmemory/storage/migrations/M005_bandit_tables.py +75 -0
package/src/superlocalmemory/storage/migrations/M006_action_outcomes_reward.py +75 -0
package/src/superlocalmemory/storage/migrations/M007_pending_outcomes.py +63 -0
package/src/superlocalmemory/storage/migrations/M009_model_lineage.py +54 -0
package/src/superlocalmemory/storage/migrations/M010_evolution_config.py +75 -0
package/src/superlocalmemory/storage/migrations/M011_archive_and_merge.py +87 -0
package/src/superlocalmemory/storage/migrations/M012_shadow_observations.py +72 -0
package/src/superlocalmemory/storage/migrations/M013_bi_temporal_columns.py +55 -0
package/src/superlocalmemory/storage/migrations/__init__.py +81 -0
package/src/superlocalmemory/storage/models.py +4 -0
package/src/superlocalmemory/ui/css/brain.css +409 -0
package/src/superlocalmemory/ui/css/legacy-dashboard.css +645 -0
package/src/superlocalmemory/ui/index.html +459 -1345
package/src/superlocalmemory/ui/js/brain.js +1321 -0
package/src/superlocalmemory/ui/js/clusters.js +123 -4
package/src/superlocalmemory/ui/js/init.js +48 -39
package/src/superlocalmemory/ui/js/memories.js +88 -2
package/src/superlocalmemory/ui/js/modal.js +71 -1
package/src/superlocalmemory/ui/js/ng-shell.js +101 -88
package/src/superlocalmemory/ui/js/trust-dashboard.js +168 -25
package/src/superlocalmemory/ui/vendor/bootstrap-icons/bootstrap-icons.css +2018 -0
package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff +0 -0
package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff2 +0 -0
package/src/superlocalmemory/ui/vendor/bootstrap.bundle.min.js +7 -0
package/src/superlocalmemory/ui/vendor/bootstrap.min.css +6 -0
package/src/superlocalmemory/ui/vendor/d3.v7.min.js +2 -0
package/src/superlocalmemory/ui/vendor/graphology-library.min.js +2 -0
package/src/superlocalmemory/ui/vendor/graphology.umd.min.js +2 -0
package/src/superlocalmemory/ui/vendor/inter-ui/inter-variable.min.css +8 -0
package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable-Italic.woff2 +0 -0
package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable.woff2 +0 -0
package/src/superlocalmemory/ui/vendor/sigma.min.js +1 -0
package/src/superlocalmemory/ui/js/behavioral.js +0 -447
package/src/superlocalmemory/ui/js/graph-core.js +0 -447
package/src/superlocalmemory/ui/js/graph-interactions.js +0 -351
package/src/superlocalmemory/ui/js/learning.js +0 -435
package/src/superlocalmemory/ui/js/patterns.js +0 -93
package/src/superlocalmemory.egg-info/PKG-INFO +0 -647
package/src/superlocalmemory.egg-info/SOURCES.txt +0 -335
package/src/superlocalmemory.egg-info/dependency_links.txt +0 -1
package/src/superlocalmemory.egg-info/entry_points.txt +0 -2
package/src/superlocalmemory.egg-info/requires.txt +0 -58
package/src/superlocalmemory.egg-info/top_level.txt +0 -1

package/src/superlocalmemory/learning/pattern_miner_constants.py ADDED Viewed

@@ -0,0 +1,47 @@
+# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
+# Licensed under AGPL-3.0-or-later - see LICENSE file
+# Part of SuperLocalMemory v3.4.21 — F4.A Stage-8 H-01 fix
+"""Static dictionaries used by ``pattern_miner`` — extracted so the
+main module stays under the 400-LOC cap.
+"""
+from __future__ import annotations
+TECH_KEYWORDS: dict[str, str] = {
+    "python": "Python", "javascript": "JavaScript",
+    "typescript": "TypeScript", "react": "React",
+    "vue": "Vue", "angular": "Angular",
+    "postgresql": "PostgreSQL", "mysql": "MySQL",
+    "sqlite": "SQLite", "docker": "Docker",
+    "kubernetes": "Kubernetes", "aws": "AWS",
+    "azure": "Azure", "gcp": "GCP",
+    "node": "Node.js", "fastapi": "FastAPI",
+    "django": "Django", "flask": "Flask",
+    "rust": "Rust", "go": "Go", "java": "Java",
+    "git": "Git", "npm": "npm", "pip": "pip",
+    "langchain": "LangChain", "ollama": "Ollama",
+    "pytorch": "PyTorch", "claude": "Claude",
+    "openai": "OpenAI", "anthropic": "Anthropic",
+    "redis": "Redis", "mongodb": "MongoDB",
+    "graphql": "GraphQL", "nextjs": "Next.js",
+    "terraform": "Terraform", "nginx": "Nginx",
+    "linux": "Linux", "macos": "macOS",
+    "vscode": "VS Code", "neovim": "Neovim",
+}
+STOPWORDS: frozenset[str] = frozenset({
+    "the", "is", "a", "an", "in", "on", "at", "to", "for",
+    "of", "and", "or", "not", "with", "that", "this", "was",
+    "are", "be", "has", "had", "have", "from", "by", "it",
+    "its", "as", "but", "were", "been", "being", "would",
+    "could", "should", "will", "may", "might", "can", "do",
+    "does", "did", "about", "into", "over", "after", "before",
+    "then", "than", "also", "just", "like", "more", "some",
+    "only", "other", "such", "each", "every", "both", "most",
+})
+__all__ = ("TECH_KEYWORDS", "STOPWORDS")

package/src/superlocalmemory/learning/ranker.py CHANGED Viewed

@@ -4,11 +4,19 @@
 """3-phase adaptive ranker — from heuristic to ML.
+LLD reference: ``.backup/active-brain/lld/LLD-02-signal-pipeline-and-lightgbm.md``
+Sections 4.4 + 4.5.
 Phase 1: cross-encoder score only (cold start)
 Phase 2: heuristic boosts (some data)
-Phase 3: LightGBM model (enough training data)
+Phase 3: LightGBM **lambdarank** Booster (native, not LGBMRanker sklearn
+         wrapper) scoring on numpy feature matrices.
-Transitions are automatic based on accumulated training data.
+Transitions are automatic based on accumulated training data. Feature-name
+drift is handled per LLD-02 §4.5 (``drift_mode``):
+    - ``aligned`` — score normally.
+    - ``subset``  — pad missing features with 0.0 in FEATURE_NAMES order.
+    - ``unknown`` — refuse to score; fall back to pre-model order.
 """
 from __future__ import annotations
@@ -17,7 +25,12 @@ import logging
 import math
 from typing import Any
-from superlocalmemory.learning.features import FeatureExtractor, FeatureVector, FEATURE_DIM
+from superlocalmemory.learning.features import (
+    FEATURE_DIM,
+    FEATURE_NAMES,
+    FeatureExtractor,
+    FeatureVector,
+)
 logger = logging.getLogger(__name__)
@@ -29,15 +42,32 @@ PHASE_3_THRESHOLD = 200  # signals needed to enter Phase 3
 class AdaptiveRanker:
     """3-phase adaptive re-ranker for V3 retrieval results."""
-    def __init__(self, signal_count: int = 0, model_state: bytes | None = None) -> None:
+    def __init__(
+        self,
+        signal_count: int = 0,
+        model_state: bytes | None = None,
+        *,
+        active_model: Any = None,
+    ) -> None:
+        """Build a ranker.
+        ``active_model`` (``model_cache.ActiveModel``) is preferred when
+        available — it carries verified booster + feature_names. The legacy
+        ``model_state`` bytes path remains for backward compatibility with
+        3.4.20 callers; it does NOT perform SHA-256 verification and should
+        not be used by the 3.4.21 recall path.
+        """
         self._signal_count = signal_count
-        self._model = None
-        if model_state:
-            self._load_model(model_state)
+        self._active = active_model
+        # Back-compat: only fill in from raw bytes when no active_model given.
+        if active_model is None and model_state:
+            self._load_legacy_bytes(model_state)
+    # --- public properties ---------------------------------------------
     @property
     def phase(self) -> int:
-        if self._signal_count >= PHASE_3_THRESHOLD and self._model is not None:
+        if self._signal_count >= PHASE_3_THRESHOLD and self._active is not None:
             return 3
         if self._signal_count >= PHASE_2_THRESHOLD:
             return 2
@@ -51,6 +81,12 @@ class AdaptiveRanker:
     def signal_count(self, value: int) -> None:
         self._signal_count = value
+    @property
+    def active_model(self) -> Any:
+        return self._active
+    # --- re-rank entry points ------------------------------------------
     def rerank(self, results: list[dict], query_context: dict) -> list[dict]:
         """Re-rank retrieval results based on current phase."""
         if not results:
@@ -58,98 +94,206 @@ class AdaptiveRanker:
         if self.phase == 3:
             return self._rerank_ml(results, query_context)
-        elif self.phase == 2:
+        if self.phase == 2:
             return self._rerank_heuristic(results, query_context)
-        else:
-            return self._rerank_baseline(results)
+        return self._rerank_baseline(results)
-    def train(self, training_data: list[dict]) -> bool:
-        """Train LightGBM model on labeled data. Returns True if model was trained."""
-        if len(training_data) < PHASE_3_THRESHOLD:
-            return False
+    def rank(self, candidates: list, query_context: dict) -> list:
+        """LLD-02 §4.5 native inference path.
-        try:
-            import lightgbm as lgb
-        except ImportError:
-            logger.warning("LightGBM not installed. Phase 3 ranking unavailable.")
-            return False
+        Accepts an iterable of objects that implement ``to_result_dict()``
+        (the signal-pipeline candidates) AND plain dicts (legacy).
+        """
+        if self._active is None or not candidates:
+            return list(candidates)
-        features_list = []
-        labels = []
-        for item in training_data:
-            fv = item.get("features", {})
-            label = item.get("label", 0.0)
-            # Convert feature dict to ordered list
-            vec = [fv.get(name, 0.0) for name in FeatureExtractor.extract(
-                {"channel_scores": {}, "fact": {}}, {"query_type": ""}
-            ).features.keys()]
-            # Simpler: just use the feature values in order
-            from superlocalmemory.learning.features import FEATURE_NAMES
-            vec = [float(fv.get(name, 0.0)) for name in FEATURE_NAMES]
-            features_list.append(vec)
-            labels.append(float(label))
-        if not features_list:
-            return False
+        # Build result dicts in a uniform shape.
+        result_dicts: list[dict] = []
+        for c in candidates:
+            if hasattr(c, "to_result_dict"):
+                result_dicts.append(c.to_result_dict())
+            elif isinstance(c, dict):
+                result_dicts.append(c)
+            else:
+                # Unknown candidate type — return original order.
+                return list(candidates)
-        dataset = lgb.Dataset(features_list, label=labels)
-        params = {
-            "objective": "binary",
-            "metric": "binary_logloss",
-            "num_leaves": 15,
-            "learning_rate": 0.1,
-            "verbose": -1,
-        }
-        self._model = lgb.train(params, dataset, num_boost_round=50)
-        logger.info("LightGBM model trained with %d examples", len(features_list))
-        return True
+        from superlocalmemory.learning.model_cache import drift_mode
-    def get_model_state(self) -> bytes | None:
-        """Serialize model for persistence."""
-        if self._model is None:
-            return None
-        return self._model.model_to_string().encode("utf-8")
+        mode = drift_mode(self._active)
+        if mode == "unknown":
+            logger.info(
+                "ranker.rank: feature-name drift unknown; "
+                "falling back to pre-model order",
+            )
+            return list(candidates)
+        # Order matrix by CURRENT FEATURE_NAMES; if subset, missing names
+        # pad with 0.0 (FeatureExtractor already does this via .get(name, 0)).
+        try:
+            import numpy as np
+        except ImportError:  # pragma: no cover — numpy is required dep
+            return list(candidates)
+        try:
+            rows = []
+            for rd in result_dicts:
+                fv = FeatureExtractor.extract(rd, query_context)
+                rows.append(fv.to_list())
+            X = np.asarray(rows, dtype=np.float32)
+            scores = self._active.booster.predict(X)
+        except Exception as exc:  # pragma: no cover — booster.predict path
+            logger.warning("ranker.rank: booster.predict failed: %s", exc)
+            return list(candidates)
+        order = np.argsort(-scores, kind="stable")
+        return [candidates[int(i)] for i in order]
-    # -- Phase implementations --
+    # --- phase implementations -----------------------------------------
     def _rerank_baseline(self, results: list[dict]) -> list[dict]:
-        """Phase 1: rank by cross-encoder score."""
-        return sorted(results, key=lambda r: r.get("cross_encoder_score", r.get("score", 0)), reverse=True)
+        return sorted(
+            results,
+            key=lambda r: r.get("cross_encoder_score", r.get("score", 0)),
+            reverse=True,
+        )
-    def _rerank_heuristic(self, results: list[dict], query_context: dict) -> list[dict]:
-        """Phase 2: heuristic boosts on top of cross-encoder."""
-        scored = []
+    def _rerank_heuristic(
+        self, results: list[dict], query_context: dict,
+    ) -> list[dict]:
+        scored: list[dict] = []
         for r in results:
             base = r.get("cross_encoder_score", r.get("score", 0))
-            # Boosts
-            recency_boost = 0.1 * math.exp(-r.get("fact", {}).get("age_days", 30) / 30)
-            access_boost = 0.05 * min(r.get("fact", {}).get("access_count", 0) / 10, 1.0)
+            age_days = r.get("fact", {}).get("age_days", 30)
+            access_count = r.get("fact", {}).get("access_count", 0)
+            recency_boost = 0.1 * math.exp(-age_days / 30)
+            access_boost = 0.05 * min(access_count / 10, 1.0)
             trust_boost = 0.1 * (r.get("trust_score", 0.5) - 0.5)
             final = base + recency_boost + access_boost + trust_boost
             scored.append({**r, "_adaptive_score": final})
         return sorted(scored, key=lambda r: r["_adaptive_score"], reverse=True)
-    def _rerank_ml(self, results: list[dict], query_context: dict) -> list[dict]:
-        """Phase 3: LightGBM prediction."""
-        if self._model is None:
+    def _rerank_ml(
+        self, results: list[dict], query_context: dict,
+    ) -> list[dict]:
+        """Phase 3 prediction via native Booster."""
+        if self._active is None:  # pragma: no cover — guarded by phase()
             return self._rerank_heuristic(results, query_context)
-        feature_vectors = FeatureExtractor.extract_batch(results, query_context)
-        predictions = []
-        for fv in feature_vectors:
-            vec = [fv.to_list()]
-            pred = self._model.predict(vec)[0]
-            predictions.append(pred)
+        from superlocalmemory.learning.model_cache import drift_mode
-        paired = list(zip(results, predictions))
-        paired.sort(key=lambda x: x[1], reverse=True)
-        return [r for r, _ in paired]
+        mode = drift_mode(self._active)
+        if mode == "unknown":
+            logger.info(
+                "ranker._rerank_ml: unknown drift → heuristic fallback",
+            )
+            return self._rerank_heuristic(results, query_context)
+        try:
+            import numpy as np
+        except ImportError:  # pragma: no cover
+            return self._rerank_heuristic(results, query_context)
-    def _load_model(self, state: bytes) -> None:
-        """Load model from serialized state."""
         try:
-            import lightgbm as lgb
-            self._model = lgb.Booster(model_str=state.decode("utf-8"))
-        except (ImportError, Exception) as exc:
-            logger.warning("Could not load LightGBM model: %s", exc)
-            self._model = None
+            feature_vectors = FeatureExtractor.extract_batch(
+                results, query_context,
+            )
+            X = np.asarray(
+                [fv.to_list() for fv in feature_vectors],
+                dtype=np.float32,
+            )
+            scores = self._active.booster.predict(X)
+        except Exception as exc:  # pragma: no cover — booster.predict path
+            logger.warning("_rerank_ml failed: %s", exc)
+            return self._rerank_heuristic(results, query_context)
+        order = np.argsort(-scores, kind="stable")
+        return [results[int(i)] for i in order]
+    # --- legacy load path (back-compat) --------------------------------
+    def _load_legacy_bytes(self, state: bytes) -> None:
+        """Best-effort load from raw bytes — NO SHA-256 verify.
+        Kept for 3.4.20 callers. The 3.4.21 recall path uses
+        ``model_cache.load_active`` which enforces verification.
+        """
+        try:
+            import lightgbm as lgb  # noqa: PLC0415
+            booster = lgb.Booster(model_str=state.decode("utf-8"))
+        except Exception as exc:
+            logger.warning("Legacy model load failed: %s", exc)
+            self._active = None
+            return
+        from superlocalmemory.learning.model_cache import ActiveModel
+        self._active = ActiveModel(
+            profile_id="legacy",
+            booster=booster,
+            feature_names=tuple(FEATURE_NAMES),
+            trained_at="",
+            sha256="",
+        )
+    # --- legacy train() shim (3.4.20 API) ------------------------------
+    def train(self, training_data: list) -> bool:
+        """Deprecated — v3.4.21 training lives in ``consolidation_worker``.
+        Kept as a guard for 3.4.20 callers: returns False when
+        training_data is below the Phase-3 threshold, True after a best-
+        effort native booster fit on the legacy feature dict shape
+        (never persists to disk). Production training must go through
+        ``consolidation_worker._retrain_ranker`` which uses real features
+        + ``lambdarank`` + group + integrity persistence.
+        """
+        if not training_data or len(training_data) < PHASE_3_THRESHOLD:
+            return False
+        # Best-effort legacy path — does NOT persist, does NOT promote.
+        try:
+            import lightgbm as lgb  # noqa: PLC0415
+            import numpy as np
+        except ImportError:
+            return False
+        X = np.asarray(
+            [[float((d.get("features") or {}).get(n, 0.0))
+              for n in FEATURE_NAMES]
+             for d in training_data],
+            dtype=np.float32,
+        )
+        y = np.asarray(
+            [float(d.get("label", 0.0)) for d in training_data],
+            dtype=np.float32,
+        )
+        ds = lgb.Dataset(X, label=y, feature_name=list(FEATURE_NAMES),
+                         free_raw_data=False)
+        try:
+            booster = lgb.train(
+                {"objective": "regression", "metric": "rmse",
+                 "verbosity": -1, "min_data_in_leaf": 1},
+                ds, num_boost_round=10,
+            )
+        except Exception:  # pragma: no cover — defensive
+            return False
+        from superlocalmemory.learning.model_cache import ActiveModel
+        self._active = ActiveModel(
+            profile_id="legacy",
+            booster=booster,
+            feature_names=tuple(FEATURE_NAMES),
+            trained_at="",
+            sha256="",
+        )
+        return True
+    # --- legacy serialiser (used by external code in 3.4.20) -----------
+    def get_model_state(self) -> bytes | None:
+        if self._active is None:
+            return None
+        try:
+            return self._active.booster.model_to_string().encode("utf-8")
+        except Exception:  # pragma: no cover — defensive
+            return None

package/src/superlocalmemory/learning/ranker_common.py ADDED Viewed

@@ -0,0 +1,163 @@
+# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
+# Licensed under AGPL-3.0-or-later - see LICENSE file
+# Part of SuperLocalMemory v3.4.21 — F4.A Stage-8 H-01 fix
+"""Ranker retraining helpers shared by legacy + online paths.
+These functions predate the LLD-10 online retrain wiring and remain
+identical in behaviour; they are factored out so both
+``ranker_retrain_legacy.py`` and ``ranker_retrain_online.py`` can call
+them without importing from each other.
+Contract refs:
+  - LLD-02 §4.6 — lambdarank retraining groups + shadow gate.
+  - LLD-10 §3.2 — in-sample NDCG gate before persisting a candidate.
+  - Stage 8 H-01 (architect) — file split.
+"""
+from __future__ import annotations
+import logging
+logger = logging.getLogger(__name__)
+__all__ = (
+    "_build_training_matrix",
+    "_shadow_test_improved",
+    "_compute_eval_metrics",
+)
+def _build_training_matrix(rows: list[dict], feature_names):
+    """Group rows by ``query_id``, preserve order by ``position``.
+    Returns ``(X, y_int, group_counts)``. ``group_counts`` is ``None``
+    when no groups are discoverable (empty input).
+    """
+    import numpy as np
+    from superlocalmemory.learning.labeler import label_for_row
+    grouped: dict[str, list[dict]] = {}
+    for row in rows:
+        qid = row.get("query_id") or ""
+        grouped.setdefault(qid, []).append(row)
+    if not grouped:
+        return np.zeros((0, len(feature_names)), dtype=np.float32), [], None
+    xs: list[list[float]] = []
+    ys: list[int] = []
+    group_counts: list[int] = []
+    for qid, group_rows in grouped.items():
+        # Sort by position ascending; missing positions land at the end.
+        group_rows = sorted(
+            group_rows,
+            key=lambda r: (
+                r.get("position") if r.get("position") is not None else 10**9
+            ),
+        )
+        for r in group_rows:
+            feats = r.get("features") or {}
+            xs.append([float(feats.get(n, 0.0)) for n in feature_names])
+            ys.append(label_for_row(r))
+        group_counts.append(len(group_rows))
+    X = np.asarray(xs, dtype=np.float32)
+    y = np.asarray(ys, dtype=np.int32)
+    return X, y, group_counts
+def _shadow_test_improved(prior_row, booster_new, rows, feature_names) -> bool:
+    """Return True iff new booster beats prior on NDCG@10 with p<0.05.
+    Lightweight paired t-test across per-query NDCG@10 scores.
+    ``prior_row`` is the dict returned by ``load_active_model`` — it
+    may be unusable (missing state_bytes / unparseable); in that case
+    we promote.
+    """
+    try:
+        import numpy as np
+        import lightgbm as lgb
+    except ImportError:  # pragma: no cover
+        return True
+    try:
+        prior_booster = lgb.Booster(
+            model_str=bytes(prior_row["state_bytes"]).decode("utf-8"),
+        )
+    except Exception:
+        return True  # prior unusable → promote new.
+    X, y, groups = _build_training_matrix(rows, feature_names)
+    if groups is None or not groups:
+        return True
+    offsets = [0]
+    for g in groups:
+        offsets.append(offsets[-1] + g)
+    def _ndcg_at_k(scores, labels, k=10):
+        order = np.argsort(-scores)
+        gains_map = [0, 1, 3, 7, 15]
+        dcg = 0.0
+        for i, idx in enumerate(order[:k]):
+            l = int(labels[idx])
+            if 0 <= l < len(gains_map):
+                dcg += gains_map[l] / np.log2(i + 2)
+        ideal = sorted(labels.tolist(), reverse=True)[:k]
+        idcg = sum(
+            (gains_map[int(l)] if 0 <= int(l) < len(gains_map) else 0)
+            / np.log2(i + 2)
+            for i, l in enumerate(ideal)
+        )
+        return dcg / idcg if idcg > 0 else 0.0
+    old_ndcgs: list[float] = []
+    new_ndcgs: list[float] = []
+    for i in range(len(groups)):
+        lo, hi = offsets[i], offsets[i + 1]
+        if hi - lo < 2:
+            continue
+        Xg, yg = X[lo:hi], y[lo:hi]
+        try:
+            s_old = prior_booster.predict(Xg)
+            s_new = booster_new.predict(Xg)
+        except Exception:
+            return False
+        old_ndcgs.append(_ndcg_at_k(s_old, yg))
+        new_ndcgs.append(_ndcg_at_k(s_new, yg))
+    if not old_ndcgs:
+        return True
+    old_arr = np.asarray(old_ndcgs)
+    new_arr = np.asarray(new_ndcgs)
+    delta = float(np.mean(new_arr - old_arr))
+    if delta < 0.02:
+        return False
+    # Paired t-test — small-sample safe.
+    diff = new_arr - old_arr
+    n = len(diff)
+    if n < 2:
+        return True
+    mean = float(np.mean(diff))
+    std = float(np.std(diff, ddof=1))
+    if std == 0.0:
+        return mean > 0
+    t_stat = mean / (std / np.sqrt(n))
+    # Rough threshold: t > 2.0 (~p<0.05 for n ≥ 10 two-tailed).
+    return t_stat > 2.0
+def _compute_eval_metrics(booster, rows, feature_names) -> dict:
+    """Lightweight training metrics snapshot."""
+    try:
+        import numpy as np
+        X, y, groups = _build_training_matrix(rows, feature_names)
+        preds = booster.predict(X) if X.size else np.zeros(0)
+        return {
+            "n_rows": int(X.shape[0]),
+            "n_groups": int(len(groups or [])),
+            "mean_score": float(np.mean(preds)) if preds.size else 0.0,
+        }
+    except Exception:  # pragma: no cover
+        return {}