npm - superlocalmemory - Versions diffs - 3.4.18 → 3.4.21 - Mend

superlocalmemory 3.4.18 → 3.4.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (172) hide show

package/CHANGELOG.md +35 -0
package/README.md +42 -34
package/bin/slm +11 -0
package/bin/slm.bat +12 -0
package/package.json +4 -3
package/pyproject.toml +3 -2
package/scripts/build-slm-hook.ps1 +40 -0
package/scripts/build-slm-hook.sh +45 -0
package/scripts/build_entry.py +452 -0
package/scripts/ci/stage5b_gate.sh +50 -0
package/scripts/postinstall/validation.js +187 -0
package/scripts/postinstall-interactive.js +756 -0
package/scripts/postinstall_binary.js +287 -0
package/scripts/release_manifest.py +273 -0
package/scripts/slm-hook.spec +56 -0
package/skills/slm-build-graph/SKILL.md +423 -0
package/skills/slm-list-recent/SKILL.md +348 -0
package/skills/slm-recall/SKILL.md +343 -0
package/skills/slm-remember/SKILL.md +194 -0
package/skills/slm-show-patterns/SKILL.md +224 -0
package/skills/slm-status/SKILL.md +363 -0
package/skills/slm-switch-profile/SKILL.md +442 -0
package/src/superlocalmemory/cli/commands.py +219 -79
package/src/superlocalmemory/cli/context_commands.py +192 -0
package/src/superlocalmemory/cli/daemon.py +15 -1
package/src/superlocalmemory/cli/db_migrate.py +80 -0
package/src/superlocalmemory/cli/escape_hatch.py +220 -0
package/src/superlocalmemory/cli/main.py +72 -1
package/src/superlocalmemory/core/context_cache.py +397 -0
package/src/superlocalmemory/core/embeddings.py +8 -2
package/src/superlocalmemory/core/engine.py +38 -2
package/src/superlocalmemory/core/engine_wiring.py +1 -1
package/src/superlocalmemory/core/ram_lock.py +111 -0
package/src/superlocalmemory/core/recall_pipeline.py +433 -3
package/src/superlocalmemory/core/recall_worker.py +8 -3
package/src/superlocalmemory/core/security_primitives.py +635 -0
package/src/superlocalmemory/core/shadow_router.py +319 -0
package/src/superlocalmemory/core/slm_disabled.py +87 -0
package/src/superlocalmemory/core/slmignore.py +125 -0
package/src/superlocalmemory/core/topic_signature.py +143 -0
package/src/superlocalmemory/core/worker_pool.py +14 -3
package/src/superlocalmemory/encoding/cognitive_consolidator.py +2 -2
package/src/superlocalmemory/evolution/budget.py +321 -0
package/src/superlocalmemory/evolution/llm_dispatch.py +508 -0
package/src/superlocalmemory/evolution/skill_evolver.py +144 -94
package/src/superlocalmemory/hooks/_outcome_common.py +506 -0
package/src/superlocalmemory/hooks/adapter_base.py +317 -0
package/src/superlocalmemory/hooks/antigravity_adapter.py +192 -0
package/src/superlocalmemory/hooks/claude_code_hooks.py +33 -1
package/src/superlocalmemory/hooks/context_payload.py +312 -0
package/src/superlocalmemory/hooks/copilot_adapter.py +154 -0
package/src/superlocalmemory/hooks/cross_platform_connector.py +90 -0
package/src/superlocalmemory/hooks/cursor_adapter.py +195 -0
package/src/superlocalmemory/hooks/hook_handlers.py +109 -8
package/src/superlocalmemory/hooks/ide_connector.py +25 -2
package/src/superlocalmemory/hooks/post_tool_async_hook.py +165 -0
package/src/superlocalmemory/hooks/post_tool_outcome_hook.py +223 -0
package/src/superlocalmemory/hooks/prewarm_auth.py +170 -0
package/src/superlocalmemory/hooks/session_registry.py +186 -0
package/src/superlocalmemory/hooks/stop_outcome_hook.py +134 -0
package/src/superlocalmemory/hooks/sync_loop.py +114 -0
package/src/superlocalmemory/hooks/user_prompt_hook.py +128 -0
package/src/superlocalmemory/hooks/user_prompt_rehash_hook.py +202 -0
package/src/superlocalmemory/infra/backup.py +3 -3
package/src/superlocalmemory/infra/cloud_backup.py +2 -2
package/src/superlocalmemory/infra/event_bus.py +2 -2
package/src/superlocalmemory/infra/webhook_dispatcher.py +3 -3
package/src/superlocalmemory/learning/arm_catalog.py +99 -0
package/src/superlocalmemory/learning/bandit.py +526 -0
package/src/superlocalmemory/learning/bandit_cache.py +133 -0
package/src/superlocalmemory/learning/behavioral.py +53 -1
package/src/superlocalmemory/learning/consolidation_cycle.py +381 -0
package/src/superlocalmemory/learning/consolidation_worker.py +188 -520
package/src/superlocalmemory/learning/database.py +256 -0
package/src/superlocalmemory/learning/dedup_hnsw.py +413 -0
package/src/superlocalmemory/learning/ensemble.py +300 -0
package/src/superlocalmemory/learning/fact_outcome_joins.py +207 -0
package/src/superlocalmemory/learning/forgetting_scheduler.py +55 -0
package/src/superlocalmemory/learning/hnsw_dedup.py +69 -0
package/src/superlocalmemory/learning/labeler.py +87 -0
package/src/superlocalmemory/learning/legacy_migration.py +277 -0
package/src/superlocalmemory/learning/memory_merge.py +160 -0
package/src/superlocalmemory/learning/model_cache.py +269 -0
package/src/superlocalmemory/learning/model_rollback.py +278 -0
package/src/superlocalmemory/learning/outcome_queue.py +284 -0
package/src/superlocalmemory/learning/pattern_miner.py +415 -0
package/src/superlocalmemory/learning/pattern_miner_constants.py +47 -0
package/src/superlocalmemory/learning/ranker.py +225 -81
package/src/superlocalmemory/learning/ranker_common.py +163 -0
package/src/superlocalmemory/learning/ranker_retrain_legacy.py +202 -0
package/src/superlocalmemory/learning/ranker_retrain_online.py +411 -0
package/src/superlocalmemory/learning/reward.py +777 -0
package/src/superlocalmemory/learning/reward_archive.py +210 -0
package/src/superlocalmemory/learning/reward_boost.py +201 -0
package/src/superlocalmemory/learning/reward_proxy.py +326 -0
package/src/superlocalmemory/learning/shadow_test.py +524 -0
package/src/superlocalmemory/learning/signal_worker.py +270 -0
package/src/superlocalmemory/learning/signals.py +314 -0
package/src/superlocalmemory/learning/trigram_index.py +547 -0
package/src/superlocalmemory/mcp/server.py +5 -5
package/src/superlocalmemory/mcp/tools_context.py +183 -0
package/src/superlocalmemory/mcp/tools_core.py +92 -27
package/src/superlocalmemory/parameterization/soft_prompt_generator.py +13 -0
package/src/superlocalmemory/retrieval/engine.py +52 -0
package/src/superlocalmemory/retrieval/reranker.py +4 -2
package/src/superlocalmemory/server/api.py +2 -2
package/src/superlocalmemory/server/bandit_loops.py +140 -0
package/src/superlocalmemory/server/middleware/__init__.py +11 -0
package/src/superlocalmemory/server/middleware/security_headers.py +144 -0
package/src/superlocalmemory/server/routes/backup.py +36 -13
package/src/superlocalmemory/server/routes/behavioral.py +50 -19
package/src/superlocalmemory/server/routes/brain.py +1234 -0
package/src/superlocalmemory/server/routes/data_io.py +4 -4
package/src/superlocalmemory/server/routes/events.py +2 -2
package/src/superlocalmemory/server/routes/helpers.py +1 -1
package/src/superlocalmemory/server/routes/learning.py +192 -7
package/src/superlocalmemory/server/routes/memories.py +189 -1
package/src/superlocalmemory/server/routes/prewarm.py +171 -0
package/src/superlocalmemory/server/routes/profiles.py +3 -3
package/src/superlocalmemory/server/routes/token.py +88 -0
package/src/superlocalmemory/server/routes/ws.py +5 -5
package/src/superlocalmemory/server/security_middleware.py +13 -7
package/src/superlocalmemory/server/ui.py +2 -2
package/src/superlocalmemory/server/unified_daemon.py +335 -3
package/src/superlocalmemory/storage/migration_runner.py +545 -0
package/src/superlocalmemory/storage/migrations/M001_add_signal_features_columns.py +67 -0
package/src/superlocalmemory/storage/migrations/M002_model_state_history.py +132 -0
package/src/superlocalmemory/storage/migrations/M003_migration_log.py +38 -0
package/src/superlocalmemory/storage/migrations/M004_cross_platform_sync_log.py +46 -0
package/src/superlocalmemory/storage/migrations/M005_bandit_tables.py +75 -0
package/src/superlocalmemory/storage/migrations/M006_action_outcomes_reward.py +75 -0
package/src/superlocalmemory/storage/migrations/M007_pending_outcomes.py +63 -0
package/src/superlocalmemory/storage/migrations/M009_model_lineage.py +54 -0
package/src/superlocalmemory/storage/migrations/M010_evolution_config.py +75 -0
package/src/superlocalmemory/storage/migrations/M011_archive_and_merge.py +87 -0
package/src/superlocalmemory/storage/migrations/M012_shadow_observations.py +72 -0
package/src/superlocalmemory/storage/migrations/M013_bi_temporal_columns.py +55 -0
package/src/superlocalmemory/storage/migrations/__init__.py +81 -0
package/src/superlocalmemory/storage/models.py +4 -0
package/src/superlocalmemory/ui/css/brain.css +409 -0
package/src/superlocalmemory/ui/css/legacy-dashboard.css +645 -0
package/src/superlocalmemory/ui/index.html +459 -1345
package/src/superlocalmemory/ui/js/brain.js +1321 -0
package/src/superlocalmemory/ui/js/clusters.js +123 -4
package/src/superlocalmemory/ui/js/init.js +48 -39
package/src/superlocalmemory/ui/js/memories.js +88 -2
package/src/superlocalmemory/ui/js/modal.js +71 -1
package/src/superlocalmemory/ui/js/ng-shell.js +101 -88
package/src/superlocalmemory/ui/js/trust-dashboard.js +168 -25
package/src/superlocalmemory/ui/vendor/bootstrap-icons/bootstrap-icons.css +2018 -0
package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff +0 -0
package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff2 +0 -0
package/src/superlocalmemory/ui/vendor/bootstrap.bundle.min.js +7 -0
package/src/superlocalmemory/ui/vendor/bootstrap.min.css +6 -0
package/src/superlocalmemory/ui/vendor/d3.v7.min.js +2 -0
package/src/superlocalmemory/ui/vendor/graphology-library.min.js +2 -0
package/src/superlocalmemory/ui/vendor/graphology.umd.min.js +2 -0
package/src/superlocalmemory/ui/vendor/inter-ui/inter-variable.min.css +8 -0
package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable-Italic.woff2 +0 -0
package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable.woff2 +0 -0
package/src/superlocalmemory/ui/vendor/sigma.min.js +1 -0
package/src/superlocalmemory/ui/js/behavioral.js +0 -447
package/src/superlocalmemory/ui/js/graph-core.js +0 -447
package/src/superlocalmemory/ui/js/graph-interactions.js +0 -351
package/src/superlocalmemory/ui/js/learning.js +0 -435
package/src/superlocalmemory/ui/js/patterns.js +0 -93
package/src/superlocalmemory.egg-info/PKG-INFO +0 -647
package/src/superlocalmemory.egg-info/SOURCES.txt +0 -335
package/src/superlocalmemory.egg-info/dependency_links.txt +0 -1
package/src/superlocalmemory.egg-info/entry_points.txt +0 -2
package/src/superlocalmemory.egg-info/requires.txt +0 -58
package/src/superlocalmemory.egg-info/top_level.txt +0 -1

package/src/superlocalmemory/learning/consolidation_worker.py CHANGED Viewed

@@ -2,15 +2,24 @@
 # Licensed under AGPL-3.0-or-later - see LICENSE file
 # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
-"""Sleep-Time Consolidation Worker — background memory maintenance.
+"""Sleep-Time Consolidation Worker — **back-compat shim**.
-Runs periodically (every 6 hours or on-demand) to:
-1. Decay confidence on unused facts (floor 0.1)
-2. Deduplicate near-identical facts
-3. Auto-retrain the adaptive ranker when signal threshold is met
-4. Report consolidation stats
+As of v3.4.21 (F4.A Stage-8 H-01 fix), the 1344-LOC god-module was
+split into five cohesive files:
-Inspired by: Letta's sleep-time compute, neuroscience memory consolidation.
+  - ``consolidation_cycle.py``     — :class:`ConsolidationWorker`.
+  - ``pattern_miner.py``           — :func:`generate_patterns`.
+  - ``ranker_retrain_legacy.py``   — deprecated cold-start path.
+  - ``ranker_retrain_online.py``   — LLD-10 candidate seams.
+  - ``ranker_common.py``           — training-matrix + NDCG helpers.
+This shim exists so that the 3830 live tests + dashboard + MCP tools +
+managed server routes keep importing from
+``superlocalmemory.learning.consolidation_worker`` with zero churn.
+Tests that ``monkeypatch.setattr(cw_mod, "_train_booster", fake)`` work
+unchanged because :func:`_run_shadow_cycle` is defined here and resolves
+its helper names through *this* module's globals.
 Part of Qualixar | Author: Varun Pratap Bhardwaj
 """
@@ -19,521 +28,180 @@ from __future__ import annotations
 import logging
 import sqlite3
-from datetime import datetime, timezone
-from pathlib import Path
 logger = logging.getLogger(__name__)
-class ConsolidationWorker:
-    """Background memory maintenance worker.
-    Call `run()` periodically or via dashboard button.
-    All operations are safe — they improve quality without losing data.
+# ---------------------------------------------------------------------------
+# Re-exports — anything tests or production code reached for on the old
+# module stays reachable here under the same name.
+# ---------------------------------------------------------------------------
+from superlocalmemory.learning.consolidation_cycle import (  # noqa: E402
+    ConsolidationWorker,
+)
+from superlocalmemory.learning.ranker_common import (  # noqa: E402
+    _build_training_matrix,
+    _compute_eval_metrics,
+    _shadow_test_improved,
+)
+from superlocalmemory.learning.ranker_retrain_legacy import (  # noqa: E402
+    _retrain_ranker_impl,
+    _LEGACY_RETRAIN_DEPRECATED,
+)
+from superlocalmemory.learning.ranker_retrain_online import (  # noqa: E402
+    RETRAIN_HOURS_THRESHOLD,
+    RETRAIN_HYPERPARAM_CAPS,
+    RETRAIN_MODEL_SIZE_BYTES_CAP,
+    RETRAIN_NEW_OUTCOMES_THRESHOLD,
+    RETRAIN_WALL_TIME_BUDGET_SEC,
+    RetrainWallTimeExceeded,
+    _check_rollback,
+    _feature_names,
+    _fetch_training_rows,
+    _measure_serialized_size,
+    _persist_candidate,
+    _promote_candidate,
+    _train_booster,
+)
+__all__ = (
+    "ConsolidationWorker",
+    "RetrainWallTimeExceeded",
+    "RETRAIN_HYPERPARAM_CAPS",
+    "RETRAIN_WALL_TIME_BUDGET_SEC",
+    "RETRAIN_MODEL_SIZE_BYTES_CAP",
+    "RETRAIN_NEW_OUTCOMES_THRESHOLD",
+    "RETRAIN_HOURS_THRESHOLD",
+    "_retrain_ranker_impl",
+    "_LEGACY_RETRAIN_DEPRECATED",
+    "_build_training_matrix",
+    "_compute_eval_metrics",
+    "_shadow_test_improved",
+    "_feature_names",
+    "_fetch_training_rows",
+    "_measure_serialized_size",
+    "_train_booster",
+    "_persist_candidate",
+    "_promote_candidate",
+    "_check_rollback",
+    "_run_shadow_cycle",
+)
+# ---------------------------------------------------------------------------
+# Orchestrator — defined HERE so tests patching ``cw_mod._train_booster``
+# actually intercept the helper call.
+# ---------------------------------------------------------------------------
+def _run_shadow_cycle(
+    *,
+    memory_db_path: str,
+    learning_db_path: str,
+    profile_id: str,
+) -> dict:
+    """Top-level online retrain cycle — runs inside the consolidation
+    worker.
+    Orchestrates: fetch rows → train → size-check → persist candidate
+    (NOT auto-promote). Promotion happens separately once the live
+    shadow-router accumulates enough observations (see
+    :mod:`superlocalmemory.core.shadow_router`).
+    Helper functions are looked up via this module's namespace so test
+    monkey-patches on ``consolidation_worker`` take effect without any
+    test churn.
+    Returns a dict with keys:
+      * ``aborted``: reason string if aborted (``'insufficient_data'``,
+        ``'model_too_large'``, ``'wall_time_exceeded'``, ``'train_error'``).
+      * ``candidate_persisted``: True if a candidate row was written.
+      * ``promoted``: False (always — promotion is a separate step).
+      * ``metrics``: training metrics dict on success.
     """
-    def __init__(self, memory_db: str | Path, learning_db: str | Path) -> None:
-        self._memory_db = str(memory_db)
-        self._learning_db = str(learning_db)
-    def run(self, profile_id: str, dry_run: bool = False) -> dict:
-        """Run full consolidation cycle. Returns stats."""
-        stats = {
-            "decayed": 0,
-            "deduped": 0,
-            "retrained": False,
-            "signal_count": 0,
-            "ranker_phase": 1,
-            "timestamp": datetime.now(timezone.utc).isoformat(),
-        }
-        # 1. Confidence decay on unused facts
-        try:
-            from superlocalmemory.learning.signals import LearningSignals
-            decayed = LearningSignals.decay_confidence(
-                self._memory_db, profile_id, rate=0.001,
-            )
-            stats["decayed"] = decayed
-            if not dry_run:
-                logger.info("Confidence decay: %d facts affected", decayed)
-        except Exception as exc:
-            logger.debug("Decay failed: %s", exc)
-        # 2. Deduplication (mark near-identical facts)
-        try:
-            deduped = self._deduplicate(profile_id, dry_run)
-            stats["deduped"] = deduped
-        except Exception as exc:
-            logger.debug("Dedup failed: %s", exc)
-        # 3. Generate behavioral patterns from memories
-        try:
-            patterns = self._generate_patterns(profile_id, dry_run)
-            stats["patterns_generated"] = patterns
-        except Exception as exc:
-            logger.debug("Pattern generation failed: %s", exc)
-        # 4. Recompute graph intelligence (v3.4.2: wired into learning pipeline)
-        try:
-            from superlocalmemory.core.graph_analyzer import GraphAnalyzer
-            conn_ga = sqlite3.connect(self._memory_db, timeout=10)
-            conn_ga.execute("PRAGMA busy_timeout=5000")
-            conn_ga.row_factory = sqlite3.Row
-            class _DBProxy:
-                """Minimal DB proxy for GraphAnalyzer compatibility."""
-                def __init__(self, connection: sqlite3.Connection) -> None:
-                    self._conn = connection
-                def execute(self, sql: str, params: tuple = ()) -> list:
-                    cursor = self._conn.execute(sql, params)
-                    if sql.strip().upper().startswith(("INSERT", "UPDATE", "DELETE", "ALTER", "CREATE")):
-                        self._conn.commit()
-                        return []
-                    return cursor.fetchall()
-            ga = GraphAnalyzer(_DBProxy(conn_ga))
-            if not dry_run:
-                ga_result = ga.compute_and_store(profile_id)
-                stats["graph_nodes"] = ga_result.get("node_count", 0)
-                stats["graph_communities"] = ga_result.get("community_count", 0)
-                logger.info(
-                    "Graph analysis: %d nodes, %d communities",
-                    stats["graph_nodes"], stats["graph_communities"],
-                )
-            conn_ga.close()
-        except Exception as exc:
-            logger.debug("Graph analysis failed: %s", exc)
-        # 5. Check if ranker should retrain
-        try:
-            from superlocalmemory.learning.feedback import FeedbackCollector
-            collector = FeedbackCollector(Path(self._learning_db))
-            signal_count = collector.get_feedback_count(profile_id)
-            stats["signal_count"] = signal_count
-            stats["ranker_phase"] = 1 if signal_count < 50 else (2 if signal_count < 200 else 3)
-            # Auto-retrain at threshold crossings
-            if signal_count >= 200 and not dry_run:
-                retrained = self._retrain_ranker(profile_id, signal_count)
-                stats["retrained"] = retrained
-        except Exception as exc:
-            logger.debug("Retrain check failed: %s", exc)
-        # 6. Entity compilation (v3.4.3: compiled truth per entity)
-        if not dry_run:
-            try:
-                from superlocalmemory.learning.entity_compiler import EntityCompiler
-                from superlocalmemory.core.config import SLMConfig
-                config = SLMConfig.load()
-                compiler = EntityCompiler(self._memory_db, config)
-                ec_result = compiler.compile_all(profile_id)
-                stats["entities_compiled"] = ec_result.get("compiled", 0)
-                if ec_result["compiled"] > 0:
-                    logger.info("Entity compilation: %d entities compiled",
-                                ec_result["compiled"])
-            except Exception as exc:
-                logger.debug("Entity compilation failed: %s", exc)
-        return stats
-    def _deduplicate(self, profile_id: str, dry_run: bool) -> int:
-        """Find and mark near-duplicate facts.
-        Uses content similarity (exact prefix match for now).
-        Does NOT delete — marks with lower confidence.
-        """
-        try:
-            conn = sqlite3.connect(self._memory_db, timeout=10)
-            conn.execute("PRAGMA busy_timeout=5000")
-            conn.row_factory = sqlite3.Row
-            rows = conn.execute(
-                "SELECT fact_id, content FROM atomic_facts "
-                "WHERE profile_id = ? ORDER BY created_at",
-                (profile_id,),
-            ).fetchall()
-            seen_prefixes: dict[str, str] = {}
-            duplicates = []
-            for r in rows:
-                d = dict(r)
-                prefix = d["content"][:100].strip().lower()
-                if prefix in seen_prefixes:
-                    duplicates.append(d["fact_id"])
-                else:
-                    seen_prefixes[prefix] = d["fact_id"]
-            if duplicates and not dry_run:
-                for fid in duplicates:
-                    conn.execute(
-                        "UPDATE atomic_facts SET confidence = MAX(0.1, confidence * 0.5) "
-                        "WHERE fact_id = ?",
-                        (fid,),
-                    )
-                conn.commit()
-            conn.close()
-            return len(duplicates)
-        except Exception:
-            return 0
-    def _generate_patterns(self, profile_id: str, dry_run: bool) -> int:
-        """Mine behavioral patterns from ALL memory sources.
-        v3.4.1: Expanded from 3 to 7 pattern types. No 500-fact cap.
-        Analyzes: facts, signals, co-retrieval edges, channel credits,
-        entities, sessions, graph communities.
-        """
-        try:
-            from superlocalmemory.learning.behavioral import BehavioralPatternStore
-            import re
-            from collections import Counter, defaultdict
-            conn = sqlite3.connect(self._memory_db, timeout=10)
-            conn.execute("PRAGMA busy_timeout=5000")
-            conn.row_factory = sqlite3.Row
-            # v3.4.1: No cap — analyze ALL facts
-            facts = conn.execute(
-                "SELECT fact_id, content, fact_type, created_at, session_id, "
-                "confidence, canonical_entities_json "
-                "FROM atomic_facts "
-                "WHERE profile_id = ? AND lifecycle = 'active' "
-                "ORDER BY created_at DESC",
-                (profile_id,),
-            ).fetchall()
-            if len(facts) < 5:
-                conn.close()
-                return 0
-            store = BehavioralPatternStore(self._learning_db)
-            generated = 0
-            # ── 1. Tech Preferences (expanded keyword list) ───────────
-            tech_keywords = {
-                "python": "Python", "javascript": "JavaScript",
-                "typescript": "TypeScript", "react": "React",
-                "vue": "Vue", "angular": "Angular",
-                "postgresql": "PostgreSQL", "mysql": "MySQL",
-                "sqlite": "SQLite", "docker": "Docker",
-                "kubernetes": "Kubernetes", "aws": "AWS",
-                "azure": "Azure", "gcp": "GCP",
-                "node": "Node.js", "fastapi": "FastAPI",
-                "django": "Django", "flask": "Flask",
-                "rust": "Rust", "go": "Go", "java": "Java",
-                "git": "Git", "npm": "npm", "pip": "pip",
-                "langchain": "LangChain", "ollama": "Ollama",
-                "pytorch": "PyTorch", "claude": "Claude",
-                "openai": "OpenAI", "anthropic": "Anthropic",
-                "redis": "Redis", "mongodb": "MongoDB",
-                "graphql": "GraphQL", "nextjs": "Next.js",
-                "terraform": "Terraform", "nginx": "Nginx",
-                "linux": "Linux", "macos": "macOS",
-                "vscode": "VS Code", "neovim": "Neovim",
-            }
-            tech_counts: Counter = Counter()
-            for f in facts:
-                content = dict(f)["content"].lower()
-                for keyword, label in tech_keywords.items():
-                    if keyword in content:
-                        tech_counts[label] += 1
-            for tech, count in tech_counts.most_common(20):
-                if count >= 2 and not dry_run:
-                    confidence = min(1.0, count / max(len(facts) * 0.1, 10))
-                    store.record_pattern(
-                        profile_id=profile_id,
-                        pattern_type="tech_preference",
-                        data={"topic": tech, "pattern_key": tech,
-                              "value": tech, "key": "tech",
-                              "evidence": count},
-                        success_rate=confidence,
-                        confidence=confidence,
-                    )
-                    generated += 1
-            # ── 2. Topic Interests (word frequency) ───────────────────
-            stopwords = frozenset({
-                "the", "is", "a", "an", "in", "on", "at", "to", "for",
-                "of", "and", "or", "not", "with", "that", "this", "was",
-                "are", "be", "has", "had", "have", "from", "by", "it",
-                "its", "as", "but", "were", "been", "being", "would",
-                "could", "should", "will", "may", "might", "can", "do",
-                "does", "did", "about", "into", "over", "after", "before",
-                "then", "than", "also", "just", "like", "more", "some",
-                "only", "other", "such", "each", "every", "both", "most",
-            })
-            word_counts: Counter = Counter()
-            for f in facts:
-                words = re.findall(r'\b[a-zA-Z]{4,}\b', dict(f)["content"].lower())
-                for w in words:
-                    if w not in stopwords:
-                        word_counts[w] += 1
-            for topic, count in word_counts.most_common(15):
-                if count >= 3 and not dry_run:
-                    confidence = min(1.0, count / max(len(facts) * 0.05, 15))
-                    store.record_pattern(
-                        profile_id=profile_id,
-                        pattern_type="interest",
-                        data={"topic": topic, "pattern_key": topic,
-                              "count": count, "evidence": count},
-                        success_rate=confidence,
-                        confidence=confidence,
-                    )
-                    generated += 1
-            # ── 3. Temporal Activity Patterns ─────────────────────────
-            hour_counts: Counter = Counter()
-            for f in facts:
-                created = dict(f).get("created_at", "")
-                try:
-                    if "T" in created:
-                        hour = int(created.split("T")[1][:2])
-                    elif " " in created:
-                        hour = int(created.split(" ")[1][:2])
-                    else:
-                        continue
-                    period = ("morning" if 6 <= hour < 12 else
-                              "afternoon" if 12 <= hour < 18 else
-                              "evening" if 18 <= hour < 22 else "night")
-                    hour_counts[period] += 1
-                except (ValueError, IndexError):
-                    pass
-            total_hours = sum(hour_counts.values())
-            for period, count in hour_counts.most_common():
-                if count >= 2 and total_hours > 0 and not dry_run:
-                    pct = round(count / total_hours * 100)
-                    store.record_pattern(
-                        profile_id=profile_id,
-                        pattern_type="temporal",
-                        data={"topic": period, "pattern_key": period,
-                              "value": f"{period} ({pct}%)",
-                              "evidence": count, "key": period,
-                              "distribution": dict(hour_counts)},
-                        success_rate=pct / 100,
-                        confidence=min(1.0, count / max(total_hours * 0.1, 5)),
-                    )
-                    generated += 1
-            # ── 4. Entity Preferences (v3.4.1 NEW) ───────────────────
-            import json as _json
-            entity_counts: Counter = Counter()
-            for f in facts:
-                raw = dict(f).get("canonical_entities_json", "")
-                if raw:
-                    try:
-                        for ent in _json.loads(raw):
-                            entity_counts[ent] += 1
-                    except (ValueError, TypeError):
-                        pass
-            # v3.4.7: Resolve entity IDs to readable canonical names
-            entity_names: dict = {}
-            try:
-                eid_list = list(entity_counts.keys())
-                if eid_list:
-                    placeholders = ",".join("?" * len(eid_list))
-                    name_rows = conn.execute(
-                        f"SELECT entity_id, canonical_name FROM canonical_entities "
-                        f"WHERE entity_id IN ({placeholders})",
-                        eid_list,
-                    ).fetchall()
-                    entity_names = {dict(r)["entity_id"]: dict(r)["canonical_name"] for r in name_rows}
-            except Exception:
-                pass
-            for entity, count in entity_counts.most_common(15):
-                if count >= 3 and not dry_run:
-                    readable = entity_names.get(entity, entity)
-                    confidence = min(1.0, count / max(len(facts) * 0.05, 10))
-                    store.record_pattern(
-                        profile_id=profile_id,
-                        pattern_type="entity_preferences",
-                        data={"topic": readable, "pattern_key": f"entity:{readable}",
-                              "value": readable, "evidence": count,
-                              "source": "entity_frequency"},
-                        success_rate=confidence,
-                        confidence=confidence,
-                    )
-                    generated += 1
-            # ── 5. Session Activity Patterns (v3.4.1 NEW) ────────────
-            session_counts: Counter = Counter()
-            for f in facts:
-                sid = dict(f).get("session_id", "")
-                if sid:
-                    session_counts[sid] += 1
-            if session_counts:
-                avg_facts_per_session = sum(session_counts.values()) / len(session_counts)
-                heavy_sessions = [s for s, c in session_counts.items() if c > avg_facts_per_session * 2]
-                if heavy_sessions and not dry_run:
-                    store.record_pattern(
-                        profile_id=profile_id,
-                        pattern_type="session_activity",
-                        data={"pattern_key": "heavy_session_usage",
-                              "value": f"{len(heavy_sessions)} intensive sessions",
-                              "evidence": len(heavy_sessions),
-                              "avg_facts": round(avg_facts_per_session, 1),
-                              "total_sessions": len(session_counts)},
-                        success_rate=0.8,
-                        confidence=min(1.0, len(heavy_sessions) / 5),
-                    )
-                    generated += 1
-            # ── 6. Fact Type Distribution (v3.4.1 NEW) ────────────────
-            type_counts: Counter = Counter()
-            for f in facts:
-                ft = dict(f).get("fact_type", "semantic")
-                type_counts[ft] += 1
-            total_ft = sum(type_counts.values())
-            if total_ft > 0 and not dry_run:
-                dominant_type = type_counts.most_common(1)[0]
-                pct = round(dominant_type[1] / total_ft * 100)
-                store.record_pattern(
-                    profile_id=profile_id,
-                    pattern_type="fact_type_distribution",
-                    data={"pattern_key": "memory_style",
-                          "value": f"{dominant_type[0]} dominant ({pct}%)",
-                          "evidence": dominant_type[1],
-                          "distribution": dict(type_counts)},
-                    success_rate=pct / 100,
-                    confidence=min(1.0, dominant_type[1] / 20),
-                )
-                generated += 1
-            # ── 7. Channel Performance (v3.4.1 NEW — from signals) ────
-            try:
-                learn_conn = sqlite3.connect(self._learning_db, timeout=10)
-                learn_conn.row_factory = sqlite3.Row
-                # Retrieval usage patterns from learning_feedback
-                channel_rows = learn_conn.execute(
-                    "SELECT channel, COUNT(*) AS cnt, "
-                    "AVG(signal_value) AS avg_signal "
-                    "FROM learning_feedback "
-                    "WHERE profile_id = ? "
-                    "GROUP BY channel ORDER BY cnt DESC",
-                    (profile_id,),
-                ).fetchall()
-                for row in channel_rows:
-                    d = dict(row)
-                    ch = d.get("channel", "unknown")
-                    cnt = d.get("cnt", 0)
-                    avg_sig = round(float(d.get("avg_signal", 0) or 0), 3)
-                    if cnt >= 5 and not dry_run:
-                        store.record_pattern(
-                            profile_id=profile_id,
-                            pattern_type="channel_performance",
-                            data={"pattern_key": f"channel:{ch}",
-                                  "value": f"{ch} ({cnt} hits, {avg_sig} avg)",
-                                  "evidence": cnt,
-                                  "avg_signal": avg_sig},
-                            success_rate=avg_sig,
-                            confidence=min(1.0, cnt / 50),
-                        )
-                        generated += 1
-                # Co-retrieval cluster patterns
-                try:
-                    coret_rows = learn_conn.execute(
-                        "SELECT fact_a, fact_b, co_access_count "
-                        "FROM co_retrieval_edges "
-                        "WHERE profile_id = ? AND co_access_count >= 3 "
-                        "ORDER BY co_access_count DESC LIMIT 20",
-                        (profile_id,),
-                    ).fetchall()
-                    if coret_rows and not dry_run:
-                        store.record_pattern(
-                            profile_id=profile_id,
-                            pattern_type="co_retrieval_clusters",
-                            data={"pattern_key": "co_retrieval_clusters",
-                                  "value": f"{len(coret_rows)} strong fact pairs",
-                                  "evidence": len(coret_rows),
-                                  "top_pair_count": dict(coret_rows[0]).get("co_access_count", 0) if coret_rows else 0},
-                            success_rate=0.7,
-                            confidence=min(1.0, len(coret_rows) / 10),
-                        )
-                        generated += 1
-                except Exception:
-                    pass
-                learn_conn.close()
-            except Exception as exc:
-                logger.debug("Signal pattern mining failed: %s", exc)
-            # ── 8. Community Membership (v3.4.1 NEW — from graph) ─────
-            try:
-                comm_rows = conn.execute(
-                    "SELECT community_id, COUNT(*) AS cnt "
-                    "FROM fact_importance "
-                    "WHERE profile_id = ? AND community_id IS NOT NULL "
-                    "GROUP BY community_id ORDER BY cnt DESC",
-                    (profile_id,),
-                ).fetchall()
-                if comm_rows and not dry_run:
-                    total_comm = sum(dict(r)["cnt"] for r in comm_rows)
-                    store.record_pattern(
-                        profile_id=profile_id,
-                        pattern_type="knowledge_structure",
-                        data={"pattern_key": "knowledge_structure",
-                              "value": f"{len(comm_rows)} topic communities, {total_comm} classified facts",
-                              "evidence": total_comm,
-                              "community_count": len(comm_rows)},
-                        success_rate=0.8,
-                        confidence=min(1.0, len(comm_rows) / 5),
-                    )
-                    generated += 1
-            except Exception:
-                pass
-            conn.close()
-            logger.info(
-                "Pattern mining: %d patterns generated for profile %s "
-                "from %d facts",
-                generated, profile_id, len(facts),
-            )
-            return generated
-        except Exception as exc:
-            logger.warning("Pattern generation error: %s", exc)
-            return 0
-    def _retrain_ranker(self, profile_id: str, signal_count: int) -> bool:
-        """Retrain the adaptive ranker from accumulated feedback."""
-        try:
-            from superlocalmemory.learning.feedback import FeedbackCollector
-            from superlocalmemory.learning.ranker import AdaptiveRanker
-            collector = FeedbackCollector(Path(self._learning_db))
-            feedback = collector.get_feedback(profile_id, limit=500)
-            if len(feedback) < 200:
-                return False
-            # Build training data from feedback
-            training_data = []
-            for f in feedback:
-                label = f.get("signal_value", 0.5)
-                training_data.append({
-                    "features": {"signal_value": label},
-                    "label": label,
-                })
-            ranker = AdaptiveRanker(signal_count=signal_count)
-            trained = ranker.train(training_data)
-            if trained:
-                logger.info("Ranker retrained with %d examples (Phase 3)", len(training_data))
-            return trained
-        except Exception as exc:
-            logger.debug("Retrain failed: %s", exc)
-            return False
+    out: dict = {
+        "aborted": None, "candidate_persisted": False,
+        "promoted": False, "metrics": None,
+    }
+    try:
+        rows, _qids = _fetch_training_rows(learning_db_path, profile_id)
+    except Exception as exc:
+        logger.debug("fetch_training_rows failed: %s", exc)
+        out["aborted"] = "fetch_error"
+        return out
+    if len(rows) < 20:
+        out["aborted"] = "insufficient_data"
+        return out
+    # Load prior active for in-sample shadow.
+    try:
+        from superlocalmemory.learning.database import LearningDatabase
+        db = LearningDatabase(learning_db_path)
+        prior_row = db.load_active_model(profile_id)
+    except Exception:
+        prior_row = None
+    feature_names = _feature_names()
+    try:
+        booster, metrics = _train_booster(
+            learning_db_path, profile_id,
+            training_rows=rows, feature_names=feature_names,
+            prior_row=prior_row,
+        )
+    except RetrainWallTimeExceeded as exc:
+        out["aborted"] = "wall_time_exceeded"
+        out["metrics"] = {"wall_time_sec": exc.elapsed_sec}
+        return out
+    except Exception as exc:
+        logger.debug("train_booster failed: %s", exc)
+        out["aborted"] = "train_error"
+        return out
+    # Model-size guardrail (LLD-10 §3.2 post-train check).
+    size_bytes = _measure_serialized_size(booster)
+    if size_bytes > RETRAIN_MODEL_SIZE_BYTES_CAP:
+        logger.warning(
+            "retrain: candidate %.2f MB exceeds %.1f MB cap — rejecting",
+            size_bytes / 1e6, RETRAIN_MODEL_SIZE_BYTES_CAP / 1e6,
+        )
+        out["aborted"] = "model_too_large"
+        out["metrics"] = metrics
+        return out
+    # In-sample shadow gate — cheap filter before spending live recalls.
+    if prior_row is not None:
+        if not _shadow_test_improved(
+            prior_row, booster, rows, feature_names,
+        ):
+            out["aborted"] = "insample_shadow_fail"
+            out["metrics"] = metrics
+            return out
+    try:
+        state_bytes = booster.model_to_string().encode("utf-8")
+    except Exception as exc:  # pragma: no cover — defensive
+        logger.debug("model serialise failed: %s", exc)
+        out["aborted"] = "serialise_error"
+        return out
+    try:
+        _persist_candidate(
+            learning_db_path, profile_id=profile_id,
+            state_bytes=state_bytes, feature_names=feature_names,
+            trained_on_count=len(rows), metrics=metrics,
+            shadow_results={"in_sample_pass": prior_row is not None},
+        )
+    except sqlite3.Error as exc:
+        logger.warning("persist_candidate failed: %s", exc)
+        out["aborted"] = "persist_error"
+        return out
+    out["candidate_persisted"] = True
+    out["promoted"] = False  # Promotion is a separate live-shadow step.
+    out["metrics"] = metrics
+    return out