superlocalmemory 3.4.18 → 3.4.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/CHANGELOG.md +35 -0
  2. package/README.md +42 -34
  3. package/bin/slm +11 -0
  4. package/bin/slm.bat +12 -0
  5. package/package.json +4 -3
  6. package/pyproject.toml +3 -2
  7. package/scripts/build-slm-hook.ps1 +40 -0
  8. package/scripts/build-slm-hook.sh +45 -0
  9. package/scripts/build_entry.py +452 -0
  10. package/scripts/ci/stage5b_gate.sh +50 -0
  11. package/scripts/postinstall/validation.js +187 -0
  12. package/scripts/postinstall-interactive.js +756 -0
  13. package/scripts/postinstall_binary.js +287 -0
  14. package/scripts/release_manifest.py +273 -0
  15. package/scripts/slm-hook.spec +56 -0
  16. package/skills/slm-build-graph/SKILL.md +423 -0
  17. package/skills/slm-list-recent/SKILL.md +348 -0
  18. package/skills/slm-recall/SKILL.md +343 -0
  19. package/skills/slm-remember/SKILL.md +194 -0
  20. package/skills/slm-show-patterns/SKILL.md +224 -0
  21. package/skills/slm-status/SKILL.md +363 -0
  22. package/skills/slm-switch-profile/SKILL.md +442 -0
  23. package/src/superlocalmemory/cli/commands.py +219 -79
  24. package/src/superlocalmemory/cli/context_commands.py +192 -0
  25. package/src/superlocalmemory/cli/daemon.py +15 -1
  26. package/src/superlocalmemory/cli/db_migrate.py +80 -0
  27. package/src/superlocalmemory/cli/escape_hatch.py +220 -0
  28. package/src/superlocalmemory/cli/main.py +72 -1
  29. package/src/superlocalmemory/core/context_cache.py +397 -0
  30. package/src/superlocalmemory/core/embeddings.py +8 -2
  31. package/src/superlocalmemory/core/engine.py +38 -2
  32. package/src/superlocalmemory/core/engine_wiring.py +1 -1
  33. package/src/superlocalmemory/core/ram_lock.py +111 -0
  34. package/src/superlocalmemory/core/recall_pipeline.py +433 -3
  35. package/src/superlocalmemory/core/recall_worker.py +8 -3
  36. package/src/superlocalmemory/core/security_primitives.py +635 -0
  37. package/src/superlocalmemory/core/shadow_router.py +319 -0
  38. package/src/superlocalmemory/core/slm_disabled.py +87 -0
  39. package/src/superlocalmemory/core/slmignore.py +125 -0
  40. package/src/superlocalmemory/core/topic_signature.py +143 -0
  41. package/src/superlocalmemory/core/worker_pool.py +14 -3
  42. package/src/superlocalmemory/encoding/cognitive_consolidator.py +2 -2
  43. package/src/superlocalmemory/evolution/budget.py +321 -0
  44. package/src/superlocalmemory/evolution/llm_dispatch.py +508 -0
  45. package/src/superlocalmemory/evolution/skill_evolver.py +144 -94
  46. package/src/superlocalmemory/hooks/_outcome_common.py +506 -0
  47. package/src/superlocalmemory/hooks/adapter_base.py +317 -0
  48. package/src/superlocalmemory/hooks/antigravity_adapter.py +192 -0
  49. package/src/superlocalmemory/hooks/claude_code_hooks.py +33 -1
  50. package/src/superlocalmemory/hooks/context_payload.py +312 -0
  51. package/src/superlocalmemory/hooks/copilot_adapter.py +154 -0
  52. package/src/superlocalmemory/hooks/cross_platform_connector.py +90 -0
  53. package/src/superlocalmemory/hooks/cursor_adapter.py +195 -0
  54. package/src/superlocalmemory/hooks/hook_handlers.py +109 -8
  55. package/src/superlocalmemory/hooks/ide_connector.py +25 -2
  56. package/src/superlocalmemory/hooks/post_tool_async_hook.py +165 -0
  57. package/src/superlocalmemory/hooks/post_tool_outcome_hook.py +223 -0
  58. package/src/superlocalmemory/hooks/prewarm_auth.py +170 -0
  59. package/src/superlocalmemory/hooks/session_registry.py +186 -0
  60. package/src/superlocalmemory/hooks/stop_outcome_hook.py +134 -0
  61. package/src/superlocalmemory/hooks/sync_loop.py +114 -0
  62. package/src/superlocalmemory/hooks/user_prompt_hook.py +128 -0
  63. package/src/superlocalmemory/hooks/user_prompt_rehash_hook.py +202 -0
  64. package/src/superlocalmemory/infra/backup.py +3 -3
  65. package/src/superlocalmemory/infra/cloud_backup.py +2 -2
  66. package/src/superlocalmemory/infra/event_bus.py +2 -2
  67. package/src/superlocalmemory/infra/webhook_dispatcher.py +3 -3
  68. package/src/superlocalmemory/learning/arm_catalog.py +99 -0
  69. package/src/superlocalmemory/learning/bandit.py +526 -0
  70. package/src/superlocalmemory/learning/bandit_cache.py +133 -0
  71. package/src/superlocalmemory/learning/behavioral.py +53 -1
  72. package/src/superlocalmemory/learning/consolidation_cycle.py +381 -0
  73. package/src/superlocalmemory/learning/consolidation_worker.py +188 -520
  74. package/src/superlocalmemory/learning/database.py +256 -0
  75. package/src/superlocalmemory/learning/dedup_hnsw.py +413 -0
  76. package/src/superlocalmemory/learning/ensemble.py +300 -0
  77. package/src/superlocalmemory/learning/fact_outcome_joins.py +207 -0
  78. package/src/superlocalmemory/learning/forgetting_scheduler.py +55 -0
  79. package/src/superlocalmemory/learning/hnsw_dedup.py +69 -0
  80. package/src/superlocalmemory/learning/labeler.py +87 -0
  81. package/src/superlocalmemory/learning/legacy_migration.py +277 -0
  82. package/src/superlocalmemory/learning/memory_merge.py +160 -0
  83. package/src/superlocalmemory/learning/model_cache.py +269 -0
  84. package/src/superlocalmemory/learning/model_rollback.py +278 -0
  85. package/src/superlocalmemory/learning/outcome_queue.py +284 -0
  86. package/src/superlocalmemory/learning/pattern_miner.py +415 -0
  87. package/src/superlocalmemory/learning/pattern_miner_constants.py +47 -0
  88. package/src/superlocalmemory/learning/ranker.py +225 -81
  89. package/src/superlocalmemory/learning/ranker_common.py +163 -0
  90. package/src/superlocalmemory/learning/ranker_retrain_legacy.py +202 -0
  91. package/src/superlocalmemory/learning/ranker_retrain_online.py +411 -0
  92. package/src/superlocalmemory/learning/reward.py +777 -0
  93. package/src/superlocalmemory/learning/reward_archive.py +210 -0
  94. package/src/superlocalmemory/learning/reward_boost.py +201 -0
  95. package/src/superlocalmemory/learning/reward_proxy.py +326 -0
  96. package/src/superlocalmemory/learning/shadow_test.py +524 -0
  97. package/src/superlocalmemory/learning/signal_worker.py +270 -0
  98. package/src/superlocalmemory/learning/signals.py +314 -0
  99. package/src/superlocalmemory/learning/trigram_index.py +547 -0
  100. package/src/superlocalmemory/mcp/server.py +5 -5
  101. package/src/superlocalmemory/mcp/tools_context.py +183 -0
  102. package/src/superlocalmemory/mcp/tools_core.py +92 -27
  103. package/src/superlocalmemory/parameterization/soft_prompt_generator.py +13 -0
  104. package/src/superlocalmemory/retrieval/engine.py +52 -0
  105. package/src/superlocalmemory/retrieval/reranker.py +4 -2
  106. package/src/superlocalmemory/server/api.py +2 -2
  107. package/src/superlocalmemory/server/bandit_loops.py +140 -0
  108. package/src/superlocalmemory/server/middleware/__init__.py +11 -0
  109. package/src/superlocalmemory/server/middleware/security_headers.py +144 -0
  110. package/src/superlocalmemory/server/routes/backup.py +36 -13
  111. package/src/superlocalmemory/server/routes/behavioral.py +50 -19
  112. package/src/superlocalmemory/server/routes/brain.py +1234 -0
  113. package/src/superlocalmemory/server/routes/data_io.py +4 -4
  114. package/src/superlocalmemory/server/routes/events.py +2 -2
  115. package/src/superlocalmemory/server/routes/helpers.py +1 -1
  116. package/src/superlocalmemory/server/routes/learning.py +192 -7
  117. package/src/superlocalmemory/server/routes/memories.py +189 -1
  118. package/src/superlocalmemory/server/routes/prewarm.py +171 -0
  119. package/src/superlocalmemory/server/routes/profiles.py +3 -3
  120. package/src/superlocalmemory/server/routes/token.py +88 -0
  121. package/src/superlocalmemory/server/routes/ws.py +5 -5
  122. package/src/superlocalmemory/server/security_middleware.py +13 -7
  123. package/src/superlocalmemory/server/ui.py +2 -2
  124. package/src/superlocalmemory/server/unified_daemon.py +335 -3
  125. package/src/superlocalmemory/storage/migration_runner.py +545 -0
  126. package/src/superlocalmemory/storage/migrations/M001_add_signal_features_columns.py +67 -0
  127. package/src/superlocalmemory/storage/migrations/M002_model_state_history.py +132 -0
  128. package/src/superlocalmemory/storage/migrations/M003_migration_log.py +38 -0
  129. package/src/superlocalmemory/storage/migrations/M004_cross_platform_sync_log.py +46 -0
  130. package/src/superlocalmemory/storage/migrations/M005_bandit_tables.py +75 -0
  131. package/src/superlocalmemory/storage/migrations/M006_action_outcomes_reward.py +75 -0
  132. package/src/superlocalmemory/storage/migrations/M007_pending_outcomes.py +63 -0
  133. package/src/superlocalmemory/storage/migrations/M009_model_lineage.py +54 -0
  134. package/src/superlocalmemory/storage/migrations/M010_evolution_config.py +75 -0
  135. package/src/superlocalmemory/storage/migrations/M011_archive_and_merge.py +87 -0
  136. package/src/superlocalmemory/storage/migrations/M012_shadow_observations.py +72 -0
  137. package/src/superlocalmemory/storage/migrations/M013_bi_temporal_columns.py +55 -0
  138. package/src/superlocalmemory/storage/migrations/__init__.py +81 -0
  139. package/src/superlocalmemory/storage/models.py +4 -0
  140. package/src/superlocalmemory/ui/css/brain.css +409 -0
  141. package/src/superlocalmemory/ui/css/legacy-dashboard.css +645 -0
  142. package/src/superlocalmemory/ui/index.html +459 -1345
  143. package/src/superlocalmemory/ui/js/brain.js +1321 -0
  144. package/src/superlocalmemory/ui/js/clusters.js +123 -4
  145. package/src/superlocalmemory/ui/js/init.js +48 -39
  146. package/src/superlocalmemory/ui/js/memories.js +88 -2
  147. package/src/superlocalmemory/ui/js/modal.js +71 -1
  148. package/src/superlocalmemory/ui/js/ng-shell.js +101 -88
  149. package/src/superlocalmemory/ui/js/trust-dashboard.js +168 -25
  150. package/src/superlocalmemory/ui/vendor/bootstrap-icons/bootstrap-icons.css +2018 -0
  151. package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff +0 -0
  152. package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff2 +0 -0
  153. package/src/superlocalmemory/ui/vendor/bootstrap.bundle.min.js +7 -0
  154. package/src/superlocalmemory/ui/vendor/bootstrap.min.css +6 -0
  155. package/src/superlocalmemory/ui/vendor/d3.v7.min.js +2 -0
  156. package/src/superlocalmemory/ui/vendor/graphology-library.min.js +2 -0
  157. package/src/superlocalmemory/ui/vendor/graphology.umd.min.js +2 -0
  158. package/src/superlocalmemory/ui/vendor/inter-ui/inter-variable.min.css +8 -0
  159. package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable-Italic.woff2 +0 -0
  160. package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable.woff2 +0 -0
  161. package/src/superlocalmemory/ui/vendor/sigma.min.js +1 -0
  162. package/src/superlocalmemory/ui/js/behavioral.js +0 -447
  163. package/src/superlocalmemory/ui/js/graph-core.js +0 -447
  164. package/src/superlocalmemory/ui/js/graph-interactions.js +0 -351
  165. package/src/superlocalmemory/ui/js/learning.js +0 -435
  166. package/src/superlocalmemory/ui/js/patterns.js +0 -93
  167. package/src/superlocalmemory.egg-info/PKG-INFO +0 -647
  168. package/src/superlocalmemory.egg-info/SOURCES.txt +0 -335
  169. package/src/superlocalmemory.egg-info/dependency_links.txt +0 -1
  170. package/src/superlocalmemory.egg-info/entry_points.txt +0 -2
  171. package/src/superlocalmemory.egg-info/requires.txt +0 -58
  172. package/src/superlocalmemory.egg-info/top_level.txt +0 -1
@@ -0,0 +1,47 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under AGPL-3.0-or-later - see LICENSE file
3
+ # Part of SuperLocalMemory v3.4.21 — F4.A Stage-8 H-01 fix
4
+
5
+ """Static dictionaries used by ``pattern_miner`` — extracted so the
6
+ main module stays under the 400-LOC cap.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+
12
+ TECH_KEYWORDS: dict[str, str] = {
13
+ "python": "Python", "javascript": "JavaScript",
14
+ "typescript": "TypeScript", "react": "React",
15
+ "vue": "Vue", "angular": "Angular",
16
+ "postgresql": "PostgreSQL", "mysql": "MySQL",
17
+ "sqlite": "SQLite", "docker": "Docker",
18
+ "kubernetes": "Kubernetes", "aws": "AWS",
19
+ "azure": "Azure", "gcp": "GCP",
20
+ "node": "Node.js", "fastapi": "FastAPI",
21
+ "django": "Django", "flask": "Flask",
22
+ "rust": "Rust", "go": "Go", "java": "Java",
23
+ "git": "Git", "npm": "npm", "pip": "pip",
24
+ "langchain": "LangChain", "ollama": "Ollama",
25
+ "pytorch": "PyTorch", "claude": "Claude",
26
+ "openai": "OpenAI", "anthropic": "Anthropic",
27
+ "redis": "Redis", "mongodb": "MongoDB",
28
+ "graphql": "GraphQL", "nextjs": "Next.js",
29
+ "terraform": "Terraform", "nginx": "Nginx",
30
+ "linux": "Linux", "macos": "macOS",
31
+ "vscode": "VS Code", "neovim": "Neovim",
32
+ }
33
+
34
+
35
+ STOPWORDS: frozenset[str] = frozenset({
36
+ "the", "is", "a", "an", "in", "on", "at", "to", "for",
37
+ "of", "and", "or", "not", "with", "that", "this", "was",
38
+ "are", "be", "has", "had", "have", "from", "by", "it",
39
+ "its", "as", "but", "were", "been", "being", "would",
40
+ "could", "should", "will", "may", "might", "can", "do",
41
+ "does", "did", "about", "into", "over", "after", "before",
42
+ "then", "than", "also", "just", "like", "more", "some",
43
+ "only", "other", "such", "each", "every", "both", "most",
44
+ })
45
+
46
+
47
+ __all__ = ("TECH_KEYWORDS", "STOPWORDS")
@@ -4,11 +4,19 @@
4
4
 
5
5
  """3-phase adaptive ranker — from heuristic to ML.
6
6
 
7
+ LLD reference: ``.backup/active-brain/lld/LLD-02-signal-pipeline-and-lightgbm.md``
8
+ Sections 4.4 + 4.5.
9
+
7
10
  Phase 1: cross-encoder score only (cold start)
8
11
  Phase 2: heuristic boosts (some data)
9
- Phase 3: LightGBM model (enough training data)
12
+ Phase 3: LightGBM **lambdarank** Booster (native, not LGBMRanker sklearn
13
+ wrapper) scoring on numpy feature matrices.
10
14
 
11
- Transitions are automatic based on accumulated training data.
15
+ Transitions are automatic based on accumulated training data. Feature-name
16
+ drift is handled per LLD-02 §4.5 (``drift_mode``):
17
+ - ``aligned`` — score normally.
18
+ - ``subset`` — pad missing features with 0.0 in FEATURE_NAMES order.
19
+ - ``unknown`` — refuse to score; fall back to pre-model order.
12
20
  """
13
21
 
14
22
  from __future__ import annotations
@@ -17,7 +25,12 @@ import logging
17
25
  import math
18
26
  from typing import Any
19
27
 
20
- from superlocalmemory.learning.features import FeatureExtractor, FeatureVector, FEATURE_DIM
28
+ from superlocalmemory.learning.features import (
29
+ FEATURE_DIM,
30
+ FEATURE_NAMES,
31
+ FeatureExtractor,
32
+ FeatureVector,
33
+ )
21
34
 
22
35
  logger = logging.getLogger(__name__)
23
36
 
@@ -29,15 +42,32 @@ PHASE_3_THRESHOLD = 200 # signals needed to enter Phase 3
29
42
  class AdaptiveRanker:
30
43
  """3-phase adaptive re-ranker for V3 retrieval results."""
31
44
 
32
- def __init__(self, signal_count: int = 0, model_state: bytes | None = None) -> None:
45
+ def __init__(
46
+ self,
47
+ signal_count: int = 0,
48
+ model_state: bytes | None = None,
49
+ *,
50
+ active_model: Any = None,
51
+ ) -> None:
52
+ """Build a ranker.
53
+
54
+ ``active_model`` (``model_cache.ActiveModel``) is preferred when
55
+ available — it carries verified booster + feature_names. The legacy
56
+ ``model_state`` bytes path remains for backward compatibility with
57
+ 3.4.20 callers; it does NOT perform SHA-256 verification and should
58
+ not be used by the 3.4.21 recall path.
59
+ """
33
60
  self._signal_count = signal_count
34
- self._model = None
35
- if model_state:
36
- self._load_model(model_state)
61
+ self._active = active_model
62
+ # Back-compat: only fill in from raw bytes when no active_model given.
63
+ if active_model is None and model_state:
64
+ self._load_legacy_bytes(model_state)
65
+
66
+ # --- public properties ---------------------------------------------
37
67
 
38
68
  @property
39
69
  def phase(self) -> int:
40
- if self._signal_count >= PHASE_3_THRESHOLD and self._model is not None:
70
+ if self._signal_count >= PHASE_3_THRESHOLD and self._active is not None:
41
71
  return 3
42
72
  if self._signal_count >= PHASE_2_THRESHOLD:
43
73
  return 2
@@ -51,6 +81,12 @@ class AdaptiveRanker:
51
81
  def signal_count(self, value: int) -> None:
52
82
  self._signal_count = value
53
83
 
84
+ @property
85
+ def active_model(self) -> Any:
86
+ return self._active
87
+
88
+ # --- re-rank entry points ------------------------------------------
89
+
54
90
  def rerank(self, results: list[dict], query_context: dict) -> list[dict]:
55
91
  """Re-rank retrieval results based on current phase."""
56
92
  if not results:
@@ -58,98 +94,206 @@ class AdaptiveRanker:
58
94
 
59
95
  if self.phase == 3:
60
96
  return self._rerank_ml(results, query_context)
61
- elif self.phase == 2:
97
+ if self.phase == 2:
62
98
  return self._rerank_heuristic(results, query_context)
63
- else:
64
- return self._rerank_baseline(results)
99
+ return self._rerank_baseline(results)
65
100
 
66
- def train(self, training_data: list[dict]) -> bool:
67
- """Train LightGBM model on labeled data. Returns True if model was trained."""
68
- if len(training_data) < PHASE_3_THRESHOLD:
69
- return False
101
+ def rank(self, candidates: list, query_context: dict) -> list:
102
+ """LLD-02 §4.5 native inference path.
70
103
 
71
- try:
72
- import lightgbm as lgb
73
- except ImportError:
74
- logger.warning("LightGBM not installed. Phase 3 ranking unavailable.")
75
- return False
104
+ Accepts an iterable of objects that implement ``to_result_dict()``
105
+ (the signal-pipeline candidates) AND plain dicts (legacy).
106
+ """
107
+ if self._active is None or not candidates:
108
+ return list(candidates)
76
109
 
77
- features_list = []
78
- labels = []
79
- for item in training_data:
80
- fv = item.get("features", {})
81
- label = item.get("label", 0.0)
82
- # Convert feature dict to ordered list
83
- vec = [fv.get(name, 0.0) for name in FeatureExtractor.extract(
84
- {"channel_scores": {}, "fact": {}}, {"query_type": ""}
85
- ).features.keys()]
86
- # Simpler: just use the feature values in order
87
- from superlocalmemory.learning.features import FEATURE_NAMES
88
- vec = [float(fv.get(name, 0.0)) for name in FEATURE_NAMES]
89
- features_list.append(vec)
90
- labels.append(float(label))
91
-
92
- if not features_list:
93
- return False
110
+ # Build result dicts in a uniform shape.
111
+ result_dicts: list[dict] = []
112
+ for c in candidates:
113
+ if hasattr(c, "to_result_dict"):
114
+ result_dicts.append(c.to_result_dict())
115
+ elif isinstance(c, dict):
116
+ result_dicts.append(c)
117
+ else:
118
+ # Unknown candidate type — return original order.
119
+ return list(candidates)
94
120
 
95
- dataset = lgb.Dataset(features_list, label=labels)
96
- params = {
97
- "objective": "binary",
98
- "metric": "binary_logloss",
99
- "num_leaves": 15,
100
- "learning_rate": 0.1,
101
- "verbose": -1,
102
- }
103
- self._model = lgb.train(params, dataset, num_boost_round=50)
104
- logger.info("LightGBM model trained with %d examples", len(features_list))
105
- return True
121
+ from superlocalmemory.learning.model_cache import drift_mode
106
122
 
107
- def get_model_state(self) -> bytes | None:
108
- """Serialize model for persistence."""
109
- if self._model is None:
110
- return None
111
- return self._model.model_to_string().encode("utf-8")
123
+ mode = drift_mode(self._active)
124
+ if mode == "unknown":
125
+ logger.info(
126
+ "ranker.rank: feature-name drift unknown; "
127
+ "falling back to pre-model order",
128
+ )
129
+ return list(candidates)
130
+
131
+ # Order matrix by CURRENT FEATURE_NAMES; if subset, missing names
132
+ # pad with 0.0 (FeatureExtractor already does this via .get(name, 0)).
133
+ try:
134
+ import numpy as np
135
+ except ImportError: # pragma: no cover — numpy is required dep
136
+ return list(candidates)
137
+
138
+ try:
139
+ rows = []
140
+ for rd in result_dicts:
141
+ fv = FeatureExtractor.extract(rd, query_context)
142
+ rows.append(fv.to_list())
143
+ X = np.asarray(rows, dtype=np.float32)
144
+ scores = self._active.booster.predict(X)
145
+ except Exception as exc: # pragma: no cover — booster.predict path
146
+ logger.warning("ranker.rank: booster.predict failed: %s", exc)
147
+ return list(candidates)
148
+
149
+ order = np.argsort(-scores, kind="stable")
150
+ return [candidates[int(i)] for i in order]
112
151
 
113
- # -- Phase implementations --
152
+ # --- phase implementations -----------------------------------------
114
153
 
115
154
  def _rerank_baseline(self, results: list[dict]) -> list[dict]:
116
- """Phase 1: rank by cross-encoder score."""
117
- return sorted(results, key=lambda r: r.get("cross_encoder_score", r.get("score", 0)), reverse=True)
155
+ return sorted(
156
+ results,
157
+ key=lambda r: r.get("cross_encoder_score", r.get("score", 0)),
158
+ reverse=True,
159
+ )
118
160
 
119
- def _rerank_heuristic(self, results: list[dict], query_context: dict) -> list[dict]:
120
- """Phase 2: heuristic boosts on top of cross-encoder."""
121
- scored = []
161
+ def _rerank_heuristic(
162
+ self, results: list[dict], query_context: dict,
163
+ ) -> list[dict]:
164
+ scored: list[dict] = []
122
165
  for r in results:
123
166
  base = r.get("cross_encoder_score", r.get("score", 0))
124
- # Boosts
125
- recency_boost = 0.1 * math.exp(-r.get("fact", {}).get("age_days", 30) / 30)
126
- access_boost = 0.05 * min(r.get("fact", {}).get("access_count", 0) / 10, 1.0)
167
+ age_days = r.get("fact", {}).get("age_days", 30)
168
+ access_count = r.get("fact", {}).get("access_count", 0)
169
+ recency_boost = 0.1 * math.exp(-age_days / 30)
170
+ access_boost = 0.05 * min(access_count / 10, 1.0)
127
171
  trust_boost = 0.1 * (r.get("trust_score", 0.5) - 0.5)
128
172
  final = base + recency_boost + access_boost + trust_boost
129
173
  scored.append({**r, "_adaptive_score": final})
130
174
  return sorted(scored, key=lambda r: r["_adaptive_score"], reverse=True)
131
175
 
132
- def _rerank_ml(self, results: list[dict], query_context: dict) -> list[dict]:
133
- """Phase 3: LightGBM prediction."""
134
- if self._model is None:
176
+ def _rerank_ml(
177
+ self, results: list[dict], query_context: dict,
178
+ ) -> list[dict]:
179
+ """Phase 3 prediction via native Booster."""
180
+ if self._active is None: # pragma: no cover — guarded by phase()
135
181
  return self._rerank_heuristic(results, query_context)
136
182
 
137
- feature_vectors = FeatureExtractor.extract_batch(results, query_context)
138
- predictions = []
139
- for fv in feature_vectors:
140
- vec = [fv.to_list()]
141
- pred = self._model.predict(vec)[0]
142
- predictions.append(pred)
183
+ from superlocalmemory.learning.model_cache import drift_mode
143
184
 
144
- paired = list(zip(results, predictions))
145
- paired.sort(key=lambda x: x[1], reverse=True)
146
- return [r for r, _ in paired]
185
+ mode = drift_mode(self._active)
186
+ if mode == "unknown":
187
+ logger.info(
188
+ "ranker._rerank_ml: unknown drift → heuristic fallback",
189
+ )
190
+ return self._rerank_heuristic(results, query_context)
191
+
192
+ try:
193
+ import numpy as np
194
+ except ImportError: # pragma: no cover
195
+ return self._rerank_heuristic(results, query_context)
147
196
 
148
- def _load_model(self, state: bytes) -> None:
149
- """Load model from serialized state."""
150
197
  try:
151
- import lightgbm as lgb
152
- self._model = lgb.Booster(model_str=state.decode("utf-8"))
153
- except (ImportError, Exception) as exc:
154
- logger.warning("Could not load LightGBM model: %s", exc)
155
- self._model = None
198
+ feature_vectors = FeatureExtractor.extract_batch(
199
+ results, query_context,
200
+ )
201
+ X = np.asarray(
202
+ [fv.to_list() for fv in feature_vectors],
203
+ dtype=np.float32,
204
+ )
205
+ scores = self._active.booster.predict(X)
206
+ except Exception as exc: # pragma: no cover — booster.predict path
207
+ logger.warning("_rerank_ml failed: %s", exc)
208
+ return self._rerank_heuristic(results, query_context)
209
+
210
+ order = np.argsort(-scores, kind="stable")
211
+ return [results[int(i)] for i in order]
212
+
213
+ # --- legacy load path (back-compat) --------------------------------
214
+
215
+ def _load_legacy_bytes(self, state: bytes) -> None:
216
+ """Best-effort load from raw bytes — NO SHA-256 verify.
217
+
218
+ Kept for 3.4.20 callers. The 3.4.21 recall path uses
219
+ ``model_cache.load_active`` which enforces verification.
220
+ """
221
+ try:
222
+ import lightgbm as lgb # noqa: PLC0415
223
+
224
+ booster = lgb.Booster(model_str=state.decode("utf-8"))
225
+ except Exception as exc:
226
+ logger.warning("Legacy model load failed: %s", exc)
227
+ self._active = None
228
+ return
229
+
230
+ from superlocalmemory.learning.model_cache import ActiveModel
231
+
232
+ self._active = ActiveModel(
233
+ profile_id="legacy",
234
+ booster=booster,
235
+ feature_names=tuple(FEATURE_NAMES),
236
+ trained_at="",
237
+ sha256="",
238
+ )
239
+
240
+ # --- legacy train() shim (3.4.20 API) ------------------------------
241
+
242
+ def train(self, training_data: list) -> bool:
243
+ """Deprecated — v3.4.21 training lives in ``consolidation_worker``.
244
+
245
+ Kept as a guard for 3.4.20 callers: returns False when
246
+ training_data is below the Phase-3 threshold, True after a best-
247
+ effort native booster fit on the legacy feature dict shape
248
+ (never persists to disk). Production training must go through
249
+ ``consolidation_worker._retrain_ranker`` which uses real features
250
+ + ``lambdarank`` + group + integrity persistence.
251
+ """
252
+ if not training_data or len(training_data) < PHASE_3_THRESHOLD:
253
+ return False
254
+ # Best-effort legacy path — does NOT persist, does NOT promote.
255
+ try:
256
+ import lightgbm as lgb # noqa: PLC0415
257
+ import numpy as np
258
+ except ImportError:
259
+ return False
260
+ X = np.asarray(
261
+ [[float((d.get("features") or {}).get(n, 0.0))
262
+ for n in FEATURE_NAMES]
263
+ for d in training_data],
264
+ dtype=np.float32,
265
+ )
266
+ y = np.asarray(
267
+ [float(d.get("label", 0.0)) for d in training_data],
268
+ dtype=np.float32,
269
+ )
270
+ ds = lgb.Dataset(X, label=y, feature_name=list(FEATURE_NAMES),
271
+ free_raw_data=False)
272
+ try:
273
+ booster = lgb.train(
274
+ {"objective": "regression", "metric": "rmse",
275
+ "verbosity": -1, "min_data_in_leaf": 1},
276
+ ds, num_boost_round=10,
277
+ )
278
+ except Exception: # pragma: no cover — defensive
279
+ return False
280
+ from superlocalmemory.learning.model_cache import ActiveModel
281
+
282
+ self._active = ActiveModel(
283
+ profile_id="legacy",
284
+ booster=booster,
285
+ feature_names=tuple(FEATURE_NAMES),
286
+ trained_at="",
287
+ sha256="",
288
+ )
289
+ return True
290
+
291
+ # --- legacy serialiser (used by external code in 3.4.20) -----------
292
+
293
+ def get_model_state(self) -> bytes | None:
294
+ if self._active is None:
295
+ return None
296
+ try:
297
+ return self._active.booster.model_to_string().encode("utf-8")
298
+ except Exception: # pragma: no cover — defensive
299
+ return None
@@ -0,0 +1,163 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under AGPL-3.0-or-later - see LICENSE file
3
+ # Part of SuperLocalMemory v3.4.21 — F4.A Stage-8 H-01 fix
4
+
5
+ """Ranker retraining helpers shared by legacy + online paths.
6
+
7
+ These functions predate the LLD-10 online retrain wiring and remain
8
+ identical in behaviour; they are factored out so both
9
+ ``ranker_retrain_legacy.py`` and ``ranker_retrain_online.py`` can call
10
+ them without importing from each other.
11
+
12
+ Contract refs:
13
+ - LLD-02 §4.6 — lambdarank retraining groups + shadow gate.
14
+ - LLD-10 §3.2 — in-sample NDCG gate before persisting a candidate.
15
+ - Stage 8 H-01 (architect) — file split.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ __all__ = (
25
+ "_build_training_matrix",
26
+ "_shadow_test_improved",
27
+ "_compute_eval_metrics",
28
+ )
29
+
30
+
31
+ def _build_training_matrix(rows: list[dict], feature_names):
32
+ """Group rows by ``query_id``, preserve order by ``position``.
33
+
34
+ Returns ``(X, y_int, group_counts)``. ``group_counts`` is ``None``
35
+ when no groups are discoverable (empty input).
36
+ """
37
+ import numpy as np
38
+ from superlocalmemory.learning.labeler import label_for_row
39
+
40
+ grouped: dict[str, list[dict]] = {}
41
+ for row in rows:
42
+ qid = row.get("query_id") or ""
43
+ grouped.setdefault(qid, []).append(row)
44
+ if not grouped:
45
+ return np.zeros((0, len(feature_names)), dtype=np.float32), [], None
46
+
47
+ xs: list[list[float]] = []
48
+ ys: list[int] = []
49
+ group_counts: list[int] = []
50
+ for qid, group_rows in grouped.items():
51
+ # Sort by position ascending; missing positions land at the end.
52
+ group_rows = sorted(
53
+ group_rows,
54
+ key=lambda r: (
55
+ r.get("position") if r.get("position") is not None else 10**9
56
+ ),
57
+ )
58
+ for r in group_rows:
59
+ feats = r.get("features") or {}
60
+ xs.append([float(feats.get(n, 0.0)) for n in feature_names])
61
+ ys.append(label_for_row(r))
62
+ group_counts.append(len(group_rows))
63
+
64
+ X = np.asarray(xs, dtype=np.float32)
65
+ y = np.asarray(ys, dtype=np.int32)
66
+ return X, y, group_counts
67
+
68
+
69
+ def _shadow_test_improved(prior_row, booster_new, rows, feature_names) -> bool:
70
+ """Return True iff new booster beats prior on NDCG@10 with p<0.05.
71
+
72
+ Lightweight paired t-test across per-query NDCG@10 scores.
73
+ ``prior_row`` is the dict returned by ``load_active_model`` — it
74
+ may be unusable (missing state_bytes / unparseable); in that case
75
+ we promote.
76
+ """
77
+ try:
78
+ import numpy as np
79
+ import lightgbm as lgb
80
+ except ImportError: # pragma: no cover
81
+ return True
82
+
83
+ try:
84
+ prior_booster = lgb.Booster(
85
+ model_str=bytes(prior_row["state_bytes"]).decode("utf-8"),
86
+ )
87
+ except Exception:
88
+ return True # prior unusable → promote new.
89
+
90
+ X, y, groups = _build_training_matrix(rows, feature_names)
91
+ if groups is None or not groups:
92
+ return True
93
+
94
+ offsets = [0]
95
+ for g in groups:
96
+ offsets.append(offsets[-1] + g)
97
+
98
+ def _ndcg_at_k(scores, labels, k=10):
99
+ order = np.argsort(-scores)
100
+ gains_map = [0, 1, 3, 7, 15]
101
+ dcg = 0.0
102
+ for i, idx in enumerate(order[:k]):
103
+ l = int(labels[idx])
104
+ if 0 <= l < len(gains_map):
105
+ dcg += gains_map[l] / np.log2(i + 2)
106
+ ideal = sorted(labels.tolist(), reverse=True)[:k]
107
+ idcg = sum(
108
+ (gains_map[int(l)] if 0 <= int(l) < len(gains_map) else 0)
109
+ / np.log2(i + 2)
110
+ for i, l in enumerate(ideal)
111
+ )
112
+ return dcg / idcg if idcg > 0 else 0.0
113
+
114
+ old_ndcgs: list[float] = []
115
+ new_ndcgs: list[float] = []
116
+ for i in range(len(groups)):
117
+ lo, hi = offsets[i], offsets[i + 1]
118
+ if hi - lo < 2:
119
+ continue
120
+ Xg, yg = X[lo:hi], y[lo:hi]
121
+ try:
122
+ s_old = prior_booster.predict(Xg)
123
+ s_new = booster_new.predict(Xg)
124
+ except Exception:
125
+ return False
126
+ old_ndcgs.append(_ndcg_at_k(s_old, yg))
127
+ new_ndcgs.append(_ndcg_at_k(s_new, yg))
128
+
129
+ if not old_ndcgs:
130
+ return True
131
+ old_arr = np.asarray(old_ndcgs)
132
+ new_arr = np.asarray(new_ndcgs)
133
+ delta = float(np.mean(new_arr - old_arr))
134
+ if delta < 0.02:
135
+ return False
136
+
137
+ # Paired t-test — small-sample safe.
138
+ diff = new_arr - old_arr
139
+ n = len(diff)
140
+ if n < 2:
141
+ return True
142
+ mean = float(np.mean(diff))
143
+ std = float(np.std(diff, ddof=1))
144
+ if std == 0.0:
145
+ return mean > 0
146
+ t_stat = mean / (std / np.sqrt(n))
147
+ # Rough threshold: t > 2.0 (~p<0.05 for n ≥ 10 two-tailed).
148
+ return t_stat > 2.0
149
+
150
+
151
+ def _compute_eval_metrics(booster, rows, feature_names) -> dict:
152
+ """Lightweight training metrics snapshot."""
153
+ try:
154
+ import numpy as np
155
+ X, y, groups = _build_training_matrix(rows, feature_names)
156
+ preds = booster.predict(X) if X.size else np.zeros(0)
157
+ return {
158
+ "n_rows": int(X.shape[0]),
159
+ "n_groups": int(len(groups or [])),
160
+ "mean_score": float(np.mean(preds)) if preds.size else 0.0,
161
+ }
162
+ except Exception: # pragma: no cover
163
+ return {}