superlocalmemory 3.4.19 → 3.4.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/README.md +42 -34
- package/bin/slm +11 -0
- package/bin/slm.bat +12 -0
- package/package.json +4 -3
- package/pyproject.toml +4 -3
- package/scripts/build-slm-hook.ps1 +40 -0
- package/scripts/build-slm-hook.sh +45 -0
- package/scripts/build_entry.py +452 -0
- package/scripts/ci/stage5b_gate.sh +50 -0
- package/scripts/postinstall/validation.js +187 -0
- package/scripts/postinstall-interactive.js +756 -0
- package/scripts/postinstall_binary.js +287 -0
- package/scripts/release_manifest.py +273 -0
- package/scripts/slm-hook.spec +56 -0
- package/skills/slm-build-graph/SKILL.md +423 -0
- package/skills/slm-list-recent/SKILL.md +348 -0
- package/skills/slm-recall/SKILL.md +343 -0
- package/skills/slm-remember/SKILL.md +194 -0
- package/skills/slm-show-patterns/SKILL.md +224 -0
- package/skills/slm-status/SKILL.md +363 -0
- package/skills/slm-switch-profile/SKILL.md +442 -0
- package/src/superlocalmemory/cli/commands.py +254 -79
- package/src/superlocalmemory/cli/context_commands.py +192 -0
- package/src/superlocalmemory/cli/daemon.py +15 -1
- package/src/superlocalmemory/cli/db_migrate.py +80 -0
- package/src/superlocalmemory/cli/escape_hatch.py +220 -0
- package/src/superlocalmemory/cli/main.py +72 -1
- package/src/superlocalmemory/core/context_cache.py +397 -0
- package/src/superlocalmemory/core/engine.py +38 -2
- package/src/superlocalmemory/core/engine_wiring.py +1 -1
- package/src/superlocalmemory/core/ram_lock.py +111 -0
- package/src/superlocalmemory/core/recall_pipeline.py +433 -3
- package/src/superlocalmemory/core/recall_worker.py +8 -3
- package/src/superlocalmemory/core/security_primitives.py +635 -0
- package/src/superlocalmemory/core/shadow_router.py +319 -0
- package/src/superlocalmemory/core/slm_disabled.py +87 -0
- package/src/superlocalmemory/core/slmignore.py +125 -0
- package/src/superlocalmemory/core/topic_signature.py +143 -0
- package/src/superlocalmemory/core/worker_pool.py +14 -3
- package/src/superlocalmemory/encoding/cognitive_consolidator.py +2 -2
- package/src/superlocalmemory/evolution/budget.py +321 -0
- package/src/superlocalmemory/evolution/llm_dispatch.py +508 -0
- package/src/superlocalmemory/evolution/skill_evolver.py +144 -94
- package/src/superlocalmemory/hooks/_outcome_common.py +506 -0
- package/src/superlocalmemory/hooks/adapter_base.py +317 -0
- package/src/superlocalmemory/hooks/antigravity_adapter.py +192 -0
- package/src/superlocalmemory/hooks/claude_code_hooks.py +33 -1
- package/src/superlocalmemory/hooks/context_payload.py +312 -0
- package/src/superlocalmemory/hooks/copilot_adapter.py +154 -0
- package/src/superlocalmemory/hooks/cross_platform_connector.py +90 -0
- package/src/superlocalmemory/hooks/cursor_adapter.py +195 -0
- package/src/superlocalmemory/hooks/hook_handlers.py +109 -8
- package/src/superlocalmemory/hooks/ide_connector.py +25 -2
- package/src/superlocalmemory/hooks/post_tool_async_hook.py +165 -0
- package/src/superlocalmemory/hooks/post_tool_outcome_hook.py +223 -0
- package/src/superlocalmemory/hooks/prewarm_auth.py +170 -0
- package/src/superlocalmemory/hooks/session_registry.py +186 -0
- package/src/superlocalmemory/hooks/stop_outcome_hook.py +134 -0
- package/src/superlocalmemory/hooks/sync_loop.py +114 -0
- package/src/superlocalmemory/hooks/user_prompt_hook.py +128 -0
- package/src/superlocalmemory/hooks/user_prompt_rehash_hook.py +202 -0
- package/src/superlocalmemory/infra/backup.py +3 -3
- package/src/superlocalmemory/infra/cloud_backup.py +2 -2
- package/src/superlocalmemory/infra/event_bus.py +2 -2
- package/src/superlocalmemory/infra/webhook_dispatcher.py +3 -3
- package/src/superlocalmemory/learning/arm_catalog.py +99 -0
- package/src/superlocalmemory/learning/bandit.py +526 -0
- package/src/superlocalmemory/learning/bandit_cache.py +133 -0
- package/src/superlocalmemory/learning/behavioral.py +53 -1
- package/src/superlocalmemory/learning/consolidation_cycle.py +381 -0
- package/src/superlocalmemory/learning/consolidation_worker.py +188 -520
- package/src/superlocalmemory/learning/database.py +256 -0
- package/src/superlocalmemory/learning/dedup_hnsw.py +413 -0
- package/src/superlocalmemory/learning/ensemble.py +300 -0
- package/src/superlocalmemory/learning/fact_outcome_joins.py +207 -0
- package/src/superlocalmemory/learning/forgetting_scheduler.py +55 -0
- package/src/superlocalmemory/learning/hnsw_dedup.py +69 -0
- package/src/superlocalmemory/learning/labeler.py +87 -0
- package/src/superlocalmemory/learning/legacy_migration.py +277 -0
- package/src/superlocalmemory/learning/memory_merge.py +160 -0
- package/src/superlocalmemory/learning/model_cache.py +269 -0
- package/src/superlocalmemory/learning/model_rollback.py +278 -0
- package/src/superlocalmemory/learning/outcome_queue.py +284 -0
- package/src/superlocalmemory/learning/pattern_miner.py +415 -0
- package/src/superlocalmemory/learning/pattern_miner_constants.py +47 -0
- package/src/superlocalmemory/learning/ranker.py +225 -81
- package/src/superlocalmemory/learning/ranker_common.py +163 -0
- package/src/superlocalmemory/learning/ranker_retrain_legacy.py +202 -0
- package/src/superlocalmemory/learning/ranker_retrain_online.py +411 -0
- package/src/superlocalmemory/learning/reward.py +777 -0
- package/src/superlocalmemory/learning/reward_archive.py +210 -0
- package/src/superlocalmemory/learning/reward_boost.py +201 -0
- package/src/superlocalmemory/learning/reward_proxy.py +326 -0
- package/src/superlocalmemory/learning/shadow_test.py +524 -0
- package/src/superlocalmemory/learning/signal_worker.py +270 -0
- package/src/superlocalmemory/learning/signals.py +314 -0
- package/src/superlocalmemory/learning/trigram_index.py +547 -0
- package/src/superlocalmemory/mcp/server.py +5 -5
- package/src/superlocalmemory/mcp/tools_context.py +183 -0
- package/src/superlocalmemory/mcp/tools_core.py +92 -27
- package/src/superlocalmemory/parameterization/soft_prompt_generator.py +13 -0
- package/src/superlocalmemory/retrieval/engine.py +52 -0
- package/src/superlocalmemory/server/api.py +2 -2
- package/src/superlocalmemory/server/bandit_loops.py +140 -0
- package/src/superlocalmemory/server/middleware/__init__.py +11 -0
- package/src/superlocalmemory/server/middleware/security_headers.py +144 -0
- package/src/superlocalmemory/server/routes/backup.py +36 -13
- package/src/superlocalmemory/server/routes/behavioral.py +50 -19
- package/src/superlocalmemory/server/routes/brain.py +1234 -0
- package/src/superlocalmemory/server/routes/data_io.py +4 -4
- package/src/superlocalmemory/server/routes/events.py +2 -2
- package/src/superlocalmemory/server/routes/helpers.py +1 -1
- package/src/superlocalmemory/server/routes/learning.py +192 -7
- package/src/superlocalmemory/server/routes/memories.py +189 -1
- package/src/superlocalmemory/server/routes/prewarm.py +171 -0
- package/src/superlocalmemory/server/routes/profiles.py +3 -3
- package/src/superlocalmemory/server/routes/token.py +88 -0
- package/src/superlocalmemory/server/routes/ws.py +5 -5
- package/src/superlocalmemory/server/security_middleware.py +13 -7
- package/src/superlocalmemory/server/ui.py +2 -2
- package/src/superlocalmemory/server/unified_daemon.py +335 -3
- package/src/superlocalmemory/skills/slm-build-graph/SKILL.md +423 -0
- package/src/superlocalmemory/skills/slm-list-recent/SKILL.md +348 -0
- package/src/superlocalmemory/skills/slm-recall/SKILL.md +343 -0
- package/src/superlocalmemory/skills/slm-remember/SKILL.md +194 -0
- package/src/superlocalmemory/skills/slm-show-patterns/SKILL.md +224 -0
- package/src/superlocalmemory/skills/slm-status/SKILL.md +363 -0
- package/src/superlocalmemory/skills/slm-switch-profile/SKILL.md +442 -0
- package/src/superlocalmemory/storage/migration_runner.py +545 -0
- package/src/superlocalmemory/storage/migrations/M001_add_signal_features_columns.py +67 -0
- package/src/superlocalmemory/storage/migrations/M002_model_state_history.py +132 -0
- package/src/superlocalmemory/storage/migrations/M003_migration_log.py +38 -0
- package/src/superlocalmemory/storage/migrations/M004_cross_platform_sync_log.py +46 -0
- package/src/superlocalmemory/storage/migrations/M005_bandit_tables.py +75 -0
- package/src/superlocalmemory/storage/migrations/M006_action_outcomes_reward.py +75 -0
- package/src/superlocalmemory/storage/migrations/M007_pending_outcomes.py +63 -0
- package/src/superlocalmemory/storage/migrations/M009_model_lineage.py +54 -0
- package/src/superlocalmemory/storage/migrations/M010_evolution_config.py +75 -0
- package/src/superlocalmemory/storage/migrations/M011_archive_and_merge.py +87 -0
- package/src/superlocalmemory/storage/migrations/M012_shadow_observations.py +72 -0
- package/src/superlocalmemory/storage/migrations/M013_bi_temporal_columns.py +55 -0
- package/src/superlocalmemory/storage/migrations/__init__.py +81 -0
- package/src/superlocalmemory/storage/models.py +4 -0
- package/src/superlocalmemory/ui/css/brain.css +409 -0
- package/src/superlocalmemory/ui/css/legacy-dashboard.css +645 -0
- package/src/superlocalmemory/ui/index.html +459 -1345
- package/src/superlocalmemory/ui/js/brain.js +1321 -0
- package/src/superlocalmemory/ui/js/clusters.js +123 -4
- package/src/superlocalmemory/ui/js/init.js +48 -39
- package/src/superlocalmemory/ui/js/memories.js +88 -2
- package/src/superlocalmemory/ui/js/modal.js +71 -1
- package/src/superlocalmemory/ui/js/ng-shell.js +101 -88
- package/src/superlocalmemory/ui/js/trust-dashboard.js +168 -25
- package/src/superlocalmemory/ui/vendor/bootstrap-icons/bootstrap-icons.css +2018 -0
- package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff +0 -0
- package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff2 +0 -0
- package/src/superlocalmemory/ui/vendor/bootstrap.bundle.min.js +7 -0
- package/src/superlocalmemory/ui/vendor/bootstrap.min.css +6 -0
- package/src/superlocalmemory/ui/vendor/d3.v7.min.js +2 -0
- package/src/superlocalmemory/ui/vendor/graphology-library.min.js +2 -0
- package/src/superlocalmemory/ui/vendor/graphology.umd.min.js +2 -0
- package/src/superlocalmemory/ui/vendor/inter-ui/inter-variable.min.css +8 -0
- package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable-Italic.woff2 +0 -0
- package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable.woff2 +0 -0
- package/src/superlocalmemory/ui/vendor/sigma.min.js +1 -0
- package/src/superlocalmemory/ui/js/behavioral.js +0 -447
- package/src/superlocalmemory/ui/js/graph-core.js +0 -447
- package/src/superlocalmemory/ui/js/graph-interactions.js +0 -351
- package/src/superlocalmemory/ui/js/learning.js +0 -435
- package/src/superlocalmemory/ui/js/patterns.js +0 -93
- package/src/superlocalmemory.egg-info/PKG-INFO +0 -647
- package/src/superlocalmemory.egg-info/SOURCES.txt +0 -335
- package/src/superlocalmemory.egg-info/dependency_links.txt +0 -1
- package/src/superlocalmemory.egg-info/entry_points.txt +0 -2
- package/src/superlocalmemory.egg-info/requires.txt +0 -58
- package/src/superlocalmemory.egg-info/top_level.txt +0 -1
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under AGPL-3.0-or-later - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4.22 — F4.A Stage-8 H-01 fix
|
|
4
|
+
|
|
5
|
+
"""Static dictionaries used by ``pattern_miner`` — extracted so the
|
|
6
|
+
main module stays under the 400-LOC cap.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
TECH_KEYWORDS: dict[str, str] = {
|
|
13
|
+
"python": "Python", "javascript": "JavaScript",
|
|
14
|
+
"typescript": "TypeScript", "react": "React",
|
|
15
|
+
"vue": "Vue", "angular": "Angular",
|
|
16
|
+
"postgresql": "PostgreSQL", "mysql": "MySQL",
|
|
17
|
+
"sqlite": "SQLite", "docker": "Docker",
|
|
18
|
+
"kubernetes": "Kubernetes", "aws": "AWS",
|
|
19
|
+
"azure": "Azure", "gcp": "GCP",
|
|
20
|
+
"node": "Node.js", "fastapi": "FastAPI",
|
|
21
|
+
"django": "Django", "flask": "Flask",
|
|
22
|
+
"rust": "Rust", "go": "Go", "java": "Java",
|
|
23
|
+
"git": "Git", "npm": "npm", "pip": "pip",
|
|
24
|
+
"langchain": "LangChain", "ollama": "Ollama",
|
|
25
|
+
"pytorch": "PyTorch", "claude": "Claude",
|
|
26
|
+
"openai": "OpenAI", "anthropic": "Anthropic",
|
|
27
|
+
"redis": "Redis", "mongodb": "MongoDB",
|
|
28
|
+
"graphql": "GraphQL", "nextjs": "Next.js",
|
|
29
|
+
"terraform": "Terraform", "nginx": "Nginx",
|
|
30
|
+
"linux": "Linux", "macos": "macOS",
|
|
31
|
+
"vscode": "VS Code", "neovim": "Neovim",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
STOPWORDS: frozenset[str] = frozenset({
|
|
36
|
+
"the", "is", "a", "an", "in", "on", "at", "to", "for",
|
|
37
|
+
"of", "and", "or", "not", "with", "that", "this", "was",
|
|
38
|
+
"are", "be", "has", "had", "have", "from", "by", "it",
|
|
39
|
+
"its", "as", "but", "were", "been", "being", "would",
|
|
40
|
+
"could", "should", "will", "may", "might", "can", "do",
|
|
41
|
+
"does", "did", "about", "into", "over", "after", "before",
|
|
42
|
+
"then", "than", "also", "just", "like", "more", "some",
|
|
43
|
+
"only", "other", "such", "each", "every", "both", "most",
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
__all__ = ("TECH_KEYWORDS", "STOPWORDS")
|
|
@@ -4,11 +4,19 @@
|
|
|
4
4
|
|
|
5
5
|
"""3-phase adaptive ranker — from heuristic to ML.
|
|
6
6
|
|
|
7
|
+
LLD reference: ``.backup/active-brain/lld/LLD-02-signal-pipeline-and-lightgbm.md``
|
|
8
|
+
Sections 4.4 + 4.5.
|
|
9
|
+
|
|
7
10
|
Phase 1: cross-encoder score only (cold start)
|
|
8
11
|
Phase 2: heuristic boosts (some data)
|
|
9
|
-
Phase 3: LightGBM
|
|
12
|
+
Phase 3: LightGBM **lambdarank** Booster (native, not LGBMRanker sklearn
|
|
13
|
+
wrapper) scoring on numpy feature matrices.
|
|
10
14
|
|
|
11
|
-
Transitions are automatic based on accumulated training data.
|
|
15
|
+
Transitions are automatic based on accumulated training data. Feature-name
|
|
16
|
+
drift is handled per LLD-02 §4.5 (``drift_mode``):
|
|
17
|
+
- ``aligned`` — score normally.
|
|
18
|
+
- ``subset`` — pad missing features with 0.0 in FEATURE_NAMES order.
|
|
19
|
+
- ``unknown`` — refuse to score; fall back to pre-model order.
|
|
12
20
|
"""
|
|
13
21
|
|
|
14
22
|
from __future__ import annotations
|
|
@@ -17,7 +25,12 @@ import logging
|
|
|
17
25
|
import math
|
|
18
26
|
from typing import Any
|
|
19
27
|
|
|
20
|
-
from superlocalmemory.learning.features import
|
|
28
|
+
from superlocalmemory.learning.features import (
|
|
29
|
+
FEATURE_DIM,
|
|
30
|
+
FEATURE_NAMES,
|
|
31
|
+
FeatureExtractor,
|
|
32
|
+
FeatureVector,
|
|
33
|
+
)
|
|
21
34
|
|
|
22
35
|
logger = logging.getLogger(__name__)
|
|
23
36
|
|
|
@@ -29,15 +42,32 @@ PHASE_3_THRESHOLD = 200 # signals needed to enter Phase 3
|
|
|
29
42
|
class AdaptiveRanker:
|
|
30
43
|
"""3-phase adaptive re-ranker for V3 retrieval results."""
|
|
31
44
|
|
|
32
|
-
def __init__(
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
signal_count: int = 0,
|
|
48
|
+
model_state: bytes | None = None,
|
|
49
|
+
*,
|
|
50
|
+
active_model: Any = None,
|
|
51
|
+
) -> None:
|
|
52
|
+
"""Build a ranker.
|
|
53
|
+
|
|
54
|
+
``active_model`` (``model_cache.ActiveModel``) is preferred when
|
|
55
|
+
available — it carries verified booster + feature_names. The legacy
|
|
56
|
+
``model_state`` bytes path remains for backward compatibility with
|
|
57
|
+
3.4.20 callers; it does NOT perform SHA-256 verification and should
|
|
58
|
+
not be used by the 3.4.22 recall path.
|
|
59
|
+
"""
|
|
33
60
|
self._signal_count = signal_count
|
|
34
|
-
self.
|
|
35
|
-
|
|
36
|
-
|
|
61
|
+
self._active = active_model
|
|
62
|
+
# Back-compat: only fill in from raw bytes when no active_model given.
|
|
63
|
+
if active_model is None and model_state:
|
|
64
|
+
self._load_legacy_bytes(model_state)
|
|
65
|
+
|
|
66
|
+
# --- public properties ---------------------------------------------
|
|
37
67
|
|
|
38
68
|
@property
|
|
39
69
|
def phase(self) -> int:
|
|
40
|
-
if self._signal_count >= PHASE_3_THRESHOLD and self.
|
|
70
|
+
if self._signal_count >= PHASE_3_THRESHOLD and self._active is not None:
|
|
41
71
|
return 3
|
|
42
72
|
if self._signal_count >= PHASE_2_THRESHOLD:
|
|
43
73
|
return 2
|
|
@@ -51,6 +81,12 @@ class AdaptiveRanker:
|
|
|
51
81
|
def signal_count(self, value: int) -> None:
|
|
52
82
|
self._signal_count = value
|
|
53
83
|
|
|
84
|
+
@property
|
|
85
|
+
def active_model(self) -> Any:
|
|
86
|
+
return self._active
|
|
87
|
+
|
|
88
|
+
# --- re-rank entry points ------------------------------------------
|
|
89
|
+
|
|
54
90
|
def rerank(self, results: list[dict], query_context: dict) -> list[dict]:
|
|
55
91
|
"""Re-rank retrieval results based on current phase."""
|
|
56
92
|
if not results:
|
|
@@ -58,98 +94,206 @@ class AdaptiveRanker:
|
|
|
58
94
|
|
|
59
95
|
if self.phase == 3:
|
|
60
96
|
return self._rerank_ml(results, query_context)
|
|
61
|
-
|
|
97
|
+
if self.phase == 2:
|
|
62
98
|
return self._rerank_heuristic(results, query_context)
|
|
63
|
-
|
|
64
|
-
return self._rerank_baseline(results)
|
|
99
|
+
return self._rerank_baseline(results)
|
|
65
100
|
|
|
66
|
-
def
|
|
67
|
-
"""
|
|
68
|
-
if len(training_data) < PHASE_3_THRESHOLD:
|
|
69
|
-
return False
|
|
101
|
+
def rank(self, candidates: list, query_context: dict) -> list:
|
|
102
|
+
"""LLD-02 §4.5 native inference path.
|
|
70
103
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
return
|
|
104
|
+
Accepts an iterable of objects that implement ``to_result_dict()``
|
|
105
|
+
(the signal-pipeline candidates) AND plain dicts (legacy).
|
|
106
|
+
"""
|
|
107
|
+
if self._active is None or not candidates:
|
|
108
|
+
return list(candidates)
|
|
76
109
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
for
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
from superlocalmemory.learning.features import FEATURE_NAMES
|
|
88
|
-
vec = [float(fv.get(name, 0.0)) for name in FEATURE_NAMES]
|
|
89
|
-
features_list.append(vec)
|
|
90
|
-
labels.append(float(label))
|
|
91
|
-
|
|
92
|
-
if not features_list:
|
|
93
|
-
return False
|
|
110
|
+
# Build result dicts in a uniform shape.
|
|
111
|
+
result_dicts: list[dict] = []
|
|
112
|
+
for c in candidates:
|
|
113
|
+
if hasattr(c, "to_result_dict"):
|
|
114
|
+
result_dicts.append(c.to_result_dict())
|
|
115
|
+
elif isinstance(c, dict):
|
|
116
|
+
result_dicts.append(c)
|
|
117
|
+
else:
|
|
118
|
+
# Unknown candidate type — return original order.
|
|
119
|
+
return list(candidates)
|
|
94
120
|
|
|
95
|
-
|
|
96
|
-
params = {
|
|
97
|
-
"objective": "binary",
|
|
98
|
-
"metric": "binary_logloss",
|
|
99
|
-
"num_leaves": 15,
|
|
100
|
-
"learning_rate": 0.1,
|
|
101
|
-
"verbose": -1,
|
|
102
|
-
}
|
|
103
|
-
self._model = lgb.train(params, dataset, num_boost_round=50)
|
|
104
|
-
logger.info("LightGBM model trained with %d examples", len(features_list))
|
|
105
|
-
return True
|
|
121
|
+
from superlocalmemory.learning.model_cache import drift_mode
|
|
106
122
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
123
|
+
mode = drift_mode(self._active)
|
|
124
|
+
if mode == "unknown":
|
|
125
|
+
logger.info(
|
|
126
|
+
"ranker.rank: feature-name drift unknown; "
|
|
127
|
+
"falling back to pre-model order",
|
|
128
|
+
)
|
|
129
|
+
return list(candidates)
|
|
130
|
+
|
|
131
|
+
# Order matrix by CURRENT FEATURE_NAMES; if subset, missing names
|
|
132
|
+
# pad with 0.0 (FeatureExtractor already does this via .get(name, 0)).
|
|
133
|
+
try:
|
|
134
|
+
import numpy as np
|
|
135
|
+
except ImportError: # pragma: no cover — numpy is required dep
|
|
136
|
+
return list(candidates)
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
rows = []
|
|
140
|
+
for rd in result_dicts:
|
|
141
|
+
fv = FeatureExtractor.extract(rd, query_context)
|
|
142
|
+
rows.append(fv.to_list())
|
|
143
|
+
X = np.asarray(rows, dtype=np.float32)
|
|
144
|
+
scores = self._active.booster.predict(X)
|
|
145
|
+
except Exception as exc: # pragma: no cover — booster.predict path
|
|
146
|
+
logger.warning("ranker.rank: booster.predict failed: %s", exc)
|
|
147
|
+
return list(candidates)
|
|
148
|
+
|
|
149
|
+
order = np.argsort(-scores, kind="stable")
|
|
150
|
+
return [candidates[int(i)] for i in order]
|
|
112
151
|
|
|
113
|
-
#
|
|
152
|
+
# --- phase implementations -----------------------------------------
|
|
114
153
|
|
|
115
154
|
def _rerank_baseline(self, results: list[dict]) -> list[dict]:
|
|
116
|
-
|
|
117
|
-
|
|
155
|
+
return sorted(
|
|
156
|
+
results,
|
|
157
|
+
key=lambda r: r.get("cross_encoder_score", r.get("score", 0)),
|
|
158
|
+
reverse=True,
|
|
159
|
+
)
|
|
118
160
|
|
|
119
|
-
def _rerank_heuristic(
|
|
120
|
-
|
|
121
|
-
|
|
161
|
+
def _rerank_heuristic(
|
|
162
|
+
self, results: list[dict], query_context: dict,
|
|
163
|
+
) -> list[dict]:
|
|
164
|
+
scored: list[dict] = []
|
|
122
165
|
for r in results:
|
|
123
166
|
base = r.get("cross_encoder_score", r.get("score", 0))
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
167
|
+
age_days = r.get("fact", {}).get("age_days", 30)
|
|
168
|
+
access_count = r.get("fact", {}).get("access_count", 0)
|
|
169
|
+
recency_boost = 0.1 * math.exp(-age_days / 30)
|
|
170
|
+
access_boost = 0.05 * min(access_count / 10, 1.0)
|
|
127
171
|
trust_boost = 0.1 * (r.get("trust_score", 0.5) - 0.5)
|
|
128
172
|
final = base + recency_boost + access_boost + trust_boost
|
|
129
173
|
scored.append({**r, "_adaptive_score": final})
|
|
130
174
|
return sorted(scored, key=lambda r: r["_adaptive_score"], reverse=True)
|
|
131
175
|
|
|
132
|
-
def _rerank_ml(
|
|
133
|
-
|
|
134
|
-
|
|
176
|
+
def _rerank_ml(
|
|
177
|
+
self, results: list[dict], query_context: dict,
|
|
178
|
+
) -> list[dict]:
|
|
179
|
+
"""Phase 3 prediction via native Booster."""
|
|
180
|
+
if self._active is None: # pragma: no cover — guarded by phase()
|
|
135
181
|
return self._rerank_heuristic(results, query_context)
|
|
136
182
|
|
|
137
|
-
|
|
138
|
-
predictions = []
|
|
139
|
-
for fv in feature_vectors:
|
|
140
|
-
vec = [fv.to_list()]
|
|
141
|
-
pred = self._model.predict(vec)[0]
|
|
142
|
-
predictions.append(pred)
|
|
183
|
+
from superlocalmemory.learning.model_cache import drift_mode
|
|
143
184
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
185
|
+
mode = drift_mode(self._active)
|
|
186
|
+
if mode == "unknown":
|
|
187
|
+
logger.info(
|
|
188
|
+
"ranker._rerank_ml: unknown drift → heuristic fallback",
|
|
189
|
+
)
|
|
190
|
+
return self._rerank_heuristic(results, query_context)
|
|
191
|
+
|
|
192
|
+
try:
|
|
193
|
+
import numpy as np
|
|
194
|
+
except ImportError: # pragma: no cover
|
|
195
|
+
return self._rerank_heuristic(results, query_context)
|
|
147
196
|
|
|
148
|
-
def _load_model(self, state: bytes) -> None:
|
|
149
|
-
"""Load model from serialized state."""
|
|
150
197
|
try:
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
198
|
+
feature_vectors = FeatureExtractor.extract_batch(
|
|
199
|
+
results, query_context,
|
|
200
|
+
)
|
|
201
|
+
X = np.asarray(
|
|
202
|
+
[fv.to_list() for fv in feature_vectors],
|
|
203
|
+
dtype=np.float32,
|
|
204
|
+
)
|
|
205
|
+
scores = self._active.booster.predict(X)
|
|
206
|
+
except Exception as exc: # pragma: no cover — booster.predict path
|
|
207
|
+
logger.warning("_rerank_ml failed: %s", exc)
|
|
208
|
+
return self._rerank_heuristic(results, query_context)
|
|
209
|
+
|
|
210
|
+
order = np.argsort(-scores, kind="stable")
|
|
211
|
+
return [results[int(i)] for i in order]
|
|
212
|
+
|
|
213
|
+
# --- legacy load path (back-compat) --------------------------------
|
|
214
|
+
|
|
215
|
+
def _load_legacy_bytes(self, state: bytes) -> None:
|
|
216
|
+
"""Best-effort load from raw bytes — NO SHA-256 verify.
|
|
217
|
+
|
|
218
|
+
Kept for 3.4.20 callers. The 3.4.22 recall path uses
|
|
219
|
+
``model_cache.load_active`` which enforces verification.
|
|
220
|
+
"""
|
|
221
|
+
try:
|
|
222
|
+
import lightgbm as lgb # noqa: PLC0415
|
|
223
|
+
|
|
224
|
+
booster = lgb.Booster(model_str=state.decode("utf-8"))
|
|
225
|
+
except Exception as exc:
|
|
226
|
+
logger.warning("Legacy model load failed: %s", exc)
|
|
227
|
+
self._active = None
|
|
228
|
+
return
|
|
229
|
+
|
|
230
|
+
from superlocalmemory.learning.model_cache import ActiveModel
|
|
231
|
+
|
|
232
|
+
self._active = ActiveModel(
|
|
233
|
+
profile_id="legacy",
|
|
234
|
+
booster=booster,
|
|
235
|
+
feature_names=tuple(FEATURE_NAMES),
|
|
236
|
+
trained_at="",
|
|
237
|
+
sha256="",
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
# --- legacy train() shim (3.4.20 API) ------------------------------
|
|
241
|
+
|
|
242
|
+
def train(self, training_data: list) -> bool:
|
|
243
|
+
"""Deprecated — v3.4.22 training lives in ``consolidation_worker``.
|
|
244
|
+
|
|
245
|
+
Kept as a guard for 3.4.20 callers: returns False when
|
|
246
|
+
training_data is below the Phase-3 threshold, True after a best-
|
|
247
|
+
effort native booster fit on the legacy feature dict shape
|
|
248
|
+
(never persists to disk). Production training must go through
|
|
249
|
+
``consolidation_worker._retrain_ranker`` which uses real features
|
|
250
|
+
+ ``lambdarank`` + group + integrity persistence.
|
|
251
|
+
"""
|
|
252
|
+
if not training_data or len(training_data) < PHASE_3_THRESHOLD:
|
|
253
|
+
return False
|
|
254
|
+
# Best-effort legacy path — does NOT persist, does NOT promote.
|
|
255
|
+
try:
|
|
256
|
+
import lightgbm as lgb # noqa: PLC0415
|
|
257
|
+
import numpy as np
|
|
258
|
+
except ImportError:
|
|
259
|
+
return False
|
|
260
|
+
X = np.asarray(
|
|
261
|
+
[[float((d.get("features") or {}).get(n, 0.0))
|
|
262
|
+
for n in FEATURE_NAMES]
|
|
263
|
+
for d in training_data],
|
|
264
|
+
dtype=np.float32,
|
|
265
|
+
)
|
|
266
|
+
y = np.asarray(
|
|
267
|
+
[float(d.get("label", 0.0)) for d in training_data],
|
|
268
|
+
dtype=np.float32,
|
|
269
|
+
)
|
|
270
|
+
ds = lgb.Dataset(X, label=y, feature_name=list(FEATURE_NAMES),
|
|
271
|
+
free_raw_data=False)
|
|
272
|
+
try:
|
|
273
|
+
booster = lgb.train(
|
|
274
|
+
{"objective": "regression", "metric": "rmse",
|
|
275
|
+
"verbosity": -1, "min_data_in_leaf": 1},
|
|
276
|
+
ds, num_boost_round=10,
|
|
277
|
+
)
|
|
278
|
+
except Exception: # pragma: no cover — defensive
|
|
279
|
+
return False
|
|
280
|
+
from superlocalmemory.learning.model_cache import ActiveModel
|
|
281
|
+
|
|
282
|
+
self._active = ActiveModel(
|
|
283
|
+
profile_id="legacy",
|
|
284
|
+
booster=booster,
|
|
285
|
+
feature_names=tuple(FEATURE_NAMES),
|
|
286
|
+
trained_at="",
|
|
287
|
+
sha256="",
|
|
288
|
+
)
|
|
289
|
+
return True
|
|
290
|
+
|
|
291
|
+
# --- legacy serialiser (used by external code in 3.4.20) -----------
|
|
292
|
+
|
|
293
|
+
def get_model_state(self) -> bytes | None:
|
|
294
|
+
if self._active is None:
|
|
295
|
+
return None
|
|
296
|
+
try:
|
|
297
|
+
return self._active.booster.model_to_string().encode("utf-8")
|
|
298
|
+
except Exception: # pragma: no cover — defensive
|
|
299
|
+
return None
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under AGPL-3.0-or-later - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4.22 — F4.A Stage-8 H-01 fix
|
|
4
|
+
|
|
5
|
+
"""Ranker retraining helpers shared by legacy + online paths.
|
|
6
|
+
|
|
7
|
+
These functions predate the LLD-10 online retrain wiring and remain
|
|
8
|
+
identical in behaviour; they are factored out so both
|
|
9
|
+
``ranker_retrain_legacy.py`` and ``ranker_retrain_online.py`` can call
|
|
10
|
+
them without importing from each other.
|
|
11
|
+
|
|
12
|
+
Contract refs:
|
|
13
|
+
- LLD-02 §4.6 — lambdarank retraining groups + shadow gate.
|
|
14
|
+
- LLD-10 §3.2 — in-sample NDCG gate before persisting a candidate.
|
|
15
|
+
- Stage 8 H-01 (architect) — file split.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import logging
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
__all__ = (
|
|
25
|
+
"_build_training_matrix",
|
|
26
|
+
"_shadow_test_improved",
|
|
27
|
+
"_compute_eval_metrics",
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _build_training_matrix(rows: list[dict], feature_names):
|
|
32
|
+
"""Group rows by ``query_id``, preserve order by ``position``.
|
|
33
|
+
|
|
34
|
+
Returns ``(X, y_int, group_counts)``. ``group_counts`` is ``None``
|
|
35
|
+
when no groups are discoverable (empty input).
|
|
36
|
+
"""
|
|
37
|
+
import numpy as np
|
|
38
|
+
from superlocalmemory.learning.labeler import label_for_row
|
|
39
|
+
|
|
40
|
+
grouped: dict[str, list[dict]] = {}
|
|
41
|
+
for row in rows:
|
|
42
|
+
qid = row.get("query_id") or ""
|
|
43
|
+
grouped.setdefault(qid, []).append(row)
|
|
44
|
+
if not grouped:
|
|
45
|
+
return np.zeros((0, len(feature_names)), dtype=np.float32), [], None
|
|
46
|
+
|
|
47
|
+
xs: list[list[float]] = []
|
|
48
|
+
ys: list[int] = []
|
|
49
|
+
group_counts: list[int] = []
|
|
50
|
+
for qid, group_rows in grouped.items():
|
|
51
|
+
# Sort by position ascending; missing positions land at the end.
|
|
52
|
+
group_rows = sorted(
|
|
53
|
+
group_rows,
|
|
54
|
+
key=lambda r: (
|
|
55
|
+
r.get("position") if r.get("position") is not None else 10**9
|
|
56
|
+
),
|
|
57
|
+
)
|
|
58
|
+
for r in group_rows:
|
|
59
|
+
feats = r.get("features") or {}
|
|
60
|
+
xs.append([float(feats.get(n, 0.0)) for n in feature_names])
|
|
61
|
+
ys.append(label_for_row(r))
|
|
62
|
+
group_counts.append(len(group_rows))
|
|
63
|
+
|
|
64
|
+
X = np.asarray(xs, dtype=np.float32)
|
|
65
|
+
y = np.asarray(ys, dtype=np.int32)
|
|
66
|
+
return X, y, group_counts
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _shadow_test_improved(prior_row, booster_new, rows, feature_names) -> bool:
|
|
70
|
+
"""Return True iff new booster beats prior on NDCG@10 with p<0.05.
|
|
71
|
+
|
|
72
|
+
Lightweight paired t-test across per-query NDCG@10 scores.
|
|
73
|
+
``prior_row`` is the dict returned by ``load_active_model`` — it
|
|
74
|
+
may be unusable (missing state_bytes / unparseable); in that case
|
|
75
|
+
we promote.
|
|
76
|
+
"""
|
|
77
|
+
try:
|
|
78
|
+
import numpy as np
|
|
79
|
+
import lightgbm as lgb
|
|
80
|
+
except ImportError: # pragma: no cover
|
|
81
|
+
return True
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
prior_booster = lgb.Booster(
|
|
85
|
+
model_str=bytes(prior_row["state_bytes"]).decode("utf-8"),
|
|
86
|
+
)
|
|
87
|
+
except Exception:
|
|
88
|
+
return True # prior unusable → promote new.
|
|
89
|
+
|
|
90
|
+
X, y, groups = _build_training_matrix(rows, feature_names)
|
|
91
|
+
if groups is None or not groups:
|
|
92
|
+
return True
|
|
93
|
+
|
|
94
|
+
offsets = [0]
|
|
95
|
+
for g in groups:
|
|
96
|
+
offsets.append(offsets[-1] + g)
|
|
97
|
+
|
|
98
|
+
def _ndcg_at_k(scores, labels, k=10):
|
|
99
|
+
order = np.argsort(-scores)
|
|
100
|
+
gains_map = [0, 1, 3, 7, 15]
|
|
101
|
+
dcg = 0.0
|
|
102
|
+
for i, idx in enumerate(order[:k]):
|
|
103
|
+
l = int(labels[idx])
|
|
104
|
+
if 0 <= l < len(gains_map):
|
|
105
|
+
dcg += gains_map[l] / np.log2(i + 2)
|
|
106
|
+
ideal = sorted(labels.tolist(), reverse=True)[:k]
|
|
107
|
+
idcg = sum(
|
|
108
|
+
(gains_map[int(l)] if 0 <= int(l) < len(gains_map) else 0)
|
|
109
|
+
/ np.log2(i + 2)
|
|
110
|
+
for i, l in enumerate(ideal)
|
|
111
|
+
)
|
|
112
|
+
return dcg / idcg if idcg > 0 else 0.0
|
|
113
|
+
|
|
114
|
+
old_ndcgs: list[float] = []
|
|
115
|
+
new_ndcgs: list[float] = []
|
|
116
|
+
for i in range(len(groups)):
|
|
117
|
+
lo, hi = offsets[i], offsets[i + 1]
|
|
118
|
+
if hi - lo < 2:
|
|
119
|
+
continue
|
|
120
|
+
Xg, yg = X[lo:hi], y[lo:hi]
|
|
121
|
+
try:
|
|
122
|
+
s_old = prior_booster.predict(Xg)
|
|
123
|
+
s_new = booster_new.predict(Xg)
|
|
124
|
+
except Exception:
|
|
125
|
+
return False
|
|
126
|
+
old_ndcgs.append(_ndcg_at_k(s_old, yg))
|
|
127
|
+
new_ndcgs.append(_ndcg_at_k(s_new, yg))
|
|
128
|
+
|
|
129
|
+
if not old_ndcgs:
|
|
130
|
+
return True
|
|
131
|
+
old_arr = np.asarray(old_ndcgs)
|
|
132
|
+
new_arr = np.asarray(new_ndcgs)
|
|
133
|
+
delta = float(np.mean(new_arr - old_arr))
|
|
134
|
+
if delta < 0.02:
|
|
135
|
+
return False
|
|
136
|
+
|
|
137
|
+
# Paired t-test — small-sample safe.
|
|
138
|
+
diff = new_arr - old_arr
|
|
139
|
+
n = len(diff)
|
|
140
|
+
if n < 2:
|
|
141
|
+
return True
|
|
142
|
+
mean = float(np.mean(diff))
|
|
143
|
+
std = float(np.std(diff, ddof=1))
|
|
144
|
+
if std == 0.0:
|
|
145
|
+
return mean > 0
|
|
146
|
+
t_stat = mean / (std / np.sqrt(n))
|
|
147
|
+
# Rough threshold: t > 2.0 (~p<0.05 for n ≥ 10 two-tailed).
|
|
148
|
+
return t_stat > 2.0
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _compute_eval_metrics(booster, rows, feature_names) -> dict:
|
|
152
|
+
"""Lightweight training metrics snapshot."""
|
|
153
|
+
try:
|
|
154
|
+
import numpy as np
|
|
155
|
+
X, y, groups = _build_training_matrix(rows, feature_names)
|
|
156
|
+
preds = booster.predict(X) if X.size else np.zeros(0)
|
|
157
|
+
return {
|
|
158
|
+
"n_rows": int(X.shape[0]),
|
|
159
|
+
"n_groups": int(len(groups or [])),
|
|
160
|
+
"mean_score": float(np.mean(preds)) if preds.size else 0.0,
|
|
161
|
+
}
|
|
162
|
+
except Exception: # pragma: no cover
|
|
163
|
+
return {}
|