superlocalmemory 3.4.19 → 3.4.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/README.md +42 -34
- package/bin/slm +11 -0
- package/bin/slm.bat +12 -0
- package/package.json +4 -3
- package/pyproject.toml +4 -3
- package/scripts/build-slm-hook.ps1 +40 -0
- package/scripts/build-slm-hook.sh +45 -0
- package/scripts/build_entry.py +452 -0
- package/scripts/ci/stage5b_gate.sh +50 -0
- package/scripts/postinstall/validation.js +187 -0
- package/scripts/postinstall-interactive.js +756 -0
- package/scripts/postinstall_binary.js +287 -0
- package/scripts/release_manifest.py +273 -0
- package/scripts/slm-hook.spec +56 -0
- package/skills/slm-build-graph/SKILL.md +423 -0
- package/skills/slm-list-recent/SKILL.md +348 -0
- package/skills/slm-recall/SKILL.md +343 -0
- package/skills/slm-remember/SKILL.md +194 -0
- package/skills/slm-show-patterns/SKILL.md +224 -0
- package/skills/slm-status/SKILL.md +363 -0
- package/skills/slm-switch-profile/SKILL.md +442 -0
- package/src/superlocalmemory/cli/commands.py +254 -79
- package/src/superlocalmemory/cli/context_commands.py +192 -0
- package/src/superlocalmemory/cli/daemon.py +15 -1
- package/src/superlocalmemory/cli/db_migrate.py +80 -0
- package/src/superlocalmemory/cli/escape_hatch.py +220 -0
- package/src/superlocalmemory/cli/main.py +72 -1
- package/src/superlocalmemory/core/context_cache.py +397 -0
- package/src/superlocalmemory/core/engine.py +38 -2
- package/src/superlocalmemory/core/engine_wiring.py +1 -1
- package/src/superlocalmemory/core/ram_lock.py +111 -0
- package/src/superlocalmemory/core/recall_pipeline.py +433 -3
- package/src/superlocalmemory/core/recall_worker.py +8 -3
- package/src/superlocalmemory/core/security_primitives.py +635 -0
- package/src/superlocalmemory/core/shadow_router.py +319 -0
- package/src/superlocalmemory/core/slm_disabled.py +87 -0
- package/src/superlocalmemory/core/slmignore.py +125 -0
- package/src/superlocalmemory/core/topic_signature.py +143 -0
- package/src/superlocalmemory/core/worker_pool.py +14 -3
- package/src/superlocalmemory/encoding/cognitive_consolidator.py +2 -2
- package/src/superlocalmemory/evolution/budget.py +321 -0
- package/src/superlocalmemory/evolution/llm_dispatch.py +508 -0
- package/src/superlocalmemory/evolution/skill_evolver.py +144 -94
- package/src/superlocalmemory/hooks/_outcome_common.py +506 -0
- package/src/superlocalmemory/hooks/adapter_base.py +317 -0
- package/src/superlocalmemory/hooks/antigravity_adapter.py +192 -0
- package/src/superlocalmemory/hooks/claude_code_hooks.py +33 -1
- package/src/superlocalmemory/hooks/context_payload.py +312 -0
- package/src/superlocalmemory/hooks/copilot_adapter.py +154 -0
- package/src/superlocalmemory/hooks/cross_platform_connector.py +90 -0
- package/src/superlocalmemory/hooks/cursor_adapter.py +195 -0
- package/src/superlocalmemory/hooks/hook_handlers.py +109 -8
- package/src/superlocalmemory/hooks/ide_connector.py +25 -2
- package/src/superlocalmemory/hooks/post_tool_async_hook.py +165 -0
- package/src/superlocalmemory/hooks/post_tool_outcome_hook.py +223 -0
- package/src/superlocalmemory/hooks/prewarm_auth.py +170 -0
- package/src/superlocalmemory/hooks/session_registry.py +186 -0
- package/src/superlocalmemory/hooks/stop_outcome_hook.py +134 -0
- package/src/superlocalmemory/hooks/sync_loop.py +114 -0
- package/src/superlocalmemory/hooks/user_prompt_hook.py +128 -0
- package/src/superlocalmemory/hooks/user_prompt_rehash_hook.py +202 -0
- package/src/superlocalmemory/infra/backup.py +3 -3
- package/src/superlocalmemory/infra/cloud_backup.py +2 -2
- package/src/superlocalmemory/infra/event_bus.py +2 -2
- package/src/superlocalmemory/infra/webhook_dispatcher.py +3 -3
- package/src/superlocalmemory/learning/arm_catalog.py +99 -0
- package/src/superlocalmemory/learning/bandit.py +526 -0
- package/src/superlocalmemory/learning/bandit_cache.py +133 -0
- package/src/superlocalmemory/learning/behavioral.py +53 -1
- package/src/superlocalmemory/learning/consolidation_cycle.py +381 -0
- package/src/superlocalmemory/learning/consolidation_worker.py +188 -520
- package/src/superlocalmemory/learning/database.py +256 -0
- package/src/superlocalmemory/learning/dedup_hnsw.py +413 -0
- package/src/superlocalmemory/learning/ensemble.py +300 -0
- package/src/superlocalmemory/learning/fact_outcome_joins.py +207 -0
- package/src/superlocalmemory/learning/forgetting_scheduler.py +55 -0
- package/src/superlocalmemory/learning/hnsw_dedup.py +69 -0
- package/src/superlocalmemory/learning/labeler.py +87 -0
- package/src/superlocalmemory/learning/legacy_migration.py +277 -0
- package/src/superlocalmemory/learning/memory_merge.py +160 -0
- package/src/superlocalmemory/learning/model_cache.py +269 -0
- package/src/superlocalmemory/learning/model_rollback.py +278 -0
- package/src/superlocalmemory/learning/outcome_queue.py +284 -0
- package/src/superlocalmemory/learning/pattern_miner.py +415 -0
- package/src/superlocalmemory/learning/pattern_miner_constants.py +47 -0
- package/src/superlocalmemory/learning/ranker.py +225 -81
- package/src/superlocalmemory/learning/ranker_common.py +163 -0
- package/src/superlocalmemory/learning/ranker_retrain_legacy.py +202 -0
- package/src/superlocalmemory/learning/ranker_retrain_online.py +411 -0
- package/src/superlocalmemory/learning/reward.py +777 -0
- package/src/superlocalmemory/learning/reward_archive.py +210 -0
- package/src/superlocalmemory/learning/reward_boost.py +201 -0
- package/src/superlocalmemory/learning/reward_proxy.py +326 -0
- package/src/superlocalmemory/learning/shadow_test.py +524 -0
- package/src/superlocalmemory/learning/signal_worker.py +270 -0
- package/src/superlocalmemory/learning/signals.py +314 -0
- package/src/superlocalmemory/learning/trigram_index.py +547 -0
- package/src/superlocalmemory/mcp/server.py +5 -5
- package/src/superlocalmemory/mcp/tools_context.py +183 -0
- package/src/superlocalmemory/mcp/tools_core.py +92 -27
- package/src/superlocalmemory/parameterization/soft_prompt_generator.py +13 -0
- package/src/superlocalmemory/retrieval/engine.py +52 -0
- package/src/superlocalmemory/server/api.py +2 -2
- package/src/superlocalmemory/server/bandit_loops.py +140 -0
- package/src/superlocalmemory/server/middleware/__init__.py +11 -0
- package/src/superlocalmemory/server/middleware/security_headers.py +144 -0
- package/src/superlocalmemory/server/routes/backup.py +36 -13
- package/src/superlocalmemory/server/routes/behavioral.py +50 -19
- package/src/superlocalmemory/server/routes/brain.py +1234 -0
- package/src/superlocalmemory/server/routes/data_io.py +4 -4
- package/src/superlocalmemory/server/routes/events.py +2 -2
- package/src/superlocalmemory/server/routes/helpers.py +1 -1
- package/src/superlocalmemory/server/routes/learning.py +192 -7
- package/src/superlocalmemory/server/routes/memories.py +189 -1
- package/src/superlocalmemory/server/routes/prewarm.py +171 -0
- package/src/superlocalmemory/server/routes/profiles.py +3 -3
- package/src/superlocalmemory/server/routes/token.py +88 -0
- package/src/superlocalmemory/server/routes/ws.py +5 -5
- package/src/superlocalmemory/server/security_middleware.py +13 -7
- package/src/superlocalmemory/server/ui.py +2 -2
- package/src/superlocalmemory/server/unified_daemon.py +335 -3
- package/src/superlocalmemory/skills/slm-build-graph/SKILL.md +423 -0
- package/src/superlocalmemory/skills/slm-list-recent/SKILL.md +348 -0
- package/src/superlocalmemory/skills/slm-recall/SKILL.md +343 -0
- package/src/superlocalmemory/skills/slm-remember/SKILL.md +194 -0
- package/src/superlocalmemory/skills/slm-show-patterns/SKILL.md +224 -0
- package/src/superlocalmemory/skills/slm-status/SKILL.md +363 -0
- package/src/superlocalmemory/skills/slm-switch-profile/SKILL.md +442 -0
- package/src/superlocalmemory/storage/migration_runner.py +545 -0
- package/src/superlocalmemory/storage/migrations/M001_add_signal_features_columns.py +67 -0
- package/src/superlocalmemory/storage/migrations/M002_model_state_history.py +132 -0
- package/src/superlocalmemory/storage/migrations/M003_migration_log.py +38 -0
- package/src/superlocalmemory/storage/migrations/M004_cross_platform_sync_log.py +46 -0
- package/src/superlocalmemory/storage/migrations/M005_bandit_tables.py +75 -0
- package/src/superlocalmemory/storage/migrations/M006_action_outcomes_reward.py +75 -0
- package/src/superlocalmemory/storage/migrations/M007_pending_outcomes.py +63 -0
- package/src/superlocalmemory/storage/migrations/M009_model_lineage.py +54 -0
- package/src/superlocalmemory/storage/migrations/M010_evolution_config.py +75 -0
- package/src/superlocalmemory/storage/migrations/M011_archive_and_merge.py +87 -0
- package/src/superlocalmemory/storage/migrations/M012_shadow_observations.py +72 -0
- package/src/superlocalmemory/storage/migrations/M013_bi_temporal_columns.py +55 -0
- package/src/superlocalmemory/storage/migrations/__init__.py +81 -0
- package/src/superlocalmemory/storage/models.py +4 -0
- package/src/superlocalmemory/ui/css/brain.css +409 -0
- package/src/superlocalmemory/ui/css/legacy-dashboard.css +645 -0
- package/src/superlocalmemory/ui/index.html +459 -1345
- package/src/superlocalmemory/ui/js/brain.js +1321 -0
- package/src/superlocalmemory/ui/js/clusters.js +123 -4
- package/src/superlocalmemory/ui/js/init.js +48 -39
- package/src/superlocalmemory/ui/js/memories.js +88 -2
- package/src/superlocalmemory/ui/js/modal.js +71 -1
- package/src/superlocalmemory/ui/js/ng-shell.js +101 -88
- package/src/superlocalmemory/ui/js/trust-dashboard.js +168 -25
- package/src/superlocalmemory/ui/vendor/bootstrap-icons/bootstrap-icons.css +2018 -0
- package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff +0 -0
- package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff2 +0 -0
- package/src/superlocalmemory/ui/vendor/bootstrap.bundle.min.js +7 -0
- package/src/superlocalmemory/ui/vendor/bootstrap.min.css +6 -0
- package/src/superlocalmemory/ui/vendor/d3.v7.min.js +2 -0
- package/src/superlocalmemory/ui/vendor/graphology-library.min.js +2 -0
- package/src/superlocalmemory/ui/vendor/graphology.umd.min.js +2 -0
- package/src/superlocalmemory/ui/vendor/inter-ui/inter-variable.min.css +8 -0
- package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable-Italic.woff2 +0 -0
- package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable.woff2 +0 -0
- package/src/superlocalmemory/ui/vendor/sigma.min.js +1 -0
- package/src/superlocalmemory/ui/js/behavioral.js +0 -447
- package/src/superlocalmemory/ui/js/graph-core.js +0 -447
- package/src/superlocalmemory/ui/js/graph-interactions.js +0 -351
- package/src/superlocalmemory/ui/js/learning.js +0 -435
- package/src/superlocalmemory/ui/js/patterns.js +0 -93
- package/src/superlocalmemory.egg-info/PKG-INFO +0 -647
- package/src/superlocalmemory.egg-info/SOURCES.txt +0 -335
- package/src/superlocalmemory.egg-info/dependency_links.txt +0 -1
- package/src/superlocalmemory.egg-info/entry_points.txt +0 -2
- package/src/superlocalmemory.egg-info/requires.txt +0 -58
- package/src/superlocalmemory.egg-info/top_level.txt +0 -1
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under AGPL-3.0-or-later - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4.22 — F4.A Stage-8 H-07 deprecation
|
|
4
|
+
|
|
5
|
+
"""Legacy ranker retrain path (signal_count >= 200 cold-start).
|
|
6
|
+
|
|
7
|
+
**DEPRECATED** as of v3.4.22: superseded by the LLD-10 online retrain
|
|
8
|
+
cycle in ``ranker_retrain_online.py``. Retained for two reasons:
|
|
9
|
+
|
|
10
|
+
1. Back-compat — ``tests/test_learning/test_ranker_v2.py`` and
|
|
11
|
+
``tests/test_api/test_dashboard_phase_truth.py`` still import
|
|
12
|
+
``_retrain_ranker_impl`` directly.
|
|
13
|
+
2. Cold-start — profiles with signals but no active model yet
|
|
14
|
+
(first-ever training) still need a path to bootstrap lineage.
|
|
15
|
+
|
|
16
|
+
Gating (Stage 8 H-07 fix): the `ConsolidationWorker.run` call site
|
|
17
|
+
invokes this path ONLY when ``_should_retrain`` returns False (no
|
|
18
|
+
active model) AND raw signal_count ≥ 200. Once a profile has outcomes
|
|
19
|
+
and an active model, the online retrain wins unconditionally.
|
|
20
|
+
|
|
21
|
+
Every public entry point here emits a one-shot DeprecationWarning the
|
|
22
|
+
first time it runs per process so operators see the signal without log
|
|
23
|
+
spam.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import hashlib
|
|
29
|
+
import logging
|
|
30
|
+
import os
|
|
31
|
+
import warnings
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
|
|
34
|
+
from superlocalmemory.learning.ranker_common import (
|
|
35
|
+
_build_training_matrix,
|
|
36
|
+
_compute_eval_metrics,
|
|
37
|
+
_shadow_test_improved,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
logger = logging.getLogger(__name__)
|
|
41
|
+
|
|
42
|
+
_LEGACY_RETRAIN_DEPRECATED = True
|
|
43
|
+
|
|
44
|
+
# One-shot warning flag per process.
|
|
45
|
+
_warned = False
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _emit_deprecation_once() -> None:
|
|
49
|
+
"""Fire the DeprecationWarning exactly once per process."""
|
|
50
|
+
global _warned
|
|
51
|
+
if _warned:
|
|
52
|
+
return
|
|
53
|
+
_warned = True
|
|
54
|
+
# M-P-10: HARD removal target — v3.5.0. The legacy path must not
|
|
55
|
+
# survive into v3.5.x tech debt. Remove with the dashboard lineage
|
|
56
|
+
# UI ship (Stage 7+) per MANIFEST-DEVIATION A.3 #3.
|
|
57
|
+
warnings.warn(
|
|
58
|
+
"ranker_retrain_legacy is deprecated as of SLM v3.4.22 and will "
|
|
59
|
+
"be REMOVED in v3.5.0 — use ranker_retrain_online._run_shadow_cycle "
|
|
60
|
+
"instead. The legacy path is kept only for cold-start profiles "
|
|
61
|
+
"with no active model.",
|
|
62
|
+
DeprecationWarning,
|
|
63
|
+
stacklevel=3,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
__all__ = (
|
|
68
|
+
"_retrain_ranker_impl",
|
|
69
|
+
"_LEGACY_RETRAIN_DEPRECATED",
|
|
70
|
+
"_build_training_matrix", # re-export for callers that imported here
|
|
71
|
+
"_compute_eval_metrics",
|
|
72
|
+
"_shadow_test_improved",
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _retrain_ranker_impl(
|
|
77
|
+
learning_db: str | Path,
|
|
78
|
+
profile_id: str,
|
|
79
|
+
*,
|
|
80
|
+
include_synthetic: bool = False,
|
|
81
|
+
) -> bool:
|
|
82
|
+
"""Legacy cold-start training path — pure function.
|
|
83
|
+
|
|
84
|
+
DEPRECATED: prefer ``ranker_retrain_online._run_shadow_cycle``.
|
|
85
|
+
|
|
86
|
+
``include_synthetic`` forwards to
|
|
87
|
+
:meth:`LearningDatabase.fetch_training_examples` so migrated legacy
|
|
88
|
+
rows (``is_synthetic=1``) participate in training when the user opts
|
|
89
|
+
in via the dashboard "Migrate legacy data" flow.
|
|
90
|
+
"""
|
|
91
|
+
_emit_deprecation_once()
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
import numpy as np
|
|
95
|
+
import lightgbm as lgb # noqa: PLC0415
|
|
96
|
+
except ImportError:
|
|
97
|
+
logger.info("lightgbm or numpy missing; skipping retrain")
|
|
98
|
+
return False
|
|
99
|
+
|
|
100
|
+
from superlocalmemory.learning.database import LearningDatabase
|
|
101
|
+
from superlocalmemory.learning.features import FEATURE_NAMES
|
|
102
|
+
from superlocalmemory.learning.labeler import label_gain
|
|
103
|
+
|
|
104
|
+
db = LearningDatabase(learning_db)
|
|
105
|
+
rows = db.fetch_training_examples(
|
|
106
|
+
profile_id=profile_id,
|
|
107
|
+
limit=2000,
|
|
108
|
+
min_outcome_age_sec=60,
|
|
109
|
+
include_synthetic=include_synthetic,
|
|
110
|
+
)
|
|
111
|
+
if len(rows) < 200:
|
|
112
|
+
logger.info(
|
|
113
|
+
"retrain: need ≥200 rows, have %d — deferring", len(rows),
|
|
114
|
+
)
|
|
115
|
+
return False
|
|
116
|
+
|
|
117
|
+
X, y_int, groups = _build_training_matrix(rows, FEATURE_NAMES)
|
|
118
|
+
if groups is None or len(groups) < 2:
|
|
119
|
+
logger.info(
|
|
120
|
+
"retrain: insufficient query groups (%s) — deferring",
|
|
121
|
+
None if groups is None else len(groups),
|
|
122
|
+
)
|
|
123
|
+
return False
|
|
124
|
+
assert sum(groups) == X.shape[0], (
|
|
125
|
+
f"group sum mismatch: {sum(groups)} != {X.shape[0]}"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
gain = label_gain()
|
|
129
|
+
# Defensive: clamp any out-of-range label.
|
|
130
|
+
y_int = np.clip(y_int, 0, len(gain) - 1)
|
|
131
|
+
|
|
132
|
+
ds_train = lgb.Dataset(
|
|
133
|
+
X,
|
|
134
|
+
label=y_int,
|
|
135
|
+
group=groups,
|
|
136
|
+
feature_name=list(FEATURE_NAMES),
|
|
137
|
+
free_raw_data=False,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# MKT-v2-M-01: allow switching between ``lambdarank`` (default,
|
|
141
|
+
# LLD-02 CR1) and ``rank_xendcg`` via ``SLM_LGBM_OBJECTIVE``.
|
|
142
|
+
_allowed_objectives = {"lambdarank", "rank_xendcg"}
|
|
143
|
+
objective = os.environ.get("SLM_LGBM_OBJECTIVE", "lambdarank").strip()
|
|
144
|
+
if objective not in _allowed_objectives:
|
|
145
|
+
logger.warning(
|
|
146
|
+
"SLM_LGBM_OBJECTIVE=%r not in %s; defaulting to lambdarank",
|
|
147
|
+
objective, sorted(_allowed_objectives),
|
|
148
|
+
)
|
|
149
|
+
objective = "lambdarank"
|
|
150
|
+
params = {
|
|
151
|
+
"objective": objective,
|
|
152
|
+
"metric": "ndcg",
|
|
153
|
+
"ndcg_eval_at": [1, 3, 5, 10],
|
|
154
|
+
"label_gain": gain,
|
|
155
|
+
"learning_rate": 0.05,
|
|
156
|
+
"num_leaves": 31,
|
|
157
|
+
"min_data_in_leaf": 20,
|
|
158
|
+
"verbosity": -1,
|
|
159
|
+
"num_threads": max(1, (os.cpu_count() or 2) - 1),
|
|
160
|
+
}
|
|
161
|
+
try:
|
|
162
|
+
booster_new = lgb.train(params, ds_train, num_boost_round=50)
|
|
163
|
+
except lgb.basic.LightGBMError as exc:
|
|
164
|
+
logger.warning("retrain: lightgbm train failed: %s", exc)
|
|
165
|
+
return False
|
|
166
|
+
|
|
167
|
+
# Shadow test: only promote if better than prior active model.
|
|
168
|
+
prior = db.load_active_model(profile_id)
|
|
169
|
+
if prior is not None:
|
|
170
|
+
if not _shadow_test_improved(prior, booster_new, rows, FEATURE_NAMES):
|
|
171
|
+
logger.info("Shadow test: new model did not beat prior; keeping")
|
|
172
|
+
return False
|
|
173
|
+
|
|
174
|
+
model_str = booster_new.model_to_string()
|
|
175
|
+
state_bytes = model_str.encode("utf-8")
|
|
176
|
+
sha = hashlib.sha256(state_bytes).hexdigest()
|
|
177
|
+
try:
|
|
178
|
+
db.persist_model(
|
|
179
|
+
profile_id=profile_id,
|
|
180
|
+
state_bytes=state_bytes,
|
|
181
|
+
bytes_sha256=sha,
|
|
182
|
+
feature_names=list(FEATURE_NAMES),
|
|
183
|
+
trained_on_count=len(rows),
|
|
184
|
+
metrics=_compute_eval_metrics(booster_new, rows, FEATURE_NAMES),
|
|
185
|
+
)
|
|
186
|
+
except Exception as exc:
|
|
187
|
+
logger.warning("persist_model failed: %s", exc)
|
|
188
|
+
return False
|
|
189
|
+
|
|
190
|
+
# Invalidate in-process cache so new model is picked up.
|
|
191
|
+
try:
|
|
192
|
+
from superlocalmemory.learning.model_cache import invalidate
|
|
193
|
+
invalidate(profile_id)
|
|
194
|
+
except Exception: # pragma: no cover — defensive
|
|
195
|
+
pass
|
|
196
|
+
|
|
197
|
+
logger.info(
|
|
198
|
+
"Ranker retrained (legacy, lambdarank): %d rows, %d groups, "
|
|
199
|
+
"promoted=True",
|
|
200
|
+
len(rows), len(groups),
|
|
201
|
+
)
|
|
202
|
+
return True
|
|
@@ -0,0 +1,411 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under AGPL-3.0-or-later - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4.22 — F4.A Stage-8 H-01 + LLD-10
|
|
4
|
+
|
|
5
|
+
"""LLD-10 online retrain: hyperparam-capped LightGBM training + candidate
|
|
6
|
+
persist + atomic lineage flip.
|
|
7
|
+
|
|
8
|
+
All seams (``_train_booster``, ``_fetch_training_rows``,
|
|
9
|
+
``_measure_serialized_size``, ``_persist_candidate``,
|
|
10
|
+
``_promote_candidate``, ``_feature_names``) are module-level functions so
|
|
11
|
+
tests can monkey-patch them via the shim (``consolidation_worker``)
|
|
12
|
+
which re-exports them with the shim's own module bindings.
|
|
13
|
+
|
|
14
|
+
The orchestrator ``_run_shadow_cycle`` lives in the shim
|
|
15
|
+
(``consolidation_worker``) so patches on ``consolidation_worker`` keep
|
|
16
|
+
working without any test churn.
|
|
17
|
+
|
|
18
|
+
Contract refs:
|
|
19
|
+
- LLD-10 §2 (triggers), §3.2 (caps), §5 (lineage flip).
|
|
20
|
+
- IMPLEMENTATION-MANIFEST v3.4.22 FINAL A.3.
|
|
21
|
+
- Stage 8 H-01 (architect).
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import hashlib
|
|
27
|
+
import json
|
|
28
|
+
import logging
|
|
29
|
+
import os
|
|
30
|
+
import sqlite3
|
|
31
|
+
import time
|
|
32
|
+
from datetime import datetime, timezone
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
from typing import Final
|
|
35
|
+
|
|
36
|
+
from superlocalmemory.learning.ranker_common import (
|
|
37
|
+
_build_training_matrix,
|
|
38
|
+
_compute_eval_metrics,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
logger = logging.getLogger(__name__)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
# LLD-10 retrain constants
|
|
46
|
+
# ---------------------------------------------------------------------------
|
|
47
|
+
|
|
48
|
+
#: LightGBM hyperparameter caps. Contractual — violating these is a
|
|
49
|
+
#: Stage-8 audit failure. Manifest A.3 names: num_leaves ≤ 31,
|
|
50
|
+
#: max_depth ≤ 7, feature_fraction ≤ 0.8.
|
|
51
|
+
RETRAIN_HYPERPARAM_CAPS: Final[dict] = {
|
|
52
|
+
"num_leaves": 31,
|
|
53
|
+
"max_depth": 7,
|
|
54
|
+
"feature_fraction": 0.8,
|
|
55
|
+
"bagging_fraction": 0.8,
|
|
56
|
+
"min_data_in_leaf": 20,
|
|
57
|
+
"num_boost_round": 50,
|
|
58
|
+
"learning_rate": 0.05,
|
|
59
|
+
"metric": "ndcg",
|
|
60
|
+
"ndcg_eval_at": [1, 3, 5, 10],
|
|
61
|
+
"verbosity": -1,
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
#: Wall-time ceiling (seconds) for a single retrain cycle.
|
|
65
|
+
RETRAIN_WALL_TIME_BUDGET_SEC: Final[float] = 30.0
|
|
66
|
+
|
|
67
|
+
#: Model-size ceiling for the serialised booster blob (10 MB).
|
|
68
|
+
RETRAIN_MODEL_SIZE_BYTES_CAP: Final[int] = 10 * 1024 * 1024
|
|
69
|
+
|
|
70
|
+
#: Trigger: new outcomes since last retrain ≥ this → retrain.
|
|
71
|
+
RETRAIN_NEW_OUTCOMES_THRESHOLD: Final[int] = 50
|
|
72
|
+
|
|
73
|
+
#: Trigger: hours since last retrain ≥ this → retrain.
|
|
74
|
+
RETRAIN_HOURS_THRESHOLD: Final[float] = 24.0
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class RetrainWallTimeExceeded(Exception):
|
|
78
|
+
"""Raised by ``_train_booster`` when the 30 s budget is blown."""
|
|
79
|
+
|
|
80
|
+
def __init__(self, *, elapsed_sec: float) -> None:
|
|
81
|
+
super().__init__(
|
|
82
|
+
f"retrain wall-time exceeded: {elapsed_sec:.1f}s > "
|
|
83
|
+
f"{RETRAIN_WALL_TIME_BUDGET_SEC:.1f}s",
|
|
84
|
+
)
|
|
85
|
+
self.elapsed_sec = elapsed_sec
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
__all__ = (
|
|
89
|
+
"RetrainWallTimeExceeded",
|
|
90
|
+
"RETRAIN_HYPERPARAM_CAPS",
|
|
91
|
+
"RETRAIN_WALL_TIME_BUDGET_SEC",
|
|
92
|
+
"RETRAIN_MODEL_SIZE_BYTES_CAP",
|
|
93
|
+
"RETRAIN_NEW_OUTCOMES_THRESHOLD",
|
|
94
|
+
"RETRAIN_HOURS_THRESHOLD",
|
|
95
|
+
"_feature_names",
|
|
96
|
+
"_fetch_training_rows",
|
|
97
|
+
"_measure_serialized_size",
|
|
98
|
+
"_train_booster",
|
|
99
|
+
"_persist_candidate",
|
|
100
|
+
"_promote_candidate",
|
|
101
|
+
"_check_rollback",
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# ---------------------------------------------------------------------------
|
|
106
|
+
# Seams: module-level functions so tests can monkey-patch via the shim.
|
|
107
|
+
# ---------------------------------------------------------------------------
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _feature_names() -> list[str]:
|
|
111
|
+
"""Indirection for tests — returns the live ranker FEATURE_NAMES."""
|
|
112
|
+
from superlocalmemory.learning.features import FEATURE_NAMES
|
|
113
|
+
return list(FEATURE_NAMES)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _fetch_training_rows(
|
|
117
|
+
learning_db_path: str, profile_id: str,
|
|
118
|
+
) -> tuple[list[dict], list[str]]:
|
|
119
|
+
"""Fetch real training rows + queue of candidate query_ids.
|
|
120
|
+
|
|
121
|
+
Returns ``(rows, candidate_ids)`` — ``rows`` matches the shape that
|
|
122
|
+
``_build_training_matrix`` expects (``query_id``, ``fact_id``,
|
|
123
|
+
``position``, ``features`` dict, ``outcome_reward``).
|
|
124
|
+
Tests monkey-patch this seam to inject deterministic fixtures.
|
|
125
|
+
"""
|
|
126
|
+
from superlocalmemory.learning.database import LearningDatabase
|
|
127
|
+
|
|
128
|
+
db = LearningDatabase(learning_db_path)
|
|
129
|
+
rows = db.fetch_training_examples(
|
|
130
|
+
profile_id=profile_id,
|
|
131
|
+
limit=5000,
|
|
132
|
+
min_outcome_age_sec=60,
|
|
133
|
+
include_synthetic=False,
|
|
134
|
+
)
|
|
135
|
+
return rows, [r.get("query_id", "") for r in rows if r.get("query_id")]
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _measure_serialized_size(booster) -> int:
|
|
139
|
+
"""Return the serialised booster size in bytes. Seam for tests."""
|
|
140
|
+
try:
|
|
141
|
+
return len(booster.model_to_string().encode("utf-8"))
|
|
142
|
+
except Exception:
|
|
143
|
+
return 0
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _train_booster(
|
|
147
|
+
learning_db_path: str,
|
|
148
|
+
profile_id: str,
|
|
149
|
+
*,
|
|
150
|
+
training_rows: list[dict],
|
|
151
|
+
feature_names: list[str],
|
|
152
|
+
prior_row: dict | None,
|
|
153
|
+
):
|
|
154
|
+
"""Train a LightGBM booster with the HARD hyperparam caps + wall-time
|
|
155
|
+
guard. Raises :class:`RetrainWallTimeExceeded` on budget breach.
|
|
156
|
+
|
|
157
|
+
Returns ``(booster, metrics_dict)``. Tests monkey-patch this seam;
|
|
158
|
+
production invocation uses the real path.
|
|
159
|
+
"""
|
|
160
|
+
try:
|
|
161
|
+
import numpy as np
|
|
162
|
+
import lightgbm as lgb
|
|
163
|
+
except ImportError as exc: # pragma: no cover — platform guard
|
|
164
|
+
raise RuntimeError(f"lightgbm unavailable: {exc}") from exc
|
|
165
|
+
|
|
166
|
+
from superlocalmemory.learning.labeler import label_gain
|
|
167
|
+
|
|
168
|
+
X, y_int, groups = _build_training_matrix(training_rows, feature_names)
|
|
169
|
+
if groups is None or len(groups) < 2:
|
|
170
|
+
raise ValueError(
|
|
171
|
+
f"insufficient query groups for retrain "
|
|
172
|
+
f"(got {None if groups is None else len(groups)})",
|
|
173
|
+
)
|
|
174
|
+
assert sum(groups) == X.shape[0], (
|
|
175
|
+
f"group sum mismatch: {sum(groups)} != {X.shape[0]}")
|
|
176
|
+
|
|
177
|
+
gain = label_gain()
|
|
178
|
+
y_int = np.clip(y_int, 0, len(gain) - 1)
|
|
179
|
+
|
|
180
|
+
ds_train = lgb.Dataset(
|
|
181
|
+
X,
|
|
182
|
+
label=y_int,
|
|
183
|
+
group=groups,
|
|
184
|
+
feature_name=list(feature_names),
|
|
185
|
+
free_raw_data=False,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
_allowed_objectives = {"lambdarank", "rank_xendcg"}
|
|
189
|
+
objective = os.environ.get("SLM_LGBM_OBJECTIVE", "lambdarank").strip()
|
|
190
|
+
if objective not in _allowed_objectives:
|
|
191
|
+
objective = "lambdarank"
|
|
192
|
+
|
|
193
|
+
# CAPS — values are enforced both in the params dict (trainer side)
|
|
194
|
+
# and in RETRAIN_HYPERPARAM_CAPS (surface constant for tests + ops).
|
|
195
|
+
params = dict(RETRAIN_HYPERPARAM_CAPS)
|
|
196
|
+
params["objective"] = objective
|
|
197
|
+
params["label_gain"] = gain
|
|
198
|
+
params["num_threads"] = max(1, (os.cpu_count() or 2) - 1)
|
|
199
|
+
num_boost_round = int(params.pop("num_boost_round"))
|
|
200
|
+
|
|
201
|
+
start = time.monotonic()
|
|
202
|
+
# S9-defer H-P-08: before v3.4.22 the wall-time check fired only
|
|
203
|
+
# AFTER ``lgb.train`` returned, which could take minutes on a
|
|
204
|
+
# pathological dataset before we noticed. LightGBM's ``callbacks``
|
|
205
|
+
# parameter accepts a per-iteration hook that can stop training
|
|
206
|
+
# early. We install a callback that raises once the cumulative
|
|
207
|
+
# wall-time exceeds ``RETRAIN_WALL_TIME_BUDGET_SEC`` so overrun
|
|
208
|
+
# aborts mid-training, not post-training.
|
|
209
|
+
class _WallTimeAbort(Exception):
|
|
210
|
+
pass
|
|
211
|
+
|
|
212
|
+
def _timeout_cb(_env):
|
|
213
|
+
if time.monotonic() - start >= RETRAIN_WALL_TIME_BUDGET_SEC:
|
|
214
|
+
raise _WallTimeAbort
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
booster = lgb.train(
|
|
218
|
+
params, ds_train, num_boost_round=num_boost_round,
|
|
219
|
+
callbacks=[_timeout_cb],
|
|
220
|
+
)
|
|
221
|
+
except _WallTimeAbort:
|
|
222
|
+
elapsed = time.monotonic() - start
|
|
223
|
+
raise RetrainWallTimeExceeded(elapsed_sec=elapsed)
|
|
224
|
+
except TypeError:
|
|
225
|
+
# Older lightgbm binaries don't accept callbacks= in lgb.train.
|
|
226
|
+
# Fall back to the original post-hoc check (still bounded by
|
|
227
|
+
# num_boost_round * per-round work, which params cap tightly).
|
|
228
|
+
booster = lgb.train(
|
|
229
|
+
params, ds_train, num_boost_round=num_boost_round,
|
|
230
|
+
)
|
|
231
|
+
except lgb.basic.LightGBMError as exc: # pragma: no cover
|
|
232
|
+
raise RuntimeError(f"lgb.train failed: {exc}") from exc
|
|
233
|
+
elapsed = time.monotonic() - start
|
|
234
|
+
if elapsed >= RETRAIN_WALL_TIME_BUDGET_SEC:
|
|
235
|
+
raise RetrainWallTimeExceeded(elapsed_sec=elapsed)
|
|
236
|
+
|
|
237
|
+
metrics = _compute_eval_metrics(booster, training_rows, feature_names)
|
|
238
|
+
metrics["wall_time_sec"] = elapsed
|
|
239
|
+
return booster, metrics
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _persist_candidate(
|
|
243
|
+
learning_db_path: str,
|
|
244
|
+
*,
|
|
245
|
+
profile_id: str,
|
|
246
|
+
state_bytes: bytes,
|
|
247
|
+
feature_names: list[str],
|
|
248
|
+
trained_on_count: int,
|
|
249
|
+
metrics: dict,
|
|
250
|
+
shadow_results: dict | None,
|
|
251
|
+
) -> int:
|
|
252
|
+
"""Insert a fresh candidate row with is_candidate=1 + is_active=0.
|
|
253
|
+
|
|
254
|
+
Honours the partial unique index ``idx_model_candidate_one`` —
|
|
255
|
+
callers must discard/reject any prior candidate before insert.
|
|
256
|
+
"""
|
|
257
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
258
|
+
sha = hashlib.sha256(state_bytes).hexdigest()
|
|
259
|
+
metrics_json = json.dumps(metrics or {}, separators=(",", ":"))
|
|
260
|
+
fn_json = json.dumps(list(feature_names), separators=(",", ":"))
|
|
261
|
+
shadow_json = json.dumps(shadow_results or {}, separators=(",", ":"))
|
|
262
|
+
|
|
263
|
+
with sqlite3.connect(learning_db_path, timeout=10) as conn:
|
|
264
|
+
try:
|
|
265
|
+
conn.execute("BEGIN IMMEDIATE")
|
|
266
|
+
# Wipe any stale candidate first (one-at-a-time contract).
|
|
267
|
+
conn.execute(
|
|
268
|
+
"DELETE FROM learning_model_state "
|
|
269
|
+
"WHERE profile_id = ? AND is_candidate = 1",
|
|
270
|
+
(profile_id,),
|
|
271
|
+
)
|
|
272
|
+
cur = conn.execute(
|
|
273
|
+
"INSERT INTO learning_model_state "
|
|
274
|
+
"(profile_id, model_version, state_bytes, bytes_sha256, "
|
|
275
|
+
" trained_on_count, feature_names, metrics_json, "
|
|
276
|
+
" is_active, is_candidate, shadow_results_json, "
|
|
277
|
+
" trained_at, updated_at) "
|
|
278
|
+
"VALUES (?, '3.4.22', ?, ?, ?, ?, ?, 0, 1, ?, ?, ?)",
|
|
279
|
+
(
|
|
280
|
+
profile_id, state_bytes, sha, int(trained_on_count),
|
|
281
|
+
fn_json, metrics_json, shadow_json, now, now,
|
|
282
|
+
),
|
|
283
|
+
)
|
|
284
|
+
conn.commit()
|
|
285
|
+
candidate_id = int(cur.lastrowid or 0)
|
|
286
|
+
except sqlite3.Error:
|
|
287
|
+
conn.rollback()
|
|
288
|
+
raise
|
|
289
|
+
|
|
290
|
+
# S9-W1 C1: attach the new candidate to the shadow router so live
|
|
291
|
+
# recall-settled signals start accumulating into its ShadowTest.
|
|
292
|
+
# Without this call, ``ShadowTest.decide()`` would sit at
|
|
293
|
+
# ``insufficient_data`` forever and the candidate would never be
|
|
294
|
+
# promoted or rejected — LLD-10 would be shipped-but-cold (exactly
|
|
295
|
+
# the failure mode Stage 8 SB-1 claimed to close but only wired the
|
|
296
|
+
# consumer side for). Fail-soft so persist itself never regresses.
|
|
297
|
+
try:
|
|
298
|
+
from superlocalmemory.core import shadow_router as _sr
|
|
299
|
+
# memory_db path is not available here; the router accepts empty
|
|
300
|
+
# string — attach_candidate only touches learning_db state.
|
|
301
|
+
router = _sr.get_shadow_router(
|
|
302
|
+
memory_db="",
|
|
303
|
+
learning_db=learning_db_path,
|
|
304
|
+
profile_id=profile_id,
|
|
305
|
+
)
|
|
306
|
+
router.attach_candidate(candidate_id)
|
|
307
|
+
except Exception as exc: # noqa: BLE001 — defence in depth
|
|
308
|
+
logger.debug("attach_candidate failed (non-fatal): %s", exc)
|
|
309
|
+
|
|
310
|
+
return candidate_id
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def _promote_candidate(
|
|
314
|
+
learning_db_path: str, *, profile_id: str, candidate_id: int,
|
|
315
|
+
) -> bool:
|
|
316
|
+
"""Atomic lineage flip (LLD-10 §5 / §6.1).
|
|
317
|
+
|
|
318
|
+
Invariants (enforced by M009 partial unique indexes):
|
|
319
|
+
* Exactly one ``is_active=1`` per profile at any instant.
|
|
320
|
+
* Exactly one ``is_candidate=1`` per profile at any instant.
|
|
321
|
+
|
|
322
|
+
Flip order inside one BEGIN IMMEDIATE:
|
|
323
|
+
1. Clear existing is_previous (it becomes is_rollback).
|
|
324
|
+
2. Current active → is_active=0, is_previous=1.
|
|
325
|
+
3. Candidate → is_active=1, is_candidate=0, promoted_at=now.
|
|
326
|
+
"""
|
|
327
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
328
|
+
with sqlite3.connect(learning_db_path, timeout=10) as conn:
|
|
329
|
+
conn.row_factory = sqlite3.Row
|
|
330
|
+
try:
|
|
331
|
+
conn.execute("BEGIN IMMEDIATE")
|
|
332
|
+
|
|
333
|
+
# Step 1 — demote the existing previous to rollback, if any.
|
|
334
|
+
existing_prev = conn.execute(
|
|
335
|
+
"SELECT id FROM learning_model_state "
|
|
336
|
+
"WHERE profile_id = ? AND is_previous = 1",
|
|
337
|
+
(profile_id,),
|
|
338
|
+
).fetchone()
|
|
339
|
+
if existing_prev is not None:
|
|
340
|
+
conn.execute(
|
|
341
|
+
"UPDATE learning_model_state "
|
|
342
|
+
"SET is_previous = 0, is_rollback = 1 "
|
|
343
|
+
"WHERE id = ?",
|
|
344
|
+
(existing_prev["id"],),
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
# Step 2 — demote current active. Clear is_active first so
|
|
348
|
+
# the partial unique index on is_active=1 never sees two
|
|
349
|
+
# rows simultaneously.
|
|
350
|
+
conn.execute(
|
|
351
|
+
"UPDATE learning_model_state "
|
|
352
|
+
"SET is_active = 0, is_previous = 1 "
|
|
353
|
+
"WHERE profile_id = ? AND is_active = 1",
|
|
354
|
+
(profile_id,),
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
# Step 3 — promote candidate.
|
|
358
|
+
conn.execute(
|
|
359
|
+
"UPDATE learning_model_state "
|
|
360
|
+
"SET is_active = 1, is_candidate = 0, promoted_at = ? "
|
|
361
|
+
"WHERE id = ?",
|
|
362
|
+
(now, candidate_id),
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
# Reset the outcome counter on the new active.
|
|
366
|
+
row = conn.execute(
|
|
367
|
+
"SELECT metadata_json FROM learning_model_state "
|
|
368
|
+
"WHERE id = ?", (candidate_id,),
|
|
369
|
+
).fetchone()
|
|
370
|
+
try:
|
|
371
|
+
meta = json.loads(row["metadata_json"] or "{}")
|
|
372
|
+
except (TypeError, ValueError):
|
|
373
|
+
meta = {}
|
|
374
|
+
meta["new_outcomes_since_last_retrain"] = 0
|
|
375
|
+
meta["last_retrain_at"] = now
|
|
376
|
+
conn.execute(
|
|
377
|
+
"UPDATE learning_model_state SET metadata_json = ? "
|
|
378
|
+
"WHERE id = ?",
|
|
379
|
+
(json.dumps(meta), candidate_id),
|
|
380
|
+
)
|
|
381
|
+
conn.commit()
|
|
382
|
+
return True
|
|
383
|
+
except sqlite3.Error as exc:
|
|
384
|
+
conn.rollback()
|
|
385
|
+
logger.error("_promote_candidate sqlite error: %s", exc)
|
|
386
|
+
return False
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def _check_rollback(
|
|
390
|
+
*,
|
|
391
|
+
learning_db_path: str,
|
|
392
|
+
profile_id: str,
|
|
393
|
+
observations: list[float],
|
|
394
|
+
baseline_ndcg: float,
|
|
395
|
+
) -> bool:
|
|
396
|
+
"""Evaluate the 200-recall watch window and fire rollback if needed.
|
|
397
|
+
|
|
398
|
+
Returns True iff rollback was executed.
|
|
399
|
+
"""
|
|
400
|
+
from superlocalmemory.learning.model_rollback import ModelRollback
|
|
401
|
+
|
|
402
|
+
rb = ModelRollback(
|
|
403
|
+
learning_db_path=learning_db_path,
|
|
404
|
+
profile_id=profile_id,
|
|
405
|
+
baseline_ndcg=baseline_ndcg,
|
|
406
|
+
)
|
|
407
|
+
for i, val in enumerate(observations):
|
|
408
|
+
rb.record_post_promotion(query_id=f"watch-{i}", ndcg_at_10=val)
|
|
409
|
+
if rb.should_rollback():
|
|
410
|
+
return rb.execute_rollback(reason="watch_window_regression")
|
|
411
|
+
return False
|