superlocalmemory 2.6.0 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +167 -1803
- package/README.md +212 -397
- package/bin/slm +179 -3
- package/bin/superlocalmemoryv2:learning +4 -0
- package/bin/superlocalmemoryv2:patterns +4 -0
- package/docs/ACCESSIBILITY.md +291 -0
- package/docs/ARCHITECTURE.md +12 -6
- package/docs/FRAMEWORK-INTEGRATIONS.md +300 -0
- package/docs/MCP-MANUAL-SETUP.md +14 -4
- package/install.sh +99 -3
- package/mcp_server.py +291 -1
- package/package.json +2 -1
- package/requirements-learning.txt +12 -0
- package/scripts/verify-v27.sh +233 -0
- package/skills/slm-show-patterns/SKILL.md +224 -0
- package/src/learning/__init__.py +201 -0
- package/src/learning/adaptive_ranker.py +826 -0
- package/src/learning/cross_project_aggregator.py +866 -0
- package/src/learning/engagement_tracker.py +638 -0
- package/src/learning/feature_extractor.py +461 -0
- package/src/learning/feedback_collector.py +690 -0
- package/src/learning/learning_db.py +842 -0
- package/src/learning/project_context_manager.py +582 -0
- package/src/learning/source_quality_scorer.py +685 -0
- package/src/learning/synthetic_bootstrap.py +1047 -0
- package/src/learning/tests/__init__.py +0 -0
- package/src/learning/tests/test_adaptive_ranker.py +328 -0
- package/src/learning/tests/test_aggregator.py +309 -0
- package/src/learning/tests/test_feedback_collector.py +295 -0
- package/src/learning/tests/test_learning_db.py +606 -0
- package/src/learning/tests/test_project_context.py +296 -0
- package/src/learning/tests/test_source_quality.py +355 -0
- package/src/learning/tests/test_synthetic_bootstrap.py +433 -0
- package/src/learning/tests/test_workflow_miner.py +322 -0
- package/src/learning/workflow_pattern_miner.py +665 -0
- package/ui/index.html +346 -13
- package/ui/js/clusters.js +90 -1
- package/ui/js/graph-core.js +445 -0
- package/ui/js/graph-cytoscape-monolithic-backup.js +1168 -0
- package/ui/js/graph-cytoscape.js +1168 -0
- package/ui/js/graph-d3-backup.js +32 -0
- package/ui/js/graph-filters.js +220 -0
- package/ui/js/graph-interactions.js +354 -0
- package/ui/js/graph-ui.js +214 -0
- package/ui/js/memories.js +52 -0
- package/ui/js/modal.js +104 -1
|
@@ -0,0 +1,826 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
SuperLocalMemory V2 - Adaptive Ranker (v2.7)
|
|
4
|
+
Copyright (c) 2026 Varun Pratap Bhardwaj
|
|
5
|
+
Licensed under MIT License
|
|
6
|
+
|
|
7
|
+
Repository: https://github.com/varun369/SuperLocalMemoryV2
|
|
8
|
+
Author: Varun Pratap Bhardwaj (Solution Architect)
|
|
9
|
+
|
|
10
|
+
NOTICE: This software is protected by MIT License.
|
|
11
|
+
Attribution must be preserved in all copies or derivatives.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
"""
|
|
15
|
+
AdaptiveRanker — Three-phase adaptive re-ranking engine.
|
|
16
|
+
|
|
17
|
+
This is the core ranking engine for v2.7 "Your AI Learns You". It sits
|
|
18
|
+
between the existing search methods (FTS5 + TF-IDF + HNSW) and the final
|
|
19
|
+
result list, re-ordering candidates based on learned user preferences.
|
|
20
|
+
|
|
21
|
+
Three Phases (progressive adaptation):
|
|
22
|
+
|
|
23
|
+
Phase 0 — Baseline (< 20 feedback signals):
|
|
24
|
+
Pure v2.6 behavior. No re-ranking applied. Results returned as-is
|
|
25
|
+
from the existing search pipeline. Zero risk of degradation.
|
|
26
|
+
|
|
27
|
+
Phase 1 — Rule-Based (20-199 signals):
|
|
28
|
+
Applies learned-pattern boosting to search results. Uses feature
|
|
29
|
+
extraction to compute boost multipliers for tech match, project
|
|
30
|
+
match, recency, and source quality. Deterministic and interpretable.
|
|
31
|
+
|
|
32
|
+
Phase 2 — ML Model (200+ signals across 50+ unique queries):
|
|
33
|
+
LightGBM LambdaRank re-ranker. Trained on real feedback data
|
|
34
|
+
(and optionally bootstrapped from synthetic data). Produces ML
|
|
35
|
+
scores that replace the original ranking order.
|
|
36
|
+
|
|
37
|
+
Design Principles:
|
|
38
|
+
- LightGBM is OPTIONAL. If not installed, falls back to rule-based.
|
|
39
|
+
- Any exception in re-ranking falls back to original v2.6 results.
|
|
40
|
+
- Model is loaded lazily and cached in memory.
|
|
41
|
+
- Training is explicit (called by user or scheduled), never implicit.
|
|
42
|
+
- Original scores are preserved as 'base_score' for diagnostics.
|
|
43
|
+
|
|
44
|
+
Research Backing:
|
|
45
|
+
- eKNOW 2025: BM25 -> re-ranker pipeline for personal collections
|
|
46
|
+
- MACLA (arXiv:2512.18950): Bayesian confidence scoring
|
|
47
|
+
- FCS LREC 2024: Cold-start mitigation via synthetic bootstrap
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
import logging
|
|
51
|
+
import threading
|
|
52
|
+
from datetime import datetime
|
|
53
|
+
from pathlib import Path
|
|
54
|
+
from typing import Any, Dict, List, Optional
|
|
55
|
+
|
|
56
|
+
# LightGBM is OPTIONAL — graceful fallback to rule-based ranking
|
|
57
|
+
try:
|
|
58
|
+
import lightgbm as lgb
|
|
59
|
+
HAS_LIGHTGBM = True
|
|
60
|
+
except ImportError:
|
|
61
|
+
lgb = None
|
|
62
|
+
HAS_LIGHTGBM = False
|
|
63
|
+
|
|
64
|
+
# NumPy is used for feature matrix construction (comes with sklearn)
|
|
65
|
+
try:
|
|
66
|
+
import numpy as np
|
|
67
|
+
HAS_NUMPY = True
|
|
68
|
+
except ImportError:
|
|
69
|
+
np = None
|
|
70
|
+
HAS_NUMPY = False
|
|
71
|
+
|
|
72
|
+
from .feature_extractor import FeatureExtractor, FEATURE_NAMES, NUM_FEATURES
|
|
73
|
+
|
|
74
|
+
logger = logging.getLogger("superlocalmemory.learning.adaptive_ranker")
|
|
75
|
+
|
|
76
|
+
# ============================================================================
|
|
77
|
+
# Constants
|
|
78
|
+
# ============================================================================
|
|
79
|
+
|
|
80
|
+
MODELS_DIR = Path.home() / ".claude-memory" / "models"
|
|
81
|
+
MODEL_PATH = MODELS_DIR / "ranker.txt"
|
|
82
|
+
|
|
83
|
+
# Phase thresholds — how many feedback signals to trigger each phase
|
|
84
|
+
PHASE_THRESHOLDS = {
|
|
85
|
+
'baseline': 0, # 0 feedback samples -> no re-ranking
|
|
86
|
+
'rule_based': 20, # 20+ feedback -> rule-based boosting
|
|
87
|
+
'ml_model': 200, # 200+ feedback across 50+ unique queries -> ML
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
# Minimum unique queries required for ML phase (prevents overfitting
|
|
91
|
+
# to a small number of repeated queries)
|
|
92
|
+
MIN_UNIQUE_QUERIES_FOR_ML = 50
|
|
93
|
+
|
|
94
|
+
# Rule-based boost multipliers (Phase 1)
|
|
95
|
+
# These are conservative — they nudge the ranking without flipping order
|
|
96
|
+
_RULE_BOOST = {
|
|
97
|
+
'tech_match_strong': 1.3, # Memory matches 2+ preferred techs
|
|
98
|
+
'tech_match_weak': 1.1, # Memory matches 1 preferred tech
|
|
99
|
+
'project_match': 1.5, # Memory from current project
|
|
100
|
+
'project_unknown': 1.0, # No project context — no boost
|
|
101
|
+
'project_mismatch': 0.9, # Memory from different project
|
|
102
|
+
'source_quality_high': 1.2, # Source quality > 0.7
|
|
103
|
+
'source_quality_low': 0.85, # Source quality < 0.3
|
|
104
|
+
'recency_boost_max': 1.2, # Recent memory (< 7 days)
|
|
105
|
+
'recency_penalty_max': 0.8, # Old memory (> 365 days)
|
|
106
|
+
'high_importance': 1.15, # Importance >= 8
|
|
107
|
+
'high_access': 1.1, # Accessed 5+ times
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
# LightGBM training parameters — tuned for small, personal datasets
|
|
111
|
+
# Aggressive regularization prevents overfitting on < 10K samples
|
|
112
|
+
TRAINING_PARAMS = {
|
|
113
|
+
'objective': 'lambdarank',
|
|
114
|
+
'metric': 'ndcg',
|
|
115
|
+
'ndcg_eval_at': [5, 10],
|
|
116
|
+
'learning_rate': 0.05,
|
|
117
|
+
'num_leaves': 16,
|
|
118
|
+
'max_depth': 4,
|
|
119
|
+
'min_child_samples': 10,
|
|
120
|
+
'subsample': 0.8,
|
|
121
|
+
'reg_alpha': 0.1,
|
|
122
|
+
'reg_lambda': 1.0,
|
|
123
|
+
'boosting_type': 'dart',
|
|
124
|
+
'n_estimators': 50,
|
|
125
|
+
'verbose': -1,
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class AdaptiveRanker:
|
|
130
|
+
"""
|
|
131
|
+
Three-phase adaptive re-ranking engine.
|
|
132
|
+
|
|
133
|
+
Usage (called by memory_store_v2.search or mcp_server recall):
|
|
134
|
+
ranker = AdaptiveRanker()
|
|
135
|
+
results = ranker.rerank(search_results, query, context={
|
|
136
|
+
'tech_preferences': {...},
|
|
137
|
+
'current_project': 'MyProject',
|
|
138
|
+
'source_scores': {...},
|
|
139
|
+
'workflow_phase': 'testing',
|
|
140
|
+
})
|
|
141
|
+
|
|
142
|
+
The caller wraps this in try/except — any exception here causes
|
|
143
|
+
fallback to original v2.6 results. Zero risk of degradation.
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
PHASE_THRESHOLDS = PHASE_THRESHOLDS
|
|
147
|
+
MODEL_PATH = MODEL_PATH
|
|
148
|
+
|
|
149
|
+
def __init__(self, learning_db=None):
|
|
150
|
+
"""
|
|
151
|
+
Initialize AdaptiveRanker.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
learning_db: Optional LearningDB instance. If None, imports
|
|
155
|
+
and creates one lazily.
|
|
156
|
+
"""
|
|
157
|
+
self._learning_db = learning_db
|
|
158
|
+
self._feature_extractor = FeatureExtractor()
|
|
159
|
+
self._model = None # Loaded lazily on first ML rerank
|
|
160
|
+
self._model_load_attempted = False
|
|
161
|
+
self._lock = threading.Lock()
|
|
162
|
+
|
|
163
|
+
# ========================================================================
|
|
164
|
+
# LearningDB Access
|
|
165
|
+
# ========================================================================
|
|
166
|
+
|
|
167
|
+
def _get_learning_db(self):
|
|
168
|
+
"""Get or create the LearningDB instance."""
|
|
169
|
+
if self._learning_db is None:
|
|
170
|
+
try:
|
|
171
|
+
from .learning_db import LearningDB
|
|
172
|
+
self._learning_db = LearningDB()
|
|
173
|
+
except Exception as e:
|
|
174
|
+
logger.warning("Cannot access LearningDB: %s", e)
|
|
175
|
+
return None
|
|
176
|
+
return self._learning_db
|
|
177
|
+
|
|
178
|
+
# ========================================================================
|
|
179
|
+
# Phase Detection
|
|
180
|
+
# ========================================================================
|
|
181
|
+
|
|
182
|
+
def get_phase(self) -> str:
|
|
183
|
+
"""
|
|
184
|
+
Determine the current ranking phase based on feedback data.
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
'baseline' — Not enough data for personalization
|
|
188
|
+
'rule_based' — Enough data for rule-based boosting
|
|
189
|
+
'ml_model' — Enough data for ML ranking (if LightGBM available)
|
|
190
|
+
"""
|
|
191
|
+
ldb = self._get_learning_db()
|
|
192
|
+
if ldb is None:
|
|
193
|
+
return 'baseline'
|
|
194
|
+
|
|
195
|
+
try:
|
|
196
|
+
feedback_count = ldb.get_feedback_count()
|
|
197
|
+
unique_queries = ldb.get_unique_query_count()
|
|
198
|
+
except Exception as e:
|
|
199
|
+
logger.warning("Failed to check feedback counts: %s", e)
|
|
200
|
+
return 'baseline'
|
|
201
|
+
|
|
202
|
+
# Phase 2: ML model — requires enough data AND LightGBM AND numpy
|
|
203
|
+
if (
|
|
204
|
+
feedback_count >= PHASE_THRESHOLDS['ml_model']
|
|
205
|
+
and unique_queries >= MIN_UNIQUE_QUERIES_FOR_ML
|
|
206
|
+
and HAS_LIGHTGBM
|
|
207
|
+
and HAS_NUMPY
|
|
208
|
+
):
|
|
209
|
+
return 'ml_model'
|
|
210
|
+
|
|
211
|
+
# Phase 1: Rule-based — just needs minimum feedback
|
|
212
|
+
if feedback_count >= PHASE_THRESHOLDS['rule_based']:
|
|
213
|
+
return 'rule_based'
|
|
214
|
+
|
|
215
|
+
# Phase 0: Not enough data yet
|
|
216
|
+
return 'baseline'
|
|
217
|
+
|
|
218
|
+
def get_phase_info(self) -> Dict[str, Any]:
|
|
219
|
+
"""
|
|
220
|
+
Return detailed phase information for diagnostics.
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
Dict with phase, feedback_count, unique_queries, thresholds,
|
|
224
|
+
model_loaded, lightgbm_available.
|
|
225
|
+
"""
|
|
226
|
+
ldb = self._get_learning_db()
|
|
227
|
+
feedback_count = 0
|
|
228
|
+
unique_queries = 0
|
|
229
|
+
|
|
230
|
+
if ldb is not None:
|
|
231
|
+
try:
|
|
232
|
+
feedback_count = ldb.get_feedback_count()
|
|
233
|
+
unique_queries = ldb.get_unique_query_count()
|
|
234
|
+
except Exception:
|
|
235
|
+
pass
|
|
236
|
+
|
|
237
|
+
phase = self.get_phase()
|
|
238
|
+
|
|
239
|
+
return {
|
|
240
|
+
'phase': phase,
|
|
241
|
+
'feedback_count': feedback_count,
|
|
242
|
+
'unique_queries': unique_queries,
|
|
243
|
+
'thresholds': dict(PHASE_THRESHOLDS),
|
|
244
|
+
'min_unique_queries_for_ml': MIN_UNIQUE_QUERIES_FOR_ML,
|
|
245
|
+
'model_loaded': self._model is not None,
|
|
246
|
+
'model_path_exists': MODEL_PATH.exists(),
|
|
247
|
+
'lightgbm_available': HAS_LIGHTGBM,
|
|
248
|
+
'numpy_available': HAS_NUMPY,
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
# ========================================================================
|
|
252
|
+
# Main Re-ranking Entry Point
|
|
253
|
+
# ========================================================================
|
|
254
|
+
|
|
255
|
+
def rerank(
|
|
256
|
+
self,
|
|
257
|
+
results: List[dict],
|
|
258
|
+
query: str,
|
|
259
|
+
context: Optional[dict] = None,
|
|
260
|
+
) -> List[dict]:
|
|
261
|
+
"""
|
|
262
|
+
Re-rank search results based on learned user preferences.
|
|
263
|
+
|
|
264
|
+
This is the main entry point, called after the search pipeline
|
|
265
|
+
produces initial results. It determines the current phase and
|
|
266
|
+
routes to the appropriate ranking strategy.
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
results: List of memory dicts from search (with 'score' field).
|
|
270
|
+
query: The recall query string.
|
|
271
|
+
context: Optional context dict with:
|
|
272
|
+
- tech_preferences: Dict[str, dict] — user's tech prefs
|
|
273
|
+
- current_project: str — active project name
|
|
274
|
+
- source_scores: Dict[str, float] — source quality scores
|
|
275
|
+
- workflow_phase: str — current workflow phase
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
Re-ranked list of memory dicts. Each memory gets:
|
|
279
|
+
- 'base_score': Original score from search pipeline
|
|
280
|
+
- 'ranking_phase': Which phase was used
|
|
281
|
+
- 'score': Updated score (may differ from base_score)
|
|
282
|
+
|
|
283
|
+
CRITICAL: The caller wraps this in try/except. Any exception
|
|
284
|
+
causes fallback to original v2.6 results. This method must
|
|
285
|
+
never corrupt the results list.
|
|
286
|
+
"""
|
|
287
|
+
if not results:
|
|
288
|
+
return results
|
|
289
|
+
|
|
290
|
+
# Short-circuit: don't re-rank trivially small result sets
|
|
291
|
+
if len(results) <= 1:
|
|
292
|
+
for r in results:
|
|
293
|
+
r['base_score'] = r.get('score', 0.0)
|
|
294
|
+
r['ranking_phase'] = 'baseline'
|
|
295
|
+
return results
|
|
296
|
+
|
|
297
|
+
context = context or {}
|
|
298
|
+
|
|
299
|
+
# Set up feature extraction context (once per query)
|
|
300
|
+
self._feature_extractor.set_context(
|
|
301
|
+
source_scores=context.get('source_scores'),
|
|
302
|
+
tech_preferences=context.get('tech_preferences'),
|
|
303
|
+
current_project=context.get('current_project'),
|
|
304
|
+
workflow_phase=context.get('workflow_phase'),
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
# Determine phase and route
|
|
308
|
+
phase = self.get_phase()
|
|
309
|
+
|
|
310
|
+
if phase == 'baseline':
|
|
311
|
+
# Phase 0: No re-ranking — preserve original order
|
|
312
|
+
for r in results:
|
|
313
|
+
r['base_score'] = r.get('score', 0.0)
|
|
314
|
+
r['ranking_phase'] = 'baseline'
|
|
315
|
+
return results
|
|
316
|
+
|
|
317
|
+
elif phase == 'rule_based':
|
|
318
|
+
return self._rerank_rule_based(results, query, context)
|
|
319
|
+
|
|
320
|
+
elif phase == 'ml_model':
|
|
321
|
+
# Try ML first, fall back to rule-based if model fails
|
|
322
|
+
try:
|
|
323
|
+
return self._rerank_ml(results, query, context)
|
|
324
|
+
except Exception as e:
|
|
325
|
+
logger.warning(
|
|
326
|
+
"ML re-ranking failed, falling back to rule-based: %s", e
|
|
327
|
+
)
|
|
328
|
+
return self._rerank_rule_based(results, query, context)
|
|
329
|
+
|
|
330
|
+
# Defensive: unknown phase -> no re-ranking
|
|
331
|
+
for r in results:
|
|
332
|
+
r['base_score'] = r.get('score', 0.0)
|
|
333
|
+
r['ranking_phase'] = 'unknown'
|
|
334
|
+
return results
|
|
335
|
+
|
|
336
|
+
# ========================================================================
|
|
337
|
+
# Phase 1: Rule-Based Re-ranking
|
|
338
|
+
# ========================================================================
|
|
339
|
+
|
|
340
|
+
def _rerank_rule_based(
|
|
341
|
+
self,
|
|
342
|
+
results: List[dict],
|
|
343
|
+
query: str,
|
|
344
|
+
context: dict,
|
|
345
|
+
) -> List[dict]:
|
|
346
|
+
"""
|
|
347
|
+
Phase 1: Apply rule-based boosting using extracted features.
|
|
348
|
+
|
|
349
|
+
Each result's score is multiplied by boost factors derived from
|
|
350
|
+
feature values. The boosts are conservative — they nudge the
|
|
351
|
+
ranking order without dramatically flipping results.
|
|
352
|
+
"""
|
|
353
|
+
feature_vectors = self._feature_extractor.extract_batch(results, query)
|
|
354
|
+
|
|
355
|
+
for i, result in enumerate(results):
|
|
356
|
+
base_score = result.get('score', 0.0)
|
|
357
|
+
result['base_score'] = base_score
|
|
358
|
+
result['ranking_phase'] = 'rule_based'
|
|
359
|
+
|
|
360
|
+
if i >= len(feature_vectors):
|
|
361
|
+
continue
|
|
362
|
+
|
|
363
|
+
features = feature_vectors[i]
|
|
364
|
+
boost = 1.0
|
|
365
|
+
|
|
366
|
+
# Feature [2]: tech_match
|
|
367
|
+
tech_match = features[2]
|
|
368
|
+
if tech_match >= 0.8:
|
|
369
|
+
boost *= _RULE_BOOST['tech_match_strong']
|
|
370
|
+
elif tech_match >= 0.4:
|
|
371
|
+
boost *= _RULE_BOOST['tech_match_weak']
|
|
372
|
+
|
|
373
|
+
# Feature [3]: project_match
|
|
374
|
+
project_match = features[3]
|
|
375
|
+
if project_match >= 0.9:
|
|
376
|
+
boost *= _RULE_BOOST['project_match']
|
|
377
|
+
elif project_match <= 0.35:
|
|
378
|
+
boost *= _RULE_BOOST['project_mismatch']
|
|
379
|
+
|
|
380
|
+
# Feature [5]: source_quality
|
|
381
|
+
source_quality = features[5]
|
|
382
|
+
if source_quality >= 0.7:
|
|
383
|
+
boost *= _RULE_BOOST['source_quality_high']
|
|
384
|
+
elif source_quality < 0.3:
|
|
385
|
+
boost *= _RULE_BOOST['source_quality_low']
|
|
386
|
+
|
|
387
|
+
# Feature [7]: recency_score (exponential decay)
|
|
388
|
+
recency = features[7]
|
|
389
|
+
# Linear interpolation between penalty and boost
|
|
390
|
+
recency_factor = (
|
|
391
|
+
_RULE_BOOST['recency_penalty_max']
|
|
392
|
+
+ recency * (
|
|
393
|
+
_RULE_BOOST['recency_boost_max']
|
|
394
|
+
- _RULE_BOOST['recency_penalty_max']
|
|
395
|
+
)
|
|
396
|
+
)
|
|
397
|
+
boost *= recency_factor
|
|
398
|
+
|
|
399
|
+
# Feature [6]: importance_norm
|
|
400
|
+
importance_norm = features[6]
|
|
401
|
+
if importance_norm >= 0.8:
|
|
402
|
+
boost *= _RULE_BOOST['high_importance']
|
|
403
|
+
|
|
404
|
+
# Feature [8]: access_frequency
|
|
405
|
+
access_freq = features[8]
|
|
406
|
+
if access_freq >= 0.5:
|
|
407
|
+
boost *= _RULE_BOOST['high_access']
|
|
408
|
+
|
|
409
|
+
# Apply boost to score
|
|
410
|
+
result['score'] = base_score * boost
|
|
411
|
+
|
|
412
|
+
# Re-sort by boosted score (highest first)
|
|
413
|
+
results.sort(key=lambda x: x.get('score', 0.0), reverse=True)
|
|
414
|
+
return results
|
|
415
|
+
|
|
416
|
+
# ========================================================================
|
|
417
|
+
# Phase 2: ML Re-ranking (LightGBM)
|
|
418
|
+
# ========================================================================
|
|
419
|
+
|
|
420
|
+
def _rerank_ml(
|
|
421
|
+
self,
|
|
422
|
+
results: List[dict],
|
|
423
|
+
query: str,
|
|
424
|
+
context: dict,
|
|
425
|
+
) -> List[dict]:
|
|
426
|
+
"""
|
|
427
|
+
Phase 2: LightGBM LambdaRank re-ranking.
|
|
428
|
+
|
|
429
|
+
Extracts features, runs the trained model, and sorts by ML scores.
|
|
430
|
+
Preserves original score as 'base_score' and adds 'ml_score'.
|
|
431
|
+
"""
|
|
432
|
+
if not HAS_LIGHTGBM or not HAS_NUMPY:
|
|
433
|
+
raise RuntimeError("LightGBM or NumPy not available for ML ranking")
|
|
434
|
+
|
|
435
|
+
# Load model if not cached
|
|
436
|
+
model = self._load_model()
|
|
437
|
+
if model is None:
|
|
438
|
+
raise RuntimeError("No trained ranking model available")
|
|
439
|
+
|
|
440
|
+
# Extract features
|
|
441
|
+
feature_vectors = self._feature_extractor.extract_batch(results, query)
|
|
442
|
+
if not feature_vectors:
|
|
443
|
+
raise ValueError("Feature extraction returned empty results")
|
|
444
|
+
|
|
445
|
+
# Build feature matrix
|
|
446
|
+
X = np.array(feature_vectors, dtype=np.float64)
|
|
447
|
+
|
|
448
|
+
# Validate shape
|
|
449
|
+
if X.shape[1] != NUM_FEATURES:
|
|
450
|
+
raise ValueError(
|
|
451
|
+
f"Feature dimension mismatch: expected {NUM_FEATURES}, "
|
|
452
|
+
f"got {X.shape[1]}"
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
# Predict scores
|
|
456
|
+
ml_scores = model.predict(X)
|
|
457
|
+
|
|
458
|
+
# Annotate results with ML scores
|
|
459
|
+
for i, result in enumerate(results):
|
|
460
|
+
result['base_score'] = result.get('score', 0.0)
|
|
461
|
+
result['ranking_phase'] = 'ml_model'
|
|
462
|
+
if i < len(ml_scores):
|
|
463
|
+
result['ml_score'] = float(ml_scores[i])
|
|
464
|
+
result['score'] = float(ml_scores[i])
|
|
465
|
+
else:
|
|
466
|
+
result['ml_score'] = 0.0
|
|
467
|
+
|
|
468
|
+
# Re-sort by ML score (highest first)
|
|
469
|
+
results.sort(key=lambda x: x.get('score', 0.0), reverse=True)
|
|
470
|
+
return results
|
|
471
|
+
|
|
472
|
+
# ========================================================================
|
|
473
|
+
# Model Management
|
|
474
|
+
# ========================================================================
|
|
475
|
+
|
|
476
|
+
def _load_model(self):
|
|
477
|
+
"""
|
|
478
|
+
Load LightGBM model from disk (lazy, cached).
|
|
479
|
+
|
|
480
|
+
Returns:
|
|
481
|
+
lgb.Booster instance or None if unavailable.
|
|
482
|
+
"""
|
|
483
|
+
# Return cached model if already loaded
|
|
484
|
+
if self._model is not None:
|
|
485
|
+
return self._model
|
|
486
|
+
|
|
487
|
+
# Avoid repeated failed load attempts
|
|
488
|
+
if self._model_load_attempted:
|
|
489
|
+
return None
|
|
490
|
+
|
|
491
|
+
with self._lock:
|
|
492
|
+
# Double-check after acquiring lock
|
|
493
|
+
if self._model is not None:
|
|
494
|
+
return self._model
|
|
495
|
+
if self._model_load_attempted:
|
|
496
|
+
return None
|
|
497
|
+
|
|
498
|
+
self._model_load_attempted = True
|
|
499
|
+
|
|
500
|
+
if not HAS_LIGHTGBM:
|
|
501
|
+
logger.info("LightGBM not installed — ML ranking unavailable")
|
|
502
|
+
return None
|
|
503
|
+
|
|
504
|
+
if not MODEL_PATH.exists():
|
|
505
|
+
logger.info(
|
|
506
|
+
"No ranking model at %s — ML ranking unavailable",
|
|
507
|
+
MODEL_PATH
|
|
508
|
+
)
|
|
509
|
+
return None
|
|
510
|
+
|
|
511
|
+
try:
|
|
512
|
+
self._model = lgb.Booster(model_file=str(MODEL_PATH))
|
|
513
|
+
logger.info("Loaded ranking model from %s", MODEL_PATH)
|
|
514
|
+
return self._model
|
|
515
|
+
except Exception as e:
|
|
516
|
+
logger.warning("Failed to load ranking model: %s", e)
|
|
517
|
+
return None
|
|
518
|
+
|
|
519
|
+
def reload_model(self):
|
|
520
|
+
"""
|
|
521
|
+
Force reload of the ranking model from disk.
|
|
522
|
+
|
|
523
|
+
Call this after training a new model to pick up the updated weights.
|
|
524
|
+
"""
|
|
525
|
+
with self._lock:
|
|
526
|
+
self._model = None
|
|
527
|
+
self._model_load_attempted = False
|
|
528
|
+
# Trigger fresh load
|
|
529
|
+
return self._load_model()
|
|
530
|
+
|
|
531
|
+
# ========================================================================
|
|
532
|
+
# Model Training
|
|
533
|
+
# ========================================================================
|
|
534
|
+
|
|
535
|
+
def train(self, force: bool = False) -> Optional[Dict[str, Any]]:
|
|
536
|
+
"""
|
|
537
|
+
Train or retrain the LightGBM ranking model.
|
|
538
|
+
|
|
539
|
+
Uses continued training (init_model) if a model already exists,
|
|
540
|
+
incorporating new feedback data incrementally.
|
|
541
|
+
|
|
542
|
+
Args:
|
|
543
|
+
force: If True, train even if below ML threshold.
|
|
544
|
+
Useful for synthetic bootstrap training.
|
|
545
|
+
|
|
546
|
+
Returns:
|
|
547
|
+
Training metadata dict, or None if training not possible.
|
|
548
|
+
Metadata includes: model_version, training_samples, ndcg_at_10,
|
|
549
|
+
model_path, created_at.
|
|
550
|
+
"""
|
|
551
|
+
if not HAS_LIGHTGBM or not HAS_NUMPY:
|
|
552
|
+
logger.warning(
|
|
553
|
+
"Cannot train: LightGBM=%s, NumPy=%s",
|
|
554
|
+
HAS_LIGHTGBM, HAS_NUMPY
|
|
555
|
+
)
|
|
556
|
+
return None
|
|
557
|
+
|
|
558
|
+
ldb = self._get_learning_db()
|
|
559
|
+
if ldb is None:
|
|
560
|
+
logger.warning("Cannot train: LearningDB unavailable")
|
|
561
|
+
return None
|
|
562
|
+
|
|
563
|
+
# Check if we have enough data (unless forced)
|
|
564
|
+
if not force:
|
|
565
|
+
feedback_count = ldb.get_feedback_count()
|
|
566
|
+
unique_queries = ldb.get_unique_query_count()
|
|
567
|
+
if (
|
|
568
|
+
feedback_count < PHASE_THRESHOLDS['ml_model']
|
|
569
|
+
or unique_queries < MIN_UNIQUE_QUERIES_FOR_ML
|
|
570
|
+
):
|
|
571
|
+
logger.info(
|
|
572
|
+
"Insufficient data for training: %d feedback / %d queries "
|
|
573
|
+
"(need %d / %d)",
|
|
574
|
+
feedback_count, unique_queries,
|
|
575
|
+
PHASE_THRESHOLDS['ml_model'], MIN_UNIQUE_QUERIES_FOR_ML,
|
|
576
|
+
)
|
|
577
|
+
return None
|
|
578
|
+
|
|
579
|
+
# Prepare training data
|
|
580
|
+
training_data = self._prepare_training_data()
|
|
581
|
+
if training_data is None:
|
|
582
|
+
logger.warning("No usable training data available")
|
|
583
|
+
return None
|
|
584
|
+
|
|
585
|
+
X, y, groups = training_data
|
|
586
|
+
total_samples = X.shape[0]
|
|
587
|
+
|
|
588
|
+
if total_samples < 10:
|
|
589
|
+
logger.warning("Too few training samples: %d", total_samples)
|
|
590
|
+
return None
|
|
591
|
+
|
|
592
|
+
logger.info(
|
|
593
|
+
"Training ranking model: %d samples, %d groups",
|
|
594
|
+
total_samples, len(groups)
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
# Create LightGBM dataset
|
|
598
|
+
train_dataset = lgb.Dataset(
|
|
599
|
+
X, label=y, group=groups,
|
|
600
|
+
feature_name=list(FEATURE_NAMES),
|
|
601
|
+
free_raw_data=False,
|
|
602
|
+
)
|
|
603
|
+
|
|
604
|
+
# Training parameters
|
|
605
|
+
params = dict(TRAINING_PARAMS)
|
|
606
|
+
n_estimators = params.pop('n_estimators', 50)
|
|
607
|
+
|
|
608
|
+
# Check for existing model (continued training)
|
|
609
|
+
init_model = None
|
|
610
|
+
if MODEL_PATH.exists():
|
|
611
|
+
try:
|
|
612
|
+
init_model = lgb.Booster(model_file=str(MODEL_PATH))
|
|
613
|
+
logger.info("Continuing training from existing model")
|
|
614
|
+
except Exception:
|
|
615
|
+
logger.info("Starting fresh training (existing model unreadable)")
|
|
616
|
+
init_model = None
|
|
617
|
+
|
|
618
|
+
# Train
|
|
619
|
+
try:
|
|
620
|
+
booster = lgb.train(
|
|
621
|
+
params,
|
|
622
|
+
train_dataset,
|
|
623
|
+
num_boost_round=n_estimators,
|
|
624
|
+
init_model=init_model,
|
|
625
|
+
valid_sets=[train_dataset],
|
|
626
|
+
valid_names=['train'],
|
|
627
|
+
callbacks=[lgb.log_evaluation(period=0)], # Silent training
|
|
628
|
+
)
|
|
629
|
+
except Exception as e:
|
|
630
|
+
logger.error("LightGBM training failed: %s", e)
|
|
631
|
+
return None
|
|
632
|
+
|
|
633
|
+
# Save model
|
|
634
|
+
MODELS_DIR.mkdir(parents=True, exist_ok=True)
|
|
635
|
+
try:
|
|
636
|
+
booster.save_model(str(MODEL_PATH))
|
|
637
|
+
logger.info("Ranking model saved to %s", MODEL_PATH)
|
|
638
|
+
except Exception as e:
|
|
639
|
+
logger.error("Failed to save ranking model: %s", e)
|
|
640
|
+
return None
|
|
641
|
+
|
|
642
|
+
# Extract NDCG@10 from training evaluation (if available)
|
|
643
|
+
ndcg_at_10 = None
|
|
644
|
+
try:
|
|
645
|
+
eval_results = booster.eval_train(lgb.Dataset(X, label=y, group=groups))
|
|
646
|
+
for name, _dataset_name, value, _is_higher_better in eval_results:
|
|
647
|
+
if 'ndcg@10' in name:
|
|
648
|
+
ndcg_at_10 = value
|
|
649
|
+
break
|
|
650
|
+
except Exception:
|
|
651
|
+
pass
|
|
652
|
+
|
|
653
|
+
# Record metadata in learning_db
|
|
654
|
+
model_version = datetime.now().strftime("v%Y%m%d_%H%M%S")
|
|
655
|
+
try:
|
|
656
|
+
ldb.record_model_training(
|
|
657
|
+
model_version=model_version,
|
|
658
|
+
training_samples=total_samples,
|
|
659
|
+
real_samples=total_samples,
|
|
660
|
+
synthetic_samples=0,
|
|
661
|
+
ndcg_at_10=ndcg_at_10,
|
|
662
|
+
model_path=str(MODEL_PATH),
|
|
663
|
+
)
|
|
664
|
+
except Exception as e:
|
|
665
|
+
logger.warning("Failed to record training metadata: %s", e)
|
|
666
|
+
|
|
667
|
+
# Reload model into cache
|
|
668
|
+
self.reload_model()
|
|
669
|
+
|
|
670
|
+
metadata = {
|
|
671
|
+
'model_version': model_version,
|
|
672
|
+
'training_samples': total_samples,
|
|
673
|
+
'query_groups': len(groups),
|
|
674
|
+
'n_estimators': n_estimators,
|
|
675
|
+
'ndcg_at_10': ndcg_at_10,
|
|
676
|
+
'model_path': str(MODEL_PATH),
|
|
677
|
+
'continued_from': init_model is not None,
|
|
678
|
+
'created_at': datetime.now().isoformat(),
|
|
679
|
+
}
|
|
680
|
+
logger.info("Training complete: %s", metadata)
|
|
681
|
+
return metadata
|
|
682
|
+
|
|
683
|
+
def _prepare_training_data(self) -> Optional[tuple]:
|
|
684
|
+
"""
|
|
685
|
+
Prepare training data from feedback records.
|
|
686
|
+
|
|
687
|
+
For each unique query (grouped by query_hash):
|
|
688
|
+
- Fetch all feedback entries for that query
|
|
689
|
+
- Look up the corresponding memory from memory.db
|
|
690
|
+
- Extract features for each memory
|
|
691
|
+
- Use signal_value as the relevance label
|
|
692
|
+
|
|
693
|
+
Returns:
|
|
694
|
+
Tuple of (X, y, groups) for LGBMRanker, or None if insufficient.
|
|
695
|
+
X: numpy array (n_samples, 9)
|
|
696
|
+
y: numpy array (n_samples,) — relevance labels
|
|
697
|
+
groups: list of ints — samples per query group
|
|
698
|
+
"""
|
|
699
|
+
import sqlite3
|
|
700
|
+
|
|
701
|
+
ldb = self._get_learning_db()
|
|
702
|
+
if ldb is None:
|
|
703
|
+
return None
|
|
704
|
+
|
|
705
|
+
feedback = ldb.get_feedback_for_training()
|
|
706
|
+
if not feedback:
|
|
707
|
+
return None
|
|
708
|
+
|
|
709
|
+
# Group feedback by query_hash
|
|
710
|
+
query_groups: Dict[str, List[dict]] = {}
|
|
711
|
+
for entry in feedback:
|
|
712
|
+
qh = entry['query_hash']
|
|
713
|
+
if qh not in query_groups:
|
|
714
|
+
query_groups[qh] = []
|
|
715
|
+
query_groups[qh].append(entry)
|
|
716
|
+
|
|
717
|
+
# Filter: only keep groups with 2+ items (ranking requires pairs)
|
|
718
|
+
query_groups = {
|
|
719
|
+
qh: entries for qh, entries in query_groups.items()
|
|
720
|
+
if len(entries) >= 2
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
if not query_groups:
|
|
724
|
+
logger.info("No query groups with 2+ feedback entries")
|
|
725
|
+
return None
|
|
726
|
+
|
|
727
|
+
# Collect memory IDs we need to look up
|
|
728
|
+
memory_ids_needed = set()
|
|
729
|
+
for entries in query_groups.values():
|
|
730
|
+
for entry in entries:
|
|
731
|
+
memory_ids_needed.add(entry['memory_id'])
|
|
732
|
+
|
|
733
|
+
# Fetch memories from memory.db
|
|
734
|
+
memory_db_path = Path.home() / ".claude-memory" / "memory.db"
|
|
735
|
+
if not memory_db_path.exists():
|
|
736
|
+
logger.warning("memory.db not found at %s", memory_db_path)
|
|
737
|
+
return None
|
|
738
|
+
|
|
739
|
+
memories_by_id = {}
|
|
740
|
+
try:
|
|
741
|
+
conn = sqlite3.connect(str(memory_db_path), timeout=5)
|
|
742
|
+
conn.row_factory = sqlite3.Row
|
|
743
|
+
cursor = conn.cursor()
|
|
744
|
+
|
|
745
|
+
# Batch fetch memories (in chunks to avoid SQLite variable limit)
|
|
746
|
+
id_list = list(memory_ids_needed)
|
|
747
|
+
chunk_size = 500
|
|
748
|
+
for i in range(0, len(id_list), chunk_size):
|
|
749
|
+
chunk = id_list[i:i + chunk_size]
|
|
750
|
+
placeholders = ','.join('?' for _ in chunk)
|
|
751
|
+
cursor.execute(f'''
|
|
752
|
+
SELECT id, content, summary, project_path, project_name,
|
|
753
|
+
tags, category, memory_type, importance, created_at,
|
|
754
|
+
last_accessed, access_count
|
|
755
|
+
FROM memories
|
|
756
|
+
WHERE id IN ({placeholders})
|
|
757
|
+
''', chunk)
|
|
758
|
+
for row in cursor.fetchall():
|
|
759
|
+
memories_by_id[row['id']] = dict(row)
|
|
760
|
+
conn.close()
|
|
761
|
+
except Exception as e:
|
|
762
|
+
logger.error("Failed to fetch memories for training: %s", e)
|
|
763
|
+
return None
|
|
764
|
+
|
|
765
|
+
# Build feature matrix and labels
|
|
766
|
+
all_features = []
|
|
767
|
+
all_labels = []
|
|
768
|
+
groups = []
|
|
769
|
+
|
|
770
|
+
# Set a neutral context for training (we don't have query-time context)
|
|
771
|
+
self._feature_extractor.set_context()
|
|
772
|
+
|
|
773
|
+
for qh, entries in query_groups.items():
|
|
774
|
+
group_features = []
|
|
775
|
+
group_labels = []
|
|
776
|
+
|
|
777
|
+
for entry in entries:
|
|
778
|
+
mid = entry['memory_id']
|
|
779
|
+
memory = memories_by_id.get(mid)
|
|
780
|
+
if memory is None:
|
|
781
|
+
continue # Memory may have been deleted
|
|
782
|
+
|
|
783
|
+
# Use query_keywords as proxy for query text
|
|
784
|
+
query_text = entry.get('query_keywords', '') or ''
|
|
785
|
+
|
|
786
|
+
features = self._feature_extractor.extract_features(
|
|
787
|
+
memory, query_text
|
|
788
|
+
)
|
|
789
|
+
group_features.append(features)
|
|
790
|
+
group_labels.append(float(entry['signal_value']))
|
|
791
|
+
|
|
792
|
+
# Only include groups with 2+ valid entries
|
|
793
|
+
if len(group_features) >= 2:
|
|
794
|
+
all_features.extend(group_features)
|
|
795
|
+
all_labels.extend(group_labels)
|
|
796
|
+
groups.append(len(group_features))
|
|
797
|
+
|
|
798
|
+
if not groups or len(all_features) < 4:
|
|
799
|
+
logger.info(
|
|
800
|
+
"Insufficient valid training data: %d features, %d groups",
|
|
801
|
+
len(all_features), len(groups)
|
|
802
|
+
)
|
|
803
|
+
return None
|
|
804
|
+
|
|
805
|
+
X = np.array(all_features, dtype=np.float64)
|
|
806
|
+
y = np.array(all_labels, dtype=np.float64)
|
|
807
|
+
|
|
808
|
+
logger.info(
|
|
809
|
+
"Prepared training data: %d samples, %d groups, %d features",
|
|
810
|
+
X.shape[0], len(groups), X.shape[1]
|
|
811
|
+
)
|
|
812
|
+
|
|
813
|
+
return X, y, groups
|
|
814
|
+
|
|
815
|
+
|
|
816
|
+
# ============================================================================
|
|
817
|
+
# Module-level convenience
|
|
818
|
+
# ============================================================================
|
|
819
|
+
|
|
820
|
+
def get_phase() -> str:
|
|
821
|
+
"""Quick check of current ranking phase (creates temporary ranker)."""
|
|
822
|
+
try:
|
|
823
|
+
ranker = AdaptiveRanker()
|
|
824
|
+
return ranker.get_phase()
|
|
825
|
+
except Exception:
|
|
826
|
+
return 'baseline'
|