superlocalmemory 2.6.0 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/CHANGELOG.md +167 -1803
  2. package/README.md +212 -397
  3. package/bin/slm +179 -3
  4. package/bin/superlocalmemoryv2:learning +4 -0
  5. package/bin/superlocalmemoryv2:patterns +4 -0
  6. package/docs/ACCESSIBILITY.md +291 -0
  7. package/docs/ARCHITECTURE.md +12 -6
  8. package/docs/FRAMEWORK-INTEGRATIONS.md +300 -0
  9. package/docs/MCP-MANUAL-SETUP.md +14 -4
  10. package/install.sh +99 -3
  11. package/mcp_server.py +291 -1
  12. package/package.json +2 -1
  13. package/requirements-learning.txt +12 -0
  14. package/scripts/verify-v27.sh +233 -0
  15. package/skills/slm-show-patterns/SKILL.md +224 -0
  16. package/src/learning/__init__.py +201 -0
  17. package/src/learning/adaptive_ranker.py +826 -0
  18. package/src/learning/cross_project_aggregator.py +866 -0
  19. package/src/learning/engagement_tracker.py +638 -0
  20. package/src/learning/feature_extractor.py +461 -0
  21. package/src/learning/feedback_collector.py +690 -0
  22. package/src/learning/learning_db.py +842 -0
  23. package/src/learning/project_context_manager.py +582 -0
  24. package/src/learning/source_quality_scorer.py +685 -0
  25. package/src/learning/synthetic_bootstrap.py +1047 -0
  26. package/src/learning/tests/__init__.py +0 -0
  27. package/src/learning/tests/test_adaptive_ranker.py +328 -0
  28. package/src/learning/tests/test_aggregator.py +309 -0
  29. package/src/learning/tests/test_feedback_collector.py +295 -0
  30. package/src/learning/tests/test_learning_db.py +606 -0
  31. package/src/learning/tests/test_project_context.py +296 -0
  32. package/src/learning/tests/test_source_quality.py +355 -0
  33. package/src/learning/tests/test_synthetic_bootstrap.py +433 -0
  34. package/src/learning/tests/test_workflow_miner.py +322 -0
  35. package/src/learning/workflow_pattern_miner.py +665 -0
  36. package/ui/index.html +346 -13
  37. package/ui/js/clusters.js +90 -1
  38. package/ui/js/graph-core.js +445 -0
  39. package/ui/js/graph-cytoscape-monolithic-backup.js +1168 -0
  40. package/ui/js/graph-cytoscape.js +1168 -0
  41. package/ui/js/graph-d3-backup.js +32 -0
  42. package/ui/js/graph-filters.js +220 -0
  43. package/ui/js/graph-interactions.js +354 -0
  44. package/ui/js/graph-ui.js +214 -0
  45. package/ui/js/memories.js +52 -0
  46. package/ui/js/modal.js +104 -1
@@ -0,0 +1,826 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ SuperLocalMemory V2 - Adaptive Ranker (v2.7)
4
+ Copyright (c) 2026 Varun Pratap Bhardwaj
5
+ Licensed under MIT License
6
+
7
+ Repository: https://github.com/varun369/SuperLocalMemoryV2
8
+ Author: Varun Pratap Bhardwaj (Solution Architect)
9
+
10
+ NOTICE: This software is protected by MIT License.
11
+ Attribution must be preserved in all copies or derivatives.
12
+ """
13
+
14
+ """
15
+ AdaptiveRanker — Three-phase adaptive re-ranking engine.
16
+
17
+ This is the core ranking engine for v2.7 "Your AI Learns You". It sits
18
+ between the existing search methods (FTS5 + TF-IDF + HNSW) and the final
19
+ result list, re-ordering candidates based on learned user preferences.
20
+
21
+ Three Phases (progressive adaptation):
22
+
23
+ Phase 0 — Baseline (< 20 feedback signals):
24
+ Pure v2.6 behavior. No re-ranking applied. Results returned as-is
25
+ from the existing search pipeline. Zero risk of degradation.
26
+
27
+ Phase 1 — Rule-Based (20-199 signals):
28
+ Applies learned-pattern boosting to search results. Uses feature
29
+ extraction to compute boost multipliers for tech match, project
30
+ match, recency, and source quality. Deterministic and interpretable.
31
+
32
+ Phase 2 — ML Model (200+ signals across 50+ unique queries):
33
+ LightGBM LambdaRank re-ranker. Trained on real feedback data
34
+ (and optionally bootstrapped from synthetic data). Produces ML
35
+ scores that replace the original ranking order.
36
+
37
+ Design Principles:
38
+ - LightGBM is OPTIONAL. If not installed, falls back to rule-based.
39
+ - Any exception in re-ranking falls back to original v2.6 results.
40
+ - Model is loaded lazily and cached in memory.
41
+ - Training is explicit (called by user or scheduled), never implicit.
42
+ - Original scores are preserved as 'base_score' for diagnostics.
43
+
44
+ Research Backing:
45
+ - eKNOW 2025: BM25 -> re-ranker pipeline for personal collections
46
+ - MACLA (arXiv:2512.18950): Bayesian confidence scoring
47
+ - FCS LREC 2024: Cold-start mitigation via synthetic bootstrap
48
+ """
49
+
50
+ import logging
51
+ import threading
52
+ from datetime import datetime
53
+ from pathlib import Path
54
+ from typing import Any, Dict, List, Optional
55
+
56
+ # LightGBM is OPTIONAL — graceful fallback to rule-based ranking
57
+ try:
58
+ import lightgbm as lgb
59
+ HAS_LIGHTGBM = True
60
+ except ImportError:
61
+ lgb = None
62
+ HAS_LIGHTGBM = False
63
+
64
+ # NumPy is used for feature matrix construction (comes with sklearn)
65
+ try:
66
+ import numpy as np
67
+ HAS_NUMPY = True
68
+ except ImportError:
69
+ np = None
70
+ HAS_NUMPY = False
71
+
72
+ from .feature_extractor import FeatureExtractor, FEATURE_NAMES, NUM_FEATURES
73
+
74
+ logger = logging.getLogger("superlocalmemory.learning.adaptive_ranker")
75
+
76
+ # ============================================================================
77
+ # Constants
78
+ # ============================================================================
79
+
80
+ MODELS_DIR = Path.home() / ".claude-memory" / "models"
81
+ MODEL_PATH = MODELS_DIR / "ranker.txt"
82
+
83
+ # Phase thresholds — how many feedback signals to trigger each phase
84
+ PHASE_THRESHOLDS = {
85
+ 'baseline': 0, # 0 feedback samples -> no re-ranking
86
+ 'rule_based': 20, # 20+ feedback -> rule-based boosting
87
+ 'ml_model': 200, # 200+ feedback across 50+ unique queries -> ML
88
+ }
89
+
90
+ # Minimum unique queries required for ML phase (prevents overfitting
91
+ # to a small number of repeated queries)
92
+ MIN_UNIQUE_QUERIES_FOR_ML = 50
93
+
94
+ # Rule-based boost multipliers (Phase 1)
95
+ # These are conservative — they nudge the ranking without flipping order
96
+ _RULE_BOOST = {
97
+ 'tech_match_strong': 1.3, # Memory matches 2+ preferred techs
98
+ 'tech_match_weak': 1.1, # Memory matches 1 preferred tech
99
+ 'project_match': 1.5, # Memory from current project
100
+ 'project_unknown': 1.0, # No project context — no boost
101
+ 'project_mismatch': 0.9, # Memory from different project
102
+ 'source_quality_high': 1.2, # Source quality > 0.7
103
+ 'source_quality_low': 0.85, # Source quality < 0.3
104
+ 'recency_boost_max': 1.2, # Recent memory (< 7 days)
105
+ 'recency_penalty_max': 0.8, # Old memory (> 365 days)
106
+ 'high_importance': 1.15, # Importance >= 8
107
+ 'high_access': 1.1, # Accessed 5+ times
108
+ }
109
+
110
+ # LightGBM training parameters — tuned for small, personal datasets
111
+ # Aggressive regularization prevents overfitting on < 10K samples
112
+ TRAINING_PARAMS = {
113
+ 'objective': 'lambdarank',
114
+ 'metric': 'ndcg',
115
+ 'ndcg_eval_at': [5, 10],
116
+ 'learning_rate': 0.05,
117
+ 'num_leaves': 16,
118
+ 'max_depth': 4,
119
+ 'min_child_samples': 10,
120
+ 'subsample': 0.8,
121
+ 'reg_alpha': 0.1,
122
+ 'reg_lambda': 1.0,
123
+ 'boosting_type': 'dart',
124
+ 'n_estimators': 50,
125
+ 'verbose': -1,
126
+ }
127
+
128
+
129
+ class AdaptiveRanker:
130
+ """
131
+ Three-phase adaptive re-ranking engine.
132
+
133
+ Usage (called by memory_store_v2.search or mcp_server recall):
134
+ ranker = AdaptiveRanker()
135
+ results = ranker.rerank(search_results, query, context={
136
+ 'tech_preferences': {...},
137
+ 'current_project': 'MyProject',
138
+ 'source_scores': {...},
139
+ 'workflow_phase': 'testing',
140
+ })
141
+
142
+ The caller wraps this in try/except — any exception here causes
143
+ fallback to original v2.6 results. Zero risk of degradation.
144
+ """
145
+
146
+ PHASE_THRESHOLDS = PHASE_THRESHOLDS
147
+ MODEL_PATH = MODEL_PATH
148
+
149
+ def __init__(self, learning_db=None):
150
+ """
151
+ Initialize AdaptiveRanker.
152
+
153
+ Args:
154
+ learning_db: Optional LearningDB instance. If None, imports
155
+ and creates one lazily.
156
+ """
157
+ self._learning_db = learning_db
158
+ self._feature_extractor = FeatureExtractor()
159
+ self._model = None # Loaded lazily on first ML rerank
160
+ self._model_load_attempted = False
161
+ self._lock = threading.Lock()
162
+
163
+ # ========================================================================
164
+ # LearningDB Access
165
+ # ========================================================================
166
+
167
+ def _get_learning_db(self):
168
+ """Get or create the LearningDB instance."""
169
+ if self._learning_db is None:
170
+ try:
171
+ from .learning_db import LearningDB
172
+ self._learning_db = LearningDB()
173
+ except Exception as e:
174
+ logger.warning("Cannot access LearningDB: %s", e)
175
+ return None
176
+ return self._learning_db
177
+
178
+ # ========================================================================
179
+ # Phase Detection
180
+ # ========================================================================
181
+
182
+ def get_phase(self) -> str:
183
+ """
184
+ Determine the current ranking phase based on feedback data.
185
+
186
+ Returns:
187
+ 'baseline' — Not enough data for personalization
188
+ 'rule_based' — Enough data for rule-based boosting
189
+ 'ml_model' — Enough data for ML ranking (if LightGBM available)
190
+ """
191
+ ldb = self._get_learning_db()
192
+ if ldb is None:
193
+ return 'baseline'
194
+
195
+ try:
196
+ feedback_count = ldb.get_feedback_count()
197
+ unique_queries = ldb.get_unique_query_count()
198
+ except Exception as e:
199
+ logger.warning("Failed to check feedback counts: %s", e)
200
+ return 'baseline'
201
+
202
+ # Phase 2: ML model — requires enough data AND LightGBM AND numpy
203
+ if (
204
+ feedback_count >= PHASE_THRESHOLDS['ml_model']
205
+ and unique_queries >= MIN_UNIQUE_QUERIES_FOR_ML
206
+ and HAS_LIGHTGBM
207
+ and HAS_NUMPY
208
+ ):
209
+ return 'ml_model'
210
+
211
+ # Phase 1: Rule-based — just needs minimum feedback
212
+ if feedback_count >= PHASE_THRESHOLDS['rule_based']:
213
+ return 'rule_based'
214
+
215
+ # Phase 0: Not enough data yet
216
+ return 'baseline'
217
+
218
+ def get_phase_info(self) -> Dict[str, Any]:
219
+ """
220
+ Return detailed phase information for diagnostics.
221
+
222
+ Returns:
223
+ Dict with phase, feedback_count, unique_queries, thresholds,
224
+ model_loaded, lightgbm_available.
225
+ """
226
+ ldb = self._get_learning_db()
227
+ feedback_count = 0
228
+ unique_queries = 0
229
+
230
+ if ldb is not None:
231
+ try:
232
+ feedback_count = ldb.get_feedback_count()
233
+ unique_queries = ldb.get_unique_query_count()
234
+ except Exception:
235
+ pass
236
+
237
+ phase = self.get_phase()
238
+
239
+ return {
240
+ 'phase': phase,
241
+ 'feedback_count': feedback_count,
242
+ 'unique_queries': unique_queries,
243
+ 'thresholds': dict(PHASE_THRESHOLDS),
244
+ 'min_unique_queries_for_ml': MIN_UNIQUE_QUERIES_FOR_ML,
245
+ 'model_loaded': self._model is not None,
246
+ 'model_path_exists': MODEL_PATH.exists(),
247
+ 'lightgbm_available': HAS_LIGHTGBM,
248
+ 'numpy_available': HAS_NUMPY,
249
+ }
250
+
251
+ # ========================================================================
252
+ # Main Re-ranking Entry Point
253
+ # ========================================================================
254
+
255
+ def rerank(
256
+ self,
257
+ results: List[dict],
258
+ query: str,
259
+ context: Optional[dict] = None,
260
+ ) -> List[dict]:
261
+ """
262
+ Re-rank search results based on learned user preferences.
263
+
264
+ This is the main entry point, called after the search pipeline
265
+ produces initial results. It determines the current phase and
266
+ routes to the appropriate ranking strategy.
267
+
268
+ Args:
269
+ results: List of memory dicts from search (with 'score' field).
270
+ query: The recall query string.
271
+ context: Optional context dict with:
272
+ - tech_preferences: Dict[str, dict] — user's tech prefs
273
+ - current_project: str — active project name
274
+ - source_scores: Dict[str, float] — source quality scores
275
+ - workflow_phase: str — current workflow phase
276
+
277
+ Returns:
278
+ Re-ranked list of memory dicts. Each memory gets:
279
+ - 'base_score': Original score from search pipeline
280
+ - 'ranking_phase': Which phase was used
281
+ - 'score': Updated score (may differ from base_score)
282
+
283
+ CRITICAL: The caller wraps this in try/except. Any exception
284
+ causes fallback to original v2.6 results. This method must
285
+ never corrupt the results list.
286
+ """
287
+ if not results:
288
+ return results
289
+
290
+ # Short-circuit: don't re-rank trivially small result sets
291
+ if len(results) <= 1:
292
+ for r in results:
293
+ r['base_score'] = r.get('score', 0.0)
294
+ r['ranking_phase'] = 'baseline'
295
+ return results
296
+
297
+ context = context or {}
298
+
299
+ # Set up feature extraction context (once per query)
300
+ self._feature_extractor.set_context(
301
+ source_scores=context.get('source_scores'),
302
+ tech_preferences=context.get('tech_preferences'),
303
+ current_project=context.get('current_project'),
304
+ workflow_phase=context.get('workflow_phase'),
305
+ )
306
+
307
+ # Determine phase and route
308
+ phase = self.get_phase()
309
+
310
+ if phase == 'baseline':
311
+ # Phase 0: No re-ranking — preserve original order
312
+ for r in results:
313
+ r['base_score'] = r.get('score', 0.0)
314
+ r['ranking_phase'] = 'baseline'
315
+ return results
316
+
317
+ elif phase == 'rule_based':
318
+ return self._rerank_rule_based(results, query, context)
319
+
320
+ elif phase == 'ml_model':
321
+ # Try ML first, fall back to rule-based if model fails
322
+ try:
323
+ return self._rerank_ml(results, query, context)
324
+ except Exception as e:
325
+ logger.warning(
326
+ "ML re-ranking failed, falling back to rule-based: %s", e
327
+ )
328
+ return self._rerank_rule_based(results, query, context)
329
+
330
+ # Defensive: unknown phase -> no re-ranking
331
+ for r in results:
332
+ r['base_score'] = r.get('score', 0.0)
333
+ r['ranking_phase'] = 'unknown'
334
+ return results
335
+
336
+ # ========================================================================
337
+ # Phase 1: Rule-Based Re-ranking
338
+ # ========================================================================
339
+
340
+ def _rerank_rule_based(
341
+ self,
342
+ results: List[dict],
343
+ query: str,
344
+ context: dict,
345
+ ) -> List[dict]:
346
+ """
347
+ Phase 1: Apply rule-based boosting using extracted features.
348
+
349
+ Each result's score is multiplied by boost factors derived from
350
+ feature values. The boosts are conservative — they nudge the
351
+ ranking order without dramatically flipping results.
352
+ """
353
+ feature_vectors = self._feature_extractor.extract_batch(results, query)
354
+
355
+ for i, result in enumerate(results):
356
+ base_score = result.get('score', 0.0)
357
+ result['base_score'] = base_score
358
+ result['ranking_phase'] = 'rule_based'
359
+
360
+ if i >= len(feature_vectors):
361
+ continue
362
+
363
+ features = feature_vectors[i]
364
+ boost = 1.0
365
+
366
+ # Feature [2]: tech_match
367
+ tech_match = features[2]
368
+ if tech_match >= 0.8:
369
+ boost *= _RULE_BOOST['tech_match_strong']
370
+ elif tech_match >= 0.4:
371
+ boost *= _RULE_BOOST['tech_match_weak']
372
+
373
+ # Feature [3]: project_match
374
+ project_match = features[3]
375
+ if project_match >= 0.9:
376
+ boost *= _RULE_BOOST['project_match']
377
+ elif project_match <= 0.35:
378
+ boost *= _RULE_BOOST['project_mismatch']
379
+
380
+ # Feature [5]: source_quality
381
+ source_quality = features[5]
382
+ if source_quality >= 0.7:
383
+ boost *= _RULE_BOOST['source_quality_high']
384
+ elif source_quality < 0.3:
385
+ boost *= _RULE_BOOST['source_quality_low']
386
+
387
+ # Feature [7]: recency_score (exponential decay)
388
+ recency = features[7]
389
+ # Linear interpolation between penalty and boost
390
+ recency_factor = (
391
+ _RULE_BOOST['recency_penalty_max']
392
+ + recency * (
393
+ _RULE_BOOST['recency_boost_max']
394
+ - _RULE_BOOST['recency_penalty_max']
395
+ )
396
+ )
397
+ boost *= recency_factor
398
+
399
+ # Feature [6]: importance_norm
400
+ importance_norm = features[6]
401
+ if importance_norm >= 0.8:
402
+ boost *= _RULE_BOOST['high_importance']
403
+
404
+ # Feature [8]: access_frequency
405
+ access_freq = features[8]
406
+ if access_freq >= 0.5:
407
+ boost *= _RULE_BOOST['high_access']
408
+
409
+ # Apply boost to score
410
+ result['score'] = base_score * boost
411
+
412
+ # Re-sort by boosted score (highest first)
413
+ results.sort(key=lambda x: x.get('score', 0.0), reverse=True)
414
+ return results
415
+
416
+ # ========================================================================
417
+ # Phase 2: ML Re-ranking (LightGBM)
418
+ # ========================================================================
419
+
420
+ def _rerank_ml(
421
+ self,
422
+ results: List[dict],
423
+ query: str,
424
+ context: dict,
425
+ ) -> List[dict]:
426
+ """
427
+ Phase 2: LightGBM LambdaRank re-ranking.
428
+
429
+ Extracts features, runs the trained model, and sorts by ML scores.
430
+ Preserves original score as 'base_score' and adds 'ml_score'.
431
+ """
432
+ if not HAS_LIGHTGBM or not HAS_NUMPY:
433
+ raise RuntimeError("LightGBM or NumPy not available for ML ranking")
434
+
435
+ # Load model if not cached
436
+ model = self._load_model()
437
+ if model is None:
438
+ raise RuntimeError("No trained ranking model available")
439
+
440
+ # Extract features
441
+ feature_vectors = self._feature_extractor.extract_batch(results, query)
442
+ if not feature_vectors:
443
+ raise ValueError("Feature extraction returned empty results")
444
+
445
+ # Build feature matrix
446
+ X = np.array(feature_vectors, dtype=np.float64)
447
+
448
+ # Validate shape
449
+ if X.shape[1] != NUM_FEATURES:
450
+ raise ValueError(
451
+ f"Feature dimension mismatch: expected {NUM_FEATURES}, "
452
+ f"got {X.shape[1]}"
453
+ )
454
+
455
+ # Predict scores
456
+ ml_scores = model.predict(X)
457
+
458
+ # Annotate results with ML scores
459
+ for i, result in enumerate(results):
460
+ result['base_score'] = result.get('score', 0.0)
461
+ result['ranking_phase'] = 'ml_model'
462
+ if i < len(ml_scores):
463
+ result['ml_score'] = float(ml_scores[i])
464
+ result['score'] = float(ml_scores[i])
465
+ else:
466
+ result['ml_score'] = 0.0
467
+
468
+ # Re-sort by ML score (highest first)
469
+ results.sort(key=lambda x: x.get('score', 0.0), reverse=True)
470
+ return results
471
+
472
+ # ========================================================================
473
+ # Model Management
474
+ # ========================================================================
475
+
476
+ def _load_model(self):
477
+ """
478
+ Load LightGBM model from disk (lazy, cached).
479
+
480
+ Returns:
481
+ lgb.Booster instance or None if unavailable.
482
+ """
483
+ # Return cached model if already loaded
484
+ if self._model is not None:
485
+ return self._model
486
+
487
+ # Avoid repeated failed load attempts
488
+ if self._model_load_attempted:
489
+ return None
490
+
491
+ with self._lock:
492
+ # Double-check after acquiring lock
493
+ if self._model is not None:
494
+ return self._model
495
+ if self._model_load_attempted:
496
+ return None
497
+
498
+ self._model_load_attempted = True
499
+
500
+ if not HAS_LIGHTGBM:
501
+ logger.info("LightGBM not installed — ML ranking unavailable")
502
+ return None
503
+
504
+ if not MODEL_PATH.exists():
505
+ logger.info(
506
+ "No ranking model at %s — ML ranking unavailable",
507
+ MODEL_PATH
508
+ )
509
+ return None
510
+
511
+ try:
512
+ self._model = lgb.Booster(model_file=str(MODEL_PATH))
513
+ logger.info("Loaded ranking model from %s", MODEL_PATH)
514
+ return self._model
515
+ except Exception as e:
516
+ logger.warning("Failed to load ranking model: %s", e)
517
+ return None
518
+
519
+ def reload_model(self):
520
+ """
521
+ Force reload of the ranking model from disk.
522
+
523
+ Call this after training a new model to pick up the updated weights.
524
+ """
525
+ with self._lock:
526
+ self._model = None
527
+ self._model_load_attempted = False
528
+ # Trigger fresh load
529
+ return self._load_model()
530
+
531
+ # ========================================================================
532
+ # Model Training
533
+ # ========================================================================
534
+
535
+ def train(self, force: bool = False) -> Optional[Dict[str, Any]]:
536
+ """
537
+ Train or retrain the LightGBM ranking model.
538
+
539
+ Uses continued training (init_model) if a model already exists,
540
+ incorporating new feedback data incrementally.
541
+
542
+ Args:
543
+ force: If True, train even if below ML threshold.
544
+ Useful for synthetic bootstrap training.
545
+
546
+ Returns:
547
+ Training metadata dict, or None if training not possible.
548
+ Metadata includes: model_version, training_samples, ndcg_at_10,
549
+ model_path, created_at.
550
+ """
551
+ if not HAS_LIGHTGBM or not HAS_NUMPY:
552
+ logger.warning(
553
+ "Cannot train: LightGBM=%s, NumPy=%s",
554
+ HAS_LIGHTGBM, HAS_NUMPY
555
+ )
556
+ return None
557
+
558
+ ldb = self._get_learning_db()
559
+ if ldb is None:
560
+ logger.warning("Cannot train: LearningDB unavailable")
561
+ return None
562
+
563
+ # Check if we have enough data (unless forced)
564
+ if not force:
565
+ feedback_count = ldb.get_feedback_count()
566
+ unique_queries = ldb.get_unique_query_count()
567
+ if (
568
+ feedback_count < PHASE_THRESHOLDS['ml_model']
569
+ or unique_queries < MIN_UNIQUE_QUERIES_FOR_ML
570
+ ):
571
+ logger.info(
572
+ "Insufficient data for training: %d feedback / %d queries "
573
+ "(need %d / %d)",
574
+ feedback_count, unique_queries,
575
+ PHASE_THRESHOLDS['ml_model'], MIN_UNIQUE_QUERIES_FOR_ML,
576
+ )
577
+ return None
578
+
579
+ # Prepare training data
580
+ training_data = self._prepare_training_data()
581
+ if training_data is None:
582
+ logger.warning("No usable training data available")
583
+ return None
584
+
585
+ X, y, groups = training_data
586
+ total_samples = X.shape[0]
587
+
588
+ if total_samples < 10:
589
+ logger.warning("Too few training samples: %d", total_samples)
590
+ return None
591
+
592
+ logger.info(
593
+ "Training ranking model: %d samples, %d groups",
594
+ total_samples, len(groups)
595
+ )
596
+
597
+ # Create LightGBM dataset
598
+ train_dataset = lgb.Dataset(
599
+ X, label=y, group=groups,
600
+ feature_name=list(FEATURE_NAMES),
601
+ free_raw_data=False,
602
+ )
603
+
604
+ # Training parameters
605
+ params = dict(TRAINING_PARAMS)
606
+ n_estimators = params.pop('n_estimators', 50)
607
+
608
+ # Check for existing model (continued training)
609
+ init_model = None
610
+ if MODEL_PATH.exists():
611
+ try:
612
+ init_model = lgb.Booster(model_file=str(MODEL_PATH))
613
+ logger.info("Continuing training from existing model")
614
+ except Exception:
615
+ logger.info("Starting fresh training (existing model unreadable)")
616
+ init_model = None
617
+
618
+ # Train
619
+ try:
620
+ booster = lgb.train(
621
+ params,
622
+ train_dataset,
623
+ num_boost_round=n_estimators,
624
+ init_model=init_model,
625
+ valid_sets=[train_dataset],
626
+ valid_names=['train'],
627
+ callbacks=[lgb.log_evaluation(period=0)], # Silent training
628
+ )
629
+ except Exception as e:
630
+ logger.error("LightGBM training failed: %s", e)
631
+ return None
632
+
633
+ # Save model
634
+ MODELS_DIR.mkdir(parents=True, exist_ok=True)
635
+ try:
636
+ booster.save_model(str(MODEL_PATH))
637
+ logger.info("Ranking model saved to %s", MODEL_PATH)
638
+ except Exception as e:
639
+ logger.error("Failed to save ranking model: %s", e)
640
+ return None
641
+
642
+ # Extract NDCG@10 from training evaluation (if available)
643
+ ndcg_at_10 = None
644
+ try:
645
+ eval_results = booster.eval_train(lgb.Dataset(X, label=y, group=groups))
646
+ for name, _dataset_name, value, _is_higher_better in eval_results:
647
+ if 'ndcg@10' in name:
648
+ ndcg_at_10 = value
649
+ break
650
+ except Exception:
651
+ pass
652
+
653
+ # Record metadata in learning_db
654
+ model_version = datetime.now().strftime("v%Y%m%d_%H%M%S")
655
+ try:
656
+ ldb.record_model_training(
657
+ model_version=model_version,
658
+ training_samples=total_samples,
659
+ real_samples=total_samples,
660
+ synthetic_samples=0,
661
+ ndcg_at_10=ndcg_at_10,
662
+ model_path=str(MODEL_PATH),
663
+ )
664
+ except Exception as e:
665
+ logger.warning("Failed to record training metadata: %s", e)
666
+
667
+ # Reload model into cache
668
+ self.reload_model()
669
+
670
+ metadata = {
671
+ 'model_version': model_version,
672
+ 'training_samples': total_samples,
673
+ 'query_groups': len(groups),
674
+ 'n_estimators': n_estimators,
675
+ 'ndcg_at_10': ndcg_at_10,
676
+ 'model_path': str(MODEL_PATH),
677
+ 'continued_from': init_model is not None,
678
+ 'created_at': datetime.now().isoformat(),
679
+ }
680
+ logger.info("Training complete: %s", metadata)
681
+ return metadata
682
+
683
+ def _prepare_training_data(self) -> Optional[tuple]:
684
+ """
685
+ Prepare training data from feedback records.
686
+
687
+ For each unique query (grouped by query_hash):
688
+ - Fetch all feedback entries for that query
689
+ - Look up the corresponding memory from memory.db
690
+ - Extract features for each memory
691
+ - Use signal_value as the relevance label
692
+
693
+ Returns:
694
+ Tuple of (X, y, groups) for LGBMRanker, or None if insufficient.
695
+ X: numpy array (n_samples, 9)
696
+ y: numpy array (n_samples,) — relevance labels
697
+ groups: list of ints — samples per query group
698
+ """
699
+ import sqlite3
700
+
701
+ ldb = self._get_learning_db()
702
+ if ldb is None:
703
+ return None
704
+
705
+ feedback = ldb.get_feedback_for_training()
706
+ if not feedback:
707
+ return None
708
+
709
+ # Group feedback by query_hash
710
+ query_groups: Dict[str, List[dict]] = {}
711
+ for entry in feedback:
712
+ qh = entry['query_hash']
713
+ if qh not in query_groups:
714
+ query_groups[qh] = []
715
+ query_groups[qh].append(entry)
716
+
717
+ # Filter: only keep groups with 2+ items (ranking requires pairs)
718
+ query_groups = {
719
+ qh: entries for qh, entries in query_groups.items()
720
+ if len(entries) >= 2
721
+ }
722
+
723
+ if not query_groups:
724
+ logger.info("No query groups with 2+ feedback entries")
725
+ return None
726
+
727
+ # Collect memory IDs we need to look up
728
+ memory_ids_needed = set()
729
+ for entries in query_groups.values():
730
+ for entry in entries:
731
+ memory_ids_needed.add(entry['memory_id'])
732
+
733
+ # Fetch memories from memory.db
734
+ memory_db_path = Path.home() / ".claude-memory" / "memory.db"
735
+ if not memory_db_path.exists():
736
+ logger.warning("memory.db not found at %s", memory_db_path)
737
+ return None
738
+
739
+ memories_by_id = {}
740
+ try:
741
+ conn = sqlite3.connect(str(memory_db_path), timeout=5)
742
+ conn.row_factory = sqlite3.Row
743
+ cursor = conn.cursor()
744
+
745
+ # Batch fetch memories (in chunks to avoid SQLite variable limit)
746
+ id_list = list(memory_ids_needed)
747
+ chunk_size = 500
748
+ for i in range(0, len(id_list), chunk_size):
749
+ chunk = id_list[i:i + chunk_size]
750
+ placeholders = ','.join('?' for _ in chunk)
751
+ cursor.execute(f'''
752
+ SELECT id, content, summary, project_path, project_name,
753
+ tags, category, memory_type, importance, created_at,
754
+ last_accessed, access_count
755
+ FROM memories
756
+ WHERE id IN ({placeholders})
757
+ ''', chunk)
758
+ for row in cursor.fetchall():
759
+ memories_by_id[row['id']] = dict(row)
760
+ conn.close()
761
+ except Exception as e:
762
+ logger.error("Failed to fetch memories for training: %s", e)
763
+ return None
764
+
765
+ # Build feature matrix and labels
766
+ all_features = []
767
+ all_labels = []
768
+ groups = []
769
+
770
+ # Set a neutral context for training (we don't have query-time context)
771
+ self._feature_extractor.set_context()
772
+
773
+ for qh, entries in query_groups.items():
774
+ group_features = []
775
+ group_labels = []
776
+
777
+ for entry in entries:
778
+ mid = entry['memory_id']
779
+ memory = memories_by_id.get(mid)
780
+ if memory is None:
781
+ continue # Memory may have been deleted
782
+
783
+ # Use query_keywords as proxy for query text
784
+ query_text = entry.get('query_keywords', '') or ''
785
+
786
+ features = self._feature_extractor.extract_features(
787
+ memory, query_text
788
+ )
789
+ group_features.append(features)
790
+ group_labels.append(float(entry['signal_value']))
791
+
792
+ # Only include groups with 2+ valid entries
793
+ if len(group_features) >= 2:
794
+ all_features.extend(group_features)
795
+ all_labels.extend(group_labels)
796
+ groups.append(len(group_features))
797
+
798
+ if not groups or len(all_features) < 4:
799
+ logger.info(
800
+ "Insufficient valid training data: %d features, %d groups",
801
+ len(all_features), len(groups)
802
+ )
803
+ return None
804
+
805
+ X = np.array(all_features, dtype=np.float64)
806
+ y = np.array(all_labels, dtype=np.float64)
807
+
808
+ logger.info(
809
+ "Prepared training data: %d samples, %d groups, %d features",
810
+ X.shape[0], len(groups), X.shape[1]
811
+ )
812
+
813
+ return X, y, groups
814
+
815
+
816
+ # ============================================================================
817
+ # Module-level convenience
818
+ # ============================================================================
819
+
820
+ def get_phase() -> str:
821
+ """Quick check of current ranking phase (creates temporary ranker)."""
822
+ try:
823
+ ranker = AdaptiveRanker()
824
+ return ranker.get_phase()
825
+ except Exception:
826
+ return 'baseline'