superlocalmemory 2.7.2 → 2.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +30 -1
- package/README.md +1 -1
- package/docs/ARCHITECTURE.md +8 -8
- package/docs/COMPRESSION-README.md +1 -1
- package/docs/SEARCH-ENGINE-V2.2.0.md +1 -0
- package/hooks/post-recall-hook.js +53 -0
- package/mcp_server.py +425 -17
- package/package.json +1 -1
- package/skills/slm-recall/SKILL.md +1 -0
- package/src/agent_registry.py +3 -3
- package/src/auto_backup.py +64 -31
- package/src/graph_engine.py +15 -11
- package/src/learning/adaptive_ranker.py +70 -1
- package/src/learning/feature_extractor.py +131 -16
- package/src/learning/feedback_collector.py +114 -0
- package/src/learning/learning_db.py +158 -34
- package/src/learning/tests/test_adaptive_ranker.py +5 -4
- package/src/learning/tests/test_aggregator.py +4 -3
- package/src/learning/tests/test_feedback_collector.py +7 -4
- package/src/learning/tests/test_signal_inference.py +399 -0
- package/src/learning/tests/test_synthetic_bootstrap.py +1 -1
- package/src/trust_scorer.py +288 -74
- package/ui/app.js +4 -4
- package/ui/index.html +38 -0
- package/ui/js/agents.js +4 -4
- package/ui/js/feedback.js +333 -0
- package/ui/js/learning.js +117 -0
- package/ui/js/modal.js +22 -1
- package/ui/js/profiles.js +8 -0
- package/ui/js/settings.js +58 -1
package/src/auto_backup.py
CHANGED
|
@@ -175,6 +175,22 @@ class AutoBackup:
|
|
|
175
175
|
|
|
176
176
|
logger.info(f"Backup created: {backup_name} ({size_mb:.1f} MB)")
|
|
177
177
|
|
|
178
|
+
# v2.7.4: Also backup learning.db if it exists
|
|
179
|
+
learning_db = self.db_path.parent / "learning.db"
|
|
180
|
+
if learning_db.exists():
|
|
181
|
+
try:
|
|
182
|
+
learning_backup_name = f"learning-{timestamp}{label_suffix}.db"
|
|
183
|
+
learning_backup_path = self.backup_dir / learning_backup_name
|
|
184
|
+
l_source = sqlite3.connect(learning_db)
|
|
185
|
+
l_backup = sqlite3.connect(learning_backup_path)
|
|
186
|
+
l_source.backup(l_backup)
|
|
187
|
+
l_backup.close()
|
|
188
|
+
l_source.close()
|
|
189
|
+
l_size = learning_backup_path.stat().st_size / (1024 * 1024)
|
|
190
|
+
logger.info(f"Learning backup created: {learning_backup_name} ({l_size:.1f} MB)")
|
|
191
|
+
except Exception as le:
|
|
192
|
+
logger.warning(f"Learning DB backup failed (non-critical): {le}")
|
|
193
|
+
|
|
178
194
|
# Enforce retention policy
|
|
179
195
|
self._enforce_retention()
|
|
180
196
|
|
|
@@ -191,24 +207,24 @@ class AutoBackup:
|
|
|
191
207
|
"""Remove old backups exceeding max_backups limit."""
|
|
192
208
|
max_backups = self.config.get('max_backups', DEFAULT_MAX_BACKUPS)
|
|
193
209
|
|
|
194
|
-
#
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
210
|
+
# Enforce for both memory and learning backups (v2.7.4)
|
|
211
|
+
for pattern in ['memory-*.db', 'learning-*.db']:
|
|
212
|
+
backups = sorted(
|
|
213
|
+
self.backup_dir.glob(pattern),
|
|
214
|
+
key=lambda f: f.stat().st_mtime
|
|
215
|
+
)
|
|
199
216
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
logger.error(f"Failed to remove old backup {oldest.name}: {e}")
|
|
217
|
+
while len(backups) > max_backups:
|
|
218
|
+
oldest = backups.pop(0)
|
|
219
|
+
try:
|
|
220
|
+
oldest.unlink()
|
|
221
|
+
logger.info(f"Removed old backup: {oldest.name}")
|
|
222
|
+
except OSError as e:
|
|
223
|
+
logger.error(f"Failed to remove old backup {oldest.name}: {e}")
|
|
208
224
|
|
|
209
225
|
def list_backups(self) -> List[Dict]:
|
|
210
226
|
"""
|
|
211
|
-
List all available backups.
|
|
227
|
+
List all available backups (memory.db + learning.db).
|
|
212
228
|
|
|
213
229
|
Returns:
|
|
214
230
|
List of backup info dictionaries
|
|
@@ -218,20 +234,26 @@ class AutoBackup:
|
|
|
218
234
|
if not self.backup_dir.exists():
|
|
219
235
|
return backups
|
|
220
236
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
'
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
237
|
+
# v2.7.4: List both memory and learning backups
|
|
238
|
+
for pattern in ['memory-*.db', 'learning-*.db']:
|
|
239
|
+
for backup_file in sorted(
|
|
240
|
+
self.backup_dir.glob(pattern),
|
|
241
|
+
key=lambda f: f.stat().st_mtime,
|
|
242
|
+
reverse=True
|
|
243
|
+
):
|
|
244
|
+
stat = backup_file.stat()
|
|
245
|
+
db_type = 'learning' if backup_file.name.startswith('learning-') else 'memory'
|
|
246
|
+
backups.append({
|
|
247
|
+
'filename': backup_file.name,
|
|
248
|
+
'path': str(backup_file),
|
|
249
|
+
'size_mb': round(stat.st_size / (1024 * 1024), 2),
|
|
250
|
+
'created': datetime.fromtimestamp(stat.st_mtime).isoformat(),
|
|
251
|
+
'age_hours': round((datetime.now() - datetime.fromtimestamp(stat.st_mtime)).total_seconds() / 3600, 1),
|
|
252
|
+
'type': db_type,
|
|
253
|
+
})
|
|
254
|
+
|
|
255
|
+
# Sort all by creation time (newest first)
|
|
256
|
+
backups.sort(key=lambda b: b['created'], reverse=True)
|
|
235
257
|
return backups
|
|
236
258
|
|
|
237
259
|
def restore_backup(self, filename: str) -> bool:
|
|
@@ -254,14 +276,20 @@ class AutoBackup:
|
|
|
254
276
|
# Create a safety backup of current state first
|
|
255
277
|
self.create_backup(label='pre-restore')
|
|
256
278
|
|
|
279
|
+
# Determine target DB based on filename prefix
|
|
280
|
+
if filename.startswith('learning-'):
|
|
281
|
+
target_db = self.db_path.parent / "learning.db"
|
|
282
|
+
else:
|
|
283
|
+
target_db = self.db_path
|
|
284
|
+
|
|
257
285
|
# Restore using SQLite backup API
|
|
258
286
|
source_conn = sqlite3.connect(backup_path)
|
|
259
|
-
target_conn = sqlite3.connect(
|
|
287
|
+
target_conn = sqlite3.connect(target_db)
|
|
260
288
|
source_conn.backup(target_conn)
|
|
261
289
|
target_conn.close()
|
|
262
290
|
source_conn.close()
|
|
263
291
|
|
|
264
|
-
logger.info(f"Restored from backup: {filename}")
|
|
292
|
+
logger.info(f"Restored from backup: {filename} → {target_db.name}")
|
|
265
293
|
return True
|
|
266
294
|
|
|
267
295
|
except Exception as e:
|
|
@@ -299,6 +327,10 @@ class AutoBackup:
|
|
|
299
327
|
else:
|
|
300
328
|
interval_display = f"{hours} hour(s)"
|
|
301
329
|
|
|
330
|
+
# v2.7.4: Separate counts for memory vs learning backups
|
|
331
|
+
memory_backups = [b for b in backups if b.get('type') == 'memory']
|
|
332
|
+
learning_backups = [b for b in backups if b.get('type') == 'learning']
|
|
333
|
+
|
|
302
334
|
return {
|
|
303
335
|
'enabled': self.config.get('enabled', True),
|
|
304
336
|
'interval_hours': hours,
|
|
@@ -307,7 +339,8 @@ class AutoBackup:
|
|
|
307
339
|
'last_backup': self.config.get('last_backup'),
|
|
308
340
|
'last_backup_file': self.config.get('last_backup_file'),
|
|
309
341
|
'next_backup': next_backup,
|
|
310
|
-
'backup_count': len(
|
|
342
|
+
'backup_count': len(memory_backups),
|
|
343
|
+
'learning_backup_count': len(learning_backups),
|
|
311
344
|
'total_size_mb': round(sum(b['size_mb'] for b in backups), 2),
|
|
312
345
|
'backups': backups,
|
|
313
346
|
}
|
package/src/graph_engine.py
CHANGED
|
@@ -297,12 +297,11 @@ class ClusterBuilder:
|
|
|
297
297
|
Returns:
|
|
298
298
|
Number of clusters created
|
|
299
299
|
"""
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
raise ImportError("python-igraph and leidenalg required. Install: pip install python-igraph leidenalg")
|
|
300
|
+
if not IGRAPH_AVAILABLE:
|
|
301
|
+
logger.warning("igraph/leidenalg not installed. Graph clustering disabled. Install with: pip3 install python-igraph leidenalg")
|
|
302
|
+
return 0
|
|
303
|
+
import igraph as ig
|
|
304
|
+
import leidenalg
|
|
306
305
|
|
|
307
306
|
conn = sqlite3.connect(self.db_path)
|
|
308
307
|
cursor = conn.cursor()
|
|
@@ -457,11 +456,11 @@ class ClusterBuilder:
|
|
|
457
456
|
Returns:
|
|
458
457
|
Dictionary with hierarchical clustering statistics
|
|
459
458
|
"""
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
459
|
+
if not IGRAPH_AVAILABLE:
|
|
460
|
+
logger.warning("igraph/leidenalg not installed. Hierarchical clustering disabled. Install with: pip3 install python-igraph leidenalg")
|
|
461
|
+
return {'subclusters_created': 0, 'depth_reached': 0}
|
|
462
|
+
import igraph as ig
|
|
463
|
+
import leidenalg
|
|
465
464
|
|
|
466
465
|
conn = sqlite3.connect(self.db_path)
|
|
467
466
|
cursor = conn.cursor()
|
|
@@ -512,6 +511,8 @@ class ClusterBuilder:
|
|
|
512
511
|
profile: str, min_size: int, max_depth: int,
|
|
513
512
|
current_depth: int) -> Tuple[int, int]:
|
|
514
513
|
"""Recursively sub-cluster a community using Leiden."""
|
|
514
|
+
if not IGRAPH_AVAILABLE:
|
|
515
|
+
return 0, current_depth - 1
|
|
515
516
|
import igraph as ig
|
|
516
517
|
import leidenalg
|
|
517
518
|
|
|
@@ -1038,6 +1039,9 @@ class GraphEngine:
|
|
|
1038
1039
|
'summaries_generated': summaries,
|
|
1039
1040
|
'time_seconds': round(elapsed, 2)
|
|
1040
1041
|
}
|
|
1042
|
+
if not IGRAPH_AVAILABLE:
|
|
1043
|
+
stats['warning'] = 'igraph/leidenalg not installed — graph built without clustering. Install with: pip3 install python-igraph leidenalg'
|
|
1044
|
+
|
|
1041
1045
|
|
|
1042
1046
|
logger.info(f"Graph build complete: {stats}")
|
|
1043
1047
|
return stats
|
|
@@ -296,12 +296,24 @@ class AdaptiveRanker:
|
|
|
296
296
|
|
|
297
297
|
context = context or {}
|
|
298
298
|
|
|
299
|
+
# Fetch signal stats for features [10-11] (v2.7.4)
|
|
300
|
+
signal_stats = {}
|
|
301
|
+
ldb = self._get_learning_db()
|
|
302
|
+
if ldb:
|
|
303
|
+
try:
|
|
304
|
+
memory_ids = [r.get('id') for r in results if r.get('id')]
|
|
305
|
+
if memory_ids:
|
|
306
|
+
signal_stats = ldb.get_signal_stats_for_memories(memory_ids)
|
|
307
|
+
except Exception:
|
|
308
|
+
pass # Signal stats failure is not critical
|
|
309
|
+
|
|
299
310
|
# Set up feature extraction context (once per query)
|
|
300
311
|
self._feature_extractor.set_context(
|
|
301
312
|
source_scores=context.get('source_scores'),
|
|
302
313
|
tech_preferences=context.get('tech_preferences'),
|
|
303
314
|
current_project=context.get('current_project'),
|
|
304
315
|
workflow_phase=context.get('workflow_phase'),
|
|
316
|
+
signal_stats=signal_stats,
|
|
305
317
|
)
|
|
306
318
|
|
|
307
319
|
# Determine phase and route
|
|
@@ -406,6 +418,20 @@ class AdaptiveRanker:
|
|
|
406
418
|
if access_freq >= 0.5:
|
|
407
419
|
boost *= _RULE_BOOST['high_access']
|
|
408
420
|
|
|
421
|
+
# Feature [10]: signal_count (v2.7.4 — feedback volume)
|
|
422
|
+
if len(features) > 10:
|
|
423
|
+
signal_count = features[10]
|
|
424
|
+
if signal_count >= 0.3: # 3+ signals
|
|
425
|
+
boost *= 1.1 # Mild boost for well-known memories
|
|
426
|
+
|
|
427
|
+
# Feature [11]: avg_signal_value (v2.7.4 — feedback quality)
|
|
428
|
+
if len(features) > 11:
|
|
429
|
+
avg_signal = features[11]
|
|
430
|
+
if avg_signal >= 0.7:
|
|
431
|
+
boost *= 1.15 # Boost memories with positive feedback
|
|
432
|
+
elif avg_signal < 0.3 and avg_signal > 0.0:
|
|
433
|
+
boost *= 0.85 # Penalize memories with negative feedback
|
|
434
|
+
|
|
409
435
|
# Apply boost to score
|
|
410
436
|
result['score'] = base_score * boost
|
|
411
437
|
|
|
@@ -509,13 +535,56 @@ class AdaptiveRanker:
|
|
|
509
535
|
return None
|
|
510
536
|
|
|
511
537
|
try:
|
|
512
|
-
|
|
538
|
+
model = lgb.Booster(model_file=str(MODEL_PATH))
|
|
539
|
+
|
|
540
|
+
# v2.7.4: Check for feature dimension mismatch (10→12 upgrade)
|
|
541
|
+
model_num_features = model.num_feature()
|
|
542
|
+
if model_num_features != NUM_FEATURES:
|
|
543
|
+
logger.info(
|
|
544
|
+
"Feature mismatch: model has %d features, expected %d. "
|
|
545
|
+
"Triggering auto-retrain in background.",
|
|
546
|
+
model_num_features, NUM_FEATURES,
|
|
547
|
+
)
|
|
548
|
+
# Delete old model and trigger re-bootstrap
|
|
549
|
+
MODEL_PATH.unlink(missing_ok=True)
|
|
550
|
+
self._trigger_retrain_background()
|
|
551
|
+
return None
|
|
552
|
+
|
|
553
|
+
self._model = model
|
|
513
554
|
logger.info("Loaded ranking model from %s", MODEL_PATH)
|
|
514
555
|
return self._model
|
|
515
556
|
except Exception as e:
|
|
516
557
|
logger.warning("Failed to load ranking model: %s", e)
|
|
517
558
|
return None
|
|
518
559
|
|
|
560
|
+
def _trigger_retrain_background(self):
|
|
561
|
+
"""Trigger model re-bootstrap in a background thread (v2.7.4)."""
|
|
562
|
+
try:
|
|
563
|
+
import threading
|
|
564
|
+
|
|
565
|
+
def _retrain():
|
|
566
|
+
try:
|
|
567
|
+
from .synthetic_bootstrap import SyntheticBootstrapper
|
|
568
|
+
bootstrapper = SyntheticBootstrapper()
|
|
569
|
+
if bootstrapper.should_bootstrap():
|
|
570
|
+
result = bootstrapper.bootstrap_model()
|
|
571
|
+
if result:
|
|
572
|
+
logger.info(
|
|
573
|
+
"Auto-retrain complete with %d-feature model",
|
|
574
|
+
NUM_FEATURES,
|
|
575
|
+
)
|
|
576
|
+
# Reload the new model
|
|
577
|
+
with self._lock:
|
|
578
|
+
self._model = None
|
|
579
|
+
self._model_load_attempted = False
|
|
580
|
+
except Exception as e:
|
|
581
|
+
logger.warning("Auto-retrain failed: %s", e)
|
|
582
|
+
|
|
583
|
+
thread = threading.Thread(target=_retrain, daemon=True)
|
|
584
|
+
thread.start()
|
|
585
|
+
except Exception:
|
|
586
|
+
pass
|
|
587
|
+
|
|
519
588
|
def reload_model(self):
|
|
520
589
|
"""
|
|
521
590
|
Force reload of the ranking model from disk.
|
|
@@ -12,22 +12,25 @@ Attribution must be preserved in all copies or derivatives.
|
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
"""
|
|
15
|
-
FeatureExtractor — Extracts
|
|
15
|
+
FeatureExtractor — Extracts 12-dimensional feature vectors for candidate memories.
|
|
16
16
|
|
|
17
17
|
Each memory retrieved during recall gets a feature vector that feeds into
|
|
18
18
|
the AdaptiveRanker. In Phase 1 (rule-based), features drive boosting weights.
|
|
19
19
|
In Phase 2 (ML), features become LightGBM input columns.
|
|
20
20
|
|
|
21
|
-
Feature Vector (
|
|
22
|
-
[0]
|
|
23
|
-
[1]
|
|
24
|
-
[2]
|
|
25
|
-
[3]
|
|
26
|
-
[4]
|
|
27
|
-
[5]
|
|
28
|
-
[6]
|
|
29
|
-
[7]
|
|
30
|
-
[8]
|
|
21
|
+
Feature Vector (12 dimensions):
|
|
22
|
+
[0] bm25_score — Existing retrieval score from search results
|
|
23
|
+
[1] tfidf_score — TF-IDF cosine similarity from search results
|
|
24
|
+
[2] tech_match — Does memory match user's tech preferences?
|
|
25
|
+
[3] project_match — Is memory from the current project?
|
|
26
|
+
[4] workflow_fit — Does memory fit current workflow phase?
|
|
27
|
+
[5] source_quality — Quality score of the source that created this memory
|
|
28
|
+
[6] importance_norm — Normalized importance (importance / 10.0)
|
|
29
|
+
[7] recency_score — Exponential decay based on age (180-day half-life)
|
|
30
|
+
[8] access_frequency — How often this memory was accessed (capped at 1.0)
|
|
31
|
+
[9] pattern_confidence — Max Beta-Binomial confidence from learned patterns
|
|
32
|
+
[10] signal_count — Number of feedback signals for this memory (v2.7.4)
|
|
33
|
+
[11] avg_signal_value — Average signal value for this memory (v2.7.4)
|
|
31
34
|
|
|
32
35
|
Design Principles:
|
|
33
36
|
- All features normalized to [0.0, 1.0] range for ML compatibility
|
|
@@ -35,6 +38,8 @@ Design Principles:
|
|
|
35
38
|
- No external API calls — everything computed locally
|
|
36
39
|
- Context (tech preferences, current project) set once per recall batch
|
|
37
40
|
- Thread-safe: no shared mutable state after set_context()
|
|
41
|
+
|
|
42
|
+
v2.7.4: Expanded from 10 to 12 features. Auto-retrain triggered on mismatch.
|
|
38
43
|
"""
|
|
39
44
|
|
|
40
45
|
import logging
|
|
@@ -59,6 +64,9 @@ FEATURE_NAMES = [
|
|
|
59
64
|
'importance_norm', # 6: Normalized importance (importance / 10.0)
|
|
60
65
|
'recency_score', # 7: Exponential decay based on age
|
|
61
66
|
'access_frequency', # 8: How often this memory was accessed (capped at 1.0)
|
|
67
|
+
'pattern_confidence', # 9: Max Beta-Binomial confidence from learned patterns
|
|
68
|
+
'signal_count', # 10: Number of feedback signals for this memory (v2.7.4)
|
|
69
|
+
'avg_signal_value', # 11: Average signal value for this memory (v2.7.4)
|
|
62
70
|
]
|
|
63
71
|
|
|
64
72
|
NUM_FEATURES = len(FEATURE_NAMES)
|
|
@@ -100,7 +108,7 @@ _MAX_ACCESS_COUNT = 10
|
|
|
100
108
|
|
|
101
109
|
class FeatureExtractor:
|
|
102
110
|
"""
|
|
103
|
-
Extracts
|
|
111
|
+
Extracts 12-dimensional feature vectors for candidate memories.
|
|
104
112
|
|
|
105
113
|
Usage:
|
|
106
114
|
extractor = FeatureExtractor()
|
|
@@ -109,9 +117,10 @@ class FeatureExtractor:
|
|
|
109
117
|
tech_preferences={'python': {'confidence': 0.9}, 'react': {'confidence': 0.7}},
|
|
110
118
|
current_project='SuperLocalMemoryV2',
|
|
111
119
|
workflow_phase='testing',
|
|
120
|
+
signal_stats={'42': {'count': 5, 'avg_value': 0.8}},
|
|
112
121
|
)
|
|
113
122
|
features = extractor.extract_batch(memories, query="search optimization")
|
|
114
|
-
# features is List[List[float]], shape (n_memories,
|
|
123
|
+
# features is List[List[float]], shape (n_memories, 12)
|
|
115
124
|
"""
|
|
116
125
|
|
|
117
126
|
FEATURE_NAMES = FEATURE_NAMES
|
|
@@ -125,6 +134,10 @@ class FeatureExtractor:
|
|
|
125
134
|
self._current_project_lower: Optional[str] = None
|
|
126
135
|
self._workflow_phase: Optional[str] = None
|
|
127
136
|
self._workflow_keywords: List[str] = []
|
|
137
|
+
# Pattern confidence cache: maps lowercased pattern value -> confidence
|
|
138
|
+
self._pattern_cache: Dict[str, float] = {}
|
|
139
|
+
# Signal stats cache: maps str(memory_id) -> {count, avg_value} (v2.7.4)
|
|
140
|
+
self._signal_stats: Dict[str, Dict[str, float]] = {}
|
|
128
141
|
|
|
129
142
|
def set_context(
|
|
130
143
|
self,
|
|
@@ -132,6 +145,8 @@ class FeatureExtractor:
|
|
|
132
145
|
tech_preferences: Optional[Dict[str, dict]] = None,
|
|
133
146
|
current_project: Optional[str] = None,
|
|
134
147
|
workflow_phase: Optional[str] = None,
|
|
148
|
+
pattern_confidences: Optional[Dict[str, float]] = None,
|
|
149
|
+
signal_stats: Optional[Dict[str, Dict[str, float]]] = None,
|
|
135
150
|
):
|
|
136
151
|
"""
|
|
137
152
|
Set context for feature extraction. Called once per recall query.
|
|
@@ -146,6 +161,11 @@ class FeatureExtractor:
|
|
|
146
161
|
From cross_project_aggregator or pattern_learner.
|
|
147
162
|
current_project: Name of the currently active project (if detected).
|
|
148
163
|
workflow_phase: Current workflow phase (planning, coding, testing, etc).
|
|
164
|
+
pattern_confidences: Map of lowercased pattern value -> confidence (0.0-1.0).
|
|
165
|
+
From pattern_learner.PatternStore.get_patterns().
|
|
166
|
+
Used for feature [9] pattern_confidence.
|
|
167
|
+
signal_stats: Map of str(memory_id) -> {count: int, avg_value: float}.
|
|
168
|
+
From learning_db feedback aggregation. Used for features [10-11].
|
|
149
169
|
"""
|
|
150
170
|
self._source_scores = source_scores or {}
|
|
151
171
|
self._tech_preferences = tech_preferences or {}
|
|
@@ -166,9 +186,15 @@ class FeatureExtractor:
|
|
|
166
186
|
if workflow_phase else []
|
|
167
187
|
)
|
|
168
188
|
|
|
189
|
+
# Cache pattern confidences for feature [9]
|
|
190
|
+
self._pattern_cache = pattern_confidences or {}
|
|
191
|
+
|
|
192
|
+
# Cache signal stats for features [10-11] (v2.7.4)
|
|
193
|
+
self._signal_stats = signal_stats or {}
|
|
194
|
+
|
|
169
195
|
def extract_features(self, memory: dict, query: str) -> List[float]:
|
|
170
196
|
"""
|
|
171
|
-
Extract
|
|
197
|
+
Extract 12-dimensional feature vector for a single memory.
|
|
172
198
|
|
|
173
199
|
Args:
|
|
174
200
|
memory: Memory dict from search results. Expected keys:
|
|
@@ -177,7 +203,7 @@ class FeatureExtractor:
|
|
|
177
203
|
query: The recall query string.
|
|
178
204
|
|
|
179
205
|
Returns:
|
|
180
|
-
List of
|
|
206
|
+
List of 12 floats in [0.0, 1.0] range, one per feature.
|
|
181
207
|
"""
|
|
182
208
|
return [
|
|
183
209
|
self._compute_bm25_score(memory),
|
|
@@ -189,6 +215,9 @@ class FeatureExtractor:
|
|
|
189
215
|
self._compute_importance_norm(memory),
|
|
190
216
|
self._compute_recency_score(memory),
|
|
191
217
|
self._compute_access_frequency(memory),
|
|
218
|
+
self._compute_pattern_confidence(memory),
|
|
219
|
+
self._compute_signal_count(memory),
|
|
220
|
+
self._compute_avg_signal_value(memory),
|
|
192
221
|
]
|
|
193
222
|
|
|
194
223
|
def extract_batch(
|
|
@@ -204,7 +233,7 @@ class FeatureExtractor:
|
|
|
204
233
|
query: The recall query string.
|
|
205
234
|
|
|
206
235
|
Returns:
|
|
207
|
-
List of feature vectors (List[List[float]]), shape (n,
|
|
236
|
+
List of feature vectors (List[List[float]]), shape (n, 12).
|
|
208
237
|
Returns empty list if memories is empty.
|
|
209
238
|
"""
|
|
210
239
|
if not memories:
|
|
@@ -447,6 +476,92 @@ class FeatureExtractor:
|
|
|
447
476
|
return min(access_count / float(_MAX_ACCESS_COUNT), 1.0)
|
|
448
477
|
|
|
449
478
|
|
|
479
|
+
def _compute_signal_count(self, memory: dict) -> float:
|
|
480
|
+
"""
|
|
481
|
+
Number of feedback signals for this memory, normalized to [0, 1].
|
|
482
|
+
|
|
483
|
+
Uses cached signal_stats from learning.db. Capped at 10 signals.
|
|
484
|
+
Memories with more feedback signals are more "known" to the system.
|
|
485
|
+
|
|
486
|
+
Returns:
|
|
487
|
+
min(count / 10.0, 1.0) — 0.0 if no signals, 1.0 if 10+ signals
|
|
488
|
+
0.0 if no signal stats available (v2.7.3 or earlier)
|
|
489
|
+
"""
|
|
490
|
+
memory_id = str(memory.get('id', ''))
|
|
491
|
+
if not memory_id or not self._signal_stats:
|
|
492
|
+
return 0.0
|
|
493
|
+
|
|
494
|
+
stats = self._signal_stats.get(memory_id, {})
|
|
495
|
+
count = stats.get('count', 0)
|
|
496
|
+
return min(count / 10.0, 1.0)
|
|
497
|
+
|
|
498
|
+
def _compute_avg_signal_value(self, memory: dict) -> float:
|
|
499
|
+
"""
|
|
500
|
+
Average signal value for this memory.
|
|
501
|
+
|
|
502
|
+
Uses cached signal_stats from learning.db. Gives the ranker a direct
|
|
503
|
+
view of whether this memory's feedback is positive (>0.5) or negative (<0.5).
|
|
504
|
+
|
|
505
|
+
Returns:
|
|
506
|
+
Average signal value (0.0-1.0), or 0.5 (neutral) if no data.
|
|
507
|
+
"""
|
|
508
|
+
memory_id = str(memory.get('id', ''))
|
|
509
|
+
if not memory_id or not self._signal_stats:
|
|
510
|
+
return 0.5 # Neutral default
|
|
511
|
+
|
|
512
|
+
stats = self._signal_stats.get(memory_id, {})
|
|
513
|
+
avg = stats.get('avg_value', 0.5)
|
|
514
|
+
return max(0.0, min(float(avg), 1.0))
|
|
515
|
+
|
|
516
|
+
def _compute_pattern_confidence(self, memory: dict) -> float:
|
|
517
|
+
"""
|
|
518
|
+
Compute max Beta-Binomial confidence from learned patterns matching this memory.
|
|
519
|
+
|
|
520
|
+
Looks up the cached pattern_confidences (set via set_context) and checks
|
|
521
|
+
if any pattern value appears in the memory's content or tags. Returns the
|
|
522
|
+
maximum confidence among all matching patterns.
|
|
523
|
+
|
|
524
|
+
Returns:
|
|
525
|
+
Max confidence (0.0-1.0) from matching patterns
|
|
526
|
+
0.5 if no patterns loaded (neutral — unknown)
|
|
527
|
+
0.0 if patterns loaded but none match
|
|
528
|
+
"""
|
|
529
|
+
if not self._pattern_cache:
|
|
530
|
+
return 0.5 # No patterns available — neutral
|
|
531
|
+
|
|
532
|
+
content = memory.get('content', '')
|
|
533
|
+
if not content:
|
|
534
|
+
return 0.0
|
|
535
|
+
|
|
536
|
+
content_lower = content.lower()
|
|
537
|
+
|
|
538
|
+
# Also check tags
|
|
539
|
+
tags_str = ''
|
|
540
|
+
tags = memory.get('tags', [])
|
|
541
|
+
if isinstance(tags, list):
|
|
542
|
+
tags_str = ' '.join(t.lower() for t in tags)
|
|
543
|
+
elif isinstance(tags, str):
|
|
544
|
+
tags_str = tags.lower()
|
|
545
|
+
|
|
546
|
+
searchable = content_lower + ' ' + tags_str
|
|
547
|
+
|
|
548
|
+
max_confidence = 0.0
|
|
549
|
+
for pattern_value, confidence in self._pattern_cache.items():
|
|
550
|
+
# Pattern values are already lowercased in the cache
|
|
551
|
+
pattern_lower = pattern_value.lower() if pattern_value else ''
|
|
552
|
+
if not pattern_lower:
|
|
553
|
+
continue
|
|
554
|
+
# Word-boundary check for short patterns to avoid false positives
|
|
555
|
+
if len(pattern_lower) <= 3:
|
|
556
|
+
if re.search(r'\b' + re.escape(pattern_lower) + r'\b', searchable):
|
|
557
|
+
max_confidence = max(max_confidence, confidence)
|
|
558
|
+
else:
|
|
559
|
+
if pattern_lower in searchable:
|
|
560
|
+
max_confidence = max(max_confidence, confidence)
|
|
561
|
+
|
|
562
|
+
return max(0.0, min(max_confidence, 1.0))
|
|
563
|
+
|
|
564
|
+
|
|
450
565
|
# ============================================================================
|
|
451
566
|
# Module-level convenience functions
|
|
452
567
|
# ============================================================================
|
|
@@ -108,6 +108,17 @@ class FeedbackCollector:
|
|
|
108
108
|
"mcp_used_low": 0.4,
|
|
109
109
|
"cli_useful": 0.9,
|
|
110
110
|
"dashboard_click": 0.8,
|
|
111
|
+
"dashboard_thumbs_up": 1.0,
|
|
112
|
+
"dashboard_thumbs_down": 0.0,
|
|
113
|
+
"dashboard_pin": 1.0,
|
|
114
|
+
"dashboard_dwell_positive": 0.7,
|
|
115
|
+
"dashboard_dwell_negative": 0.1,
|
|
116
|
+
"implicit_positive_timegap": 0.6,
|
|
117
|
+
"implicit_negative_requick": 0.1,
|
|
118
|
+
"implicit_positive_reaccess": 0.7,
|
|
119
|
+
"implicit_positive_post_update": 0.8,
|
|
120
|
+
"implicit_negative_post_delete": 0.0,
|
|
121
|
+
"implicit_positive_cross_tool": 0.8,
|
|
111
122
|
"passive_decay": 0.0,
|
|
112
123
|
}
|
|
113
124
|
|
|
@@ -294,6 +305,109 @@ class FeedbackCollector:
|
|
|
294
305
|
dwell_time=dwell_time,
|
|
295
306
|
)
|
|
296
307
|
|
|
308
|
+
# ======================================================================
|
|
309
|
+
# Channel 4: Implicit Signals (v2.7.4 — auto-collected, zero user effort)
|
|
310
|
+
# ======================================================================
|
|
311
|
+
|
|
312
|
+
def record_implicit_signal(
|
|
313
|
+
self,
|
|
314
|
+
memory_id: int,
|
|
315
|
+
query: str,
|
|
316
|
+
signal_type: str,
|
|
317
|
+
source_tool: Optional[str] = None,
|
|
318
|
+
rank_position: Optional[int] = None,
|
|
319
|
+
) -> Optional[int]:
|
|
320
|
+
"""
|
|
321
|
+
Record an implicit feedback signal inferred from user behavior.
|
|
322
|
+
|
|
323
|
+
Called by the signal inference engine in mcp_server.py when it
|
|
324
|
+
detects behavioral patterns (time gaps, re-queries, re-access, etc.).
|
|
325
|
+
|
|
326
|
+
Args:
|
|
327
|
+
memory_id: ID of the memory.
|
|
328
|
+
query: The recall query (hashed, not stored raw).
|
|
329
|
+
signal_type: One of the implicit_* signal types.
|
|
330
|
+
source_tool: Which tool originated the query.
|
|
331
|
+
rank_position: Where the memory appeared in results.
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
Row ID of the feedback record, or None on error.
|
|
335
|
+
"""
|
|
336
|
+
if not query or signal_type not in self.SIGNAL_VALUES:
|
|
337
|
+
logger.warning(
|
|
338
|
+
"record_implicit_signal: invalid query or signal_type=%s",
|
|
339
|
+
signal_type,
|
|
340
|
+
)
|
|
341
|
+
return None
|
|
342
|
+
|
|
343
|
+
signal_value = self.SIGNAL_VALUES[signal_type]
|
|
344
|
+
query_hash = self._hash_query(query)
|
|
345
|
+
keywords = self._extract_keywords(query)
|
|
346
|
+
|
|
347
|
+
return self._store_feedback(
|
|
348
|
+
query_hash=query_hash,
|
|
349
|
+
query_keywords=keywords,
|
|
350
|
+
memory_id=memory_id,
|
|
351
|
+
signal_type=signal_type,
|
|
352
|
+
signal_value=signal_value,
|
|
353
|
+
channel="implicit",
|
|
354
|
+
source_tool=source_tool,
|
|
355
|
+
rank_position=rank_position,
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
def record_dashboard_feedback(
|
|
359
|
+
self,
|
|
360
|
+
memory_id: int,
|
|
361
|
+
query: str,
|
|
362
|
+
feedback_type: str,
|
|
363
|
+
dwell_time: Optional[float] = None,
|
|
364
|
+
) -> Optional[int]:
|
|
365
|
+
"""
|
|
366
|
+
Record explicit dashboard feedback (thumbs up/down, pin, dwell).
|
|
367
|
+
|
|
368
|
+
Args:
|
|
369
|
+
memory_id: ID of the memory.
|
|
370
|
+
query: The search query active when feedback given.
|
|
371
|
+
feedback_type: One of 'thumbs_up', 'thumbs_down', 'pin',
|
|
372
|
+
'dwell_positive', 'dwell_negative'.
|
|
373
|
+
dwell_time: Seconds spent viewing (for dwell signals).
|
|
374
|
+
|
|
375
|
+
Returns:
|
|
376
|
+
Row ID of the feedback record, or None on error.
|
|
377
|
+
"""
|
|
378
|
+
type_map = {
|
|
379
|
+
"thumbs_up": "dashboard_thumbs_up",
|
|
380
|
+
"thumbs_down": "dashboard_thumbs_down",
|
|
381
|
+
"pin": "dashboard_pin",
|
|
382
|
+
"dwell_positive": "dashboard_dwell_positive",
|
|
383
|
+
"dwell_negative": "dashboard_dwell_negative",
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
signal_type = type_map.get(feedback_type)
|
|
387
|
+
if not signal_type or signal_type not in self.SIGNAL_VALUES:
|
|
388
|
+
logger.warning(
|
|
389
|
+
"record_dashboard_feedback: invalid feedback_type=%s",
|
|
390
|
+
feedback_type,
|
|
391
|
+
)
|
|
392
|
+
return None
|
|
393
|
+
|
|
394
|
+
if not query:
|
|
395
|
+
query = f"__dashboard__:{memory_id}"
|
|
396
|
+
|
|
397
|
+
signal_value = self.SIGNAL_VALUES[signal_type]
|
|
398
|
+
query_hash = self._hash_query(query)
|
|
399
|
+
keywords = self._extract_keywords(query)
|
|
400
|
+
|
|
401
|
+
return self._store_feedback(
|
|
402
|
+
query_hash=query_hash,
|
|
403
|
+
query_keywords=keywords,
|
|
404
|
+
memory_id=memory_id,
|
|
405
|
+
signal_type=signal_type,
|
|
406
|
+
signal_value=signal_value,
|
|
407
|
+
channel="dashboard",
|
|
408
|
+
dwell_time=dwell_time,
|
|
409
|
+
)
|
|
410
|
+
|
|
297
411
|
# ======================================================================
|
|
298
412
|
# Passive Decay Tracking
|
|
299
413
|
# ======================================================================
|