superlocalmemory 3.4.10 → 3.4.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -11
- package/docs/skill-evolution.md +77 -10
- package/ide/hooks/tool-event-hook.sh +4 -4
- package/package.json +1 -1
- package/pyproject.toml +3 -2
- package/src/superlocalmemory/cli/commands.py +170 -0
- package/src/superlocalmemory/cli/main.py +21 -0
- package/src/superlocalmemory/cli/setup_wizard.py +54 -11
- package/src/superlocalmemory/core/config.py +35 -0
- package/src/superlocalmemory/core/consolidation_engine.py +128 -0
- package/src/superlocalmemory/core/embedding_worker.py +1 -1
- package/src/superlocalmemory/core/engine.py +12 -0
- package/src/superlocalmemory/core/fact_consolidator.py +425 -0
- package/src/superlocalmemory/core/graph_pruner.py +290 -0
- package/src/superlocalmemory/core/maintenance_scheduler.py +20 -0
- package/src/superlocalmemory/core/recall_pipeline.py +9 -0
- package/src/superlocalmemory/core/tier_manager.py +325 -0
- package/src/superlocalmemory/encoding/entity_resolver.py +6 -5
- package/src/superlocalmemory/evolution/__init__.py +29 -0
- package/src/superlocalmemory/evolution/blind_verifier.py +115 -0
- package/src/superlocalmemory/evolution/evolution_store.py +302 -0
- package/src/superlocalmemory/evolution/mutation_generator.py +181 -0
- package/src/superlocalmemory/evolution/skill_evolver.py +555 -0
- package/src/superlocalmemory/evolution/triggers.py +367 -0
- package/src/superlocalmemory/evolution/types.py +92 -0
- package/src/superlocalmemory/hooks/hook_handlers.py +13 -0
- package/src/superlocalmemory/learning/skill_performance_miner.py +44 -11
- package/src/superlocalmemory/mcp/server.py +4 -0
- package/src/superlocalmemory/mcp/tools_evolution.py +338 -0
- package/src/superlocalmemory/retrieval/engine.py +98 -11
- package/src/superlocalmemory/retrieval/entity_channel.py +118 -0
- package/src/superlocalmemory/retrieval/forgetting_filter.py +22 -7
- package/src/superlocalmemory/retrieval/strategy.py +2 -2
- package/src/superlocalmemory/server/routes/behavioral.py +19 -15
- package/src/superlocalmemory/server/routes/evolution.py +213 -0
- package/src/superlocalmemory/server/routes/tiers.py +195 -0
- package/src/superlocalmemory/server/unified_daemon.py +39 -5
- package/src/superlocalmemory/storage/schema_v3411.py +149 -0
- package/src/superlocalmemory/ui/index.html +5 -2
- package/src/superlocalmemory/ui/js/lifecycle.js +83 -0
- package/src/superlocalmemory/ui/js/ng-skills.js +394 -10
- package/src/superlocalmemory.egg-info/PKG-INFO +614 -0
- package/src/superlocalmemory.egg-info/SOURCES.txt +335 -0
- package/src/superlocalmemory.egg-info/dependency_links.txt +1 -0
- package/src/superlocalmemory.egg-info/entry_points.txt +2 -0
- package/src/superlocalmemory.egg-info/requires.txt +55 -0
- package/src/superlocalmemory.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under AGPL-3.0-or-later - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
|
|
4
|
+
|
|
5
|
+
"""SLM v3.4.11 "Skill Evolution" — Evolution MCP Tools.
|
|
6
|
+
|
|
7
|
+
Three evolution tools:
|
|
8
|
+
- evolve_skill: Manually trigger evolution for a specific skill
|
|
9
|
+
- skill_health: Get health metrics for a skill or all skills
|
|
10
|
+
- skill_lineage: Get evolution lineage for a skill
|
|
11
|
+
|
|
12
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
import logging
|
|
19
|
+
import sqlite3
|
|
20
|
+
from datetime import datetime, timezone
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Callable
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
MEMORY_DB = Path.home() / ".superlocalmemory" / "memory.db"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def register_evolution_tools(server, get_engine: Callable) -> None:
|
|
30
|
+
"""Register evolution MCP tools for skill evolution intelligence."""
|
|
31
|
+
|
|
32
|
+
@server.tool()
|
|
33
|
+
async def evolve_skill(
|
|
34
|
+
skill_name: str,
|
|
35
|
+
evolution_type: str = "fix",
|
|
36
|
+
reason: str = "",
|
|
37
|
+
) -> dict:
|
|
38
|
+
"""Manually trigger evolution for a specific skill.
|
|
39
|
+
|
|
40
|
+
Runs the full evolution pipeline: screen -> confirm -> mutate ->
|
|
41
|
+
blind verify -> persist. Requires evolution to be enabled in config.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
skill_name: Name of the skill to evolve (e.g. "brainstorming")
|
|
45
|
+
evolution_type: One of 'fix', 'derived', 'captured'
|
|
46
|
+
reason: Optional reason / evidence for the evolution
|
|
47
|
+
"""
|
|
48
|
+
try:
|
|
49
|
+
# Check if evolution is enabled in config
|
|
50
|
+
config_path = Path.home() / ".superlocalmemory" / "config.json"
|
|
51
|
+
evo_cfg = {}
|
|
52
|
+
if config_path.exists():
|
|
53
|
+
with open(config_path) as f:
|
|
54
|
+
cfg = json.load(f)
|
|
55
|
+
evo_cfg = cfg.get("evolution", {})
|
|
56
|
+
|
|
57
|
+
if not evo_cfg.get("enabled", False):
|
|
58
|
+
return {
|
|
59
|
+
"success": False,
|
|
60
|
+
"error": "Evolution is disabled. Enable via: slm config set evolution.enabled true",
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
from superlocalmemory.evolution.skill_evolver import SkillEvolver
|
|
64
|
+
from superlocalmemory.evolution.types import (
|
|
65
|
+
EvolutionCandidate,
|
|
66
|
+
EvolutionType,
|
|
67
|
+
TriggerType,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# Map string to enum
|
|
71
|
+
type_map = {"fix": EvolutionType.FIX, "derived": EvolutionType.DERIVED, "captured": EvolutionType.CAPTURED}
|
|
72
|
+
evo_type = type_map.get(evolution_type, EvolutionType.FIX)
|
|
73
|
+
|
|
74
|
+
# Build a minimal config for the evolver
|
|
75
|
+
class _EvoCfg:
|
|
76
|
+
enabled = True
|
|
77
|
+
backend = evo_cfg.get("backend", "auto")
|
|
78
|
+
max_evolutions_per_cycle = evo_cfg.get("max_evolutions_per_cycle", 3)
|
|
79
|
+
|
|
80
|
+
class _Cfg:
|
|
81
|
+
evolution = _EvoCfg()
|
|
82
|
+
|
|
83
|
+
db_path = str(MEMORY_DB)
|
|
84
|
+
evolver = SkillEvolver(db_path, _Cfg())
|
|
85
|
+
|
|
86
|
+
# Build candidate from manual trigger
|
|
87
|
+
evidence = (reason,) if reason else ("Manual evolution trigger via MCP",)
|
|
88
|
+
candidate = EvolutionCandidate(
|
|
89
|
+
skill_name=skill_name,
|
|
90
|
+
evolution_type=evo_type,
|
|
91
|
+
trigger=TriggerType.HEALTH_CHECK,
|
|
92
|
+
evidence=evidence,
|
|
93
|
+
effective_score=0.0,
|
|
94
|
+
invocation_count=0,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Process through the pipeline
|
|
98
|
+
engine = get_engine()
|
|
99
|
+
profile_id = engine.profile_id if engine else "default"
|
|
100
|
+
|
|
101
|
+
evolver._store.reset_cycle()
|
|
102
|
+
outcome = evolver._process_candidate(candidate, profile_id)
|
|
103
|
+
|
|
104
|
+
# Fetch the latest record for this skill to return details
|
|
105
|
+
recent = evolver._store.get_skill_history(skill_name, limit=1)
|
|
106
|
+
record_info = {}
|
|
107
|
+
if recent:
|
|
108
|
+
r = recent[0]
|
|
109
|
+
record_info = {
|
|
110
|
+
"id": r.id,
|
|
111
|
+
"status": r.status.value,
|
|
112
|
+
"mutation_summary": r.mutation_summary,
|
|
113
|
+
"blind_verified": r.blind_verified,
|
|
114
|
+
"rejection_reason": r.rejection_reason,
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return {
|
|
118
|
+
"success": outcome == "evolved",
|
|
119
|
+
"outcome": outcome,
|
|
120
|
+
"skill_name": skill_name,
|
|
121
|
+
"evolution_type": evolution_type,
|
|
122
|
+
**record_info,
|
|
123
|
+
}
|
|
124
|
+
except Exception as exc:
|
|
125
|
+
logger.debug("evolve_skill failed: %s", exc)
|
|
126
|
+
return {"success": False, "error": str(exc)}
|
|
127
|
+
|
|
128
|
+
@server.tool()
|
|
129
|
+
async def skill_health(
|
|
130
|
+
skill_name: str = "",
|
|
131
|
+
include_history: bool = False,
|
|
132
|
+
) -> dict:
|
|
133
|
+
"""Get health metrics for a skill or all skills.
|
|
134
|
+
|
|
135
|
+
Queries behavioral assertions (skill_performance category) and
|
|
136
|
+
tool_events to compute per-skill invocation counts, effective
|
|
137
|
+
rates, and status.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
skill_name: Specific skill name (empty = all skills)
|
|
141
|
+
include_history: Include recent tool event history per skill
|
|
142
|
+
"""
|
|
143
|
+
try:
|
|
144
|
+
engine = get_engine()
|
|
145
|
+
profile_id = engine.profile_id if engine else "default"
|
|
146
|
+
db_path = str(MEMORY_DB)
|
|
147
|
+
|
|
148
|
+
conn = sqlite3.connect(db_path, timeout=10)
|
|
149
|
+
conn.row_factory = sqlite3.Row
|
|
150
|
+
|
|
151
|
+
# Gather per-skill invocation stats from tool_events
|
|
152
|
+
# Skills are logged as tool_name='Skill' with actual skill name in input_summary
|
|
153
|
+
if skill_name:
|
|
154
|
+
# M-LIKE: Escape LIKE wildcards in user-provided skill_name
|
|
155
|
+
safe_name = skill_name.replace('\\', '\\\\').replace('%', r'\%').replace('_', r'\_')
|
|
156
|
+
event_query = (
|
|
157
|
+
"SELECT input_summary, event_type, created_at, duration_ms "
|
|
158
|
+
"FROM tool_events "
|
|
159
|
+
"WHERE profile_id = ? AND tool_name = 'Skill' "
|
|
160
|
+
"AND input_summary LIKE ? ESCAPE '\\' "
|
|
161
|
+
"ORDER BY created_at DESC"
|
|
162
|
+
)
|
|
163
|
+
event_rows = conn.execute(event_query, (profile_id, f"%{safe_name}%")).fetchall()
|
|
164
|
+
# Aggregate
|
|
165
|
+
invocations = len(event_rows)
|
|
166
|
+
errors = sum(1 for r in event_rows if dict(r).get("event_type") == "error")
|
|
167
|
+
last_invoked = dict(event_rows[0]).get("created_at", "") if event_rows else ""
|
|
168
|
+
effective_rate = ((invocations - errors) / invocations) if invocations > 0 else 0.0
|
|
169
|
+
skill_entries = [{
|
|
170
|
+
"name": skill_name,
|
|
171
|
+
"invocations": invocations,
|
|
172
|
+
"errors": errors,
|
|
173
|
+
"effective_rate": round(effective_rate, 4),
|
|
174
|
+
"last_invoked": last_invoked,
|
|
175
|
+
"status": "healthy" if effective_rate >= 0.7 else ("degraded" if effective_rate >= 0.4 else "critical"),
|
|
176
|
+
}]
|
|
177
|
+
if include_history:
|
|
178
|
+
skill_entries[0]["recent_events"] = [
|
|
179
|
+
dict(r) for r in event_rows[:10]
|
|
180
|
+
]
|
|
181
|
+
else:
|
|
182
|
+
# Get all Skill tool events and extract skill names from input_summary
|
|
183
|
+
event_query = (
|
|
184
|
+
"SELECT input_summary, event_type, created_at "
|
|
185
|
+
"FROM tool_events "
|
|
186
|
+
"WHERE profile_id = ? AND tool_name = 'Skill' "
|
|
187
|
+
"ORDER BY created_at DESC LIMIT 500"
|
|
188
|
+
)
|
|
189
|
+
event_rows = conn.execute(event_query, (profile_id,)).fetchall()
|
|
190
|
+
|
|
191
|
+
# Parse skill names from input_summary and aggregate
|
|
192
|
+
from collections import defaultdict
|
|
193
|
+
skill_stats: dict = defaultdict(lambda: {"invocations": 0, "errors": 0, "last_invoked": ""})
|
|
194
|
+
for row in event_rows:
|
|
195
|
+
r = dict(row)
|
|
196
|
+
summary = r.get("input_summary", "")
|
|
197
|
+
# Extract skill name from JSON or plain text
|
|
198
|
+
sname = ""
|
|
199
|
+
try:
|
|
200
|
+
parsed = json.loads(summary)
|
|
201
|
+
sname = parsed.get("skill", "") or parsed.get("name", "")
|
|
202
|
+
except (json.JSONDecodeError, TypeError):
|
|
203
|
+
if ":" in summary:
|
|
204
|
+
sname = summary.split('"')[1] if '"' in summary else summary.strip()
|
|
205
|
+
if not sname:
|
|
206
|
+
continue
|
|
207
|
+
stats = skill_stats[sname]
|
|
208
|
+
stats["invocations"] += 1
|
|
209
|
+
if r.get("event_type") == "error":
|
|
210
|
+
stats["errors"] += 1
|
|
211
|
+
if not stats["last_invoked"]:
|
|
212
|
+
stats["last_invoked"] = r.get("created_at", "")
|
|
213
|
+
|
|
214
|
+
skill_entries = []
|
|
215
|
+
for sname, stats in sorted(skill_stats.items(), key=lambda x: x[1]["invocations"], reverse=True)[:50]:
|
|
216
|
+
inv = stats["invocations"]
|
|
217
|
+
errs = stats["errors"]
|
|
218
|
+
eff = ((inv - errs) / inv) if inv > 0 else 0.0
|
|
219
|
+
skill_entries.append({
|
|
220
|
+
"name": sname,
|
|
221
|
+
"invocations": inv,
|
|
222
|
+
"errors": errs,
|
|
223
|
+
"effective_rate": round(eff, 4),
|
|
224
|
+
"last_invoked": stats["last_invoked"],
|
|
225
|
+
"status": "healthy" if eff >= 0.7 else ("degraded" if eff >= 0.4 else "critical"),
|
|
226
|
+
})
|
|
227
|
+
|
|
228
|
+
# Gather skill_performance assertions
|
|
229
|
+
assertion_query = (
|
|
230
|
+
"SELECT trigger_condition, action, confidence "
|
|
231
|
+
"FROM behavioral_assertions "
|
|
232
|
+
"WHERE profile_id = ? AND category = 'skill_performance'"
|
|
233
|
+
)
|
|
234
|
+
assertion_params = [profile_id]
|
|
235
|
+
if skill_name:
|
|
236
|
+
safe_assert_name = skill_name.replace('\\', '\\\\').replace('%', r'\%').replace('_', r'\_')
|
|
237
|
+
assertion_query += " AND trigger_condition LIKE ? ESCAPE '\\'"
|
|
238
|
+
assertion_params.append(f"%{safe_assert_name}%")
|
|
239
|
+
assertion_rows = conn.execute(assertion_query, tuple(assertion_params)).fetchall()
|
|
240
|
+
|
|
241
|
+
skills = skill_entries
|
|
242
|
+
|
|
243
|
+
# Add assertion insights
|
|
244
|
+
assertion_insights = [
|
|
245
|
+
{"trigger": dict(a)["trigger_condition"], "action": dict(a)["action"], "confidence": dict(a)["confidence"]}
|
|
246
|
+
for a in assertion_rows
|
|
247
|
+
]
|
|
248
|
+
|
|
249
|
+
conn.close()
|
|
250
|
+
|
|
251
|
+
return {
|
|
252
|
+
"skills": skills,
|
|
253
|
+
"skill_count": len(skills),
|
|
254
|
+
"assertion_insights": assertion_insights,
|
|
255
|
+
"profile_id": profile_id,
|
|
256
|
+
}
|
|
257
|
+
except Exception as exc:
|
|
258
|
+
logger.debug("skill_health failed: %s", exc)
|
|
259
|
+
return {"skills": [], "skill_count": 0, "error": str(exc)}
|
|
260
|
+
|
|
261
|
+
@server.tool()
|
|
262
|
+
async def skill_lineage(
|
|
263
|
+
skill_name: str = "",
|
|
264
|
+
) -> dict:
|
|
265
|
+
"""Get evolution lineage for a skill.
|
|
266
|
+
|
|
267
|
+
Queries the skill_evolution_log table and builds a version tree
|
|
268
|
+
showing how skills evolved from their parents.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
skill_name: Specific skill name (empty = all skills)
|
|
272
|
+
"""
|
|
273
|
+
try:
|
|
274
|
+
db_path = str(MEMORY_DB)
|
|
275
|
+
conn = sqlite3.connect(db_path, timeout=10)
|
|
276
|
+
conn.row_factory = sqlite3.Row
|
|
277
|
+
|
|
278
|
+
if skill_name:
|
|
279
|
+
rows = conn.execute(
|
|
280
|
+
"SELECT id, skill_name, parent_skill_id, evolution_type, "
|
|
281
|
+
"trigger_type, generation, status, mutation_summary, "
|
|
282
|
+
"blind_verified, created_at, completed_at "
|
|
283
|
+
"FROM skill_evolution_log "
|
|
284
|
+
"WHERE skill_name = ? OR parent_skill_id = ? "
|
|
285
|
+
"ORDER BY created_at ASC",
|
|
286
|
+
(skill_name, skill_name),
|
|
287
|
+
).fetchall()
|
|
288
|
+
else:
|
|
289
|
+
rows = conn.execute(
|
|
290
|
+
"SELECT id, skill_name, parent_skill_id, evolution_type, "
|
|
291
|
+
"trigger_type, generation, status, mutation_summary, "
|
|
292
|
+
"blind_verified, created_at, completed_at "
|
|
293
|
+
"FROM skill_evolution_log "
|
|
294
|
+
"ORDER BY created_at DESC LIMIT 100",
|
|
295
|
+
).fetchall()
|
|
296
|
+
|
|
297
|
+
conn.close()
|
|
298
|
+
|
|
299
|
+
lineage = [
|
|
300
|
+
{
|
|
301
|
+
"id": dict(r)["id"],
|
|
302
|
+
"skill_name": dict(r)["skill_name"],
|
|
303
|
+
"parent_skill_id": dict(r).get("parent_skill_id", ""),
|
|
304
|
+
"evolution_type": dict(r)["evolution_type"],
|
|
305
|
+
"trigger": dict(r)["trigger_type"],
|
|
306
|
+
"generation": dict(r).get("generation", 0),
|
|
307
|
+
"status": dict(r)["status"],
|
|
308
|
+
"mutation_summary": dict(r).get("mutation_summary", ""),
|
|
309
|
+
"blind_verified": bool(dict(r).get("blind_verified", 0)),
|
|
310
|
+
"created_at": dict(r).get("created_at", ""),
|
|
311
|
+
"completed_at": dict(r).get("completed_at", ""),
|
|
312
|
+
}
|
|
313
|
+
for r in rows
|
|
314
|
+
]
|
|
315
|
+
|
|
316
|
+
# Build tree structure: group by root skill
|
|
317
|
+
tree: dict = {}
|
|
318
|
+
for entry in lineage:
|
|
319
|
+
root = entry.get("parent_skill_id") or entry["skill_name"]
|
|
320
|
+
if root not in tree:
|
|
321
|
+
tree[root] = {"root": root, "evolutions": []}
|
|
322
|
+
tree[root]["evolutions"].append({
|
|
323
|
+
"id": entry["id"],
|
|
324
|
+
"skill_name": entry["skill_name"],
|
|
325
|
+
"evolution_type": entry["evolution_type"],
|
|
326
|
+
"status": entry["status"],
|
|
327
|
+
"generation": entry["generation"],
|
|
328
|
+
"created_at": entry["created_at"],
|
|
329
|
+
})
|
|
330
|
+
|
|
331
|
+
return {
|
|
332
|
+
"lineage": lineage,
|
|
333
|
+
"lineage_count": len(lineage),
|
|
334
|
+
"tree": tree,
|
|
335
|
+
}
|
|
336
|
+
except Exception as exc:
|
|
337
|
+
logger.debug("skill_lineage failed: %s", exc)
|
|
338
|
+
return {"lineage": [], "lineage_count": 0, "tree": {}, "error": str(exc)}
|
|
@@ -187,15 +187,43 @@ class RetrievalEngine:
|
|
|
187
187
|
except Exception as exc:
|
|
188
188
|
logger.warning("Scene expansion: %s", exc)
|
|
189
189
|
|
|
190
|
+
# V3.4.11: Entity graph signal enhancement (post-RRF boost)
|
|
191
|
+
# Instead of competing as independent channel, entity_graph SCORES
|
|
192
|
+
# the candidates from other channels by graph proximity to query entities.
|
|
193
|
+
# Research: Microsoft GraphRAG DRIFT, Pistis-RAG cascaded architecture.
|
|
194
|
+
if (self._entity is not None
|
|
195
|
+
and "entity_graph" not in set(self._config.disabled_channels)
|
|
196
|
+
and fused):
|
|
197
|
+
try:
|
|
198
|
+
candidate_ids = [fr.fact_id for fr in fused[:100]]
|
|
199
|
+
eg_scores = self._entity.score_candidates(
|
|
200
|
+
query, candidate_ids, profile_id,
|
|
201
|
+
)
|
|
202
|
+
if eg_scores:
|
|
203
|
+
boosted = []
|
|
204
|
+
for fr in fused:
|
|
205
|
+
eg_sc = eg_scores.get(fr.fact_id, 0.0)
|
|
206
|
+
if eg_sc > 0:
|
|
207
|
+
eg_weight = strat.weights.get("entity_graph", 1.0)
|
|
208
|
+
boost = 1.0 + eg_sc * eg_weight * 0.3
|
|
209
|
+
boosted.append(FusionResult(
|
|
210
|
+
fact_id=fr.fact_id,
|
|
211
|
+
fused_score=fr.fused_score * boost,
|
|
212
|
+
channel_ranks=fr.channel_ranks,
|
|
213
|
+
channel_scores={**fr.channel_scores, "entity_graph": eg_sc},
|
|
214
|
+
))
|
|
215
|
+
else:
|
|
216
|
+
boosted.append(fr)
|
|
217
|
+
fused = sorted(boosted, key=lambda r: r.fused_score, reverse=True)
|
|
218
|
+
except Exception as exc:
|
|
219
|
+
logger.warning("Entity graph signal enhancement: %s", exc)
|
|
220
|
+
|
|
190
221
|
# 4. Load facts for rerank pool
|
|
191
222
|
pool = min(len(fused), max(effective_limit * 3, 30))
|
|
192
223
|
top = fused[:pool]
|
|
193
224
|
facts = self._load_facts(top, profile_id)
|
|
194
225
|
|
|
195
226
|
# V3.3.21: Session diversity for aggregation queries.
|
|
196
|
-
# Cat 1 (single-hop/aggregation) needs facts from MULTIPLE sessions.
|
|
197
|
-
# Without diversity enforcement, top-20 may all come from 1-2 sessions,
|
|
198
|
-
# missing scattered mentions across 19+ sessions.
|
|
199
227
|
if strat.query_type == "aggregation" and facts:
|
|
200
228
|
top = self._enforce_session_diversity(top, facts, min_sessions=3, top_k=20)
|
|
201
229
|
|
|
@@ -212,8 +240,18 @@ class RetrievalEngine:
|
|
|
212
240
|
ce_alpha = 0.5 if strat.query_type in ("multi_hop", "temporal") else 0.75
|
|
213
241
|
top = self._apply_reranker(query, top, facts, alpha=ce_alpha)
|
|
214
242
|
|
|
243
|
+
# V3.4.11: Channel diversity — guarantee entity_graph results appear in
|
|
244
|
+
# the final output. Applied AFTER reranker so results can't be pushed out.
|
|
245
|
+
final_top = top[:effective_limit]
|
|
246
|
+
final_top = self._enforce_channel_diversity(
|
|
247
|
+
final_top, fused, ch_results, effective_limit,
|
|
248
|
+
)
|
|
249
|
+
# Reload facts for any newly injected results
|
|
250
|
+
if len(final_top) > len(top[:effective_limit]):
|
|
251
|
+
facts = self._load_facts(final_top, profile_id)
|
|
252
|
+
|
|
215
253
|
# 6. Build response
|
|
216
|
-
results = self._build_results(
|
|
254
|
+
results = self._build_results(final_top, facts, strat)
|
|
217
255
|
ms = (time.monotonic() - t0) * 1000.0
|
|
218
256
|
return RecallResponse(
|
|
219
257
|
query=query, mode=mode, results=results,
|
|
@@ -334,6 +372,54 @@ class RetrievalEngine:
|
|
|
334
372
|
remaining = [fr for fr in rest if fr.fact_id not in promoted_ids]
|
|
335
373
|
return top + promoted + remaining
|
|
336
374
|
|
|
375
|
+
# -- Channel diversity enforcement ----------------------------------------
|
|
376
|
+
|
|
377
|
+
@staticmethod
|
|
378
|
+
def _enforce_channel_diversity(
|
|
379
|
+
top: list,
|
|
380
|
+
fused: list,
|
|
381
|
+
ch_results: dict[str, list[tuple[str, float]]],
|
|
382
|
+
effective_limit: int,
|
|
383
|
+
min_per_channel: int = 2,
|
|
384
|
+
) -> list:
|
|
385
|
+
"""Ensure structure channels (entity_graph) get representation.
|
|
386
|
+
|
|
387
|
+
V3.4.11: entity_graph finds valid results but RRF scores them low
|
|
388
|
+
because they don't overlap with semantic/bm25 results. This interleaves
|
|
389
|
+
top entity_graph facts into positions 3-4 of the final output instead
|
|
390
|
+
of appending at the end where they'd never be seen.
|
|
391
|
+
"""
|
|
392
|
+
structure_channels = ["entity_graph"]
|
|
393
|
+
top_ids = {fr.fact_id for fr in top}
|
|
394
|
+
|
|
395
|
+
promoted = []
|
|
396
|
+
for ch_name in structure_channels:
|
|
397
|
+
ch_items = ch_results.get(ch_name, [])
|
|
398
|
+
if not ch_items:
|
|
399
|
+
continue
|
|
400
|
+
|
|
401
|
+
present = sum(1 for fid, _ in ch_items if fid in top_ids)
|
|
402
|
+
if present >= min_per_channel:
|
|
403
|
+
continue
|
|
404
|
+
|
|
405
|
+
needed = min_per_channel - present
|
|
406
|
+
ch_fids = {fid for fid, _ in ch_items}
|
|
407
|
+
for fr in fused:
|
|
408
|
+
if fr.fact_id in ch_fids and fr.fact_id not in top_ids:
|
|
409
|
+
promoted.append(fr)
|
|
410
|
+
top_ids.add(fr.fact_id)
|
|
411
|
+
needed -= 1
|
|
412
|
+
if needed <= 0:
|
|
413
|
+
break
|
|
414
|
+
|
|
415
|
+
if not promoted:
|
|
416
|
+
return top
|
|
417
|
+
|
|
418
|
+
# Append as safety net — with proper RRF weights (strategy.py),
|
|
419
|
+
# entity_graph facts should already rank naturally in the top-k.
|
|
420
|
+
# This only fires when they're still missing despite weight boost.
|
|
421
|
+
return list(top) + promoted
|
|
422
|
+
|
|
337
423
|
# -- Channel execution --------------------------------------------------
|
|
338
424
|
|
|
339
425
|
def _embed_query(self, query: str) -> list[float] | None:
|
|
@@ -369,6 +455,11 @@ class RetrievalEngine:
|
|
|
369
455
|
if needs_embedding:
|
|
370
456
|
try:
|
|
371
457
|
q_emb = self._embed_query(query)
|
|
458
|
+
if q_emb is None:
|
|
459
|
+
logger.warning(
|
|
460
|
+
"Query embedding returned None — semantic, hopfield, "
|
|
461
|
+
"spreading_activation channels will be skipped this recall"
|
|
462
|
+
)
|
|
372
463
|
except Exception as exc:
|
|
373
464
|
logger.warning("Query embedding failed: %s", exc)
|
|
374
465
|
|
|
@@ -388,13 +479,9 @@ class RetrievalEngine:
|
|
|
388
479
|
except Exception as exc:
|
|
389
480
|
logger.warning("BM25 channel: %s", exc)
|
|
390
481
|
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
if r:
|
|
395
|
-
out["entity_graph"] = r
|
|
396
|
-
except Exception as exc:
|
|
397
|
-
logger.warning("Entity channel: %s", exc)
|
|
482
|
+
# V3.4.12: entity_graph is now a signal enhancer (post-RRF boost),
|
|
483
|
+
# not an independent channel. Removed from channel execution to avoid
|
|
484
|
+
# running spreading activation twice. See score_candidates() in engine.recall().
|
|
398
485
|
|
|
399
486
|
if self._temporal is not None and "temporal" not in disabled:
|
|
400
487
|
try:
|
|
@@ -370,6 +370,124 @@ class EntityGraphChannel:
|
|
|
370
370
|
results.sort(key=lambda x: x[1], reverse=True)
|
|
371
371
|
return results[:top_k]
|
|
372
372
|
|
|
373
|
+
def score_candidates(
|
|
374
|
+
self,
|
|
375
|
+
query: str,
|
|
376
|
+
candidate_fact_ids: list[str],
|
|
377
|
+
profile_id: str,
|
|
378
|
+
) -> dict[str, float]:
|
|
379
|
+
"""Score candidate facts by their entity-graph proximity to query entities.
|
|
380
|
+
|
|
381
|
+
V3.4.11 "Signal Enhancer" architecture: instead of returning its own
|
|
382
|
+
independent set of fact_ids (which get outranked by multi-channel facts
|
|
383
|
+
in RRF), this method scores EXISTING candidates from semantic/BM25
|
|
384
|
+
by their graph connectivity to query entities.
|
|
385
|
+
|
|
386
|
+
Research basis: Microsoft GraphRAG DRIFT Search, HippoRAG, Pistis-RAG
|
|
387
|
+
cascaded architecture. Graph signals act as post-retrieval boosters,
|
|
388
|
+
not independent retrievers. Avoids the "weakest link" phenomenon where
|
|
389
|
+
non-overlapping result sets cause rank collapse in RRF fusion.
|
|
390
|
+
|
|
391
|
+
Args:
|
|
392
|
+
query: The user's query string.
|
|
393
|
+
candidate_fact_ids: Fact IDs from semantic/BM25/other channels.
|
|
394
|
+
profile_id: User profile.
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
Dict mapping fact_id → entity_graph score [0, 1].
|
|
398
|
+
Facts with no entity connection return 0.
|
|
399
|
+
Facts directly linked to query entities score ~1.0.
|
|
400
|
+
Facts 1-hop away score ~0.7 (decay factor).
|
|
401
|
+
"""
|
|
402
|
+
if not candidate_fact_ids:
|
|
403
|
+
return {}
|
|
404
|
+
|
|
405
|
+
raw_entities = extract_query_entities(query)
|
|
406
|
+
if not raw_entities:
|
|
407
|
+
return {}
|
|
408
|
+
|
|
409
|
+
canonical_ids = self._resolve_entities(raw_entities, profile_id)
|
|
410
|
+
if not canonical_ids:
|
|
411
|
+
return {}
|
|
412
|
+
|
|
413
|
+
self._ensure_adjacency(profile_id)
|
|
414
|
+
|
|
415
|
+
# Run full spreading activation (same as search())
|
|
416
|
+
activation: dict[str, float] = defaultdict(float)
|
|
417
|
+
visited_entities: set[str] = set(canonical_ids)
|
|
418
|
+
use_cache = bool(self._entity_to_facts)
|
|
419
|
+
|
|
420
|
+
for eid in canonical_ids:
|
|
421
|
+
if use_cache:
|
|
422
|
+
for fid in self._entity_to_facts.get(eid, ()):
|
|
423
|
+
activation[fid] = max(activation[fid], 1.0)
|
|
424
|
+
else:
|
|
425
|
+
for fact in self._db.get_facts_by_entity(eid, profile_id):
|
|
426
|
+
activation[fact.fact_id] = max(activation[fact.fact_id], 1.0)
|
|
427
|
+
|
|
428
|
+
frontier = set(activation.keys())
|
|
429
|
+
for hop in range(1, self._max_hops):
|
|
430
|
+
hop_decay = self._decay ** hop
|
|
431
|
+
if hop_decay < self._threshold:
|
|
432
|
+
break
|
|
433
|
+
next_frontier: set[str] = set()
|
|
434
|
+
for fid in frontier:
|
|
435
|
+
if use_cache:
|
|
436
|
+
for neighbor, edge_weight in self._adj.get(fid, ()):
|
|
437
|
+
if self._graph_metrics:
|
|
438
|
+
weighted = activation[fid] * self._decay * edge_weight
|
|
439
|
+
if neighbor in self._graph_metrics:
|
|
440
|
+
pr = self._graph_metrics[neighbor].get("pagerank_score", 0.0)
|
|
441
|
+
weighted *= min(1.0 + pr * 2.0, 2.0)
|
|
442
|
+
else:
|
|
443
|
+
weighted = activation[fid] * self._decay
|
|
444
|
+
if weighted >= self._threshold and weighted > activation.get(neighbor, 0.0):
|
|
445
|
+
activation[neighbor] = weighted
|
|
446
|
+
next_frontier.add(neighbor)
|
|
447
|
+
|
|
448
|
+
if use_cache:
|
|
449
|
+
for fid in frontier:
|
|
450
|
+
for eid in self._fact_to_entities.get(fid, ()):
|
|
451
|
+
if eid not in visited_entities:
|
|
452
|
+
visited_entities.add(eid)
|
|
453
|
+
for linked_fid in self._entity_to_facts.get(eid, ()):
|
|
454
|
+
if hop_decay > activation.get(linked_fid, 0.0):
|
|
455
|
+
activation[linked_fid] = hop_decay
|
|
456
|
+
next_frontier.add(linked_fid)
|
|
457
|
+
|
|
458
|
+
frontier = next_frontier
|
|
459
|
+
if not frontier:
|
|
460
|
+
break
|
|
461
|
+
|
|
462
|
+
# Community-aware boosting (same as search)
|
|
463
|
+
if self._graph_metrics and use_cache:
|
|
464
|
+
from collections import Counter as _Counter
|
|
465
|
+
seed_communities: _Counter = _Counter()
|
|
466
|
+
for eid in canonical_ids:
|
|
467
|
+
for fid in self._entity_to_facts.get(eid, ()):
|
|
468
|
+
m = self._graph_metrics.get(fid, {})
|
|
469
|
+
comm = m.get("community_id")
|
|
470
|
+
if comm is not None:
|
|
471
|
+
seed_communities[comm] += 1
|
|
472
|
+
if seed_communities:
|
|
473
|
+
total_seeds = sum(seed_communities.values())
|
|
474
|
+
for fid in list(activation.keys()):
|
|
475
|
+
m = self._graph_metrics.get(fid, {})
|
|
476
|
+
fact_comm = m.get("community_id")
|
|
477
|
+
if fact_comm is not None and fact_comm in seed_communities:
|
|
478
|
+
boost = min(1.0 + 0.15 * (seed_communities[fact_comm] / total_seeds), 1.3)
|
|
479
|
+
activation[fid] *= boost
|
|
480
|
+
|
|
481
|
+
# Extract scores ONLY for the candidate set, normalize to [0, 1]
|
|
482
|
+
candidate_set = set(candidate_fact_ids)
|
|
483
|
+
scored = {fid: activation.get(fid, 0.0) for fid in candidate_set}
|
|
484
|
+
|
|
485
|
+
max_score = max(scored.values()) if scored else 0
|
|
486
|
+
if max_score > 0:
|
|
487
|
+
scored = {fid: sc / max_score for fid, sc in scored.items()}
|
|
488
|
+
|
|
489
|
+
return scored
|
|
490
|
+
|
|
373
491
|
def _suppress_contradictions(
|
|
374
492
|
self, activation: dict[str, float], profile_id: str,
|
|
375
493
|
) -> None:
|
|
@@ -41,9 +41,21 @@ _ZONE_WEIGHTS: dict[str, float] = {
|
|
|
41
41
|
"forgotten": 0.0,
|
|
42
42
|
}
|
|
43
43
|
|
|
44
|
-
#
|
|
44
|
+
# V3.4.11: Deep recall weights — includes cold/archive with reduced scores
|
|
45
|
+
_DEEP_ZONE_WEIGHTS: dict[str, float] = {
|
|
46
|
+
"active": 1.0,
|
|
47
|
+
"warm": 0.7,
|
|
48
|
+
"cold": 0.3,
|
|
49
|
+
"archive": 0.15,
|
|
50
|
+
"forgotten": 0.05,
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
# Zones where facts are excluded from results (default recall)
|
|
45
54
|
_EXCLUDED_ZONES: frozenset[str] = frozenset({"archive", "forgotten"})
|
|
46
55
|
|
|
56
|
+
# Deep recall excludes nothing — every fact is searchable
|
|
57
|
+
_DEEP_EXCLUDED_ZONES: frozenset[str] = frozenset()
|
|
58
|
+
|
|
47
59
|
|
|
48
60
|
class ForgettingFilter:
|
|
49
61
|
"""Post-retrieval filter that applies Ebbinghaus retention weighting.
|
|
@@ -51,11 +63,12 @@ class ForgettingFilter:
|
|
|
51
63
|
Removes archived/forgotten facts and adjusts scores for other zones.
|
|
52
64
|
"""
|
|
53
65
|
|
|
54
|
-
__slots__ = ("_db", "_config")
|
|
66
|
+
__slots__ = ("_db", "_config", "_deep_recall")
|
|
55
67
|
|
|
56
|
-
def __init__(self, db: DatabaseManager, config: ForgettingConfig) -> None:
|
|
68
|
+
def __init__(self, db: DatabaseManager, config: ForgettingConfig, deep_recall: bool = False) -> None:
|
|
57
69
|
self._db = db
|
|
58
70
|
self._config = config
|
|
71
|
+
self._deep_recall = deep_recall
|
|
59
72
|
|
|
60
73
|
def filter(
|
|
61
74
|
self,
|
|
@@ -112,12 +125,14 @@ class ForgettingFilter:
|
|
|
112
125
|
|
|
113
126
|
zone = ret_data.get("lifecycle_zone", "active")
|
|
114
127
|
|
|
115
|
-
|
|
116
|
-
|
|
128
|
+
# V3.4.11: Deep recall mode includes all tiers
|
|
129
|
+
excluded = _DEEP_EXCLUDED_ZONES if self._deep_recall else _EXCLUDED_ZONES
|
|
130
|
+
weights = _DEEP_ZONE_WEIGHTS if self._deep_recall else _ZONE_WEIGHTS
|
|
131
|
+
|
|
132
|
+
if zone in excluded:
|
|
117
133
|
continue
|
|
118
134
|
|
|
119
|
-
|
|
120
|
-
weight = _ZONE_WEIGHTS.get(zone, 1.0)
|
|
135
|
+
weight = weights.get(zone, 1.0)
|
|
121
136
|
new_results.append((fact_id, score * weight))
|
|
122
137
|
|
|
123
138
|
filtered[channel_name] = new_results
|