superlocalmemory 3.3.29 → 3.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ATTRIBUTION.md +1 -1
- package/CHANGELOG.md +3 -0
- package/LICENSE +633 -70
- package/README.md +14 -11
- package/docs/screenshots/01-dashboard-main.png +0 -0
- package/docs/screenshots/02-knowledge-graph.png +0 -0
- package/docs/screenshots/03-patterns-learning.png +0 -0
- package/docs/screenshots/04-learning-dashboard.png +0 -0
- package/docs/screenshots/05-behavioral-analysis.png +0 -0
- package/docs/screenshots/06-graph-communities.png +0 -0
- package/docs/v2-archive/ACCESSIBILITY.md +1 -1
- package/docs/v2-archive/FRAMEWORK-INTEGRATIONS.md +1 -1
- package/docs/v2-archive/MCP-MANUAL-SETUP.md +1 -1
- package/docs/v2-archive/SEARCH-ENGINE-V2.2.0.md +2 -2
- package/docs/v2-archive/SEARCH-INTEGRATION-GUIDE.md +1 -1
- package/docs/v2-archive/UNIVERSAL-INTEGRATION.md +1 -1
- package/docs/v2-archive/V2.2.0-OPTIONAL-SEARCH.md +1 -1
- package/docs/v2-archive/example_graph_usage.py +1 -1
- package/ide/configs/codex-mcp.toml +1 -1
- package/ide/integrations/langchain/README.md +1 -1
- package/ide/integrations/langchain/langchain_superlocalmemory/__init__.py +1 -1
- package/ide/integrations/langchain/langchain_superlocalmemory/chat_message_history.py +1 -1
- package/ide/integrations/langchain/pyproject.toml +2 -2
- package/ide/integrations/langchain/tests/__init__.py +1 -1
- package/ide/integrations/langchain/tests/test_chat_message_history.py +1 -1
- package/ide/integrations/langchain/tests/test_security.py +1 -1
- package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/__init__.py +1 -1
- package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/base.py +1 -1
- package/ide/integrations/llamaindex/pyproject.toml +2 -2
- package/ide/integrations/llamaindex/tests/__init__.py +1 -1
- package/ide/integrations/llamaindex/tests/test_chat_store.py +1 -1
- package/ide/integrations/llamaindex/tests/test_security.py +1 -1
- package/ide/skills/slm-build-graph/SKILL.md +3 -3
- package/ide/skills/slm-list-recent/SKILL.md +3 -3
- package/ide/skills/slm-recall/SKILL.md +3 -3
- package/ide/skills/slm-remember/SKILL.md +3 -3
- package/ide/skills/slm-show-patterns/SKILL.md +3 -3
- package/ide/skills/slm-status/SKILL.md +3 -3
- package/ide/skills/slm-switch-profile/SKILL.md +3 -3
- package/package.json +3 -3
- package/pyproject.toml +3 -3
- package/src/superlocalmemory/core/engine_wiring.py +5 -1
- package/src/superlocalmemory/core/graph_analyzer.py +254 -12
- package/src/superlocalmemory/learning/consolidation_worker.py +240 -52
- package/src/superlocalmemory/retrieval/entity_channel.py +135 -4
- package/src/superlocalmemory/retrieval/spreading_activation.py +45 -0
- package/src/superlocalmemory/server/api.py +9 -1
- package/src/superlocalmemory/server/routes/behavioral.py +8 -4
- package/src/superlocalmemory/server/routes/chat.py +320 -0
- package/src/superlocalmemory/server/routes/insights.py +368 -0
- package/src/superlocalmemory/server/routes/learning.py +106 -6
- package/src/superlocalmemory/server/routes/memories.py +20 -9
- package/src/superlocalmemory/server/routes/stats.py +25 -3
- package/src/superlocalmemory/server/routes/timeline.py +252 -0
- package/src/superlocalmemory/server/routes/v3_api.py +161 -0
- package/src/superlocalmemory/server/ui.py +8 -0
- package/src/superlocalmemory/ui/index.html +168 -58
- package/src/superlocalmemory/ui/js/graph-event-bus.js +83 -0
- package/src/superlocalmemory/ui/js/graph-filters.js +1 -1
- package/src/superlocalmemory/ui/js/knowledge-graph.js +942 -0
- package/src/superlocalmemory/ui/js/memory-chat.js +344 -0
- package/src/superlocalmemory/ui/js/memory-timeline.js +265 -0
- package/src/superlocalmemory/ui/js/quick-actions.js +334 -0
- package/src/superlocalmemory.egg-info/PKG-INFO +597 -0
- package/src/superlocalmemory.egg-info/SOURCES.txt +287 -0
- package/src/superlocalmemory.egg-info/dependency_links.txt +1 -0
- package/src/superlocalmemory.egg-info/entry_points.txt +2 -0
- package/src/superlocalmemory.egg-info/requires.txt +47 -0
- package/src/superlocalmemory.egg-info/top_level.txt +1 -0
|
@@ -133,127 +133,315 @@ class ConsolidationWorker:
|
|
|
133
133
|
return 0
|
|
134
134
|
|
|
135
135
|
def _generate_patterns(self, profile_id: str, dry_run: bool) -> int:
|
|
136
|
-
"""Mine behavioral patterns from
|
|
136
|
+
"""Mine behavioral patterns from ALL memory sources.
|
|
137
137
|
|
|
138
|
-
|
|
139
|
-
-
|
|
140
|
-
|
|
141
|
-
- Temporal patterns (time-of-day activity)
|
|
138
|
+
v3.4.1: Expanded from 3 to 7 pattern types. No 500-fact cap.
|
|
139
|
+
Analyzes: facts, signals, co-retrieval edges, channel credits,
|
|
140
|
+
entities, sessions, graph communities.
|
|
142
141
|
"""
|
|
143
142
|
try:
|
|
144
143
|
from superlocalmemory.learning.behavioral import BehavioralPatternStore
|
|
145
144
|
import re
|
|
146
|
-
from collections import Counter
|
|
145
|
+
from collections import Counter, defaultdict
|
|
147
146
|
|
|
148
147
|
conn = sqlite3.connect(self._memory_db, timeout=10)
|
|
149
148
|
conn.execute("PRAGMA busy_timeout=5000")
|
|
150
149
|
conn.row_factory = sqlite3.Row
|
|
151
150
|
|
|
151
|
+
# v3.4.1: No cap — analyze ALL facts
|
|
152
152
|
facts = conn.execute(
|
|
153
|
-
"SELECT content, created_at
|
|
154
|
-
"
|
|
153
|
+
"SELECT fact_id, content, fact_type, created_at, session_id, "
|
|
154
|
+
"confidence, canonical_entities_json "
|
|
155
|
+
"FROM atomic_facts "
|
|
156
|
+
"WHERE profile_id = ? AND lifecycle = 'active' "
|
|
157
|
+
"ORDER BY created_at DESC",
|
|
155
158
|
(profile_id,),
|
|
156
159
|
).fetchall()
|
|
157
|
-
conn.close()
|
|
158
160
|
|
|
159
|
-
if len(facts) <
|
|
161
|
+
if len(facts) < 5:
|
|
162
|
+
conn.close()
|
|
160
163
|
return 0
|
|
161
164
|
|
|
162
165
|
store = BehavioralPatternStore(self._learning_db)
|
|
163
166
|
generated = 0
|
|
164
167
|
|
|
165
|
-
# Tech
|
|
168
|
+
# ── 1. Tech Preferences (expanded keyword list) ───────────
|
|
166
169
|
tech_keywords = {
|
|
167
|
-
"python": "Python", "javascript": "JavaScript",
|
|
168
|
-
"
|
|
169
|
-
"
|
|
170
|
-
"
|
|
171
|
-
"
|
|
172
|
-
"
|
|
170
|
+
"python": "Python", "javascript": "JavaScript",
|
|
171
|
+
"typescript": "TypeScript", "react": "React",
|
|
172
|
+
"vue": "Vue", "angular": "Angular",
|
|
173
|
+
"postgresql": "PostgreSQL", "mysql": "MySQL",
|
|
174
|
+
"sqlite": "SQLite", "docker": "Docker",
|
|
175
|
+
"kubernetes": "Kubernetes", "aws": "AWS",
|
|
176
|
+
"azure": "Azure", "gcp": "GCP",
|
|
177
|
+
"node": "Node.js", "fastapi": "FastAPI",
|
|
178
|
+
"django": "Django", "flask": "Flask",
|
|
173
179
|
"rust": "Rust", "go": "Go", "java": "Java",
|
|
174
180
|
"git": "Git", "npm": "npm", "pip": "pip",
|
|
175
|
-
"langchain": "LangChain", "ollama": "Ollama",
|
|
176
|
-
"
|
|
181
|
+
"langchain": "LangChain", "ollama": "Ollama",
|
|
182
|
+
"pytorch": "PyTorch", "claude": "Claude",
|
|
183
|
+
"openai": "OpenAI", "anthropic": "Anthropic",
|
|
184
|
+
"redis": "Redis", "mongodb": "MongoDB",
|
|
185
|
+
"graphql": "GraphQL", "nextjs": "Next.js",
|
|
186
|
+
"terraform": "Terraform", "nginx": "Nginx",
|
|
187
|
+
"linux": "Linux", "macos": "macOS",
|
|
188
|
+
"vscode": "VS Code", "neovim": "Neovim",
|
|
177
189
|
}
|
|
178
190
|
|
|
179
|
-
tech_counts = Counter()
|
|
191
|
+
tech_counts: Counter = Counter()
|
|
180
192
|
for f in facts:
|
|
181
193
|
content = dict(f)["content"].lower()
|
|
182
194
|
for keyword, label in tech_keywords.items():
|
|
183
195
|
if keyword in content:
|
|
184
196
|
tech_counts[label] += 1
|
|
185
197
|
|
|
186
|
-
for tech, count in tech_counts.most_common(
|
|
187
|
-
if count >=
|
|
188
|
-
confidence = min(1.0, count /
|
|
198
|
+
for tech, count in tech_counts.most_common(20):
|
|
199
|
+
if count >= 2 and not dry_run:
|
|
200
|
+
confidence = min(1.0, count / max(len(facts) * 0.1, 10))
|
|
189
201
|
store.record_pattern(
|
|
190
202
|
profile_id=profile_id,
|
|
191
203
|
pattern_type="tech_preference",
|
|
192
|
-
data={"topic": tech, "pattern_key": tech,
|
|
193
|
-
|
|
204
|
+
data={"topic": tech, "pattern_key": tech,
|
|
205
|
+
"value": tech, "key": "tech",
|
|
206
|
+
"evidence": count},
|
|
194
207
|
success_rate=confidence,
|
|
195
208
|
confidence=confidence,
|
|
196
209
|
)
|
|
197
210
|
generated += 1
|
|
198
211
|
|
|
199
|
-
# Topic
|
|
200
|
-
word_counts = Counter()
|
|
212
|
+
# ── 2. Topic Interests (word frequency) ───────────────────
|
|
201
213
|
stopwords = frozenset({
|
|
202
|
-
"the", "is", "a", "an", "in", "on", "at", "to", "for",
|
|
203
|
-
"and", "or", "not", "with", "that", "this", "was",
|
|
204
|
-
"
|
|
214
|
+
"the", "is", "a", "an", "in", "on", "at", "to", "for",
|
|
215
|
+
"of", "and", "or", "not", "with", "that", "this", "was",
|
|
216
|
+
"are", "be", "has", "had", "have", "from", "by", "it",
|
|
217
|
+
"its", "as", "but", "were", "been", "being", "would",
|
|
218
|
+
"could", "should", "will", "may", "might", "can", "do",
|
|
219
|
+
"does", "did", "about", "into", "over", "after", "before",
|
|
220
|
+
"then", "than", "also", "just", "like", "more", "some",
|
|
221
|
+
"only", "other", "such", "each", "every", "both", "most",
|
|
205
222
|
})
|
|
223
|
+
word_counts: Counter = Counter()
|
|
206
224
|
for f in facts:
|
|
207
225
|
words = re.findall(r'\b[a-zA-Z]{4,}\b', dict(f)["content"].lower())
|
|
208
226
|
for w in words:
|
|
209
227
|
if w not in stopwords:
|
|
210
228
|
word_counts[w] += 1
|
|
211
229
|
|
|
212
|
-
for topic, count in word_counts.most_common(
|
|
213
|
-
if count >=
|
|
214
|
-
confidence = min(1.0, count /
|
|
230
|
+
for topic, count in word_counts.most_common(15):
|
|
231
|
+
if count >= 3 and not dry_run:
|
|
232
|
+
confidence = min(1.0, count / max(len(facts) * 0.05, 15))
|
|
215
233
|
store.record_pattern(
|
|
216
234
|
profile_id=profile_id,
|
|
217
235
|
pattern_type="interest",
|
|
218
236
|
data={"topic": topic, "pattern_key": topic,
|
|
219
|
-
|
|
237
|
+
"count": count, "evidence": count},
|
|
220
238
|
success_rate=confidence,
|
|
221
239
|
confidence=confidence,
|
|
222
240
|
)
|
|
223
241
|
generated += 1
|
|
224
242
|
|
|
225
|
-
# Temporal
|
|
226
|
-
hour_counts = Counter()
|
|
243
|
+
# ── 3. Temporal Activity Patterns ─────────────────────────
|
|
244
|
+
hour_counts: Counter = Counter()
|
|
227
245
|
for f in facts:
|
|
228
246
|
created = dict(f).get("created_at", "")
|
|
229
|
-
|
|
230
|
-
|
|
247
|
+
try:
|
|
248
|
+
if "T" in created:
|
|
231
249
|
hour = int(created.split("T")[1][:2])
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
250
|
+
elif " " in created:
|
|
251
|
+
hour = int(created.split(" ")[1][:2])
|
|
252
|
+
else:
|
|
253
|
+
continue
|
|
254
|
+
period = ("morning" if 6 <= hour < 12 else
|
|
255
|
+
"afternoon" if 12 <= hour < 18 else
|
|
256
|
+
"evening" if 18 <= hour < 22 else "night")
|
|
257
|
+
hour_counts[period] += 1
|
|
258
|
+
except (ValueError, IndexError):
|
|
259
|
+
pass
|
|
260
|
+
|
|
261
|
+
total_hours = sum(hour_counts.values())
|
|
239
262
|
for period, count in hour_counts.most_common():
|
|
240
|
-
if count >=
|
|
241
|
-
|
|
242
|
-
pct = round(count / total * 100)
|
|
263
|
+
if count >= 2 and total_hours > 0 and not dry_run:
|
|
264
|
+
pct = round(count / total_hours * 100)
|
|
243
265
|
store.record_pattern(
|
|
244
266
|
profile_id=profile_id,
|
|
245
267
|
pattern_type="temporal",
|
|
246
268
|
data={"topic": period, "pattern_key": period,
|
|
247
|
-
|
|
248
|
-
|
|
269
|
+
"value": f"{period} ({pct}%)",
|
|
270
|
+
"evidence": count, "key": period,
|
|
271
|
+
"distribution": dict(hour_counts)},
|
|
249
272
|
success_rate=pct / 100,
|
|
250
|
-
confidence=min(1.0, count /
|
|
273
|
+
confidence=min(1.0, count / max(total_hours * 0.1, 5)),
|
|
251
274
|
)
|
|
252
275
|
generated += 1
|
|
253
276
|
|
|
277
|
+
# ── 4. Entity Preferences (v3.4.1 NEW) ───────────────────
|
|
278
|
+
import json as _json
|
|
279
|
+
entity_counts: Counter = Counter()
|
|
280
|
+
for f in facts:
|
|
281
|
+
raw = dict(f).get("canonical_entities_json", "")
|
|
282
|
+
if raw:
|
|
283
|
+
try:
|
|
284
|
+
for ent in _json.loads(raw):
|
|
285
|
+
entity_counts[ent] += 1
|
|
286
|
+
except (ValueError, TypeError):
|
|
287
|
+
pass
|
|
288
|
+
|
|
289
|
+
for entity, count in entity_counts.most_common(15):
|
|
290
|
+
if count >= 3 and not dry_run:
|
|
291
|
+
confidence = min(1.0, count / max(len(facts) * 0.05, 10))
|
|
292
|
+
store.record_pattern(
|
|
293
|
+
profile_id=profile_id,
|
|
294
|
+
pattern_type="interest",
|
|
295
|
+
data={"topic": entity, "pattern_key": f"entity:{entity}",
|
|
296
|
+
"value": entity, "evidence": count,
|
|
297
|
+
"source": "entity_frequency"},
|
|
298
|
+
success_rate=confidence,
|
|
299
|
+
confidence=confidence,
|
|
300
|
+
)
|
|
301
|
+
generated += 1
|
|
302
|
+
|
|
303
|
+
# ── 5. Session Activity Patterns (v3.4.1 NEW) ────────────
|
|
304
|
+
session_counts: Counter = Counter()
|
|
305
|
+
for f in facts:
|
|
306
|
+
sid = dict(f).get("session_id", "")
|
|
307
|
+
if sid:
|
|
308
|
+
session_counts[sid] += 1
|
|
309
|
+
|
|
310
|
+
if session_counts:
|
|
311
|
+
avg_facts_per_session = sum(session_counts.values()) / len(session_counts)
|
|
312
|
+
heavy_sessions = [s for s, c in session_counts.items() if c > avg_facts_per_session * 2]
|
|
313
|
+
if heavy_sessions and not dry_run:
|
|
314
|
+
store.record_pattern(
|
|
315
|
+
profile_id=profile_id,
|
|
316
|
+
pattern_type="workflow",
|
|
317
|
+
data={"pattern_key": "heavy_session_usage",
|
|
318
|
+
"value": f"{len(heavy_sessions)} intensive sessions",
|
|
319
|
+
"evidence": len(heavy_sessions),
|
|
320
|
+
"avg_facts": round(avg_facts_per_session, 1),
|
|
321
|
+
"total_sessions": len(session_counts)},
|
|
322
|
+
success_rate=0.8,
|
|
323
|
+
confidence=min(1.0, len(heavy_sessions) / 5),
|
|
324
|
+
)
|
|
325
|
+
generated += 1
|
|
326
|
+
|
|
327
|
+
# ── 6. Fact Type Distribution (v3.4.1 NEW) ────────────────
|
|
328
|
+
type_counts: Counter = Counter()
|
|
329
|
+
for f in facts:
|
|
330
|
+
ft = dict(f).get("fact_type", "semantic")
|
|
331
|
+
type_counts[ft] += 1
|
|
332
|
+
|
|
333
|
+
total_ft = sum(type_counts.values())
|
|
334
|
+
if total_ft > 0 and not dry_run:
|
|
335
|
+
dominant_type = type_counts.most_common(1)[0]
|
|
336
|
+
pct = round(dominant_type[1] / total_ft * 100)
|
|
337
|
+
store.record_pattern(
|
|
338
|
+
profile_id=profile_id,
|
|
339
|
+
pattern_type="style",
|
|
340
|
+
data={"pattern_key": "memory_style",
|
|
341
|
+
"value": f"{dominant_type[0]} dominant ({pct}%)",
|
|
342
|
+
"evidence": dominant_type[1],
|
|
343
|
+
"distribution": dict(type_counts)},
|
|
344
|
+
success_rate=pct / 100,
|
|
345
|
+
confidence=min(1.0, dominant_type[1] / 20),
|
|
346
|
+
)
|
|
347
|
+
generated += 1
|
|
348
|
+
|
|
349
|
+
# ── 7. Channel Performance (v3.4.1 NEW — from signals) ────
|
|
350
|
+
try:
|
|
351
|
+
learn_conn = sqlite3.connect(self._learning_db, timeout=10)
|
|
352
|
+
learn_conn.row_factory = sqlite3.Row
|
|
353
|
+
|
|
354
|
+
# Retrieval usage patterns from learning_feedback
|
|
355
|
+
channel_rows = learn_conn.execute(
|
|
356
|
+
"SELECT channel, COUNT(*) AS cnt, "
|
|
357
|
+
"AVG(signal_value) AS avg_signal "
|
|
358
|
+
"FROM learning_feedback "
|
|
359
|
+
"WHERE profile_id = ? "
|
|
360
|
+
"GROUP BY channel ORDER BY cnt DESC",
|
|
361
|
+
(profile_id,),
|
|
362
|
+
).fetchall()
|
|
363
|
+
|
|
364
|
+
for row in channel_rows:
|
|
365
|
+
d = dict(row)
|
|
366
|
+
ch = d.get("channel", "unknown")
|
|
367
|
+
cnt = d.get("cnt", 0)
|
|
368
|
+
avg_sig = round(float(d.get("avg_signal", 0) or 0), 3)
|
|
369
|
+
if cnt >= 5 and not dry_run:
|
|
370
|
+
store.record_pattern(
|
|
371
|
+
profile_id=profile_id,
|
|
372
|
+
pattern_type="style",
|
|
373
|
+
data={"pattern_key": f"channel:{ch}",
|
|
374
|
+
"value": f"{ch} ({cnt} hits, {avg_sig} avg)",
|
|
375
|
+
"evidence": cnt,
|
|
376
|
+
"avg_signal": avg_sig},
|
|
377
|
+
success_rate=avg_sig,
|
|
378
|
+
confidence=min(1.0, cnt / 50),
|
|
379
|
+
)
|
|
380
|
+
generated += 1
|
|
381
|
+
|
|
382
|
+
# Co-retrieval cluster patterns
|
|
383
|
+
try:
|
|
384
|
+
coret_rows = learn_conn.execute(
|
|
385
|
+
"SELECT fact_a, fact_b, co_access_count "
|
|
386
|
+
"FROM co_retrieval_edges "
|
|
387
|
+
"WHERE profile_id = ? AND co_access_count >= 3 "
|
|
388
|
+
"ORDER BY co_access_count DESC LIMIT 20",
|
|
389
|
+
(profile_id,),
|
|
390
|
+
).fetchall()
|
|
391
|
+
if coret_rows and not dry_run:
|
|
392
|
+
store.record_pattern(
|
|
393
|
+
profile_id=profile_id,
|
|
394
|
+
pattern_type="workflow",
|
|
395
|
+
data={"pattern_key": "co_retrieval_clusters",
|
|
396
|
+
"value": f"{len(coret_rows)} strong fact pairs",
|
|
397
|
+
"evidence": len(coret_rows),
|
|
398
|
+
"top_pair_count": dict(coret_rows[0]).get("co_access_count", 0) if coret_rows else 0},
|
|
399
|
+
success_rate=0.7,
|
|
400
|
+
confidence=min(1.0, len(coret_rows) / 10),
|
|
401
|
+
)
|
|
402
|
+
generated += 1
|
|
403
|
+
except Exception:
|
|
404
|
+
pass
|
|
405
|
+
|
|
406
|
+
learn_conn.close()
|
|
407
|
+
except Exception as exc:
|
|
408
|
+
logger.debug("Signal pattern mining failed: %s", exc)
|
|
409
|
+
|
|
410
|
+
# ── 8. Community Membership (v3.4.1 NEW — from graph) ─────
|
|
411
|
+
try:
|
|
412
|
+
comm_rows = conn.execute(
|
|
413
|
+
"SELECT community_id, COUNT(*) AS cnt "
|
|
414
|
+
"FROM fact_importance "
|
|
415
|
+
"WHERE profile_id = ? AND community_id IS NOT NULL "
|
|
416
|
+
"GROUP BY community_id ORDER BY cnt DESC",
|
|
417
|
+
(profile_id,),
|
|
418
|
+
).fetchall()
|
|
419
|
+
if comm_rows and not dry_run:
|
|
420
|
+
total_comm = sum(dict(r)["cnt"] for r in comm_rows)
|
|
421
|
+
store.record_pattern(
|
|
422
|
+
profile_id=profile_id,
|
|
423
|
+
pattern_type="style",
|
|
424
|
+
data={"pattern_key": "knowledge_structure",
|
|
425
|
+
"value": f"{len(comm_rows)} topic communities, {total_comm} classified facts",
|
|
426
|
+
"evidence": total_comm,
|
|
427
|
+
"community_count": len(comm_rows)},
|
|
428
|
+
success_rate=0.8,
|
|
429
|
+
confidence=min(1.0, len(comm_rows) / 5),
|
|
430
|
+
)
|
|
431
|
+
generated += 1
|
|
432
|
+
except Exception:
|
|
433
|
+
pass
|
|
434
|
+
|
|
435
|
+
conn.close()
|
|
436
|
+
|
|
437
|
+
logger.info(
|
|
438
|
+
"Pattern mining: %d patterns generated for profile %s "
|
|
439
|
+
"from %d facts",
|
|
440
|
+
generated, profile_id, len(facts),
|
|
441
|
+
)
|
|
254
442
|
return generated
|
|
255
443
|
except Exception as exc:
|
|
256
|
-
logger.
|
|
444
|
+
logger.warning("Pattern generation error: %s", exc)
|
|
257
445
|
return 0
|
|
258
446
|
|
|
259
447
|
def _retrain_ranker(self, profile_id: str, signal_count: int) -> bool:
|
|
@@ -91,6 +91,7 @@ class EntityGraphChannel:
|
|
|
91
91
|
entity_resolver: EntityResolver | None = None,
|
|
92
92
|
decay: float = 0.7, activation_threshold: float = 0.05,
|
|
93
93
|
max_hops: int = 4,
|
|
94
|
+
graph_metrics: dict[str, dict] | None = None,
|
|
94
95
|
) -> None:
|
|
95
96
|
self._db = db
|
|
96
97
|
self._resolver = entity_resolver
|
|
@@ -101,6 +102,9 @@ class EntityGraphChannel:
|
|
|
101
102
|
self._adj: dict[str, list[tuple[str, float]]] = {}
|
|
102
103
|
self._adj_profile: str = "" # Track which profile is loaded
|
|
103
104
|
self._adj_edge_count: int = 0 # Track edge count for staleness detection
|
|
105
|
+
# v3.4.1: Graph intelligence metrics (loaded from fact_importance)
|
|
106
|
+
self._graph_metrics: dict[str, dict] = graph_metrics or {}
|
|
107
|
+
self._graph_metrics_profile: str = ""
|
|
104
108
|
|
|
105
109
|
def _ensure_adjacency(self, profile_id: str) -> None:
|
|
106
110
|
"""Load graph adjacency into memory for fast spreading activation.
|
|
@@ -133,6 +137,8 @@ class EntityGraphChannel:
|
|
|
133
137
|
self._adj_edge_count = current_count
|
|
134
138
|
# Also load entity maps (same staleness lifecycle)
|
|
135
139
|
self._load_entity_maps(profile_id)
|
|
140
|
+
# v3.4.1: Load graph intelligence metrics (P0)
|
|
141
|
+
self._load_graph_metrics(profile_id)
|
|
136
142
|
|
|
137
143
|
logger.info(
|
|
138
144
|
"Loaded adjacency cache: %d nodes, %d edges, %d entity mappings for profile %s",
|
|
@@ -192,6 +198,37 @@ class EntityGraphChannel:
|
|
|
192
198
|
len(self._entity_to_facts), len(self._fact_to_entities),
|
|
193
199
|
)
|
|
194
200
|
|
|
201
|
+
def _load_graph_metrics(self, profile_id: str) -> None:
|
|
202
|
+
"""Load PageRank, community_id, degree_centrality from fact_importance.
|
|
203
|
+
|
|
204
|
+
v3.4.1: Enables graph-enhanced retrieval (P0).
|
|
205
|
+
Called alongside adjacency loading. Same staleness lifecycle.
|
|
206
|
+
"""
|
|
207
|
+
if self._graph_metrics_profile == profile_id and self._graph_metrics:
|
|
208
|
+
return
|
|
209
|
+
self._graph_metrics = {}
|
|
210
|
+
self._graph_metrics_profile = profile_id
|
|
211
|
+
try:
|
|
212
|
+
rows = self._db.execute(
|
|
213
|
+
"SELECT fact_id, pagerank_score, community_id, degree_centrality "
|
|
214
|
+
"FROM fact_importance WHERE profile_id = ?",
|
|
215
|
+
(profile_id,),
|
|
216
|
+
)
|
|
217
|
+
for r in rows:
|
|
218
|
+
d = dict(r)
|
|
219
|
+
self._graph_metrics[d["fact_id"]] = {
|
|
220
|
+
"pagerank_score": float(d.get("pagerank_score", 0) or 0),
|
|
221
|
+
"community_id": d.get("community_id"),
|
|
222
|
+
"degree_centrality": float(d.get("degree_centrality", 0) or 0),
|
|
223
|
+
}
|
|
224
|
+
logger.info(
|
|
225
|
+
"Loaded graph metrics: %d facts for profile %s",
|
|
226
|
+
len(self._graph_metrics), profile_id,
|
|
227
|
+
)
|
|
228
|
+
except Exception as exc:
|
|
229
|
+
logger.debug("Graph metrics load failed (graceful degradation): %s", exc)
|
|
230
|
+
self._graph_metrics = {}
|
|
231
|
+
|
|
195
232
|
def invalidate_cache(self) -> None:
|
|
196
233
|
"""Clear all caches. Call after adding/removing edges or facts."""
|
|
197
234
|
self._adj.clear()
|
|
@@ -199,6 +236,8 @@ class EntityGraphChannel:
|
|
|
199
236
|
self._adj_edge_count = 0
|
|
200
237
|
self._entity_to_facts = defaultdict(list)
|
|
201
238
|
self._fact_to_entities = defaultdict(list)
|
|
239
|
+
self._graph_metrics.clear()
|
|
240
|
+
self._graph_metrics_profile = ""
|
|
202
241
|
|
|
203
242
|
def search(self, query: str, profile_id: str, top_k: int = 50) -> list[tuple[str, float]]:
|
|
204
243
|
"""Search via entity graph with spreading activation.
|
|
@@ -242,12 +281,20 @@ class EntityGraphChannel:
|
|
|
242
281
|
for fid in frontier:
|
|
243
282
|
if use_cache:
|
|
244
283
|
neighbors = self._adj.get(fid, ())
|
|
245
|
-
for neighbor,
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
284
|
+
for neighbor, edge_weight in neighbors:
|
|
285
|
+
# v3.4.1 P1: Weighted propagation + PageRank bias
|
|
286
|
+
weighted = activation[fid] * self._decay * edge_weight
|
|
287
|
+
if self._graph_metrics and neighbor in self._graph_metrics:
|
|
288
|
+
target_pr = self._graph_metrics[neighbor].get("pagerank_score", 0.0)
|
|
289
|
+
pr_boost = min(1.0 + target_pr * 2.0, 2.0)
|
|
290
|
+
weighted *= pr_boost
|
|
291
|
+
if weighted >= self._threshold and weighted > activation.get(neighbor, 0.0):
|
|
292
|
+
activation[neighbor] = weighted
|
|
249
293
|
next_frontier.add(neighbor)
|
|
250
294
|
else:
|
|
295
|
+
# NOTE: SQL fallback path does NOT use graph intelligence (P1/P2/P3).
|
|
296
|
+
# Graph intelligence is only available on the in-memory cache path.
|
|
297
|
+
# This fallback exists for mock/test DBs. See Phase 7 LLD H-01.
|
|
251
298
|
for edge in self._db.get_edges_for_node(fid, profile_id):
|
|
252
299
|
neighbor = edge.target_id if edge.source_id == fid else edge.source_id
|
|
253
300
|
propagated = activation[fid] * self._decay
|
|
@@ -282,10 +329,94 @@ class EntityGraphChannel:
|
|
|
282
329
|
if not frontier:
|
|
283
330
|
break
|
|
284
331
|
|
|
332
|
+
# v3.4.1 P2: Community-aware boosting
|
|
333
|
+
if self._graph_metrics and use_cache:
|
|
334
|
+
from collections import Counter as _Counter
|
|
335
|
+
seed_communities: _Counter = _Counter()
|
|
336
|
+
for eid in canonical_ids:
|
|
337
|
+
for fid in self._entity_to_facts.get(eid, ()):
|
|
338
|
+
m = self._graph_metrics.get(fid, {})
|
|
339
|
+
comm = m.get("community_id")
|
|
340
|
+
if comm is not None:
|
|
341
|
+
seed_communities[comm] += 1
|
|
342
|
+
if seed_communities:
|
|
343
|
+
total_seeds = sum(seed_communities.values())
|
|
344
|
+
for fid in list(activation.keys()):
|
|
345
|
+
m = self._graph_metrics.get(fid, {})
|
|
346
|
+
fact_comm = m.get("community_id")
|
|
347
|
+
if fact_comm is not None and fact_comm in seed_communities:
|
|
348
|
+
boost = min(1.0 + 0.15 * (seed_communities[fact_comm] / total_seeds), 1.3)
|
|
349
|
+
activation[fid] *= boost
|
|
350
|
+
elif fact_comm is not None and fact_comm not in seed_communities:
|
|
351
|
+
activation[fid] *= 0.9 # Mild penalty for unrelated communities
|
|
352
|
+
|
|
353
|
+
# v3.4.1 P3: Contradiction suppression via graph_edges
|
|
354
|
+
if use_cache and activation:
|
|
355
|
+
self._suppress_contradictions(activation, profile_id)
|
|
356
|
+
|
|
357
|
+
# v3.4.1: Score normalization to [0, 1]
|
|
285
358
|
results = [(fid, sc) for fid, sc in activation.items() if sc >= self._threshold]
|
|
359
|
+
if not results:
|
|
360
|
+
return []
|
|
361
|
+
max_score = max(sc for _, sc in results)
|
|
362
|
+
if max_score > 0:
|
|
363
|
+
results = [(fid, sc / max_score) for fid, sc in results]
|
|
286
364
|
results.sort(key=lambda x: x[1], reverse=True)
|
|
287
365
|
return results[:top_k]
|
|
288
366
|
|
|
367
|
+
def _suppress_contradictions(
|
|
368
|
+
self, activation: dict[str, float], profile_id: str,
|
|
369
|
+
) -> None:
|
|
370
|
+
"""P3: Penalize older fact in contradiction pairs, heavy-penalize superseded.
|
|
371
|
+
|
|
372
|
+
Uses graph_edges (edge_type CHECK includes 'contradiction', 'supersedes').
|
|
373
|
+
"""
|
|
374
|
+
candidate_ids = list(activation.keys())
|
|
375
|
+
if not candidate_ids:
|
|
376
|
+
return
|
|
377
|
+
try:
|
|
378
|
+
placeholders = ",".join("?" * len(candidate_ids))
|
|
379
|
+
sql = (
|
|
380
|
+
"SELECT source_id, target_id, edge_type FROM graph_edges "
|
|
381
|
+
"WHERE profile_id = ? AND edge_type IN ('contradiction', 'supersedes') "
|
|
382
|
+
"AND (source_id IN (" + placeholders + ") "
|
|
383
|
+
"OR target_id IN (" + placeholders + "))"
|
|
384
|
+
)
|
|
385
|
+
rows = self._db.execute(sql, (profile_id, *candidate_ids, *candidate_ids))
|
|
386
|
+
edges = [dict(r) for r in rows]
|
|
387
|
+
if not edges:
|
|
388
|
+
return
|
|
389
|
+
|
|
390
|
+
# Batch load created_at for involved facts
|
|
391
|
+
involved = set()
|
|
392
|
+
for e in edges:
|
|
393
|
+
involved.add(e["source_id"])
|
|
394
|
+
involved.add(e["target_id"])
|
|
395
|
+
involved = involved & set(candidate_ids)
|
|
396
|
+
if not involved:
|
|
397
|
+
return
|
|
398
|
+
ph2 = ",".join("?" * len(involved))
|
|
399
|
+
ts_rows = self._db.execute(
|
|
400
|
+
"SELECT fact_id, created_at FROM atomic_facts "
|
|
401
|
+
"WHERE fact_id IN (" + ph2 + ") AND profile_id = ?",
|
|
402
|
+
(*involved, profile_id),
|
|
403
|
+
)
|
|
404
|
+
ts_map = {dict(r)["fact_id"]: dict(r).get("created_at", "") for r in ts_rows}
|
|
405
|
+
|
|
406
|
+
for e in edges:
|
|
407
|
+
src, tgt, etype = e["source_id"], e["target_id"], e["edge_type"]
|
|
408
|
+
if etype == "supersedes" and src in activation:
|
|
409
|
+
activation[src] *= 0.3 # Heavy penalty: this fact was replaced
|
|
410
|
+
elif etype == "contradiction":
|
|
411
|
+
src_ts = ts_map.get(src, "")
|
|
412
|
+
tgt_ts = ts_map.get(tgt, "")
|
|
413
|
+
if src_ts and tgt_ts:
|
|
414
|
+
older = src if src_ts < tgt_ts else tgt
|
|
415
|
+
if older in activation:
|
|
416
|
+
activation[older] *= 0.5
|
|
417
|
+
except Exception as exc:
|
|
418
|
+
logger.debug("Contradiction suppression failed: %s", exc)
|
|
419
|
+
|
|
289
420
|
def _resolve_entities(self, raw: list[str], profile_id: str) -> list[str]:
|
|
290
421
|
"""Resolve raw names to canonical entity IDs."""
|
|
291
422
|
ids: list[str] = []
|
|
@@ -53,6 +53,9 @@ class SpreadingActivationConfig:
|
|
|
53
53
|
max_iterations: int = 3 # T: propagation depth
|
|
54
54
|
tau_gate: float = 0.05 # FOK confidence gate (was 0.12)
|
|
55
55
|
enabled: bool = True # Ships enabled by default
|
|
56
|
+
# v3.4.1: Graph intelligence integration
|
|
57
|
+
use_pagerank_bias: bool = False # Multiply propagation by target PageRank
|
|
58
|
+
community_boost: float = 0.0 # Boost same-community nodes (0.0 = disabled)
|
|
56
59
|
|
|
57
60
|
|
|
58
61
|
# ---------------------------------------------------------------------------
|
|
@@ -82,6 +85,11 @@ class SpreadingActivation:
|
|
|
82
85
|
self._db = db
|
|
83
86
|
self._vector_store = vector_store
|
|
84
87
|
self._config = config or SpreadingActivationConfig()
|
|
88
|
+
# v3.4.1: Graph intelligence caches (loaded lazily per profile)
|
|
89
|
+
self._pr_cache: dict[str, float] = {}
|
|
90
|
+
self._pr_profile: str = ""
|
|
91
|
+
self._comm_cache: dict[str, int | None] = {}
|
|
92
|
+
self._comm_profile: str = ""
|
|
85
93
|
|
|
86
94
|
def search(
|
|
87
95
|
self,
|
|
@@ -311,3 +319,40 @@ class SpreadingActivation:
|
|
|
311
319
|
return len(result) if result else 0
|
|
312
320
|
except Exception:
|
|
313
321
|
return 0
|
|
322
|
+
|
|
323
|
+
# ── v3.4.1: Graph Intelligence Helpers ────────────────────────
|
|
324
|
+
|
|
325
|
+
def _load_graph_metrics_cache(self, profile_id: str) -> None:
|
|
326
|
+
"""Load PageRank + community data in a single SQL query.
|
|
327
|
+
|
|
328
|
+
Called lazily on first _get_pagerank() or _get_community() call.
|
|
329
|
+
Populates both _pr_cache and _comm_cache.
|
|
330
|
+
"""
|
|
331
|
+
if self._pr_profile == profile_id and self._pr_cache:
|
|
332
|
+
return # Already loaded for this profile
|
|
333
|
+
self._pr_cache = {}
|
|
334
|
+
self._pr_profile = profile_id
|
|
335
|
+
self._comm_cache = {}
|
|
336
|
+
self._comm_profile = profile_id
|
|
337
|
+
try:
|
|
338
|
+
rows = self._db.execute(
|
|
339
|
+
"SELECT fact_id, pagerank_score, community_id "
|
|
340
|
+
"FROM fact_importance WHERE profile_id = ?",
|
|
341
|
+
(profile_id,),
|
|
342
|
+
)
|
|
343
|
+
for r in rows:
|
|
344
|
+
d = dict(r)
|
|
345
|
+
self._pr_cache[d["fact_id"]] = float(d.get("pagerank_score", 0) or 0)
|
|
346
|
+
self._comm_cache[d["fact_id"]] = d.get("community_id")
|
|
347
|
+
except Exception:
|
|
348
|
+
pass
|
|
349
|
+
|
|
350
|
+
def _get_pagerank(self, fact_id: str, profile_id: str) -> float:
|
|
351
|
+
"""Look up PageRank score from fact_importance. Cached per profile."""
|
|
352
|
+
self._load_graph_metrics_cache(profile_id)
|
|
353
|
+
return self._pr_cache.get(fact_id, 0.0)
|
|
354
|
+
|
|
355
|
+
def _get_community(self, fact_id: str, profile_id: str) -> int | None:
|
|
356
|
+
"""Look up community_id from fact_importance. Shares unified cache."""
|
|
357
|
+
self._load_graph_metrics_cache(profile_id)
|
|
358
|
+
return self._comm_cache.get(fact_id)
|
|
@@ -176,8 +176,16 @@ def create_app() -> FastAPI:
|
|
|
176
176
|
application.include_router(ws_router)
|
|
177
177
|
application.include_router(v3_router)
|
|
178
178
|
|
|
179
|
+
# v3.4.1: Chat SSE endpoint
|
|
180
|
+
for _module_name_v341 in ("chat",):
|
|
181
|
+
try:
|
|
182
|
+
_mod_v341 = __import__(f"superlocalmemory.server.routes.{_module_name_v341}", fromlist=["router"])
|
|
183
|
+
application.include_router(_mod_v341.router)
|
|
184
|
+
except (ImportError, Exception):
|
|
185
|
+
pass
|
|
186
|
+
|
|
179
187
|
# Graceful optional routers
|
|
180
|
-
for _module_name in ("learning", "lifecycle", "behavioral", "compliance"):
|
|
188
|
+
for _module_name in ("learning", "lifecycle", "behavioral", "compliance", "insights", "timeline"):
|
|
181
189
|
try:
|
|
182
190
|
_mod = __import__(f"superlocalmemory.server.routes.{_module_name}", fromlist=["router"])
|
|
183
191
|
application.include_router(_mod.router)
|