superlocalmemory 3.4.0 → 3.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -8
- package/docs/screenshots/01-dashboard-main.png +0 -0
- package/docs/screenshots/02-knowledge-graph.png +0 -0
- package/docs/screenshots/03-patterns-learning.png +0 -0
- package/docs/screenshots/04-learning-dashboard.png +0 -0
- package/docs/screenshots/05-behavioral-analysis.png +0 -0
- package/docs/screenshots/06-graph-communities.png +0 -0
- package/package.json +2 -2
- package/pyproject.toml +11 -2
- package/scripts/postinstall.js +26 -7
- package/src/superlocalmemory/cli/commands.py +42 -60
- package/src/superlocalmemory/cli/daemon.py +107 -47
- package/src/superlocalmemory/cli/main.py +10 -0
- package/src/superlocalmemory/cli/setup_wizard.py +137 -9
- package/src/superlocalmemory/core/config.py +28 -0
- package/src/superlocalmemory/core/consolidation_engine.py +38 -1
- package/src/superlocalmemory/core/engine.py +9 -0
- package/src/superlocalmemory/core/engine_wiring.py +5 -1
- package/src/superlocalmemory/core/graph_analyzer.py +254 -12
- package/src/superlocalmemory/core/health_monitor.py +313 -0
- package/src/superlocalmemory/core/reranker_worker.py +19 -5
- package/src/superlocalmemory/ingestion/__init__.py +13 -0
- package/src/superlocalmemory/ingestion/adapter_manager.py +234 -0
- package/src/superlocalmemory/ingestion/base_adapter.py +177 -0
- package/src/superlocalmemory/ingestion/calendar_adapter.py +340 -0
- package/src/superlocalmemory/ingestion/credentials.py +118 -0
- package/src/superlocalmemory/ingestion/gmail_adapter.py +369 -0
- package/src/superlocalmemory/ingestion/parsers.py +100 -0
- package/src/superlocalmemory/ingestion/transcript_adapter.py +156 -0
- package/src/superlocalmemory/learning/consolidation_worker.py +287 -53
- package/src/superlocalmemory/learning/entity_compiler.py +377 -0
- package/src/superlocalmemory/mesh/__init__.py +12 -0
- package/src/superlocalmemory/mesh/broker.py +344 -0
- package/src/superlocalmemory/retrieval/entity_channel.py +141 -4
- package/src/superlocalmemory/retrieval/spreading_activation.py +45 -0
- package/src/superlocalmemory/server/api.py +15 -8
- package/src/superlocalmemory/server/routes/behavioral.py +8 -4
- package/src/superlocalmemory/server/routes/chat.py +320 -0
- package/src/superlocalmemory/server/routes/entity.py +95 -0
- package/src/superlocalmemory/server/routes/ingest.py +110 -0
- package/src/superlocalmemory/server/routes/insights.py +368 -0
- package/src/superlocalmemory/server/routes/learning.py +106 -6
- package/src/superlocalmemory/server/routes/memories.py +20 -9
- package/src/superlocalmemory/server/routes/mesh.py +186 -0
- package/src/superlocalmemory/server/routes/stats.py +25 -3
- package/src/superlocalmemory/server/routes/timeline.py +252 -0
- package/src/superlocalmemory/server/routes/v3_api.py +161 -0
- package/src/superlocalmemory/server/ui.py +8 -0
- package/src/superlocalmemory/server/unified_daemon.py +691 -0
- package/src/superlocalmemory/storage/schema_v343.py +229 -0
- package/src/superlocalmemory/ui/index.html +168 -58
- package/src/superlocalmemory/ui/js/graph-event-bus.js +83 -0
- package/src/superlocalmemory/ui/js/graph-filters.js +1 -1
- package/src/superlocalmemory/ui/js/knowledge-graph.js +942 -0
- package/src/superlocalmemory/ui/js/memory-chat.js +344 -0
- package/src/superlocalmemory/ui/js/memory-timeline.js +265 -0
- package/src/superlocalmemory/ui/js/quick-actions.js +334 -0
- package/src/superlocalmemory.egg-info/PKG-INFO +0 -594
- package/src/superlocalmemory.egg-info/SOURCES.txt +0 -279
- package/src/superlocalmemory.egg-info/dependency_links.txt +0 -1
- package/src/superlocalmemory.egg-info/entry_points.txt +0 -2
- package/src/superlocalmemory.egg-info/requires.txt +0 -47
- package/src/superlocalmemory.egg-info/top_level.txt +0 -1
|
@@ -259,7 +259,7 @@ def run_wizard(auto: bool = False) -> None:
|
|
|
259
259
|
|
|
260
260
|
print()
|
|
261
261
|
print("╔══════════════════════════════════════════════════════════╗")
|
|
262
|
-
print("║ SuperLocalMemory V3 —
|
|
262
|
+
print("║ SuperLocalMemory V3 — The Unified Brain ║")
|
|
263
263
|
print("║ by Varun Pratap Bhardwaj / Qualixar ║")
|
|
264
264
|
print("╚══════════════════════════════════════════════════════════╝")
|
|
265
265
|
print()
|
|
@@ -373,9 +373,9 @@ def run_wizard(auto: bool = False) -> None:
|
|
|
373
373
|
else:
|
|
374
374
|
print(f"\n ✓ CodeGraph disabled (enable later in {cg_config_path})")
|
|
375
375
|
|
|
376
|
-
# -- Step 4: Download
|
|
376
|
+
# -- Step 4: Download models --
|
|
377
377
|
print()
|
|
378
|
-
print("─── Step 4/
|
|
378
|
+
print("─── Step 4/9: Download Embedding Model ───")
|
|
379
379
|
|
|
380
380
|
if not st_ok:
|
|
381
381
|
print(" ⚠ Skipped (sentence-transformers not installed)")
|
|
@@ -385,18 +385,129 @@ def run_wizard(auto: bool = False) -> None:
|
|
|
385
385
|
if not embed_ok:
|
|
386
386
|
print(" ⚠ Model will download on first use (may take a few minutes)")
|
|
387
387
|
|
|
388
|
-
# -- Step 4: Download reranker model --
|
|
389
388
|
print()
|
|
390
|
-
print("─── Step
|
|
389
|
+
print("─── Step 4b/9: Download Reranker Model ───")
|
|
391
390
|
|
|
392
391
|
if not st_ok:
|
|
393
392
|
print(" ⚠ Skipped (sentence-transformers not installed)")
|
|
394
393
|
else:
|
|
395
394
|
_download_reranker(_RERANKER_MODEL)
|
|
396
395
|
|
|
397
|
-
# -- Step 5:
|
|
396
|
+
# -- Step 5: Daemon Configuration (v3.4.3) --
|
|
397
|
+
print()
|
|
398
|
+
print("─── Step 5/9: Daemon Configuration ───")
|
|
399
|
+
print()
|
|
400
|
+
print(" The SLM daemon runs in the background for instant memory access.")
|
|
401
|
+
print()
|
|
402
|
+
print(" [1] 24/7 Always-On (recommended — brain never sleeps)")
|
|
403
|
+
print(" [2] Auto-shutdown after idle (saves RAM when not coding)")
|
|
404
|
+
print()
|
|
405
|
+
|
|
406
|
+
if interactive:
|
|
407
|
+
daemon_choice = _prompt(" Select daemon mode [1/2] (default: 1): ", "1")
|
|
408
|
+
else:
|
|
409
|
+
daemon_choice = "1"
|
|
410
|
+
print(" Auto-selecting 24/7 mode (non-interactive)")
|
|
411
|
+
|
|
412
|
+
if daemon_choice == "2":
|
|
413
|
+
if interactive:
|
|
414
|
+
timeout_choice = _prompt(" Idle timeout [30m/1h/2h] (default: 30m): ", "30m")
|
|
415
|
+
else:
|
|
416
|
+
timeout_choice = "30m"
|
|
417
|
+
timeout_map = {"30m": 1800, "1h": 3600, "2h": 7200}
|
|
418
|
+
config.daemon_idle_timeout = timeout_map.get(timeout_choice, 1800)
|
|
419
|
+
print(f"\n ✓ Auto-shutdown after {timeout_choice} idle")
|
|
420
|
+
else:
|
|
421
|
+
config.daemon_idle_timeout = 0
|
|
422
|
+
print("\n ✓ 24/7 Always-On mode")
|
|
423
|
+
|
|
424
|
+
config.save()
|
|
425
|
+
|
|
426
|
+
# -- Step 6: Mesh Communication (v3.4.3) --
|
|
427
|
+
print()
|
|
428
|
+
print("─── Step 6/9: Mesh Communication ───")
|
|
429
|
+
print()
|
|
430
|
+
print(" SLM Mesh enables agent-to-agent P2P communication.")
|
|
431
|
+
print(" Multiple AI sessions can share knowledge in real-time.")
|
|
432
|
+
print()
|
|
433
|
+
print(" [Y] Enable Mesh (recommended)")
|
|
434
|
+
print(" [N] Disable Mesh")
|
|
435
|
+
print()
|
|
436
|
+
|
|
437
|
+
if interactive:
|
|
438
|
+
mesh_choice = _prompt(" Enable Mesh? [Y/n] (default: Y): ", "y").lower()
|
|
439
|
+
else:
|
|
440
|
+
mesh_choice = "y"
|
|
441
|
+
print(" Auto-enabling Mesh (non-interactive)")
|
|
442
|
+
|
|
443
|
+
config.mesh_enabled = mesh_choice in ("", "y", "yes")
|
|
444
|
+
config.save()
|
|
445
|
+
print(f"\n ✓ Mesh {'enabled' if config.mesh_enabled else 'disabled'}")
|
|
446
|
+
|
|
447
|
+
# -- Step 7: Ingestion Adapters (v3.4.3) --
|
|
448
|
+
print()
|
|
449
|
+
print("─── Step 7/9: Ingestion Adapters ───")
|
|
450
|
+
print()
|
|
451
|
+
print(" These let SLM learn from your email, calendar, and meetings.")
|
|
452
|
+
print(" All adapters are OFF by default. You can enable them later.")
|
|
453
|
+
print()
|
|
454
|
+
print(" Available adapters:")
|
|
455
|
+
print(" • Gmail Ingestion — requires Google OAuth setup")
|
|
456
|
+
print(" • Google Calendar — shares Gmail credentials")
|
|
457
|
+
print(" • Meeting Transcripts — watches a folder for .srt/.vtt files")
|
|
458
|
+
print()
|
|
459
|
+
|
|
460
|
+
if interactive:
|
|
461
|
+
adapter_input = _prompt(" Enable any now? [Enter to skip, or type: gmail,calendar,transcript]: ", "")
|
|
462
|
+
else:
|
|
463
|
+
adapter_input = ""
|
|
464
|
+
|
|
465
|
+
# Save adapter preferences (actual setup happens via `slm adapters enable X`)
|
|
466
|
+
adapters_config = {"gmail": False, "calendar": False, "transcript": False}
|
|
467
|
+
if adapter_input:
|
|
468
|
+
for name in adapter_input.split(","):
|
|
469
|
+
name = name.strip().lower()
|
|
470
|
+
if name in adapters_config:
|
|
471
|
+
adapters_config[name] = True
|
|
472
|
+
|
|
473
|
+
adapters_path = _SLM_HOME / "adapters.json"
|
|
474
|
+
import json as _json
|
|
475
|
+
adapters_path.write_text(_json.dumps(
|
|
476
|
+
{k: {"enabled": v, "tier": "polling"} for k, v in adapters_config.items()},
|
|
477
|
+
indent=2,
|
|
478
|
+
))
|
|
479
|
+
|
|
480
|
+
enabled_adapters = [k for k, v in adapters_config.items() if v]
|
|
481
|
+
if enabled_adapters:
|
|
482
|
+
print(f"\n ✓ Enabled: {', '.join(enabled_adapters)}")
|
|
483
|
+
print(" Run `slm adapters start <name>` to begin ingestion")
|
|
484
|
+
else:
|
|
485
|
+
print("\n ✓ All adapters disabled (enable later: slm adapters enable gmail)")
|
|
486
|
+
|
|
487
|
+
# -- Step 8: Entity Compilation (v3.4.3) --
|
|
488
|
+
print()
|
|
489
|
+
print("─── Step 8/9: Entity Compilation ───")
|
|
490
|
+
print()
|
|
491
|
+
print(" Entity compilation builds knowledge summaries per person,")
|
|
492
|
+
print(" project, and concept. Runs automatically during consolidation.")
|
|
493
|
+
print()
|
|
494
|
+
print(" [Y] Enable entity compilation (recommended)")
|
|
495
|
+
print(" [N] Disable")
|
|
398
496
|
print()
|
|
399
|
-
|
|
497
|
+
|
|
498
|
+
if interactive:
|
|
499
|
+
ec_choice = _prompt(" Enable entity compilation? [Y/n] (default: Y): ", "y").lower()
|
|
500
|
+
else:
|
|
501
|
+
ec_choice = "y"
|
|
502
|
+
print(" Auto-enabling entity compilation (non-interactive)")
|
|
503
|
+
|
|
504
|
+
config.entity_compilation_enabled = ec_choice in ("", "y", "yes")
|
|
505
|
+
config.save()
|
|
506
|
+
print(f"\n ✓ Entity compilation {'enabled' if config.entity_compilation_enabled else 'disabled'}")
|
|
507
|
+
|
|
508
|
+
# -- Step 9: Verification --
|
|
509
|
+
print()
|
|
510
|
+
print("─── Step 9/9: Verification ───")
|
|
400
511
|
|
|
401
512
|
if st_ok:
|
|
402
513
|
verified = _verify_installation()
|
|
@@ -410,16 +521,33 @@ def run_wizard(auto: bool = False) -> None:
|
|
|
410
521
|
print()
|
|
411
522
|
print("╔══════════════════════════════════════════════════════════╗")
|
|
412
523
|
if verified:
|
|
413
|
-
print("║ ✓ Setup Complete —
|
|
524
|
+
print("║ ✓ Setup Complete — The Unified Brain is ready! ║")
|
|
414
525
|
else:
|
|
415
526
|
print("║ ✓ Setup Complete — basic config saved ║")
|
|
416
527
|
print("║ Models will auto-download on first use ║")
|
|
417
528
|
print("╚══════════════════════════════════════════════════════════╝")
|
|
418
529
|
print()
|
|
530
|
+
|
|
531
|
+
# Summary of choices
|
|
532
|
+
daemon_mode = "24/7" if config.daemon_idle_timeout == 0 else f"auto-shutdown ({config.daemon_idle_timeout}s)"
|
|
533
|
+
print(f" Enabled: Mode {choice.upper()}, Daemon ({daemon_mode})", end="")
|
|
534
|
+
if config.mesh_enabled:
|
|
535
|
+
print(", Mesh", end="")
|
|
536
|
+
if config.entity_compilation_enabled:
|
|
537
|
+
print(", Entity Compilation", end="")
|
|
538
|
+
if code_graph_enabled:
|
|
539
|
+
print(", CodeGraph", end="")
|
|
540
|
+
print()
|
|
541
|
+
if enabled_adapters:
|
|
542
|
+
print(f" Adapters: {', '.join(enabled_adapters)}")
|
|
543
|
+
else:
|
|
544
|
+
print(" Adapters: none (enable via: slm adapters enable gmail)")
|
|
545
|
+
print()
|
|
419
546
|
print(" Quick start:")
|
|
420
547
|
print(' slm remember "your first memory"')
|
|
421
548
|
print(' slm recall "search query"')
|
|
422
|
-
print(" slm dashboard")
|
|
549
|
+
print(" slm dashboard → http://localhost:8765")
|
|
550
|
+
print(" slm adapters enable gmail → start Gmail ingestion")
|
|
423
551
|
print()
|
|
424
552
|
print(" Need help?")
|
|
425
553
|
print(" slm doctor — diagnose issues")
|
|
@@ -153,6 +153,10 @@ class RetrievalConfig:
|
|
|
153
153
|
temporal_proximity_days: int = 30
|
|
154
154
|
|
|
155
155
|
# Reranking (V3.3.2: ONNX backend enabled for all modes)
|
|
156
|
+
# V3.4.2: Tested gte-reranker-modernbert-base (8K context) — REGRESSED
|
|
157
|
+
# LoCoMo from 68.4% to 64.1%. Reverted to MiniLM-L-12-v2. The 512-token
|
|
158
|
+
# limit is acceptable because SLM's 6-channel retrieval pre-filters
|
|
159
|
+
# relevant facts before reranking. See bench-v342-locomo.md.
|
|
156
160
|
use_cross_encoder: bool = True
|
|
157
161
|
cross_encoder_model: str = "cross-encoder/ms-marco-MiniLM-L-12-v2"
|
|
158
162
|
cross_encoder_backend: str = "" # "" = PyTorch (~500MB stable), "onnx" = ONNX (leaks on ARM64 CoreML)
|
|
@@ -577,6 +581,19 @@ class SLMConfig:
|
|
|
577
581
|
default_factory=ParameterizationConfig,
|
|
578
582
|
)
|
|
579
583
|
|
|
584
|
+
# v3.4.3: Daemon configuration
|
|
585
|
+
daemon_idle_timeout: int = 0 # 0 = 24/7 (no auto-kill). >0 = seconds before auto-kill.
|
|
586
|
+
daemon_port: int = 8765 # Primary daemon port
|
|
587
|
+
daemon_legacy_port: int = 8767 # Backward-compat redirect port
|
|
588
|
+
daemon_enable_legacy_port: bool = True # Set False to disable 8767 redirect
|
|
589
|
+
|
|
590
|
+
# v3.4.3: Entity compilation
|
|
591
|
+
entity_compilation_enabled: bool = True
|
|
592
|
+
entity_compilation_retrieval_boost: float = 1.0 # 1.0 = disabled. >1.0 = boost score.
|
|
593
|
+
|
|
594
|
+
# v3.4.3: Mesh
|
|
595
|
+
mesh_enabled: bool = True
|
|
596
|
+
|
|
580
597
|
def __post_init__(self) -> None:
|
|
581
598
|
if self.db_path is None:
|
|
582
599
|
self.db_path = self.base_dir / DEFAULT_DB_NAME
|
|
@@ -629,6 +646,17 @@ class SLMConfig:
|
|
|
629
646
|
if k in RetrievalConfig.__dataclass_fields__
|
|
630
647
|
})
|
|
631
648
|
|
|
649
|
+
# V3.4.3 config fields (additive — missing keys get dataclass defaults)
|
|
650
|
+
config.daemon_idle_timeout = data.get("daemon_idle_timeout", 0)
|
|
651
|
+
config.daemon_port = data.get("daemon_port", 8765)
|
|
652
|
+
config.daemon_legacy_port = data.get("daemon_legacy_port", 8767)
|
|
653
|
+
config.daemon_enable_legacy_port = data.get("daemon_enable_legacy_port", True)
|
|
654
|
+
config.entity_compilation_enabled = data.get("entity_compilation_enabled", True)
|
|
655
|
+
config.entity_compilation_retrieval_boost = data.get(
|
|
656
|
+
"entity_compilation_retrieval_boost", 1.0,
|
|
657
|
+
)
|
|
658
|
+
config.mesh_enabled = data.get("mesh_enabled", True)
|
|
659
|
+
|
|
632
660
|
return config
|
|
633
661
|
|
|
634
662
|
def save(self, config_path: Path | None = None) -> None:
|
|
@@ -27,6 +27,7 @@ from __future__ import annotations
|
|
|
27
27
|
|
|
28
28
|
import json
|
|
29
29
|
import logging
|
|
30
|
+
import threading
|
|
30
31
|
from datetime import datetime, timezone
|
|
31
32
|
from typing import TYPE_CHECKING, Any
|
|
32
33
|
|
|
@@ -141,7 +142,13 @@ class ConsolidationEngine:
|
|
|
141
142
|
"""Called after each store() in store_pipeline.py.
|
|
142
143
|
|
|
143
144
|
Increments internal counter. When counter hits step_count_trigger
|
|
144
|
-
(default 50), runs lightweight consolidation
|
|
145
|
+
(default 50), runs lightweight consolidation AND queues async
|
|
146
|
+
graph analysis.
|
|
147
|
+
|
|
148
|
+
V3.4.2: Graph analysis runs in background thread after every
|
|
149
|
+
lightweight consolidation trigger. This populates fact_importance
|
|
150
|
+
(PageRank, communities, bridge scores) so retrieval channels can
|
|
151
|
+
use graph intelligence without blocking store/recall latency.
|
|
145
152
|
|
|
146
153
|
Returns True if lightweight consolidation was triggered.
|
|
147
154
|
"""
|
|
@@ -152,9 +159,39 @@ class ConsolidationEngine:
|
|
|
152
159
|
if self._store_count >= self._config.step_count_trigger:
|
|
153
160
|
self._store_count = 0
|
|
154
161
|
self.consolidate(profile_id, lightweight=True)
|
|
162
|
+
# V3.4.2: Queue graph analysis in background (non-blocking)
|
|
163
|
+
self._queue_graph_analysis(profile_id)
|
|
155
164
|
return True
|
|
156
165
|
return False
|
|
157
166
|
|
|
167
|
+
def _queue_graph_analysis(self, profile_id: str) -> None:
|
|
168
|
+
"""Run graph_analyzer.compute_and_store() in a background thread.
|
|
169
|
+
|
|
170
|
+
V3.4.2: Populates fact_importance table with PageRank, community_id,
|
|
171
|
+
degree_centrality, and bridge_score. Next recall() automatically
|
|
172
|
+
uses updated graph intelligence for entity channel and spreading
|
|
173
|
+
activation. Takes ~200-800ms, runs on daemon thread, zero impact
|
|
174
|
+
on store/recall latency.
|
|
175
|
+
"""
|
|
176
|
+
if self._graph_analyzer is None:
|
|
177
|
+
return
|
|
178
|
+
analyzer = self._graph_analyzer
|
|
179
|
+
pid = profile_id
|
|
180
|
+
|
|
181
|
+
def _run() -> None:
|
|
182
|
+
try:
|
|
183
|
+
result = analyzer.compute_and_store(pid)
|
|
184
|
+
logger.info(
|
|
185
|
+
"Background graph analysis complete: %d nodes, %d communities",
|
|
186
|
+
result.get("node_count", 0),
|
|
187
|
+
result.get("community_count", 0),
|
|
188
|
+
)
|
|
189
|
+
except Exception as exc:
|
|
190
|
+
logger.debug("Background graph analysis failed (non-fatal): %s", exc)
|
|
191
|
+
|
|
192
|
+
t = threading.Thread(target=_run, daemon=True, name="graph-analysis-bg")
|
|
193
|
+
t.start()
|
|
194
|
+
|
|
158
195
|
def get_core_memory(self, profile_id: str) -> dict[str, str]:
|
|
159
196
|
"""Load all Core Memory blocks for a profile.
|
|
160
197
|
|
|
@@ -116,6 +116,15 @@ class MemoryEngine:
|
|
|
116
116
|
|
|
117
117
|
self._db = DatabaseManager(self._config.db_path)
|
|
118
118
|
self._db.initialize(schema)
|
|
119
|
+
|
|
120
|
+
# V3.4.3: Apply "Unified Brain" schema extensions (mesh, entity compilation, ingestion)
|
|
121
|
+
# Idempotent — safe to call on every init. Skips if already applied.
|
|
122
|
+
try:
|
|
123
|
+
from superlocalmemory.storage.schema_v343 import apply_v343_schema
|
|
124
|
+
apply_v343_schema(str(self._db.db_path))
|
|
125
|
+
except Exception as exc:
|
|
126
|
+
logger.debug("V3.4.3 schema migration: %s", exc)
|
|
127
|
+
|
|
119
128
|
self._embedder = init_embedder(self._config)
|
|
120
129
|
|
|
121
130
|
if self._caps.llm_fact_extraction:
|
|
@@ -364,7 +364,11 @@ def _init_spreading_activation(
|
|
|
364
364
|
SpreadingActivation,
|
|
365
365
|
SpreadingActivationConfig,
|
|
366
366
|
)
|
|
367
|
-
sa_config = SpreadingActivationConfig(
|
|
367
|
+
sa_config = SpreadingActivationConfig(
|
|
368
|
+
enabled=True,
|
|
369
|
+
use_pagerank_bias=True, # v3.4.1: PageRank-weighted propagation
|
|
370
|
+
community_boost=0.15, # v3.4.1: 15% boost for same-community nodes
|
|
371
|
+
)
|
|
368
372
|
return SpreadingActivation(
|
|
369
373
|
db=db, vector_store=vector_store, config=sa_config,
|
|
370
374
|
)
|
|
@@ -8,13 +8,19 @@ Reads BOTH graph_edges and association_edges for the full graph picture.
|
|
|
8
8
|
Stores results in fact_importance table.
|
|
9
9
|
Called during consolidation (Phase 5), not at query time.
|
|
10
10
|
|
|
11
|
+
v3.4.1: Added Leiden community detection (optional), TF-IDF community labels,
|
|
12
|
+
bridge score detection. Frontend uses Louvain; backend uses Leiden/LP.
|
|
13
|
+
|
|
11
14
|
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
12
|
-
License:
|
|
15
|
+
License: AGPL-3.0-or-later
|
|
13
16
|
"""
|
|
14
17
|
|
|
15
18
|
from __future__ import annotations
|
|
16
19
|
|
|
20
|
+
import json
|
|
17
21
|
import logging
|
|
22
|
+
from collections import Counter, defaultdict
|
|
23
|
+
from math import log
|
|
18
24
|
from typing import Any
|
|
19
25
|
|
|
20
26
|
logger = logging.getLogger(__name__)
|
|
@@ -37,7 +43,11 @@ class GraphAnalyzer:
|
|
|
37
43
|
def compute_and_store(self, profile_id: str) -> dict[str, Any]:
|
|
38
44
|
"""Run all analyses and persist to fact_importance.
|
|
39
45
|
|
|
40
|
-
|
|
46
|
+
v3.4.1: Now uses Leiden (falls back to Label Propagation),
|
|
47
|
+
generates TF-IDF community labels, computes bridge scores.
|
|
48
|
+
|
|
49
|
+
Returns summary dict with node_count, community_count, top_5_nodes,
|
|
50
|
+
bridge_count, top_bridge_nodes, community_labels.
|
|
41
51
|
"""
|
|
42
52
|
try:
|
|
43
53
|
graph = self._build_networkx_graph(profile_id)
|
|
@@ -50,22 +60,64 @@ class GraphAnalyzer:
|
|
|
50
60
|
}
|
|
51
61
|
|
|
52
62
|
pagerank = self.compute_pagerank(graph)
|
|
53
|
-
communities = self.
|
|
63
|
+
communities = self.detect_communities_leiden(graph, profile_id)
|
|
54
64
|
centrality = self._compute_degree_centrality(graph)
|
|
65
|
+
bridge_scores = self.compute_bridge_scores(graph)
|
|
66
|
+
labels = self.compute_community_labels(profile_id, communities)
|
|
55
67
|
|
|
56
|
-
#
|
|
68
|
+
# v3.4.1: Ensure bridge_score column exists (idempotent migration)
|
|
69
|
+
try:
|
|
70
|
+
columns = self._db.execute(
|
|
71
|
+
"PRAGMA table_info(fact_importance)", (),
|
|
72
|
+
)
|
|
73
|
+
has_bridge = any(
|
|
74
|
+
dict(c).get("name") == "bridge_score" for c in columns
|
|
75
|
+
)
|
|
76
|
+
if not has_bridge:
|
|
77
|
+
self._db.execute(
|
|
78
|
+
"ALTER TABLE fact_importance "
|
|
79
|
+
"ADD COLUMN bridge_score REAL DEFAULT 0.0",
|
|
80
|
+
(),
|
|
81
|
+
)
|
|
82
|
+
except Exception:
|
|
83
|
+
pass
|
|
84
|
+
|
|
85
|
+
# Persist to fact_importance (with bridge_score)
|
|
57
86
|
for node_id in graph.nodes():
|
|
58
87
|
pr_score = pagerank.get(node_id, 0.0)
|
|
59
88
|
comm_id = communities.get(node_id)
|
|
60
89
|
deg_cent = centrality.get(node_id, 0.0)
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
90
|
+
br_score = bridge_scores.get(node_id, 0.0)
|
|
91
|
+
try:
|
|
92
|
+
self._db.execute(
|
|
93
|
+
"INSERT OR REPLACE INTO fact_importance "
|
|
94
|
+
"(fact_id, profile_id, pagerank_score, community_id, "
|
|
95
|
+
" degree_centrality, bridge_score, computed_at) "
|
|
96
|
+
"VALUES (?, ?, ?, ?, ?, ?, datetime('now'))",
|
|
97
|
+
(node_id, profile_id, round(pr_score, 6),
|
|
98
|
+
comm_id, round(deg_cent, 4),
|
|
99
|
+
round(br_score, 6)),
|
|
100
|
+
)
|
|
101
|
+
except Exception:
|
|
102
|
+
# Fallback without bridge_score if column doesn't exist
|
|
103
|
+
self._db.execute(
|
|
104
|
+
"INSERT OR REPLACE INTO fact_importance "
|
|
105
|
+
"(fact_id, profile_id, pagerank_score, community_id, "
|
|
106
|
+
" degree_centrality, computed_at) "
|
|
107
|
+
"VALUES (?, ?, ?, ?, ?, datetime('now'))",
|
|
108
|
+
(node_id, profile_id, round(pr_score, 6),
|
|
109
|
+
comm_id, round(deg_cent, 4)),
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# v3.4.1: Persist community labels to JSON sidecar
|
|
113
|
+
try:
|
|
114
|
+
from pathlib import Path as _Path
|
|
115
|
+
labels_dir = _Path.home() / ".superlocalmemory"
|
|
116
|
+
labels_dir.mkdir(parents=True, exist_ok=True)
|
|
117
|
+
labels_path = labels_dir / f"{profile_id}_community_labels.json"
|
|
118
|
+
labels_path.write_text(json.dumps(labels, indent=2))
|
|
119
|
+
except Exception:
|
|
120
|
+
pass
|
|
69
121
|
|
|
70
122
|
top_5 = sorted(
|
|
71
123
|
pagerank.items(), key=lambda x: x[1], reverse=True,
|
|
@@ -74,6 +126,20 @@ class GraphAnalyzer:
|
|
|
74
126
|
set(c for c in communities.values() if c is not None),
|
|
75
127
|
)
|
|
76
128
|
|
|
129
|
+
bridge_count = len(
|
|
130
|
+
[s for s in bridge_scores.values() if s > 0.1],
|
|
131
|
+
)
|
|
132
|
+
top_bridges = sorted(
|
|
133
|
+
bridge_scores.items(), key=lambda x: -x[1],
|
|
134
|
+
)[:5]
|
|
135
|
+
|
|
136
|
+
logger.info(
|
|
137
|
+
"GraphAnalyzer: %d nodes, %d communities, %d bridges, "
|
|
138
|
+
"labels=%s",
|
|
139
|
+
graph.number_of_nodes(), unique_communities,
|
|
140
|
+
bridge_count, labels,
|
|
141
|
+
)
|
|
142
|
+
|
|
77
143
|
return {
|
|
78
144
|
"node_count": graph.number_of_nodes(),
|
|
79
145
|
"edge_count": graph.number_of_edges(),
|
|
@@ -81,6 +147,11 @@ class GraphAnalyzer:
|
|
|
81
147
|
"top_5_nodes": [
|
|
82
148
|
(nid, round(score, 4)) for nid, score in top_5
|
|
83
149
|
],
|
|
150
|
+
"bridge_count": bridge_count,
|
|
151
|
+
"top_bridge_nodes": [
|
|
152
|
+
(nid, round(s, 4)) for nid, s in top_bridges
|
|
153
|
+
],
|
|
154
|
+
"community_labels": labels,
|
|
84
155
|
}
|
|
85
156
|
except Exception as exc:
|
|
86
157
|
logger.debug("GraphAnalyzer.compute_and_store failed: %s", exc)
|
|
@@ -140,6 +211,177 @@ class GraphAnalyzer:
|
|
|
140
211
|
result[node] = comm_id
|
|
141
212
|
return result
|
|
142
213
|
|
|
214
|
+
# ── v3.4.1: Leiden Community Detection ────────────────────────
|
|
215
|
+
|
|
216
|
+
def detect_communities_leiden(
|
|
217
|
+
self,
|
|
218
|
+
graph: Any = None,
|
|
219
|
+
profile_id: str = "",
|
|
220
|
+
resolution: float = 1.0,
|
|
221
|
+
) -> dict[str, int]:
|
|
222
|
+
"""Leiden community detection (higher quality than Label Propagation).
|
|
223
|
+
|
|
224
|
+
Falls back to detect_communities() (Label Propagation) if
|
|
225
|
+
leidenalg or igraph are not installed.
|
|
226
|
+
"""
|
|
227
|
+
if graph is None:
|
|
228
|
+
graph = self._build_networkx_graph(profile_id)
|
|
229
|
+
if graph.number_of_nodes() == 0:
|
|
230
|
+
return {}
|
|
231
|
+
|
|
232
|
+
try:
|
|
233
|
+
import leidenalg
|
|
234
|
+
import igraph
|
|
235
|
+
except ImportError:
|
|
236
|
+
logger.info(
|
|
237
|
+
"leidenalg not installed, using Label Propagation fallback",
|
|
238
|
+
)
|
|
239
|
+
return self.detect_communities(graph, profile_id)
|
|
240
|
+
|
|
241
|
+
# Convert DiGraph -> undirected -> igraph
|
|
242
|
+
undirected = graph.to_undirected()
|
|
243
|
+
node_list = list(undirected.nodes())
|
|
244
|
+
node_index = {n: i for i, n in enumerate(node_list)}
|
|
245
|
+
|
|
246
|
+
ig = igraph.Graph(n=len(node_list), directed=False)
|
|
247
|
+
edges = []
|
|
248
|
+
weights = []
|
|
249
|
+
for u, v in undirected.edges():
|
|
250
|
+
if u in node_index and v in node_index:
|
|
251
|
+
edges.append((node_index[u], node_index[v]))
|
|
252
|
+
weights.append(undirected[u][v].get("weight", 1.0))
|
|
253
|
+
|
|
254
|
+
ig.add_edges(edges)
|
|
255
|
+
ig.es["weight"] = weights
|
|
256
|
+
ig.simplify(combine_edges={"weight": "max"})
|
|
257
|
+
|
|
258
|
+
partition = leidenalg.find_partition(
|
|
259
|
+
ig,
|
|
260
|
+
leidenalg.RBConfigurationVertexPartition,
|
|
261
|
+
resolution_parameter=resolution,
|
|
262
|
+
weights="weight",
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
result: dict[str, int] = {}
|
|
266
|
+
for idx, comm_id in enumerate(partition.membership):
|
|
267
|
+
result[node_list[idx]] = comm_id
|
|
268
|
+
|
|
269
|
+
logger.info(
|
|
270
|
+
"Leiden detected %d communities (resolution=%.1f)",
|
|
271
|
+
len(set(result.values())), resolution,
|
|
272
|
+
)
|
|
273
|
+
return result
|
|
274
|
+
|
|
275
|
+
# ── v3.4.1: TF-IDF Community Labels ─────────────────────────
|
|
276
|
+
|
|
277
|
+
def compute_community_labels(
|
|
278
|
+
self,
|
|
279
|
+
profile_id: str,
|
|
280
|
+
communities: dict[str, int],
|
|
281
|
+
) -> dict[int, str]:
|
|
282
|
+
"""Generate human-readable labels via TF-IDF on fact content.
|
|
283
|
+
|
|
284
|
+
Returns dict mapping community_id to label string.
|
|
285
|
+
Labels stored in config table for API access.
|
|
286
|
+
"""
|
|
287
|
+
if not communities:
|
|
288
|
+
return {}
|
|
289
|
+
|
|
290
|
+
# Group fact_ids by community
|
|
291
|
+
comm_facts: dict[int, list[str]] = defaultdict(list)
|
|
292
|
+
for fact_id, comm_id in communities.items():
|
|
293
|
+
comm_facts[comm_id].append(fact_id)
|
|
294
|
+
|
|
295
|
+
stopwords = frozenset({
|
|
296
|
+
"the", "a", "an", "is", "was", "were", "are", "be", "been",
|
|
297
|
+
"being", "have", "has", "had", "do", "does", "did", "will",
|
|
298
|
+
"would", "could", "should", "may", "might", "shall", "can",
|
|
299
|
+
"to", "of", "in", "for", "on", "with", "at", "by", "from",
|
|
300
|
+
"as", "into", "through", "during", "before", "after", "above",
|
|
301
|
+
"below", "between", "and", "but", "or", "not", "no", "nor",
|
|
302
|
+
"so", "yet", "both", "either", "neither", "this", "that",
|
|
303
|
+
"these", "those", "it", "its", "they", "them", "their",
|
|
304
|
+
"he", "she", "his", "her", "we", "our", "you", "your",
|
|
305
|
+
"i", "my", "me",
|
|
306
|
+
})
|
|
307
|
+
|
|
308
|
+
# Fetch content for each community
|
|
309
|
+
tf_per_comm: dict[int, Counter] = {}
|
|
310
|
+
for comm_id, fact_ids in comm_facts.items():
|
|
311
|
+
placeholders = ",".join("?" * len(fact_ids))
|
|
312
|
+
sql = (
|
|
313
|
+
"SELECT content FROM atomic_facts WHERE fact_id IN ("
|
|
314
|
+
+ placeholders
|
|
315
|
+
+ ") AND profile_id = ?"
|
|
316
|
+
)
|
|
317
|
+
try:
|
|
318
|
+
rows = self._db.execute(sql, (*fact_ids, profile_id))
|
|
319
|
+
texts = [dict(r).get("content", "") for r in rows]
|
|
320
|
+
except Exception:
|
|
321
|
+
texts = []
|
|
322
|
+
|
|
323
|
+
tokens: list[str] = []
|
|
324
|
+
for text in texts:
|
|
325
|
+
for word in text.lower().split():
|
|
326
|
+
w = word.strip(".,;:!?\"'()[]{}")
|
|
327
|
+
if len(w) > 2 and w not in stopwords:
|
|
328
|
+
tokens.append(w)
|
|
329
|
+
tf_per_comm[comm_id] = Counter(tokens)
|
|
330
|
+
|
|
331
|
+
num_communities = len(comm_facts)
|
|
332
|
+
labels: dict[int, str] = {}
|
|
333
|
+
|
|
334
|
+
if num_communities == 1:
|
|
335
|
+
# Single community: use raw term frequency
|
|
336
|
+
for comm_id, tf in tf_per_comm.items():
|
|
337
|
+
top = [w for w, _ in tf.most_common(3)]
|
|
338
|
+
labels[comm_id] = ", ".join(top) if top else f"Community {comm_id}"
|
|
339
|
+
else:
|
|
340
|
+
# Compute IDF across communities
|
|
341
|
+
doc_freq: Counter = Counter()
|
|
342
|
+
for tf in tf_per_comm.values():
|
|
343
|
+
for term in tf:
|
|
344
|
+
doc_freq[term] += 1
|
|
345
|
+
|
|
346
|
+
for comm_id, tf in tf_per_comm.items():
|
|
347
|
+
scored = []
|
|
348
|
+
for term, count in tf.items():
|
|
349
|
+
idf = log(1 + num_communities / (1 + doc_freq[term]))
|
|
350
|
+
scored.append((term, count * idf))
|
|
351
|
+
scored.sort(key=lambda x: x[1], reverse=True)
|
|
352
|
+
top = [w for w, _ in scored[:3]]
|
|
353
|
+
labels[comm_id] = ", ".join(top) if top else f"Community {comm_id}"
|
|
354
|
+
|
|
355
|
+
# Store in config table
|
|
356
|
+
try:
|
|
357
|
+
key = "community_labels_" + profile_id
|
|
358
|
+
value = json.dumps(labels)
|
|
359
|
+
self._db.execute(
|
|
360
|
+
"INSERT OR REPLACE INTO config (key, value, updated_at) "
|
|
361
|
+
"VALUES (?, ?, datetime('now'))",
|
|
362
|
+
(key, value),
|
|
363
|
+
)
|
|
364
|
+
except Exception as exc:
|
|
365
|
+
logger.warning("Failed to store community labels: %s", exc)
|
|
366
|
+
|
|
367
|
+
return labels
|
|
368
|
+
|
|
369
|
+
# ── v3.4.1: Bridge Score Detection ───────────────────────────
|
|
370
|
+
|
|
371
|
+
def compute_bridge_scores(self, graph: Any) -> dict[str, float]:
|
|
372
|
+
"""Identify bridge nodes via betweenness centrality.
|
|
373
|
+
|
|
374
|
+
Returns dict mapping node_id to bridge_score (0.0 to 1.0).
|
|
375
|
+
NOT persisted to DB (no column exists) -- used in summary only.
|
|
376
|
+
"""
|
|
377
|
+
import networkx as nx
|
|
378
|
+
|
|
379
|
+
if graph.number_of_nodes() <= 2:
|
|
380
|
+
return {}
|
|
381
|
+
return nx.betweenness_centrality(
|
|
382
|
+
graph, weight="weight", normalized=True,
|
|
383
|
+
)
|
|
384
|
+
|
|
143
385
|
def _compute_degree_centrality(
|
|
144
386
|
self, graph: Any,
|
|
145
387
|
) -> dict[str, float]:
|