superlocalmemory 2.8.5 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/LICENSE +9 -1
- package/NOTICE +63 -0
- package/README.md +165 -480
- package/bin/slm +17 -449
- package/bin/slm-npm +2 -2
- package/bin/slm.bat +4 -2
- package/conftest.py +5 -0
- package/docs/api-reference.md +284 -0
- package/docs/architecture.md +149 -0
- package/docs/auto-memory.md +150 -0
- package/docs/cli-reference.md +276 -0
- package/docs/compliance.md +191 -0
- package/docs/configuration.md +182 -0
- package/docs/getting-started.md +102 -0
- package/docs/ide-setup.md +261 -0
- package/docs/mcp-tools.md +220 -0
- package/docs/migration-from-v2.md +170 -0
- package/docs/profiles.md +173 -0
- package/docs/troubleshooting.md +310 -0
- package/{configs → ide/configs}/antigravity-mcp.json +3 -3
- package/ide/configs/chatgpt-desktop-mcp.json +16 -0
- package/{configs → ide/configs}/claude-desktop-mcp.json +3 -3
- package/{configs → ide/configs}/codex-mcp.toml +4 -4
- package/{configs → ide/configs}/continue-mcp.yaml +4 -3
- package/{configs → ide/configs}/continue-skills.yaml +6 -6
- package/ide/configs/cursor-mcp.json +15 -0
- package/{configs → ide/configs}/gemini-cli-mcp.json +2 -2
- package/{configs → ide/configs}/jetbrains-mcp.json +2 -2
- package/{configs → ide/configs}/opencode-mcp.json +2 -2
- package/{configs → ide/configs}/perplexity-mcp.json +2 -2
- package/{configs → ide/configs}/vscode-copilot-mcp.json +2 -2
- package/{configs → ide/configs}/windsurf-mcp.json +3 -3
- package/{configs → ide/configs}/zed-mcp.json +2 -2
- package/{hooks → ide/hooks}/context-hook.js +9 -20
- package/ide/hooks/memory-list-skill.js +70 -0
- package/ide/hooks/memory-profile-skill.js +101 -0
- package/ide/hooks/memory-recall-skill.js +62 -0
- package/ide/hooks/memory-remember-skill.js +68 -0
- package/ide/hooks/memory-reset-skill.js +160 -0
- package/{hooks → ide/hooks}/post-recall-hook.js +2 -2
- package/ide/integrations/langchain/README.md +106 -0
- package/ide/integrations/langchain/langchain_superlocalmemory/__init__.py +9 -0
- package/ide/integrations/langchain/langchain_superlocalmemory/chat_message_history.py +201 -0
- package/ide/integrations/langchain/pyproject.toml +38 -0
- package/{src/learning → ide/integrations/langchain}/tests/__init__.py +1 -0
- package/ide/integrations/langchain/tests/test_chat_message_history.py +215 -0
- package/ide/integrations/langchain/tests/test_security.py +117 -0
- package/ide/integrations/llamaindex/README.md +81 -0
- package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/__init__.py +9 -0
- package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/base.py +316 -0
- package/ide/integrations/llamaindex/pyproject.toml +43 -0
- package/{src/lifecycle → ide/integrations/llamaindex}/tests/__init__.py +1 -2
- package/ide/integrations/llamaindex/tests/test_chat_store.py +294 -0
- package/ide/integrations/llamaindex/tests/test_security.py +241 -0
- package/{skills → ide/skills}/slm-build-graph/SKILL.md +6 -6
- package/{skills → ide/skills}/slm-list-recent/SKILL.md +5 -5
- package/{skills → ide/skills}/slm-recall/SKILL.md +5 -5
- package/{skills → ide/skills}/slm-remember/SKILL.md +6 -6
- package/{skills → ide/skills}/slm-show-patterns/SKILL.md +7 -7
- package/{skills → ide/skills}/slm-status/SKILL.md +9 -9
- package/{skills → ide/skills}/slm-switch-profile/SKILL.md +9 -9
- package/package.json +13 -22
- package/pyproject.toml +85 -0
- package/scripts/build-dmg.sh +417 -0
- package/scripts/install-skills.ps1 +334 -0
- package/{install.ps1 → scripts/install.ps1} +36 -4
- package/{install.sh → scripts/install.sh} +14 -13
- package/scripts/postinstall.js +2 -2
- package/scripts/start-dashboard.ps1 +52 -0
- package/scripts/start-dashboard.sh +41 -0
- package/scripts/sync-wiki.ps1 +127 -0
- package/scripts/sync-wiki.sh +82 -0
- package/scripts/test-dmg.sh +161 -0
- package/scripts/test-npm-package.ps1 +252 -0
- package/scripts/test-npm-package.sh +207 -0
- package/scripts/verify-install.ps1 +294 -0
- package/scripts/verify-install.sh +266 -0
- package/src/superlocalmemory/__init__.py +0 -0
- package/src/superlocalmemory/attribution/__init__.py +9 -0
- package/src/superlocalmemory/attribution/mathematical_dna.py +235 -0
- package/src/superlocalmemory/attribution/signer.py +153 -0
- package/src/superlocalmemory/attribution/watermark.py +189 -0
- package/src/superlocalmemory/cli/__init__.py +5 -0
- package/src/superlocalmemory/cli/commands.py +245 -0
- package/src/superlocalmemory/cli/main.py +89 -0
- package/src/superlocalmemory/cli/migrate_cmd.py +55 -0
- package/src/superlocalmemory/cli/post_install.py +99 -0
- package/src/superlocalmemory/cli/setup_wizard.py +129 -0
- package/src/superlocalmemory/compliance/__init__.py +0 -0
- package/src/superlocalmemory/compliance/abac.py +204 -0
- package/src/superlocalmemory/compliance/audit.py +314 -0
- package/src/superlocalmemory/compliance/eu_ai_act.py +131 -0
- package/src/superlocalmemory/compliance/gdpr.py +294 -0
- package/src/superlocalmemory/compliance/lifecycle.py +158 -0
- package/src/superlocalmemory/compliance/retention.py +232 -0
- package/src/superlocalmemory/compliance/scheduler.py +148 -0
- package/src/superlocalmemory/core/__init__.py +0 -0
- package/src/superlocalmemory/core/config.py +391 -0
- package/src/superlocalmemory/core/embeddings.py +293 -0
- package/src/superlocalmemory/core/engine.py +701 -0
- package/src/superlocalmemory/core/hooks.py +65 -0
- package/src/superlocalmemory/core/maintenance.py +172 -0
- package/src/superlocalmemory/core/modes.py +140 -0
- package/src/superlocalmemory/core/profiles.py +234 -0
- package/src/superlocalmemory/core/registry.py +117 -0
- package/src/superlocalmemory/dynamics/__init__.py +0 -0
- package/src/superlocalmemory/dynamics/fisher_langevin_coupling.py +223 -0
- package/src/superlocalmemory/encoding/__init__.py +0 -0
- package/src/superlocalmemory/encoding/consolidator.py +485 -0
- package/src/superlocalmemory/encoding/emotional.py +125 -0
- package/src/superlocalmemory/encoding/entity_resolver.py +525 -0
- package/src/superlocalmemory/encoding/entropy_gate.py +104 -0
- package/src/superlocalmemory/encoding/fact_extractor.py +775 -0
- package/src/superlocalmemory/encoding/foresight.py +91 -0
- package/src/superlocalmemory/encoding/graph_builder.py +302 -0
- package/src/superlocalmemory/encoding/observation_builder.py +160 -0
- package/src/superlocalmemory/encoding/scene_builder.py +183 -0
- package/src/superlocalmemory/encoding/signal_inference.py +90 -0
- package/src/superlocalmemory/encoding/temporal_parser.py +426 -0
- package/src/superlocalmemory/encoding/type_router.py +235 -0
- package/src/superlocalmemory/hooks/__init__.py +3 -0
- package/src/superlocalmemory/hooks/auto_capture.py +111 -0
- package/src/superlocalmemory/hooks/auto_recall.py +93 -0
- package/src/superlocalmemory/hooks/ide_connector.py +204 -0
- package/src/superlocalmemory/hooks/rules_engine.py +99 -0
- package/src/superlocalmemory/infra/__init__.py +3 -0
- package/src/superlocalmemory/infra/auth_middleware.py +82 -0
- package/src/superlocalmemory/infra/backup.py +317 -0
- package/src/superlocalmemory/infra/cache_manager.py +267 -0
- package/src/superlocalmemory/infra/event_bus.py +381 -0
- package/src/superlocalmemory/infra/rate_limiter.py +135 -0
- package/src/{webhook_dispatcher.py → superlocalmemory/infra/webhook_dispatcher.py} +104 -101
- package/src/superlocalmemory/learning/__init__.py +0 -0
- package/src/superlocalmemory/learning/adaptive.py +172 -0
- package/src/superlocalmemory/learning/behavioral.py +490 -0
- package/src/superlocalmemory/learning/behavioral_listener.py +94 -0
- package/src/superlocalmemory/learning/bootstrap.py +298 -0
- package/src/superlocalmemory/learning/cross_project.py +399 -0
- package/src/superlocalmemory/learning/database.py +376 -0
- package/src/superlocalmemory/learning/engagement.py +323 -0
- package/src/superlocalmemory/learning/features.py +138 -0
- package/src/superlocalmemory/learning/feedback.py +316 -0
- package/src/superlocalmemory/learning/outcomes.py +255 -0
- package/src/superlocalmemory/learning/project_context.py +366 -0
- package/src/superlocalmemory/learning/ranker.py +155 -0
- package/src/superlocalmemory/learning/source_quality.py +303 -0
- package/src/superlocalmemory/learning/workflows.py +309 -0
- package/src/superlocalmemory/llm/__init__.py +0 -0
- package/src/superlocalmemory/llm/backbone.py +316 -0
- package/src/superlocalmemory/math/__init__.py +0 -0
- package/src/superlocalmemory/math/fisher.py +356 -0
- package/src/superlocalmemory/math/langevin.py +398 -0
- package/src/superlocalmemory/math/sheaf.py +257 -0
- package/src/superlocalmemory/mcp/__init__.py +0 -0
- package/src/superlocalmemory/mcp/resources.py +245 -0
- package/src/superlocalmemory/mcp/server.py +61 -0
- package/src/superlocalmemory/mcp/tools.py +18 -0
- package/src/superlocalmemory/mcp/tools_core.py +305 -0
- package/src/superlocalmemory/mcp/tools_v28.py +223 -0
- package/src/superlocalmemory/mcp/tools_v3.py +286 -0
- package/src/superlocalmemory/retrieval/__init__.py +0 -0
- package/src/superlocalmemory/retrieval/agentic.py +295 -0
- package/src/superlocalmemory/retrieval/ann_index.py +223 -0
- package/src/superlocalmemory/retrieval/bm25_channel.py +185 -0
- package/src/superlocalmemory/retrieval/bridge_discovery.py +170 -0
- package/src/superlocalmemory/retrieval/engine.py +390 -0
- package/src/superlocalmemory/retrieval/entity_channel.py +179 -0
- package/src/superlocalmemory/retrieval/fusion.py +78 -0
- package/src/superlocalmemory/retrieval/profile_channel.py +105 -0
- package/src/superlocalmemory/retrieval/reranker.py +154 -0
- package/src/superlocalmemory/retrieval/semantic_channel.py +232 -0
- package/src/superlocalmemory/retrieval/strategy.py +96 -0
- package/src/superlocalmemory/retrieval/temporal_channel.py +175 -0
- package/src/superlocalmemory/server/__init__.py +1 -0
- package/src/superlocalmemory/server/api.py +248 -0
- package/src/superlocalmemory/server/routes/__init__.py +4 -0
- package/src/superlocalmemory/server/routes/agents.py +107 -0
- package/src/superlocalmemory/server/routes/backup.py +91 -0
- package/src/superlocalmemory/server/routes/behavioral.py +127 -0
- package/src/superlocalmemory/server/routes/compliance.py +160 -0
- package/src/superlocalmemory/server/routes/data_io.py +188 -0
- package/src/superlocalmemory/server/routes/events.py +183 -0
- package/src/superlocalmemory/server/routes/helpers.py +85 -0
- package/src/superlocalmemory/server/routes/learning.py +273 -0
- package/src/superlocalmemory/server/routes/lifecycle.py +116 -0
- package/src/superlocalmemory/server/routes/memories.py +399 -0
- package/src/superlocalmemory/server/routes/profiles.py +219 -0
- package/src/superlocalmemory/server/routes/stats.py +346 -0
- package/src/superlocalmemory/server/routes/v3_api.py +365 -0
- package/src/superlocalmemory/server/routes/ws.py +82 -0
- package/src/superlocalmemory/server/security_middleware.py +57 -0
- package/src/superlocalmemory/server/ui.py +245 -0
- package/src/superlocalmemory/storage/__init__.py +0 -0
- package/src/superlocalmemory/storage/access_control.py +182 -0
- package/src/superlocalmemory/storage/database.py +594 -0
- package/src/superlocalmemory/storage/migrations.py +303 -0
- package/src/superlocalmemory/storage/models.py +406 -0
- package/src/superlocalmemory/storage/schema.py +726 -0
- package/src/superlocalmemory/storage/v2_migrator.py +317 -0
- package/src/superlocalmemory/trust/__init__.py +0 -0
- package/src/superlocalmemory/trust/gate.py +130 -0
- package/src/superlocalmemory/trust/provenance.py +124 -0
- package/src/superlocalmemory/trust/scorer.py +347 -0
- package/src/superlocalmemory/trust/signals.py +153 -0
- package/ui/index.html +278 -5
- package/ui/js/auto-settings.js +70 -0
- package/ui/js/dashboard.js +90 -0
- package/ui/js/fact-detail.js +92 -0
- package/ui/js/feedback.js +2 -2
- package/ui/js/ide-status.js +102 -0
- package/ui/js/math-health.js +98 -0
- package/ui/js/recall-lab.js +127 -0
- package/ui/js/settings.js +2 -2
- package/ui/js/trust-dashboard.js +73 -0
- package/api_server.py +0 -724
- package/bin/aider-smart +0 -72
- package/bin/superlocalmemoryv2-learning +0 -4
- package/bin/superlocalmemoryv2-list +0 -3
- package/bin/superlocalmemoryv2-patterns +0 -4
- package/bin/superlocalmemoryv2-profile +0 -3
- package/bin/superlocalmemoryv2-recall +0 -3
- package/bin/superlocalmemoryv2-remember +0 -3
- package/bin/superlocalmemoryv2-reset +0 -3
- package/bin/superlocalmemoryv2-status +0 -3
- package/configs/chatgpt-desktop-mcp.json +0 -16
- package/configs/cursor-mcp.json +0 -15
- package/docs/SECURITY-QUICK-REFERENCE.md +0 -214
- package/hooks/memory-list-skill.js +0 -139
- package/hooks/memory-profile-skill.js +0 -273
- package/hooks/memory-recall-skill.js +0 -114
- package/hooks/memory-remember-skill.js +0 -127
- package/hooks/memory-reset-skill.js +0 -274
- package/mcp_server.py +0 -1800
- package/requirements-core.txt +0 -22
- package/requirements-learning.txt +0 -12
- package/requirements.txt +0 -12
- package/src/agent_registry.py +0 -411
- package/src/auth_middleware.py +0 -61
- package/src/auto_backup.py +0 -459
- package/src/behavioral/__init__.py +0 -49
- package/src/behavioral/behavioral_listener.py +0 -203
- package/src/behavioral/behavioral_patterns.py +0 -275
- package/src/behavioral/cross_project_transfer.py +0 -206
- package/src/behavioral/outcome_inference.py +0 -194
- package/src/behavioral/outcome_tracker.py +0 -193
- package/src/behavioral/tests/__init__.py +0 -4
- package/src/behavioral/tests/test_behavioral_integration.py +0 -108
- package/src/behavioral/tests/test_behavioral_patterns.py +0 -150
- package/src/behavioral/tests/test_cross_project_transfer.py +0 -142
- package/src/behavioral/tests/test_mcp_behavioral.py +0 -139
- package/src/behavioral/tests/test_mcp_report_outcome.py +0 -117
- package/src/behavioral/tests/test_outcome_inference.py +0 -107
- package/src/behavioral/tests/test_outcome_tracker.py +0 -96
- package/src/cache_manager.py +0 -518
- package/src/compliance/__init__.py +0 -48
- package/src/compliance/abac_engine.py +0 -149
- package/src/compliance/abac_middleware.py +0 -116
- package/src/compliance/audit_db.py +0 -215
- package/src/compliance/audit_logger.py +0 -148
- package/src/compliance/retention_manager.py +0 -289
- package/src/compliance/retention_scheduler.py +0 -186
- package/src/compliance/tests/__init__.py +0 -4
- package/src/compliance/tests/test_abac_enforcement.py +0 -95
- package/src/compliance/tests/test_abac_engine.py +0 -124
- package/src/compliance/tests/test_abac_mcp_integration.py +0 -118
- package/src/compliance/tests/test_audit_db.py +0 -123
- package/src/compliance/tests/test_audit_logger.py +0 -98
- package/src/compliance/tests/test_mcp_audit.py +0 -128
- package/src/compliance/tests/test_mcp_retention_policy.py +0 -125
- package/src/compliance/tests/test_retention_manager.py +0 -131
- package/src/compliance/tests/test_retention_scheduler.py +0 -99
- package/src/compression/__init__.py +0 -25
- package/src/compression/cli.py +0 -150
- package/src/compression/cold_storage.py +0 -217
- package/src/compression/config.py +0 -72
- package/src/compression/orchestrator.py +0 -133
- package/src/compression/tier2_compressor.py +0 -228
- package/src/compression/tier3_compressor.py +0 -153
- package/src/compression/tier_classifier.py +0 -148
- package/src/db_connection_manager.py +0 -536
- package/src/embedding_engine.py +0 -63
- package/src/embeddings/__init__.py +0 -47
- package/src/embeddings/cache.py +0 -70
- package/src/embeddings/cli.py +0 -113
- package/src/embeddings/constants.py +0 -47
- package/src/embeddings/database.py +0 -91
- package/src/embeddings/engine.py +0 -247
- package/src/embeddings/model_loader.py +0 -145
- package/src/event_bus.py +0 -562
- package/src/graph/__init__.py +0 -36
- package/src/graph/build_helpers.py +0 -74
- package/src/graph/cli.py +0 -87
- package/src/graph/cluster_builder.py +0 -188
- package/src/graph/cluster_summary.py +0 -148
- package/src/graph/constants.py +0 -47
- package/src/graph/edge_builder.py +0 -162
- package/src/graph/entity_extractor.py +0 -95
- package/src/graph/graph_core.py +0 -226
- package/src/graph/graph_search.py +0 -231
- package/src/graph/hierarchical.py +0 -207
- package/src/graph/schema.py +0 -99
- package/src/graph_engine.py +0 -52
- package/src/hnsw_index.py +0 -628
- package/src/hybrid_search.py +0 -46
- package/src/learning/__init__.py +0 -217
- package/src/learning/adaptive_ranker.py +0 -682
- package/src/learning/bootstrap/__init__.py +0 -69
- package/src/learning/bootstrap/constants.py +0 -93
- package/src/learning/bootstrap/db_queries.py +0 -316
- package/src/learning/bootstrap/sampling.py +0 -82
- package/src/learning/bootstrap/text_utils.py +0 -71
- package/src/learning/cross_project_aggregator.py +0 -857
- package/src/learning/db/__init__.py +0 -40
- package/src/learning/db/constants.py +0 -44
- package/src/learning/db/schema.py +0 -279
- package/src/learning/engagement_tracker.py +0 -628
- package/src/learning/feature_extractor.py +0 -708
- package/src/learning/feedback_collector.py +0 -806
- package/src/learning/learning_db.py +0 -915
- package/src/learning/project_context_manager.py +0 -572
- package/src/learning/ranking/__init__.py +0 -33
- package/src/learning/ranking/constants.py +0 -84
- package/src/learning/ranking/helpers.py +0 -278
- package/src/learning/source_quality_scorer.py +0 -676
- package/src/learning/synthetic_bootstrap.py +0 -755
- package/src/learning/tests/test_adaptive_ranker.py +0 -325
- package/src/learning/tests/test_adaptive_ranker_v28.py +0 -60
- package/src/learning/tests/test_aggregator.py +0 -306
- package/src/learning/tests/test_auto_retrain_v28.py +0 -35
- package/src/learning/tests/test_e2e_ranking_v28.py +0 -82
- package/src/learning/tests/test_feature_extractor_v28.py +0 -93
- package/src/learning/tests/test_feedback_collector.py +0 -294
- package/src/learning/tests/test_learning_db.py +0 -602
- package/src/learning/tests/test_learning_db_v28.py +0 -110
- package/src/learning/tests/test_learning_init_v28.py +0 -48
- package/src/learning/tests/test_outcome_signals.py +0 -48
- package/src/learning/tests/test_project_context.py +0 -292
- package/src/learning/tests/test_schema_migration.py +0 -319
- package/src/learning/tests/test_signal_inference.py +0 -397
- package/src/learning/tests/test_source_quality.py +0 -351
- package/src/learning/tests/test_synthetic_bootstrap.py +0 -429
- package/src/learning/tests/test_workflow_miner.py +0 -318
- package/src/learning/workflow_pattern_miner.py +0 -655
- package/src/lifecycle/__init__.py +0 -54
- package/src/lifecycle/bounded_growth.py +0 -239
- package/src/lifecycle/compaction_engine.py +0 -226
- package/src/lifecycle/lifecycle_engine.py +0 -355
- package/src/lifecycle/lifecycle_evaluator.py +0 -257
- package/src/lifecycle/lifecycle_scheduler.py +0 -130
- package/src/lifecycle/retention_policy.py +0 -285
- package/src/lifecycle/tests/test_bounded_growth.py +0 -193
- package/src/lifecycle/tests/test_compaction.py +0 -179
- package/src/lifecycle/tests/test_lifecycle_engine.py +0 -137
- package/src/lifecycle/tests/test_lifecycle_evaluation.py +0 -177
- package/src/lifecycle/tests/test_lifecycle_scheduler.py +0 -127
- package/src/lifecycle/tests/test_lifecycle_search.py +0 -109
- package/src/lifecycle/tests/test_mcp_compact.py +0 -149
- package/src/lifecycle/tests/test_mcp_lifecycle_status.py +0 -114
- package/src/lifecycle/tests/test_retention_policy.py +0 -162
- package/src/mcp_tools_v28.py +0 -281
- package/src/memory/__init__.py +0 -36
- package/src/memory/cli.py +0 -205
- package/src/memory/constants.py +0 -39
- package/src/memory/helpers.py +0 -28
- package/src/memory/schema.py +0 -166
- package/src/memory-profiles.py +0 -595
- package/src/memory-reset.py +0 -491
- package/src/memory_compression.py +0 -989
- package/src/memory_store_v2.py +0 -1155
- package/src/migrate_v1_to_v2.py +0 -629
- package/src/pattern_learner.py +0 -34
- package/src/patterns/__init__.py +0 -24
- package/src/patterns/analyzers.py +0 -251
- package/src/patterns/learner.py +0 -271
- package/src/patterns/scoring.py +0 -171
- package/src/patterns/store.py +0 -225
- package/src/patterns/terminology.py +0 -140
- package/src/provenance_tracker.py +0 -312
- package/src/qualixar_attribution.py +0 -139
- package/src/qualixar_watermark.py +0 -78
- package/src/query_optimizer.py +0 -511
- package/src/rate_limiter.py +0 -83
- package/src/search/__init__.py +0 -20
- package/src/search/cli.py +0 -77
- package/src/search/constants.py +0 -26
- package/src/search/engine.py +0 -241
- package/src/search/fusion.py +0 -122
- package/src/search/index_loader.py +0 -114
- package/src/search/methods.py +0 -162
- package/src/search_engine_v2.py +0 -401
- package/src/setup_validator.py +0 -482
- package/src/subscription_manager.py +0 -391
- package/src/tree/__init__.py +0 -59
- package/src/tree/builder.py +0 -185
- package/src/tree/nodes.py +0 -202
- package/src/tree/queries.py +0 -257
- package/src/tree/schema.py +0 -80
- package/src/tree_manager.py +0 -19
- package/src/trust/__init__.py +0 -45
- package/src/trust/constants.py +0 -66
- package/src/trust/queries.py +0 -157
- package/src/trust/schema.py +0 -95
- package/src/trust/scorer.py +0 -299
- package/src/trust/signals.py +0 -95
- package/src/trust_scorer.py +0 -44
- package/ui/app.js +0 -1588
- package/ui/js/graph-cytoscape-monolithic-backup.js +0 -1168
- package/ui/js/graph-cytoscape.js +0 -1168
- package/ui/js/graph-d3-backup.js +0 -32
- package/ui/js/graph.js +0 -32
- package/ui_server.py +0 -266
- /package/docs/{ACCESSIBILITY.md → v2-archive/ACCESSIBILITY.md} +0 -0
- /package/docs/{ARCHITECTURE.md → v2-archive/ARCHITECTURE.md} +0 -0
- /package/docs/{CLI-COMMANDS-REFERENCE.md → v2-archive/CLI-COMMANDS-REFERENCE.md} +0 -0
- /package/docs/{COMPRESSION-README.md → v2-archive/COMPRESSION-README.md} +0 -0
- /package/docs/{FRAMEWORK-INTEGRATIONS.md → v2-archive/FRAMEWORK-INTEGRATIONS.md} +0 -0
- /package/docs/{MCP-MANUAL-SETUP.md → v2-archive/MCP-MANUAL-SETUP.md} +0 -0
- /package/docs/{MCP-TROUBLESHOOTING.md → v2-archive/MCP-TROUBLESHOOTING.md} +0 -0
- /package/docs/{PATTERN-LEARNING.md → v2-archive/PATTERN-LEARNING.md} +0 -0
- /package/docs/{PROFILES-GUIDE.md → v2-archive/PROFILES-GUIDE.md} +0 -0
- /package/docs/{RESET-GUIDE.md → v2-archive/RESET-GUIDE.md} +0 -0
- /package/docs/{SEARCH-ENGINE-V2.2.0.md → v2-archive/SEARCH-ENGINE-V2.2.0.md} +0 -0
- /package/docs/{SEARCH-INTEGRATION-GUIDE.md → v2-archive/SEARCH-INTEGRATION-GUIDE.md} +0 -0
- /package/docs/{UI-SERVER.md → v2-archive/UI-SERVER.md} +0 -0
- /package/docs/{UNIVERSAL-INTEGRATION.md → v2-archive/UNIVERSAL-INTEGRATION.md} +0 -0
- /package/docs/{V2.2.0-OPTIONAL-SEARCH.md → v2-archive/V2.2.0-OPTIONAL-SEARCH.md} +0 -0
- /package/docs/{WINDOWS-INSTALL-README.txt → v2-archive/WINDOWS-INSTALL-README.txt} +0 -0
- /package/docs/{WINDOWS-POST-INSTALL.txt → v2-archive/WINDOWS-POST-INSTALL.txt} +0 -0
- /package/docs/{example_graph_usage.py → v2-archive/example_graph_usage.py} +0 -0
- /package/{completions → ide/completions}/slm.bash +0 -0
- /package/{completions → ide/completions}/slm.zsh +0 -0
- /package/{configs → ide/configs}/cody-commands.json +0 -0
- /package/{install-skills.sh → scripts/install-skills.sh} +0 -0
|
@@ -0,0 +1,525 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
|
|
4
|
+
|
|
5
|
+
"""SuperLocalMemory V3 — Entity Resolution.
|
|
6
|
+
|
|
7
|
+
Maps variant mentions ("Alice", "Ms. Smith", "she") to canonical entities.
|
|
8
|
+
Persists ALL resolution results to DB — the #1 fix from the V1 audit where
|
|
9
|
+
entity resolution ran but results were silently discarded.
|
|
10
|
+
|
|
11
|
+
4-tier strategy:
|
|
12
|
+
a) Exact match in canonical_entities (case-insensitive)
|
|
13
|
+
b) Alias match in entity_aliases (case-insensitive)
|
|
14
|
+
c) Fuzzy match via Jaro-Winkler similarity (threshold 0.85)
|
|
15
|
+
d) LLM disambiguation (Mode B/C only)
|
|
16
|
+
|
|
17
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
18
|
+
License: MIT
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import json
|
|
24
|
+
import logging
|
|
25
|
+
import re
|
|
26
|
+
from typing import TYPE_CHECKING
|
|
27
|
+
|
|
28
|
+
from superlocalmemory.storage.models import CanonicalEntity, EntityAlias, _new_id, _now
|
|
29
|
+
|
|
30
|
+
if TYPE_CHECKING:
|
|
31
|
+
from superlocalmemory.llm.backbone import LLMBackbone
|
|
32
|
+
from superlocalmemory.storage.database import DatabaseManager
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
# Constants
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
JARO_WINKLER_AUTO_MERGE: float = 0.85 # Auto-merge threshold
|
|
41
|
+
JARO_WINKLER_LLM_FLOOR: float = 0.70 # Below this, never merge
|
|
42
|
+
PRONOUNS: frozenset[str] = frozenset({
|
|
43
|
+
"he", "she", "they", "him", "her", "them", "his", "hers", "their",
|
|
44
|
+
"himself", "herself", "themselves", "it", "its", "i", "me", "my",
|
|
45
|
+
"we", "us", "our", "you", "your",
|
|
46
|
+
})
|
|
47
|
+
|
|
48
|
+
# Heuristic entity type patterns
|
|
49
|
+
_ORG_MARKERS = ("Inc", "Corp", "LLC", "Ltd", "University", "Hospital", "Bank",
|
|
50
|
+
"Foundation", "Institute", "Company", "Group", "Agency")
|
|
51
|
+
_PLACE_MARKERS = ("City", "State", "County", "Island", "River", "Mountain",
|
|
52
|
+
"Lake", "Park", "Street", "Avenue", "Road", "District")
|
|
53
|
+
_EVENT_MARKERS = ("Festival", "Conference", "Summit", "Workshop", "Meeting",
|
|
54
|
+
"Election", "War", "Match", "Game", "Concert", "Wedding")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
# Jaro-Winkler similarity — pure Python fallback
|
|
59
|
+
# ---------------------------------------------------------------------------
|
|
60
|
+
|
|
61
|
+
def jaro_winkler(s1: str, s2: str, prefix_weight: float = 0.1) -> float:
|
|
62
|
+
"""Jaro-Winkler similarity in [0, 1]. 1 = identical.
|
|
63
|
+
|
|
64
|
+
Pure Python implementation. Capped at 200 chars for O(n^2) safety.
|
|
65
|
+
"""
|
|
66
|
+
s1, s2 = s1[:200], s2[:200]
|
|
67
|
+
if not s1 or not s2:
|
|
68
|
+
return 0.0
|
|
69
|
+
if s1 == s2:
|
|
70
|
+
return 1.0
|
|
71
|
+
|
|
72
|
+
len1, len2 = len(s1), len(s2)
|
|
73
|
+
match_dist = max(len1, len2) // 2 - 1
|
|
74
|
+
if match_dist < 0:
|
|
75
|
+
match_dist = 0
|
|
76
|
+
|
|
77
|
+
s1_matched = [False] * len1
|
|
78
|
+
s2_matched = [False] * len2
|
|
79
|
+
matches = transpositions = 0
|
|
80
|
+
|
|
81
|
+
for i in range(len1):
|
|
82
|
+
lo = max(0, i - match_dist)
|
|
83
|
+
hi = min(i + match_dist + 1, len2)
|
|
84
|
+
for j in range(lo, hi):
|
|
85
|
+
if s2_matched[j] or s1[i] != s2[j]:
|
|
86
|
+
continue
|
|
87
|
+
s1_matched[i] = s2_matched[j] = True
|
|
88
|
+
matches += 1
|
|
89
|
+
break
|
|
90
|
+
|
|
91
|
+
if matches == 0:
|
|
92
|
+
return 0.0
|
|
93
|
+
|
|
94
|
+
k = 0
|
|
95
|
+
for i in range(len1):
|
|
96
|
+
if not s1_matched[i]:
|
|
97
|
+
continue
|
|
98
|
+
while not s2_matched[k]:
|
|
99
|
+
k += 1
|
|
100
|
+
if s1[i] != s2[k]:
|
|
101
|
+
transpositions += 1
|
|
102
|
+
k += 1
|
|
103
|
+
|
|
104
|
+
jaro = (
|
|
105
|
+
matches / len1 + matches / len2
|
|
106
|
+
+ (matches - transpositions / 2) / matches
|
|
107
|
+
) / 3.0
|
|
108
|
+
|
|
109
|
+
prefix = sum(
|
|
110
|
+
1 for i in range(min(4, len1, len2)) if s1[i] == s2[i]
|
|
111
|
+
)
|
|
112
|
+
return jaro + prefix * prefix_weight * (1.0 - jaro)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _guess_entity_type(name: str) -> str:
|
|
116
|
+
"""Heuristic entity type classification from name string."""
|
|
117
|
+
if any(m in name for m in _ORG_MARKERS):
|
|
118
|
+
return "organization"
|
|
119
|
+
if any(m in name for m in _PLACE_MARKERS):
|
|
120
|
+
return "place"
|
|
121
|
+
if any(m in name for m in _EVENT_MARKERS):
|
|
122
|
+
return "event"
|
|
123
|
+
# Two capitalized words = likely a person name
|
|
124
|
+
if re.match(r"^[A-Z][a-z]+ [A-Z][a-z]+$", name):
|
|
125
|
+
return "person"
|
|
126
|
+
# Single capitalized word = likely a person first name
|
|
127
|
+
if re.match(r"^[A-Z][a-z]+$", name):
|
|
128
|
+
return "person"
|
|
129
|
+
return "concept"
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
# ---------------------------------------------------------------------------
|
|
133
|
+
# Entity Resolver
|
|
134
|
+
# ---------------------------------------------------------------------------
|
|
135
|
+
|
|
136
|
+
class EntityResolver:
|
|
137
|
+
"""Resolves raw entity mentions to persisted canonical entities.
|
|
138
|
+
|
|
139
|
+
Every resolution is persisted — canonical entities and aliases are stored
|
|
140
|
+
immediately, ensuring downstream graph building and retrieval use the
|
|
141
|
+
resolved identities.
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
def __init__(
|
|
145
|
+
self,
|
|
146
|
+
db: DatabaseManager,
|
|
147
|
+
llm: LLMBackbone | None = None,
|
|
148
|
+
) -> None:
|
|
149
|
+
self._db = db
|
|
150
|
+
self._llm = llm
|
|
151
|
+
|
|
152
|
+
# -- Public API ---------------------------------------------------------
|
|
153
|
+
|
|
154
|
+
def resolve(
|
|
155
|
+
self,
|
|
156
|
+
raw_entities: list[str],
|
|
157
|
+
profile_id: str,
|
|
158
|
+
) -> dict[str, str]:
|
|
159
|
+
"""Resolve raw mentions to canonical entity IDs.
|
|
160
|
+
|
|
161
|
+
Returns mapping: raw_name -> canonical entity_id.
|
|
162
|
+
All new entities and aliases are persisted before returning.
|
|
163
|
+
"""
|
|
164
|
+
if not raw_entities:
|
|
165
|
+
return {}
|
|
166
|
+
|
|
167
|
+
resolution: dict[str, str] = {}
|
|
168
|
+
candidates_for_llm: list[str] = []
|
|
169
|
+
|
|
170
|
+
for raw in raw_entities:
|
|
171
|
+
name = raw.strip()
|
|
172
|
+
if not name or name.lower() in PRONOUNS:
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
# Tier a: exact match on canonical_name
|
|
176
|
+
entity = self._db.get_entity_by_name(name, profile_id)
|
|
177
|
+
if entity is not None:
|
|
178
|
+
resolution[raw] = entity.entity_id
|
|
179
|
+
self._touch_last_seen(entity.entity_id)
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
# Tier b: alias match (case-insensitive, indexed)
|
|
183
|
+
entity_id = self._alias_lookup(name, profile_id)
|
|
184
|
+
if entity_id is not None:
|
|
185
|
+
resolution[raw] = entity_id
|
|
186
|
+
self._touch_last_seen(entity_id)
|
|
187
|
+
continue
|
|
188
|
+
|
|
189
|
+
# Tier c: fuzzy match via Jaro-Winkler
|
|
190
|
+
match_id, score = self._fuzzy_match(name, profile_id)
|
|
191
|
+
if match_id is not None and score >= JARO_WINKLER_AUTO_MERGE:
|
|
192
|
+
resolution[raw] = match_id
|
|
193
|
+
self._persist_alias(match_id, name, score, "jaro_winkler")
|
|
194
|
+
self._touch_last_seen(match_id)
|
|
195
|
+
continue
|
|
196
|
+
|
|
197
|
+
# Candidate zone (0.70–0.85): queue for LLM in Mode B/C
|
|
198
|
+
if match_id is not None and score >= JARO_WINKLER_LLM_FLOOR:
|
|
199
|
+
candidates_for_llm.append(raw)
|
|
200
|
+
continue
|
|
201
|
+
|
|
202
|
+
# No match at all — create new entity
|
|
203
|
+
new_id = self._create_entity(name, profile_id)
|
|
204
|
+
resolution[raw] = new_id
|
|
205
|
+
|
|
206
|
+
# Tier d: LLM disambiguation for fuzzy candidates (Mode B/C)
|
|
207
|
+
if candidates_for_llm and self._llm is not None:
|
|
208
|
+
llm_resolved = self._llm_disambiguate(
|
|
209
|
+
candidates_for_llm, profile_id,
|
|
210
|
+
)
|
|
211
|
+
for raw_name, entity_id in llm_resolved.items():
|
|
212
|
+
resolution[raw_name] = entity_id
|
|
213
|
+
|
|
214
|
+
# Create new entities for candidates LLM couldn't resolve
|
|
215
|
+
for raw_name in candidates_for_llm:
|
|
216
|
+
if raw_name not in resolution:
|
|
217
|
+
new_id = self._create_entity(raw_name.strip(), profile_id)
|
|
218
|
+
resolution[raw_name] = new_id
|
|
219
|
+
elif candidates_for_llm:
|
|
220
|
+
# No LLM available — create new entities for all candidates
|
|
221
|
+
for raw_name in candidates_for_llm:
|
|
222
|
+
new_id = self._create_entity(raw_name.strip(), profile_id)
|
|
223
|
+
resolution[raw_name] = new_id
|
|
224
|
+
|
|
225
|
+
return resolution
|
|
226
|
+
|
|
227
|
+
def create_speaker_entities(
|
|
228
|
+
self,
|
|
229
|
+
speaker_a: str,
|
|
230
|
+
speaker_b: str,
|
|
231
|
+
profile_id: str,
|
|
232
|
+
) -> None:
|
|
233
|
+
"""Pre-create canonical entities for conversation speakers.
|
|
234
|
+
|
|
235
|
+
Called at session start so that speaker names are immediately
|
|
236
|
+
resolvable during fact extraction. Fixes B03 (speaker entities
|
|
237
|
+
not available during first-turn encoding).
|
|
238
|
+
"""
|
|
239
|
+
for speaker in (speaker_a, speaker_b):
|
|
240
|
+
name = speaker.strip()
|
|
241
|
+
if not name or name.lower() in PRONOUNS:
|
|
242
|
+
continue
|
|
243
|
+
existing = self._db.get_entity_by_name(name, profile_id)
|
|
244
|
+
if existing is None:
|
|
245
|
+
self._create_entity(name, profile_id, entity_type="person")
|
|
246
|
+
|
|
247
|
+
def get_canonical_name(self, raw_name: str, profile_id: str) -> str:
|
|
248
|
+
"""Quick lookup: returns canonical name or original if not found."""
|
|
249
|
+
name = raw_name.strip()
|
|
250
|
+
if not name:
|
|
251
|
+
return raw_name
|
|
252
|
+
|
|
253
|
+
entity = self._db.get_entity_by_name(name, profile_id)
|
|
254
|
+
if entity is not None:
|
|
255
|
+
return entity.canonical_name
|
|
256
|
+
|
|
257
|
+
entity_id = self._alias_lookup(name, profile_id)
|
|
258
|
+
if entity_id is not None:
|
|
259
|
+
rows = self._db.execute(
|
|
260
|
+
"SELECT canonical_name FROM canonical_entities WHERE entity_id = ?",
|
|
261
|
+
(entity_id,),
|
|
262
|
+
)
|
|
263
|
+
if rows:
|
|
264
|
+
return str(dict(rows[0])["canonical_name"])
|
|
265
|
+
|
|
266
|
+
return raw_name
|
|
267
|
+
|
|
268
|
+
def merge_entities(
|
|
269
|
+
self,
|
|
270
|
+
entity_id_keep: str,
|
|
271
|
+
entity_id_merge: str,
|
|
272
|
+
profile_id: str,
|
|
273
|
+
) -> None:
|
|
274
|
+
"""Merge two entities: move all aliases and facts to keep, delete merge.
|
|
275
|
+
|
|
276
|
+
Reassigns aliases, updates canonical_entities_json in atomic_facts,
|
|
277
|
+
and removes the merged entity record.
|
|
278
|
+
"""
|
|
279
|
+
# Move aliases from merge -> keep
|
|
280
|
+
aliases = self._db.get_aliases_for_entity(entity_id_merge)
|
|
281
|
+
for alias in aliases:
|
|
282
|
+
new_alias = EntityAlias(
|
|
283
|
+
alias_id=_new_id(),
|
|
284
|
+
entity_id=entity_id_keep,
|
|
285
|
+
alias=alias.alias,
|
|
286
|
+
confidence=alias.confidence,
|
|
287
|
+
source=f"merge_from:{entity_id_merge}",
|
|
288
|
+
)
|
|
289
|
+
self._db.store_alias(new_alias)
|
|
290
|
+
|
|
291
|
+
# Also add the merged entity's canonical name as an alias of keep
|
|
292
|
+
merged = self._db.get_entity_by_name("", "") # placeholder
|
|
293
|
+
rows = self._db.execute(
|
|
294
|
+
"SELECT canonical_name FROM canonical_entities WHERE entity_id = ?",
|
|
295
|
+
(entity_id_merge,),
|
|
296
|
+
)
|
|
297
|
+
if rows:
|
|
298
|
+
merged_name = str(dict(rows[0])["canonical_name"])
|
|
299
|
+
self._persist_alias(entity_id_keep, merged_name, 1.0, "merge")
|
|
300
|
+
|
|
301
|
+
# Update atomic_facts: replace entity_id_merge with entity_id_keep
|
|
302
|
+
# in canonical_entities_json column
|
|
303
|
+
fact_rows = self._db.execute(
|
|
304
|
+
"SELECT fact_id, canonical_entities_json FROM atomic_facts "
|
|
305
|
+
"WHERE profile_id = ? AND canonical_entities_json LIKE ?",
|
|
306
|
+
(profile_id, f'%"{entity_id_merge}"%'),
|
|
307
|
+
)
|
|
308
|
+
for row in fact_rows:
|
|
309
|
+
d = dict(row)
|
|
310
|
+
try:
|
|
311
|
+
entities = json.loads(d["canonical_entities_json"])
|
|
312
|
+
updated = [
|
|
313
|
+
entity_id_keep if eid == entity_id_merge else eid
|
|
314
|
+
for eid in entities
|
|
315
|
+
]
|
|
316
|
+
# Deduplicate while preserving order
|
|
317
|
+
seen: set[str] = set()
|
|
318
|
+
deduped = []
|
|
319
|
+
for eid in updated:
|
|
320
|
+
if eid not in seen:
|
|
321
|
+
seen.add(eid)
|
|
322
|
+
deduped.append(eid)
|
|
323
|
+
self._db.execute(
|
|
324
|
+
"UPDATE atomic_facts SET canonical_entities_json = ? "
|
|
325
|
+
"WHERE fact_id = ?",
|
|
326
|
+
(json.dumps(deduped), d["fact_id"]),
|
|
327
|
+
)
|
|
328
|
+
except (json.JSONDecodeError, TypeError):
|
|
329
|
+
continue
|
|
330
|
+
|
|
331
|
+
# Delete the merged entity
|
|
332
|
+
self._db.execute(
|
|
333
|
+
"DELETE FROM entity_aliases WHERE entity_id = ?",
|
|
334
|
+
(entity_id_merge,),
|
|
335
|
+
)
|
|
336
|
+
self._db.execute(
|
|
337
|
+
"DELETE FROM canonical_entities WHERE entity_id = ?",
|
|
338
|
+
(entity_id_merge,),
|
|
339
|
+
)
|
|
340
|
+
logger.info(
|
|
341
|
+
"Merged entity %s into %s (profile=%s)",
|
|
342
|
+
entity_id_merge, entity_id_keep, profile_id,
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
# -- Internal: lookups --------------------------------------------------
|
|
346
|
+
|
|
347
|
+
def _alias_lookup(self, name: str, profile_id: str) -> str | None:
|
|
348
|
+
"""Look up entity_id via alias table (case-insensitive)."""
|
|
349
|
+
rows = self._db.execute(
|
|
350
|
+
"SELECT ea.entity_id FROM entity_aliases ea "
|
|
351
|
+
"JOIN canonical_entities ce ON ce.entity_id = ea.entity_id "
|
|
352
|
+
"WHERE LOWER(ea.alias) = LOWER(?) AND ce.profile_id = ?",
|
|
353
|
+
(name, profile_id),
|
|
354
|
+
)
|
|
355
|
+
if rows:
|
|
356
|
+
return str(dict(rows[0])["entity_id"])
|
|
357
|
+
return None
|
|
358
|
+
|
|
359
|
+
def _fuzzy_match(
|
|
360
|
+
self, name: str, profile_id: str,
|
|
361
|
+
) -> tuple[str | None, float]:
|
|
362
|
+
"""Scan all canonical names + aliases for best Jaro-Winkler match.
|
|
363
|
+
|
|
364
|
+
Returns (entity_id, score) or (None, 0.0).
|
|
365
|
+
"""
|
|
366
|
+
best_id: str | None = None
|
|
367
|
+
best_score: float = 0.0
|
|
368
|
+
name_lower = name.lower()
|
|
369
|
+
|
|
370
|
+
# Check canonical names
|
|
371
|
+
rows = self._db.execute(
|
|
372
|
+
"SELECT entity_id, canonical_name FROM canonical_entities "
|
|
373
|
+
"WHERE profile_id = ?",
|
|
374
|
+
(profile_id,),
|
|
375
|
+
)
|
|
376
|
+
for row in rows:
|
|
377
|
+
d = dict(row)
|
|
378
|
+
score = jaro_winkler(name_lower, d["canonical_name"].lower())
|
|
379
|
+
if score > best_score:
|
|
380
|
+
best_score = score
|
|
381
|
+
best_id = d["entity_id"]
|
|
382
|
+
|
|
383
|
+
# Check aliases
|
|
384
|
+
alias_rows = self._db.execute(
|
|
385
|
+
"SELECT ea.entity_id, ea.alias FROM entity_aliases ea "
|
|
386
|
+
"JOIN canonical_entities ce ON ce.entity_id = ea.entity_id "
|
|
387
|
+
"WHERE ce.profile_id = ?",
|
|
388
|
+
(profile_id,),
|
|
389
|
+
)
|
|
390
|
+
for row in alias_rows:
|
|
391
|
+
d = dict(row)
|
|
392
|
+
score = jaro_winkler(name_lower, d["alias"].lower())
|
|
393
|
+
if score > best_score:
|
|
394
|
+
best_score = score
|
|
395
|
+
best_id = d["entity_id"]
|
|
396
|
+
|
|
397
|
+
return (best_id, best_score)
|
|
398
|
+
|
|
399
|
+
# -- Internal: persistence ----------------------------------------------
|
|
400
|
+
|
|
401
|
+
def _create_entity(
|
|
402
|
+
self,
|
|
403
|
+
name: str,
|
|
404
|
+
profile_id: str,
|
|
405
|
+
entity_type: str | None = None,
|
|
406
|
+
) -> str:
|
|
407
|
+
"""Create a new canonical entity + self-alias. Returns entity_id."""
|
|
408
|
+
etype = entity_type or _guess_entity_type(name)
|
|
409
|
+
now = _now()
|
|
410
|
+
entity = CanonicalEntity(
|
|
411
|
+
entity_id=_new_id(),
|
|
412
|
+
profile_id=profile_id,
|
|
413
|
+
canonical_name=name,
|
|
414
|
+
entity_type=etype,
|
|
415
|
+
first_seen=now,
|
|
416
|
+
last_seen=now,
|
|
417
|
+
fact_count=0,
|
|
418
|
+
)
|
|
419
|
+
self._db.store_entity(entity)
|
|
420
|
+
|
|
421
|
+
# Store name as its own alias for uniform lookup
|
|
422
|
+
self._persist_alias(entity.entity_id, name, 1.0, "canonical")
|
|
423
|
+
|
|
424
|
+
logger.debug(
|
|
425
|
+
"Created entity '%s' [%s] (type=%s, profile=%s)",
|
|
426
|
+
name, entity.entity_id, etype, profile_id,
|
|
427
|
+
)
|
|
428
|
+
return entity.entity_id
|
|
429
|
+
|
|
430
|
+
def _persist_alias(
|
|
431
|
+
self,
|
|
432
|
+
entity_id: str,
|
|
433
|
+
alias_text: str,
|
|
434
|
+
confidence: float,
|
|
435
|
+
source: str,
|
|
436
|
+
) -> None:
|
|
437
|
+
"""Store an alias, skipping duplicates."""
|
|
438
|
+
# Check if alias already exists for this entity
|
|
439
|
+
existing = self._db.execute(
|
|
440
|
+
"SELECT alias_id FROM entity_aliases "
|
|
441
|
+
"WHERE entity_id = ? AND LOWER(alias) = LOWER(?)",
|
|
442
|
+
(entity_id, alias_text),
|
|
443
|
+
)
|
|
444
|
+
if existing:
|
|
445
|
+
return
|
|
446
|
+
alias = EntityAlias(
|
|
447
|
+
alias_id=_new_id(),
|
|
448
|
+
entity_id=entity_id,
|
|
449
|
+
alias=alias_text,
|
|
450
|
+
confidence=confidence,
|
|
451
|
+
source=source,
|
|
452
|
+
)
|
|
453
|
+
self._db.store_alias(alias)
|
|
454
|
+
|
|
455
|
+
def _touch_last_seen(self, entity_id: str) -> None:
|
|
456
|
+
"""Update last_seen timestamp on a canonical entity."""
|
|
457
|
+
self._db.execute(
|
|
458
|
+
"UPDATE canonical_entities SET last_seen = ? WHERE entity_id = ?",
|
|
459
|
+
(_now(), entity_id),
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
# -- Internal: LLM disambiguation (Mode B/C) ---------------------------
|
|
463
|
+
|
|
464
|
+
def _llm_disambiguate(
|
|
465
|
+
self,
|
|
466
|
+
raw_names: list[str],
|
|
467
|
+
profile_id: str,
|
|
468
|
+
) -> dict[str, str]:
|
|
469
|
+
"""Ask LLM whether fuzzy candidates match existing entities."""
|
|
470
|
+
if not self._llm or not raw_names:
|
|
471
|
+
return {}
|
|
472
|
+
|
|
473
|
+
# Gather known entity names for context
|
|
474
|
+
rows = self._db.execute(
|
|
475
|
+
"SELECT entity_id, canonical_name FROM canonical_entities "
|
|
476
|
+
"WHERE profile_id = ? LIMIT 50",
|
|
477
|
+
(profile_id,),
|
|
478
|
+
)
|
|
479
|
+
known = {
|
|
480
|
+
str(dict(r)["canonical_name"]): str(dict(r)["entity_id"])
|
|
481
|
+
for r in rows
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
prompt = (
|
|
485
|
+
"Entity resolution task. For each mention, decide if it refers to "
|
|
486
|
+
"one of the known entities or is a new entity.\n\n"
|
|
487
|
+
f"Mentions to resolve: {raw_names}\n"
|
|
488
|
+
f"Known entities: {list(known.keys())}\n\n"
|
|
489
|
+
"Respond with ONLY a JSON object mapping each mention to a "
|
|
490
|
+
"known entity name if they match, or to itself if it is new.\n"
|
|
491
|
+
'Example: {"Ms. Smith": "Alice Smith", "Bob": "Bob"}'
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
try:
|
|
495
|
+
response = self._llm.generate(
|
|
496
|
+
prompt=prompt,
|
|
497
|
+
system="You are a precise entity resolution system.",
|
|
498
|
+
max_tokens=256,
|
|
499
|
+
temperature=0.0,
|
|
500
|
+
)
|
|
501
|
+
match = re.search(r"\{.*\}", response, re.DOTALL)
|
|
502
|
+
if not match:
|
|
503
|
+
return {}
|
|
504
|
+
|
|
505
|
+
result = json.loads(match.group())
|
|
506
|
+
if not isinstance(result, dict):
|
|
507
|
+
return {}
|
|
508
|
+
|
|
509
|
+
resolved: dict[str, str] = {}
|
|
510
|
+
for mention, canonical_name in result.items():
|
|
511
|
+
mention_str = str(mention)
|
|
512
|
+
name_str = str(canonical_name)
|
|
513
|
+
if name_str in known:
|
|
514
|
+
entity_id = known[name_str]
|
|
515
|
+
resolved[mention_str] = entity_id
|
|
516
|
+
self._persist_alias(
|
|
517
|
+
entity_id, mention_str, 0.9, "llm",
|
|
518
|
+
)
|
|
519
|
+
self._touch_last_seen(entity_id)
|
|
520
|
+
# If LLM says it's itself, leave for caller to create
|
|
521
|
+
return resolved
|
|
522
|
+
|
|
523
|
+
except (json.JSONDecodeError, TypeError, Exception) as exc:
|
|
524
|
+
logger.warning("LLM entity disambiguation failed: %s", exc)
|
|
525
|
+
return {}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
|
|
4
|
+
|
|
5
|
+
"""SuperLocalMemory V3 — Entropy Gate (Deduplication).
|
|
6
|
+
|
|
7
|
+
Filters low-information and duplicate content before expensive encoding.
|
|
8
|
+
AriadneMem pattern: block near-duplicates within a time window.
|
|
9
|
+
|
|
10
|
+
Ported from V1 with fixed threshold (0.95 from S13 fix).
|
|
11
|
+
|
|
12
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
from typing import TYPE_CHECKING
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from superlocalmemory.core.embeddings import EmbeddingService
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
# Minimum content length to pass gate (very short = low information)
|
|
26
|
+
_MIN_CONTENT_LENGTH = 10
|
|
27
|
+
|
|
28
|
+
# Words that indicate low-information content
|
|
29
|
+
_LOW_INFO_PATTERNS = frozenset({
|
|
30
|
+
"ok", "okay", "yes", "no", "yeah", "sure", "thanks",
|
|
31
|
+
"thank you", "got it", "right", "hmm", "hm", "ah",
|
|
32
|
+
"i see", "alright", "fine", "cool", "nice", "great",
|
|
33
|
+
})
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class EntropyGate:
|
|
37
|
+
"""Filter low-information and duplicate content before encoding.
|
|
38
|
+
|
|
39
|
+
Two-stage filter:
|
|
40
|
+
1. Content-based: reject very short or formulaic responses
|
|
41
|
+
2. Similarity-based: reject near-duplicates of recent memories
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
embedder: EmbeddingService | None = None,
|
|
47
|
+
similarity_threshold: float = 0.95,
|
|
48
|
+
window_size: int = 50,
|
|
49
|
+
) -> None:
|
|
50
|
+
self._embedder = embedder
|
|
51
|
+
self._threshold = similarity_threshold
|
|
52
|
+
self._window_size = window_size
|
|
53
|
+
self._recent_embeddings: list[list[float]] = []
|
|
54
|
+
|
|
55
|
+
def should_pass(self, content: str) -> bool:
|
|
56
|
+
"""Return True if content has enough information to store.
|
|
57
|
+
|
|
58
|
+
Returns False for low-info or near-duplicate content.
|
|
59
|
+
"""
|
|
60
|
+
# Stage 1: Content-based filtering
|
|
61
|
+
stripped = content.strip()
|
|
62
|
+
if len(stripped) < _MIN_CONTENT_LENGTH:
|
|
63
|
+
logger.debug("Entropy gate: blocked (too short: %d chars)", len(stripped))
|
|
64
|
+
return False
|
|
65
|
+
|
|
66
|
+
normalized = stripped.lower().strip(".,!?;:")
|
|
67
|
+
if normalized in _LOW_INFO_PATTERNS:
|
|
68
|
+
logger.debug("Entropy gate: blocked (low-info pattern: '%s')", normalized)
|
|
69
|
+
return False
|
|
70
|
+
|
|
71
|
+
# Stage 2: Similarity-based deduplication
|
|
72
|
+
if self._embedder is not None and self._recent_embeddings:
|
|
73
|
+
emb = self._embedder.embed(content)
|
|
74
|
+
for recent in self._recent_embeddings:
|
|
75
|
+
sim = _cosine(emb, recent)
|
|
76
|
+
if sim > self._threshold:
|
|
77
|
+
logger.debug(
|
|
78
|
+
"Entropy gate: blocked (near-duplicate, sim=%.3f)", sim
|
|
79
|
+
)
|
|
80
|
+
return False
|
|
81
|
+
# Add to window
|
|
82
|
+
self._recent_embeddings.append(emb)
|
|
83
|
+
if len(self._recent_embeddings) > self._window_size:
|
|
84
|
+
self._recent_embeddings.pop(0)
|
|
85
|
+
elif self._embedder is not None:
|
|
86
|
+
# First content — add to window, always pass
|
|
87
|
+
emb = self._embedder.embed(content)
|
|
88
|
+
self._recent_embeddings.append(emb)
|
|
89
|
+
|
|
90
|
+
return True
|
|
91
|
+
|
|
92
|
+
def reset(self) -> None:
|
|
93
|
+
"""Clear the recent embeddings window."""
|
|
94
|
+
self._recent_embeddings.clear()
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _cosine(a: list[float], b: list[float]) -> float:
|
|
98
|
+
"""Cosine similarity between two vectors."""
|
|
99
|
+
dot = sum(x * y for x, y in zip(a, b))
|
|
100
|
+
na = sum(x * x for x in a) ** 0.5
|
|
101
|
+
nb = sum(x * x for x in b) ** 0.5
|
|
102
|
+
if na == 0 or nb == 0:
|
|
103
|
+
return 0.0
|
|
104
|
+
return dot / (na * nb)
|