superlocalmemory 2.8.6 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +9 -1
- package/NOTICE +63 -0
- package/README.md +165 -480
- package/bin/slm +17 -449
- package/bin/slm-npm +1 -1
- package/conftest.py +5 -0
- package/docs/api-reference.md +284 -0
- package/docs/architecture.md +149 -0
- package/docs/auto-memory.md +150 -0
- package/docs/cli-reference.md +276 -0
- package/docs/compliance.md +191 -0
- package/docs/configuration.md +182 -0
- package/docs/getting-started.md +102 -0
- package/docs/ide-setup.md +261 -0
- package/docs/mcp-tools.md +220 -0
- package/docs/migration-from-v2.md +170 -0
- package/docs/profiles.md +173 -0
- package/docs/troubleshooting.md +310 -0
- package/{configs → ide/configs}/antigravity-mcp.json +3 -3
- package/ide/configs/chatgpt-desktop-mcp.json +16 -0
- package/{configs → ide/configs}/claude-desktop-mcp.json +3 -3
- package/{configs → ide/configs}/codex-mcp.toml +4 -4
- package/{configs → ide/configs}/continue-mcp.yaml +4 -3
- package/{configs → ide/configs}/continue-skills.yaml +6 -6
- package/ide/configs/cursor-mcp.json +15 -0
- package/{configs → ide/configs}/gemini-cli-mcp.json +2 -2
- package/{configs → ide/configs}/jetbrains-mcp.json +2 -2
- package/{configs → ide/configs}/opencode-mcp.json +2 -2
- package/{configs → ide/configs}/perplexity-mcp.json +2 -2
- package/{configs → ide/configs}/vscode-copilot-mcp.json +2 -2
- package/{configs → ide/configs}/windsurf-mcp.json +3 -3
- package/{configs → ide/configs}/zed-mcp.json +2 -2
- package/{hooks → ide/hooks}/context-hook.js +9 -20
- package/ide/hooks/memory-list-skill.js +70 -0
- package/ide/hooks/memory-profile-skill.js +101 -0
- package/ide/hooks/memory-recall-skill.js +62 -0
- package/ide/hooks/memory-remember-skill.js +68 -0
- package/ide/hooks/memory-reset-skill.js +160 -0
- package/{hooks → ide/hooks}/post-recall-hook.js +2 -2
- package/ide/integrations/langchain/README.md +106 -0
- package/ide/integrations/langchain/langchain_superlocalmemory/__init__.py +9 -0
- package/ide/integrations/langchain/langchain_superlocalmemory/chat_message_history.py +201 -0
- package/ide/integrations/langchain/pyproject.toml +38 -0
- package/{src/learning → ide/integrations/langchain}/tests/__init__.py +1 -0
- package/ide/integrations/langchain/tests/test_chat_message_history.py +215 -0
- package/ide/integrations/langchain/tests/test_security.py +117 -0
- package/ide/integrations/llamaindex/README.md +81 -0
- package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/__init__.py +9 -0
- package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/base.py +316 -0
- package/ide/integrations/llamaindex/pyproject.toml +43 -0
- package/{src/lifecycle → ide/integrations/llamaindex}/tests/__init__.py +1 -2
- package/ide/integrations/llamaindex/tests/test_chat_store.py +294 -0
- package/ide/integrations/llamaindex/tests/test_security.py +241 -0
- package/{skills → ide/skills}/slm-build-graph/SKILL.md +6 -6
- package/{skills → ide/skills}/slm-list-recent/SKILL.md +5 -5
- package/{skills → ide/skills}/slm-recall/SKILL.md +5 -5
- package/{skills → ide/skills}/slm-remember/SKILL.md +6 -6
- package/{skills → ide/skills}/slm-show-patterns/SKILL.md +7 -7
- package/{skills → ide/skills}/slm-status/SKILL.md +9 -9
- package/{skills → ide/skills}/slm-switch-profile/SKILL.md +9 -9
- package/package.json +13 -22
- package/pyproject.toml +85 -0
- package/scripts/build-dmg.sh +417 -0
- package/scripts/install-skills.ps1 +334 -0
- package/scripts/postinstall.js +2 -2
- package/scripts/start-dashboard.ps1 +52 -0
- package/scripts/start-dashboard.sh +41 -0
- package/scripts/sync-wiki.ps1 +127 -0
- package/scripts/sync-wiki.sh +82 -0
- package/scripts/test-dmg.sh +161 -0
- package/scripts/test-npm-package.ps1 +252 -0
- package/scripts/test-npm-package.sh +207 -0
- package/scripts/verify-install.ps1 +294 -0
- package/scripts/verify-install.sh +266 -0
- package/src/superlocalmemory/__init__.py +0 -0
- package/src/superlocalmemory/attribution/__init__.py +9 -0
- package/src/superlocalmemory/attribution/mathematical_dna.py +235 -0
- package/src/superlocalmemory/attribution/signer.py +153 -0
- package/src/superlocalmemory/attribution/watermark.py +189 -0
- package/src/superlocalmemory/cli/__init__.py +5 -0
- package/src/superlocalmemory/cli/commands.py +245 -0
- package/src/superlocalmemory/cli/main.py +89 -0
- package/src/superlocalmemory/cli/migrate_cmd.py +55 -0
- package/src/superlocalmemory/cli/post_install.py +99 -0
- package/src/superlocalmemory/cli/setup_wizard.py +129 -0
- package/src/superlocalmemory/compliance/__init__.py +0 -0
- package/src/superlocalmemory/compliance/abac.py +204 -0
- package/src/superlocalmemory/compliance/audit.py +314 -0
- package/src/superlocalmemory/compliance/eu_ai_act.py +131 -0
- package/src/superlocalmemory/compliance/gdpr.py +294 -0
- package/src/superlocalmemory/compliance/lifecycle.py +158 -0
- package/src/superlocalmemory/compliance/retention.py +232 -0
- package/src/superlocalmemory/compliance/scheduler.py +148 -0
- package/src/superlocalmemory/core/__init__.py +0 -0
- package/src/superlocalmemory/core/config.py +391 -0
- package/src/superlocalmemory/core/embeddings.py +293 -0
- package/src/superlocalmemory/core/engine.py +701 -0
- package/src/superlocalmemory/core/hooks.py +65 -0
- package/src/superlocalmemory/core/maintenance.py +172 -0
- package/src/superlocalmemory/core/modes.py +140 -0
- package/src/superlocalmemory/core/profiles.py +234 -0
- package/src/superlocalmemory/core/registry.py +117 -0
- package/src/superlocalmemory/dynamics/__init__.py +0 -0
- package/src/superlocalmemory/dynamics/fisher_langevin_coupling.py +223 -0
- package/src/superlocalmemory/encoding/__init__.py +0 -0
- package/src/superlocalmemory/encoding/consolidator.py +485 -0
- package/src/superlocalmemory/encoding/emotional.py +125 -0
- package/src/superlocalmemory/encoding/entity_resolver.py +525 -0
- package/src/superlocalmemory/encoding/entropy_gate.py +104 -0
- package/src/superlocalmemory/encoding/fact_extractor.py +775 -0
- package/src/superlocalmemory/encoding/foresight.py +91 -0
- package/src/superlocalmemory/encoding/graph_builder.py +302 -0
- package/src/superlocalmemory/encoding/observation_builder.py +160 -0
- package/src/superlocalmemory/encoding/scene_builder.py +183 -0
- package/src/superlocalmemory/encoding/signal_inference.py +90 -0
- package/src/superlocalmemory/encoding/temporal_parser.py +426 -0
- package/src/superlocalmemory/encoding/type_router.py +235 -0
- package/src/superlocalmemory/hooks/__init__.py +3 -0
- package/src/superlocalmemory/hooks/auto_capture.py +111 -0
- package/src/superlocalmemory/hooks/auto_recall.py +93 -0
- package/src/superlocalmemory/hooks/ide_connector.py +204 -0
- package/src/superlocalmemory/hooks/rules_engine.py +99 -0
- package/src/superlocalmemory/infra/__init__.py +3 -0
- package/src/superlocalmemory/infra/auth_middleware.py +82 -0
- package/src/superlocalmemory/infra/backup.py +317 -0
- package/src/superlocalmemory/infra/cache_manager.py +267 -0
- package/src/superlocalmemory/infra/event_bus.py +381 -0
- package/src/superlocalmemory/infra/rate_limiter.py +135 -0
- package/src/{webhook_dispatcher.py → superlocalmemory/infra/webhook_dispatcher.py} +104 -101
- package/src/superlocalmemory/learning/__init__.py +0 -0
- package/src/superlocalmemory/learning/adaptive.py +172 -0
- package/src/superlocalmemory/learning/behavioral.py +490 -0
- package/src/superlocalmemory/learning/behavioral_listener.py +94 -0
- package/src/superlocalmemory/learning/bootstrap.py +298 -0
- package/src/superlocalmemory/learning/cross_project.py +399 -0
- package/src/superlocalmemory/learning/database.py +376 -0
- package/src/superlocalmemory/learning/engagement.py +323 -0
- package/src/superlocalmemory/learning/features.py +138 -0
- package/src/superlocalmemory/learning/feedback.py +316 -0
- package/src/superlocalmemory/learning/outcomes.py +255 -0
- package/src/superlocalmemory/learning/project_context.py +366 -0
- package/src/superlocalmemory/learning/ranker.py +155 -0
- package/src/superlocalmemory/learning/source_quality.py +303 -0
- package/src/superlocalmemory/learning/workflows.py +309 -0
- package/src/superlocalmemory/llm/__init__.py +0 -0
- package/src/superlocalmemory/llm/backbone.py +316 -0
- package/src/superlocalmemory/math/__init__.py +0 -0
- package/src/superlocalmemory/math/fisher.py +356 -0
- package/src/superlocalmemory/math/langevin.py +398 -0
- package/src/superlocalmemory/math/sheaf.py +257 -0
- package/src/superlocalmemory/mcp/__init__.py +0 -0
- package/src/superlocalmemory/mcp/resources.py +245 -0
- package/src/superlocalmemory/mcp/server.py +61 -0
- package/src/superlocalmemory/mcp/tools.py +18 -0
- package/src/superlocalmemory/mcp/tools_core.py +305 -0
- package/src/superlocalmemory/mcp/tools_v28.py +223 -0
- package/src/superlocalmemory/mcp/tools_v3.py +286 -0
- package/src/superlocalmemory/retrieval/__init__.py +0 -0
- package/src/superlocalmemory/retrieval/agentic.py +295 -0
- package/src/superlocalmemory/retrieval/ann_index.py +223 -0
- package/src/superlocalmemory/retrieval/bm25_channel.py +185 -0
- package/src/superlocalmemory/retrieval/bridge_discovery.py +170 -0
- package/src/superlocalmemory/retrieval/engine.py +390 -0
- package/src/superlocalmemory/retrieval/entity_channel.py +179 -0
- package/src/superlocalmemory/retrieval/fusion.py +78 -0
- package/src/superlocalmemory/retrieval/profile_channel.py +105 -0
- package/src/superlocalmemory/retrieval/reranker.py +154 -0
- package/src/superlocalmemory/retrieval/semantic_channel.py +232 -0
- package/src/superlocalmemory/retrieval/strategy.py +96 -0
- package/src/superlocalmemory/retrieval/temporal_channel.py +175 -0
- package/src/superlocalmemory/server/__init__.py +1 -0
- package/src/superlocalmemory/server/api.py +248 -0
- package/src/superlocalmemory/server/routes/__init__.py +4 -0
- package/src/superlocalmemory/server/routes/agents.py +107 -0
- package/src/superlocalmemory/server/routes/backup.py +91 -0
- package/src/superlocalmemory/server/routes/behavioral.py +127 -0
- package/src/superlocalmemory/server/routes/compliance.py +160 -0
- package/src/superlocalmemory/server/routes/data_io.py +188 -0
- package/src/superlocalmemory/server/routes/events.py +183 -0
- package/src/superlocalmemory/server/routes/helpers.py +85 -0
- package/src/superlocalmemory/server/routes/learning.py +273 -0
- package/src/superlocalmemory/server/routes/lifecycle.py +116 -0
- package/src/superlocalmemory/server/routes/memories.py +399 -0
- package/src/superlocalmemory/server/routes/profiles.py +219 -0
- package/src/superlocalmemory/server/routes/stats.py +346 -0
- package/src/superlocalmemory/server/routes/v3_api.py +365 -0
- package/src/superlocalmemory/server/routes/ws.py +82 -0
- package/src/superlocalmemory/server/security_middleware.py +57 -0
- package/src/superlocalmemory/server/ui.py +245 -0
- package/src/superlocalmemory/storage/__init__.py +0 -0
- package/src/superlocalmemory/storage/access_control.py +182 -0
- package/src/superlocalmemory/storage/database.py +594 -0
- package/src/superlocalmemory/storage/migrations.py +303 -0
- package/src/superlocalmemory/storage/models.py +406 -0
- package/src/superlocalmemory/storage/schema.py +726 -0
- package/src/superlocalmemory/storage/v2_migrator.py +317 -0
- package/src/superlocalmemory/trust/__init__.py +0 -0
- package/src/superlocalmemory/trust/gate.py +130 -0
- package/src/superlocalmemory/trust/provenance.py +124 -0
- package/src/superlocalmemory/trust/scorer.py +347 -0
- package/src/superlocalmemory/trust/signals.py +153 -0
- package/ui/index.html +278 -5
- package/ui/js/auto-settings.js +70 -0
- package/ui/js/dashboard.js +90 -0
- package/ui/js/fact-detail.js +92 -0
- package/ui/js/feedback.js +2 -2
- package/ui/js/ide-status.js +102 -0
- package/ui/js/math-health.js +98 -0
- package/ui/js/recall-lab.js +127 -0
- package/ui/js/settings.js +2 -2
- package/ui/js/trust-dashboard.js +73 -0
- package/api_server.py +0 -724
- package/bin/aider-smart +0 -72
- package/bin/superlocalmemoryv2-learning +0 -4
- package/bin/superlocalmemoryv2-list +0 -3
- package/bin/superlocalmemoryv2-patterns +0 -4
- package/bin/superlocalmemoryv2-profile +0 -3
- package/bin/superlocalmemoryv2-recall +0 -3
- package/bin/superlocalmemoryv2-remember +0 -3
- package/bin/superlocalmemoryv2-reset +0 -3
- package/bin/superlocalmemoryv2-status +0 -3
- package/configs/chatgpt-desktop-mcp.json +0 -16
- package/configs/cursor-mcp.json +0 -15
- package/hooks/memory-list-skill.js +0 -139
- package/hooks/memory-profile-skill.js +0 -273
- package/hooks/memory-recall-skill.js +0 -114
- package/hooks/memory-remember-skill.js +0 -127
- package/hooks/memory-reset-skill.js +0 -274
- package/mcp_server.py +0 -1808
- package/requirements-core.txt +0 -22
- package/requirements-learning.txt +0 -12
- package/requirements.txt +0 -12
- package/src/agent_registry.py +0 -411
- package/src/auth_middleware.py +0 -61
- package/src/auto_backup.py +0 -459
- package/src/behavioral/__init__.py +0 -49
- package/src/behavioral/behavioral_listener.py +0 -203
- package/src/behavioral/behavioral_patterns.py +0 -275
- package/src/behavioral/cross_project_transfer.py +0 -206
- package/src/behavioral/outcome_inference.py +0 -194
- package/src/behavioral/outcome_tracker.py +0 -193
- package/src/behavioral/tests/__init__.py +0 -4
- package/src/behavioral/tests/test_behavioral_integration.py +0 -108
- package/src/behavioral/tests/test_behavioral_patterns.py +0 -150
- package/src/behavioral/tests/test_cross_project_transfer.py +0 -142
- package/src/behavioral/tests/test_mcp_behavioral.py +0 -139
- package/src/behavioral/tests/test_mcp_report_outcome.py +0 -117
- package/src/behavioral/tests/test_outcome_inference.py +0 -107
- package/src/behavioral/tests/test_outcome_tracker.py +0 -96
- package/src/cache_manager.py +0 -518
- package/src/compliance/__init__.py +0 -48
- package/src/compliance/abac_engine.py +0 -149
- package/src/compliance/abac_middleware.py +0 -116
- package/src/compliance/audit_db.py +0 -215
- package/src/compliance/audit_logger.py +0 -148
- package/src/compliance/retention_manager.py +0 -289
- package/src/compliance/retention_scheduler.py +0 -186
- package/src/compliance/tests/__init__.py +0 -4
- package/src/compliance/tests/test_abac_enforcement.py +0 -95
- package/src/compliance/tests/test_abac_engine.py +0 -124
- package/src/compliance/tests/test_abac_mcp_integration.py +0 -118
- package/src/compliance/tests/test_audit_db.py +0 -123
- package/src/compliance/tests/test_audit_logger.py +0 -98
- package/src/compliance/tests/test_mcp_audit.py +0 -128
- package/src/compliance/tests/test_mcp_retention_policy.py +0 -125
- package/src/compliance/tests/test_retention_manager.py +0 -131
- package/src/compliance/tests/test_retention_scheduler.py +0 -99
- package/src/compression/__init__.py +0 -25
- package/src/compression/cli.py +0 -150
- package/src/compression/cold_storage.py +0 -217
- package/src/compression/config.py +0 -72
- package/src/compression/orchestrator.py +0 -133
- package/src/compression/tier2_compressor.py +0 -228
- package/src/compression/tier3_compressor.py +0 -153
- package/src/compression/tier_classifier.py +0 -148
- package/src/db_connection_manager.py +0 -536
- package/src/embedding_engine.py +0 -63
- package/src/embeddings/__init__.py +0 -47
- package/src/embeddings/cache.py +0 -70
- package/src/embeddings/cli.py +0 -113
- package/src/embeddings/constants.py +0 -47
- package/src/embeddings/database.py +0 -91
- package/src/embeddings/engine.py +0 -247
- package/src/embeddings/model_loader.py +0 -145
- package/src/event_bus.py +0 -562
- package/src/graph/__init__.py +0 -36
- package/src/graph/build_helpers.py +0 -74
- package/src/graph/cli.py +0 -87
- package/src/graph/cluster_builder.py +0 -188
- package/src/graph/cluster_summary.py +0 -148
- package/src/graph/constants.py +0 -47
- package/src/graph/edge_builder.py +0 -162
- package/src/graph/entity_extractor.py +0 -95
- package/src/graph/graph_core.py +0 -226
- package/src/graph/graph_search.py +0 -231
- package/src/graph/hierarchical.py +0 -207
- package/src/graph/schema.py +0 -99
- package/src/graph_engine.py +0 -52
- package/src/hnsw_index.py +0 -628
- package/src/hybrid_search.py +0 -46
- package/src/learning/__init__.py +0 -217
- package/src/learning/adaptive_ranker.py +0 -682
- package/src/learning/bootstrap/__init__.py +0 -69
- package/src/learning/bootstrap/constants.py +0 -93
- package/src/learning/bootstrap/db_queries.py +0 -316
- package/src/learning/bootstrap/sampling.py +0 -82
- package/src/learning/bootstrap/text_utils.py +0 -71
- package/src/learning/cross_project_aggregator.py +0 -857
- package/src/learning/db/__init__.py +0 -40
- package/src/learning/db/constants.py +0 -44
- package/src/learning/db/schema.py +0 -279
- package/src/learning/engagement_tracker.py +0 -628
- package/src/learning/feature_extractor.py +0 -708
- package/src/learning/feedback_collector.py +0 -806
- package/src/learning/learning_db.py +0 -915
- package/src/learning/project_context_manager.py +0 -572
- package/src/learning/ranking/__init__.py +0 -33
- package/src/learning/ranking/constants.py +0 -84
- package/src/learning/ranking/helpers.py +0 -278
- package/src/learning/source_quality_scorer.py +0 -676
- package/src/learning/synthetic_bootstrap.py +0 -755
- package/src/learning/tests/test_adaptive_ranker.py +0 -325
- package/src/learning/tests/test_adaptive_ranker_v28.py +0 -60
- package/src/learning/tests/test_aggregator.py +0 -306
- package/src/learning/tests/test_auto_retrain_v28.py +0 -35
- package/src/learning/tests/test_e2e_ranking_v28.py +0 -82
- package/src/learning/tests/test_feature_extractor_v28.py +0 -93
- package/src/learning/tests/test_feedback_collector.py +0 -294
- package/src/learning/tests/test_learning_db.py +0 -602
- package/src/learning/tests/test_learning_db_v28.py +0 -110
- package/src/learning/tests/test_learning_init_v28.py +0 -48
- package/src/learning/tests/test_outcome_signals.py +0 -48
- package/src/learning/tests/test_project_context.py +0 -292
- package/src/learning/tests/test_schema_migration.py +0 -319
- package/src/learning/tests/test_signal_inference.py +0 -397
- package/src/learning/tests/test_source_quality.py +0 -351
- package/src/learning/tests/test_synthetic_bootstrap.py +0 -429
- package/src/learning/tests/test_workflow_miner.py +0 -318
- package/src/learning/workflow_pattern_miner.py +0 -655
- package/src/lifecycle/__init__.py +0 -54
- package/src/lifecycle/bounded_growth.py +0 -239
- package/src/lifecycle/compaction_engine.py +0 -226
- package/src/lifecycle/lifecycle_engine.py +0 -355
- package/src/lifecycle/lifecycle_evaluator.py +0 -257
- package/src/lifecycle/lifecycle_scheduler.py +0 -130
- package/src/lifecycle/retention_policy.py +0 -285
- package/src/lifecycle/tests/test_bounded_growth.py +0 -193
- package/src/lifecycle/tests/test_compaction.py +0 -179
- package/src/lifecycle/tests/test_lifecycle_engine.py +0 -137
- package/src/lifecycle/tests/test_lifecycle_evaluation.py +0 -177
- package/src/lifecycle/tests/test_lifecycle_scheduler.py +0 -127
- package/src/lifecycle/tests/test_lifecycle_search.py +0 -109
- package/src/lifecycle/tests/test_mcp_compact.py +0 -149
- package/src/lifecycle/tests/test_mcp_lifecycle_status.py +0 -114
- package/src/lifecycle/tests/test_retention_policy.py +0 -162
- package/src/mcp_tools_v28.py +0 -281
- package/src/memory/__init__.py +0 -36
- package/src/memory/cli.py +0 -205
- package/src/memory/constants.py +0 -39
- package/src/memory/helpers.py +0 -28
- package/src/memory/schema.py +0 -166
- package/src/memory-profiles.py +0 -595
- package/src/memory-reset.py +0 -491
- package/src/memory_compression.py +0 -989
- package/src/memory_store_v2.py +0 -1155
- package/src/migrate_v1_to_v2.py +0 -629
- package/src/pattern_learner.py +0 -34
- package/src/patterns/__init__.py +0 -24
- package/src/patterns/analyzers.py +0 -251
- package/src/patterns/learner.py +0 -271
- package/src/patterns/scoring.py +0 -171
- package/src/patterns/store.py +0 -225
- package/src/patterns/terminology.py +0 -140
- package/src/provenance_tracker.py +0 -312
- package/src/qualixar_attribution.py +0 -139
- package/src/qualixar_watermark.py +0 -78
- package/src/query_optimizer.py +0 -511
- package/src/rate_limiter.py +0 -83
- package/src/search/__init__.py +0 -20
- package/src/search/cli.py +0 -77
- package/src/search/constants.py +0 -26
- package/src/search/engine.py +0 -241
- package/src/search/fusion.py +0 -122
- package/src/search/index_loader.py +0 -114
- package/src/search/methods.py +0 -162
- package/src/search_engine_v2.py +0 -401
- package/src/setup_validator.py +0 -482
- package/src/subscription_manager.py +0 -391
- package/src/tree/__init__.py +0 -59
- package/src/tree/builder.py +0 -185
- package/src/tree/nodes.py +0 -202
- package/src/tree/queries.py +0 -257
- package/src/tree/schema.py +0 -80
- package/src/tree_manager.py +0 -19
- package/src/trust/__init__.py +0 -45
- package/src/trust/constants.py +0 -66
- package/src/trust/queries.py +0 -157
- package/src/trust/schema.py +0 -95
- package/src/trust/scorer.py +0 -299
- package/src/trust/signals.py +0 -95
- package/src/trust_scorer.py +0 -44
- package/ui/app.js +0 -1588
- package/ui/js/graph-cytoscape-monolithic-backup.js +0 -1168
- package/ui/js/graph-cytoscape.js +0 -1168
- package/ui/js/graph-d3-backup.js +0 -32
- package/ui/js/graph.js +0 -32
- package/ui_server.py +0 -286
- /package/docs/{ACCESSIBILITY.md → v2-archive/ACCESSIBILITY.md} +0 -0
- /package/docs/{ARCHITECTURE.md → v2-archive/ARCHITECTURE.md} +0 -0
- /package/docs/{CLI-COMMANDS-REFERENCE.md → v2-archive/CLI-COMMANDS-REFERENCE.md} +0 -0
- /package/docs/{COMPRESSION-README.md → v2-archive/COMPRESSION-README.md} +0 -0
- /package/docs/{FRAMEWORK-INTEGRATIONS.md → v2-archive/FRAMEWORK-INTEGRATIONS.md} +0 -0
- /package/docs/{MCP-MANUAL-SETUP.md → v2-archive/MCP-MANUAL-SETUP.md} +0 -0
- /package/docs/{MCP-TROUBLESHOOTING.md → v2-archive/MCP-TROUBLESHOOTING.md} +0 -0
- /package/docs/{PATTERN-LEARNING.md → v2-archive/PATTERN-LEARNING.md} +0 -0
- /package/docs/{PROFILES-GUIDE.md → v2-archive/PROFILES-GUIDE.md} +0 -0
- /package/docs/{RESET-GUIDE.md → v2-archive/RESET-GUIDE.md} +0 -0
- /package/docs/{SEARCH-ENGINE-V2.2.0.md → v2-archive/SEARCH-ENGINE-V2.2.0.md} +0 -0
- /package/docs/{SEARCH-INTEGRATION-GUIDE.md → v2-archive/SEARCH-INTEGRATION-GUIDE.md} +0 -0
- /package/docs/{UI-SERVER.md → v2-archive/UI-SERVER.md} +0 -0
- /package/docs/{UNIVERSAL-INTEGRATION.md → v2-archive/UNIVERSAL-INTEGRATION.md} +0 -0
- /package/docs/{V2.2.0-OPTIONAL-SEARCH.md → v2-archive/V2.2.0-OPTIONAL-SEARCH.md} +0 -0
- /package/docs/{WINDOWS-INSTALL-README.txt → v2-archive/WINDOWS-INSTALL-README.txt} +0 -0
- /package/docs/{WINDOWS-POST-INSTALL.txt → v2-archive/WINDOWS-POST-INSTALL.txt} +0 -0
- /package/docs/{example_graph_usage.py → v2-archive/example_graph_usage.py} +0 -0
- /package/{completions → ide/completions}/slm.bash +0 -0
- /package/{completions → ide/completions}/slm.zsh +0 -0
- /package/{configs → ide/configs}/cody-commands.json +0 -0
- /package/{install-skills.sh → scripts/install-skills.sh} +0 -0
- /package/{install.ps1 → scripts/install.ps1} +0 -0
- /package/{install.sh → scripts/install.sh} +0 -0
package/src/search/constants.py
DELETED
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# SPDX-License-Identifier: MIT
|
|
3
|
-
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
-
"""SuperLocalMemory V2 - Hybrid Search System
|
|
5
|
-
|
|
6
|
-
Solution Architect & Original Creator
|
|
7
|
-
|
|
8
|
-
(see LICENSE file)
|
|
9
|
-
|
|
10
|
-
ATTRIBUTION REQUIRED: This notice must be preserved in all copies.
|
|
11
|
-
"""
|
|
12
|
-
"""
|
|
13
|
-
Shared imports and constants for the hybrid search package.
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
|
-
import time
|
|
17
|
-
import math
|
|
18
|
-
import json
|
|
19
|
-
import sqlite3
|
|
20
|
-
from collections import defaultdict
|
|
21
|
-
from typing import List, Dict, Tuple, Optional, Any, Set
|
|
22
|
-
from pathlib import Path
|
|
23
|
-
|
|
24
|
-
from search_engine_v2 import BM25SearchEngine
|
|
25
|
-
from query_optimizer import QueryOptimizer
|
|
26
|
-
from cache_manager import CacheManager
|
package/src/search/engine.py
DELETED
|
@@ -1,241 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# SPDX-License-Identifier: MIT
|
|
3
|
-
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
-
"""HybridSearchEngine - Main orchestrator for multi-method retrieval fusion.
|
|
5
|
-
"""
|
|
6
|
-
import time
|
|
7
|
-
import json
|
|
8
|
-
import sqlite3
|
|
9
|
-
from pathlib import Path
|
|
10
|
-
from typing import List, Dict, Tuple, Optional, Any
|
|
11
|
-
|
|
12
|
-
from search_engine_v2 import BM25SearchEngine
|
|
13
|
-
from query_optimizer import QueryOptimizer
|
|
14
|
-
from cache_manager import CacheManager
|
|
15
|
-
|
|
16
|
-
from search.index_loader import IndexLoaderMixin
|
|
17
|
-
from search.methods import SearchMethodsMixin
|
|
18
|
-
from search.fusion import FusionMixin
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class HybridSearchEngine(IndexLoaderMixin, SearchMethodsMixin, FusionMixin):
|
|
22
|
-
"""
|
|
23
|
-
Hybrid search combining BM25, graph traversal, and semantic search.
|
|
24
|
-
|
|
25
|
-
Provides flexible retrieval strategies based on query type and
|
|
26
|
-
available resources.
|
|
27
|
-
"""
|
|
28
|
-
|
|
29
|
-
def __init__(
|
|
30
|
-
self,
|
|
31
|
-
db_path: Path,
|
|
32
|
-
bm25_engine: Optional[BM25SearchEngine] = None,
|
|
33
|
-
query_optimizer: Optional[QueryOptimizer] = None,
|
|
34
|
-
cache_manager: Optional[CacheManager] = None,
|
|
35
|
-
enable_cache: bool = True
|
|
36
|
-
):
|
|
37
|
-
"""
|
|
38
|
-
Initialize hybrid search engine.
|
|
39
|
-
|
|
40
|
-
Args:
|
|
41
|
-
db_path: Path to memory database
|
|
42
|
-
bm25_engine: Pre-configured BM25 engine (will create if None)
|
|
43
|
-
query_optimizer: Query optimizer instance (will create if None)
|
|
44
|
-
cache_manager: Cache manager instance (will create if None)
|
|
45
|
-
enable_cache: Enable result caching
|
|
46
|
-
"""
|
|
47
|
-
self.db_path = db_path
|
|
48
|
-
|
|
49
|
-
# Initialize components
|
|
50
|
-
self.bm25 = bm25_engine or BM25SearchEngine()
|
|
51
|
-
self.optimizer = query_optimizer or QueryOptimizer()
|
|
52
|
-
self.cache = cache_manager if enable_cache else None
|
|
53
|
-
|
|
54
|
-
# Graph engine (lazy load to avoid circular dependencies)
|
|
55
|
-
self._graph_engine = None
|
|
56
|
-
|
|
57
|
-
# TF-IDF fallback (from memory_store_v2)
|
|
58
|
-
self._tfidf_vectorizer = None
|
|
59
|
-
self._tfidf_vectors = None
|
|
60
|
-
self._memory_ids = []
|
|
61
|
-
|
|
62
|
-
# Performance tracking
|
|
63
|
-
self.last_search_time = 0.0
|
|
64
|
-
self.last_fusion_time = 0.0
|
|
65
|
-
|
|
66
|
-
# Load index
|
|
67
|
-
self._load_index()
|
|
68
|
-
|
|
69
|
-
def search(
|
|
70
|
-
self,
|
|
71
|
-
query: str,
|
|
72
|
-
limit: int = 10,
|
|
73
|
-
method: str = "hybrid",
|
|
74
|
-
weights: Optional[Dict[str, float]] = None,
|
|
75
|
-
use_cache: bool = True
|
|
76
|
-
) -> List[Dict[str, Any]]:
|
|
77
|
-
"""
|
|
78
|
-
Hybrid search with multiple retrieval methods.
|
|
79
|
-
|
|
80
|
-
Args:
|
|
81
|
-
query: Search query
|
|
82
|
-
limit: Maximum results
|
|
83
|
-
method: Fusion method ("hybrid", "weighted", "rrf", "bm25", "semantic", "graph")
|
|
84
|
-
weights: Custom weights for weighted fusion (default: balanced)
|
|
85
|
-
use_cache: Use cache for results
|
|
86
|
-
|
|
87
|
-
Returns:
|
|
88
|
-
List of memory dictionaries with scores and match details
|
|
89
|
-
"""
|
|
90
|
-
start_time = time.time()
|
|
91
|
-
|
|
92
|
-
# Check cache
|
|
93
|
-
if use_cache and self.cache:
|
|
94
|
-
cached = self.cache.get(query, limit=limit, method=method)
|
|
95
|
-
if cached is not None:
|
|
96
|
-
self.last_search_time = time.time() - start_time
|
|
97
|
-
return cached
|
|
98
|
-
|
|
99
|
-
# Default weights
|
|
100
|
-
if weights is None:
|
|
101
|
-
weights = {
|
|
102
|
-
'bm25': 0.4,
|
|
103
|
-
'semantic': 0.3,
|
|
104
|
-
'graph': 0.3
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
# Single method search
|
|
108
|
-
if method == "bm25":
|
|
109
|
-
raw_results = self.search_bm25(query, limit)
|
|
110
|
-
elif method == "semantic":
|
|
111
|
-
raw_results = self.search_semantic(query, limit)
|
|
112
|
-
elif method == "graph":
|
|
113
|
-
raw_results = self.search_graph(query, limit)
|
|
114
|
-
|
|
115
|
-
# Multi-method fusion
|
|
116
|
-
else:
|
|
117
|
-
fusion_start = time.time()
|
|
118
|
-
|
|
119
|
-
# Get results from all methods
|
|
120
|
-
results_dict = {}
|
|
121
|
-
|
|
122
|
-
if weights.get('bm25', 0) > 0:
|
|
123
|
-
results_dict['bm25'] = self.search_bm25(query, limit=limit*2)
|
|
124
|
-
|
|
125
|
-
if weights.get('semantic', 0) > 0:
|
|
126
|
-
results_dict['semantic'] = self.search_semantic(query, limit=limit*2)
|
|
127
|
-
|
|
128
|
-
if weights.get('graph', 0) > 0:
|
|
129
|
-
results_dict['graph'] = self.search_graph(query, limit=limit*2)
|
|
130
|
-
|
|
131
|
-
# Fusion
|
|
132
|
-
if method == "rrf":
|
|
133
|
-
raw_results = self._reciprocal_rank_fusion(list(results_dict.values()))
|
|
134
|
-
else: # weighted or hybrid
|
|
135
|
-
raw_results = self._weighted_fusion(results_dict, weights)
|
|
136
|
-
|
|
137
|
-
self.last_fusion_time = time.time() - fusion_start
|
|
138
|
-
|
|
139
|
-
# Limit results
|
|
140
|
-
raw_results = raw_results[:limit]
|
|
141
|
-
|
|
142
|
-
# Fetch full memory details
|
|
143
|
-
results = self._fetch_memory_details(raw_results, query)
|
|
144
|
-
|
|
145
|
-
# Cache results
|
|
146
|
-
if use_cache and self.cache:
|
|
147
|
-
self.cache.put(query, results, limit=limit, method=method)
|
|
148
|
-
|
|
149
|
-
self.last_search_time = time.time() - start_time
|
|
150
|
-
|
|
151
|
-
return results
|
|
152
|
-
|
|
153
|
-
def _fetch_memory_details(
|
|
154
|
-
self,
|
|
155
|
-
raw_results: List[Tuple[int, float]],
|
|
156
|
-
query: str
|
|
157
|
-
) -> List[Dict[str, Any]]:
|
|
158
|
-
"""
|
|
159
|
-
Fetch full memory details for result IDs.
|
|
160
|
-
|
|
161
|
-
Args:
|
|
162
|
-
raw_results: List of (memory_id, score) tuples
|
|
163
|
-
query: Original query (for context)
|
|
164
|
-
|
|
165
|
-
Returns:
|
|
166
|
-
List of memory dictionaries with full details
|
|
167
|
-
"""
|
|
168
|
-
if not raw_results:
|
|
169
|
-
return []
|
|
170
|
-
|
|
171
|
-
memory_ids = [mem_id for mem_id, _ in raw_results]
|
|
172
|
-
id_to_score = {mem_id: score for mem_id, score in raw_results}
|
|
173
|
-
|
|
174
|
-
conn = sqlite3.connect(self.db_path)
|
|
175
|
-
try:
|
|
176
|
-
cursor = conn.cursor()
|
|
177
|
-
|
|
178
|
-
# Fetch memories
|
|
179
|
-
placeholders = ','.join(['?'] * len(memory_ids))
|
|
180
|
-
cursor.execute(f'''
|
|
181
|
-
SELECT id, content, summary, project_path, project_name, tags,
|
|
182
|
-
category, parent_id, tree_path, depth, memory_type,
|
|
183
|
-
importance, created_at, cluster_id, last_accessed, access_count
|
|
184
|
-
FROM memories
|
|
185
|
-
WHERE id IN ({placeholders})
|
|
186
|
-
''', memory_ids)
|
|
187
|
-
|
|
188
|
-
rows = cursor.fetchall()
|
|
189
|
-
finally:
|
|
190
|
-
conn.close()
|
|
191
|
-
|
|
192
|
-
# Build result dictionaries
|
|
193
|
-
results = []
|
|
194
|
-
for row in rows:
|
|
195
|
-
mem_id = row[0]
|
|
196
|
-
results.append({
|
|
197
|
-
'id': mem_id,
|
|
198
|
-
'content': row[1],
|
|
199
|
-
'summary': row[2],
|
|
200
|
-
'project_path': row[3],
|
|
201
|
-
'project_name': row[4],
|
|
202
|
-
'tags': json.loads(row[5]) if row[5] else [],
|
|
203
|
-
'category': row[6],
|
|
204
|
-
'parent_id': row[7],
|
|
205
|
-
'tree_path': row[8],
|
|
206
|
-
'depth': row[9],
|
|
207
|
-
'memory_type': row[10],
|
|
208
|
-
'importance': row[11],
|
|
209
|
-
'created_at': row[12],
|
|
210
|
-
'cluster_id': row[13],
|
|
211
|
-
'last_accessed': row[14],
|
|
212
|
-
'access_count': row[15],
|
|
213
|
-
'score': id_to_score.get(mem_id, 0.0),
|
|
214
|
-
'match_type': 'hybrid'
|
|
215
|
-
})
|
|
216
|
-
|
|
217
|
-
# Sort by score
|
|
218
|
-
results.sort(key=lambda x: x['score'], reverse=True)
|
|
219
|
-
|
|
220
|
-
return results
|
|
221
|
-
|
|
222
|
-
def get_stats(self) -> Dict[str, Any]:
|
|
223
|
-
"""
|
|
224
|
-
Get hybrid search statistics.
|
|
225
|
-
|
|
226
|
-
Returns:
|
|
227
|
-
Dictionary with performance stats
|
|
228
|
-
"""
|
|
229
|
-
stats = {
|
|
230
|
-
'bm25': self.bm25.get_stats(),
|
|
231
|
-
'optimizer': self.optimizer.get_stats(),
|
|
232
|
-
'last_search_time_ms': self.last_search_time * 1000,
|
|
233
|
-
'last_fusion_time_ms': self.last_fusion_time * 1000,
|
|
234
|
-
'tfidf_available': self._tfidf_vectorizer is not None,
|
|
235
|
-
'graph_available': self._graph_engine is not None
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
if self.cache:
|
|
239
|
-
stats['cache'] = self.cache.get_stats()
|
|
240
|
-
|
|
241
|
-
return stats
|
package/src/search/fusion.py
DELETED
|
@@ -1,122 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# SPDX-License-Identifier: MIT
|
|
3
|
-
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
-
"""Score fusion strategies for combining multi-method search results.
|
|
5
|
-
"""
|
|
6
|
-
from collections import defaultdict
|
|
7
|
-
from typing import List, Dict, Tuple
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class FusionMixin:
|
|
11
|
-
"""
|
|
12
|
-
Mixin providing score normalization and fusion strategies.
|
|
13
|
-
|
|
14
|
-
No external dependencies -- operates purely on (id, score) tuples.
|
|
15
|
-
"""
|
|
16
|
-
|
|
17
|
-
def _normalize_scores(
|
|
18
|
-
self,
|
|
19
|
-
results: List[Tuple[int, float]]
|
|
20
|
-
) -> List[Tuple[int, float]]:
|
|
21
|
-
"""
|
|
22
|
-
Normalize scores to [0, 1] range using min-max normalization.
|
|
23
|
-
|
|
24
|
-
Args:
|
|
25
|
-
results: List of (id, score) tuples
|
|
26
|
-
|
|
27
|
-
Returns:
|
|
28
|
-
Normalized results
|
|
29
|
-
"""
|
|
30
|
-
if not results:
|
|
31
|
-
return []
|
|
32
|
-
|
|
33
|
-
scores = [score for _, score in results]
|
|
34
|
-
min_score = min(scores)
|
|
35
|
-
max_score = max(scores)
|
|
36
|
-
|
|
37
|
-
if max_score == min_score:
|
|
38
|
-
# All scores equal - return uniform scores
|
|
39
|
-
return [(id, 1.0) for id, _ in results]
|
|
40
|
-
|
|
41
|
-
normalized = []
|
|
42
|
-
for mem_id, score in results:
|
|
43
|
-
norm_score = (score - min_score) / (max_score - min_score)
|
|
44
|
-
normalized.append((mem_id, norm_score))
|
|
45
|
-
|
|
46
|
-
return normalized
|
|
47
|
-
|
|
48
|
-
def _reciprocal_rank_fusion(
|
|
49
|
-
self,
|
|
50
|
-
results_list: List[List[Tuple[int, float]]],
|
|
51
|
-
k: int = 60
|
|
52
|
-
) -> List[Tuple[int, float]]:
|
|
53
|
-
"""
|
|
54
|
-
Combine multiple result lists using Reciprocal Rank Fusion.
|
|
55
|
-
|
|
56
|
-
RRF formula: score(d) = sum 1 / (k + rank(d))
|
|
57
|
-
|
|
58
|
-
RRF is rank-based and doesn't depend on score magnitudes,
|
|
59
|
-
making it robust to different scoring scales.
|
|
60
|
-
|
|
61
|
-
Args:
|
|
62
|
-
results_list: List of result lists from different methods
|
|
63
|
-
k: RRF constant (default: 60, standard value)
|
|
64
|
-
|
|
65
|
-
Returns:
|
|
66
|
-
Fused results sorted by RRF score
|
|
67
|
-
"""
|
|
68
|
-
# Build rank maps for each method
|
|
69
|
-
rrf_scores = defaultdict(float)
|
|
70
|
-
|
|
71
|
-
for results in results_list:
|
|
72
|
-
for rank, (mem_id, _) in enumerate(results, start=1):
|
|
73
|
-
rrf_scores[mem_id] += 1.0 / (k + rank)
|
|
74
|
-
|
|
75
|
-
# Convert to sorted list
|
|
76
|
-
fused = [(mem_id, score) for mem_id, score in rrf_scores.items()]
|
|
77
|
-
fused.sort(key=lambda x: x[1], reverse=True)
|
|
78
|
-
|
|
79
|
-
return fused
|
|
80
|
-
|
|
81
|
-
def _weighted_fusion(
|
|
82
|
-
self,
|
|
83
|
-
results_dict: Dict[str, List[Tuple[int, float]]],
|
|
84
|
-
weights: Dict[str, float]
|
|
85
|
-
) -> List[Tuple[int, float]]:
|
|
86
|
-
"""
|
|
87
|
-
Combine results using weighted score fusion.
|
|
88
|
-
|
|
89
|
-
Normalizes scores from each method then combines with weights.
|
|
90
|
-
|
|
91
|
-
Args:
|
|
92
|
-
results_dict: Dictionary mapping method name to results
|
|
93
|
-
weights: Dictionary mapping method name to weight
|
|
94
|
-
|
|
95
|
-
Returns:
|
|
96
|
-
Fused results sorted by combined score
|
|
97
|
-
"""
|
|
98
|
-
# Normalize scores for each method
|
|
99
|
-
normalized = {}
|
|
100
|
-
for method, results in results_dict.items():
|
|
101
|
-
normalized[method] = self._normalize_scores(results)
|
|
102
|
-
|
|
103
|
-
# Combine with weights
|
|
104
|
-
combined_scores = defaultdict(float)
|
|
105
|
-
max_weight_sum = defaultdict(float) # Track possible max score per doc
|
|
106
|
-
|
|
107
|
-
for method, results in normalized.items():
|
|
108
|
-
weight = weights.get(method, 0.0)
|
|
109
|
-
|
|
110
|
-
for mem_id, score in results:
|
|
111
|
-
combined_scores[mem_id] += weight * score
|
|
112
|
-
max_weight_sum[mem_id] += weight
|
|
113
|
-
|
|
114
|
-
# Normalize by actual weights (some docs may not appear in all methods)
|
|
115
|
-
fused = []
|
|
116
|
-
for mem_id, score in combined_scores.items():
|
|
117
|
-
normalized_score = score / max_weight_sum[mem_id] if max_weight_sum[mem_id] > 0 else 0
|
|
118
|
-
fused.append((mem_id, normalized_score))
|
|
119
|
-
|
|
120
|
-
fused.sort(key=lambda x: x[1], reverse=True)
|
|
121
|
-
|
|
122
|
-
return fused
|
|
@@ -1,114 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# SPDX-License-Identifier: MIT
|
|
3
|
-
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
-
"""Index loading and graph engine lazy-loading for hybrid search.
|
|
5
|
-
"""
|
|
6
|
-
import json
|
|
7
|
-
import sqlite3
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
from typing import Optional
|
|
10
|
-
|
|
11
|
-
from search_engine_v2 import BM25SearchEngine
|
|
12
|
-
from query_optimizer import QueryOptimizer
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class IndexLoaderMixin:
|
|
16
|
-
"""
|
|
17
|
-
Mixin that provides index loading and graph engine lazy-loading.
|
|
18
|
-
|
|
19
|
-
Expects the host class to have:
|
|
20
|
-
- self.db_path: Path
|
|
21
|
-
- self.bm25: BM25SearchEngine
|
|
22
|
-
- self.optimizer: QueryOptimizer
|
|
23
|
-
- self._graph_engine: Optional[GraphEngine]
|
|
24
|
-
- self._tfidf_vectorizer
|
|
25
|
-
- self._tfidf_vectors
|
|
26
|
-
- self._memory_ids: list
|
|
27
|
-
"""
|
|
28
|
-
|
|
29
|
-
def _load_index(self):
|
|
30
|
-
"""
|
|
31
|
-
Load documents from database and build search indexes.
|
|
32
|
-
"""
|
|
33
|
-
conn = sqlite3.connect(self.db_path)
|
|
34
|
-
try:
|
|
35
|
-
cursor = conn.cursor()
|
|
36
|
-
|
|
37
|
-
# Fetch all memories
|
|
38
|
-
cursor.execute('''
|
|
39
|
-
SELECT id, content, summary, tags
|
|
40
|
-
FROM memories
|
|
41
|
-
ORDER BY id
|
|
42
|
-
''')
|
|
43
|
-
|
|
44
|
-
rows = cursor.fetchall()
|
|
45
|
-
finally:
|
|
46
|
-
conn.close()
|
|
47
|
-
|
|
48
|
-
if not rows:
|
|
49
|
-
return
|
|
50
|
-
|
|
51
|
-
# Build BM25 index
|
|
52
|
-
doc_ids = [row[0] for row in rows]
|
|
53
|
-
documents = []
|
|
54
|
-
vocabulary = set()
|
|
55
|
-
|
|
56
|
-
for row in rows:
|
|
57
|
-
# Combine content + summary + tags for indexing
|
|
58
|
-
text_parts = [row[1]] # content
|
|
59
|
-
|
|
60
|
-
if row[2]: # summary
|
|
61
|
-
text_parts.append(row[2])
|
|
62
|
-
|
|
63
|
-
if row[3]: # tags (JSON)
|
|
64
|
-
try:
|
|
65
|
-
tags = json.loads(row[3])
|
|
66
|
-
text_parts.extend(tags)
|
|
67
|
-
except Exception:
|
|
68
|
-
pass
|
|
69
|
-
|
|
70
|
-
doc_text = ' '.join(text_parts)
|
|
71
|
-
documents.append(doc_text)
|
|
72
|
-
|
|
73
|
-
# Build vocabulary for spell correction
|
|
74
|
-
tokens = self.bm25._tokenize(doc_text)
|
|
75
|
-
vocabulary.update(tokens)
|
|
76
|
-
|
|
77
|
-
# Index with BM25
|
|
78
|
-
self.bm25.index_documents(documents, doc_ids)
|
|
79
|
-
self._memory_ids = doc_ids
|
|
80
|
-
|
|
81
|
-
# Initialize optimizer with vocabulary
|
|
82
|
-
self.optimizer.vocabulary = vocabulary
|
|
83
|
-
|
|
84
|
-
# Build co-occurrence for query expansion
|
|
85
|
-
tokenized_docs = [self.bm25._tokenize(doc) for doc in documents]
|
|
86
|
-
self.optimizer.build_cooccurrence_matrix(tokenized_docs)
|
|
87
|
-
|
|
88
|
-
# Try to load TF-IDF (optional semantic search)
|
|
89
|
-
try:
|
|
90
|
-
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
91
|
-
from sklearn.metrics.pairwise import cosine_similarity
|
|
92
|
-
import numpy as np
|
|
93
|
-
|
|
94
|
-
self._tfidf_vectorizer = TfidfVectorizer(
|
|
95
|
-
max_features=5000,
|
|
96
|
-
stop_words='english',
|
|
97
|
-
ngram_range=(1, 2)
|
|
98
|
-
)
|
|
99
|
-
self._tfidf_vectors = self._tfidf_vectorizer.fit_transform(documents)
|
|
100
|
-
|
|
101
|
-
except ImportError:
|
|
102
|
-
# sklearn not available - skip semantic search
|
|
103
|
-
pass
|
|
104
|
-
|
|
105
|
-
def _load_graph_engine(self):
|
|
106
|
-
"""Lazy load graph engine to avoid circular imports."""
|
|
107
|
-
if self._graph_engine is None:
|
|
108
|
-
try:
|
|
109
|
-
from graph_engine import GraphEngine
|
|
110
|
-
self._graph_engine = GraphEngine(self.db_path)
|
|
111
|
-
except ImportError:
|
|
112
|
-
# Graph engine not available
|
|
113
|
-
pass
|
|
114
|
-
return self._graph_engine
|
package/src/search/methods.py
DELETED
|
@@ -1,162 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# SPDX-License-Identifier: MIT
|
|
3
|
-
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
-
"""Individual search methods (BM25, semantic, graph) for hybrid search.
|
|
5
|
-
"""
|
|
6
|
-
from typing import List, Tuple
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class SearchMethodsMixin:
|
|
10
|
-
"""
|
|
11
|
-
Mixin providing individual search method implementations.
|
|
12
|
-
|
|
13
|
-
Expects the host class to have:
|
|
14
|
-
- self.bm25: BM25SearchEngine
|
|
15
|
-
- self.optimizer: QueryOptimizer
|
|
16
|
-
- self._tfidf_vectorizer
|
|
17
|
-
- self._tfidf_vectors
|
|
18
|
-
- self._memory_ids: list
|
|
19
|
-
- self._load_graph_engine() method
|
|
20
|
-
- self.search_bm25() method (for graph seed)
|
|
21
|
-
"""
|
|
22
|
-
|
|
23
|
-
def search_bm25(
|
|
24
|
-
self,
|
|
25
|
-
query: str,
|
|
26
|
-
limit: int = 10,
|
|
27
|
-
score_threshold: float = 0.0
|
|
28
|
-
) -> List[Tuple[int, float]]:
|
|
29
|
-
"""
|
|
30
|
-
Search using BM25 keyword matching.
|
|
31
|
-
|
|
32
|
-
Args:
|
|
33
|
-
query: Search query
|
|
34
|
-
limit: Maximum results
|
|
35
|
-
score_threshold: Minimum score threshold
|
|
36
|
-
|
|
37
|
-
Returns:
|
|
38
|
-
List of (memory_id, score) tuples
|
|
39
|
-
"""
|
|
40
|
-
# Optimize query
|
|
41
|
-
optimized = self.optimizer.optimize(
|
|
42
|
-
query,
|
|
43
|
-
enable_spell_correction=True,
|
|
44
|
-
enable_expansion=False # Expansion can hurt precision
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
# Search with BM25
|
|
48
|
-
results = self.bm25.search(optimized, limit, score_threshold)
|
|
49
|
-
|
|
50
|
-
return results
|
|
51
|
-
|
|
52
|
-
def search_semantic(
|
|
53
|
-
self,
|
|
54
|
-
query: str,
|
|
55
|
-
limit: int = 10,
|
|
56
|
-
score_threshold: float = 0.05
|
|
57
|
-
) -> List[Tuple[int, float]]:
|
|
58
|
-
"""
|
|
59
|
-
Search using TF-IDF semantic similarity.
|
|
60
|
-
|
|
61
|
-
Args:
|
|
62
|
-
query: Search query
|
|
63
|
-
limit: Maximum results
|
|
64
|
-
score_threshold: Minimum similarity threshold
|
|
65
|
-
|
|
66
|
-
Returns:
|
|
67
|
-
List of (memory_id, score) tuples
|
|
68
|
-
"""
|
|
69
|
-
if self._tfidf_vectorizer is None or self._tfidf_vectors is None:
|
|
70
|
-
return []
|
|
71
|
-
|
|
72
|
-
try:
|
|
73
|
-
from sklearn.metrics.pairwise import cosine_similarity
|
|
74
|
-
import numpy as np
|
|
75
|
-
|
|
76
|
-
# Vectorize query
|
|
77
|
-
query_vec = self._tfidf_vectorizer.transform([query])
|
|
78
|
-
|
|
79
|
-
# Calculate similarities
|
|
80
|
-
similarities = cosine_similarity(query_vec, self._tfidf_vectors).flatten()
|
|
81
|
-
|
|
82
|
-
# Get top results above threshold
|
|
83
|
-
results = []
|
|
84
|
-
for idx, score in enumerate(similarities):
|
|
85
|
-
if score >= score_threshold:
|
|
86
|
-
memory_id = self._memory_ids[idx]
|
|
87
|
-
results.append((memory_id, float(score)))
|
|
88
|
-
|
|
89
|
-
# Sort by score and limit
|
|
90
|
-
results.sort(key=lambda x: x[1], reverse=True)
|
|
91
|
-
return results[:limit]
|
|
92
|
-
|
|
93
|
-
except Exception as e:
|
|
94
|
-
# Fallback gracefully
|
|
95
|
-
return []
|
|
96
|
-
|
|
97
|
-
def search_graph(
|
|
98
|
-
self,
|
|
99
|
-
query: str,
|
|
100
|
-
limit: int = 10,
|
|
101
|
-
max_depth: int = 2
|
|
102
|
-
) -> List[Tuple[int, float]]:
|
|
103
|
-
"""
|
|
104
|
-
Search using graph traversal from initial matches.
|
|
105
|
-
|
|
106
|
-
Strategy:
|
|
107
|
-
1. Get seed memories from BM25
|
|
108
|
-
2. Traverse graph to find related memories
|
|
109
|
-
3. Score by distance from seed nodes
|
|
110
|
-
|
|
111
|
-
Args:
|
|
112
|
-
query: Search query
|
|
113
|
-
limit: Maximum results
|
|
114
|
-
max_depth: Maximum graph traversal depth
|
|
115
|
-
|
|
116
|
-
Returns:
|
|
117
|
-
List of (memory_id, score) tuples
|
|
118
|
-
"""
|
|
119
|
-
graph = self._load_graph_engine()
|
|
120
|
-
if graph is None:
|
|
121
|
-
return []
|
|
122
|
-
|
|
123
|
-
# Get seed memories from BM25
|
|
124
|
-
seed_results = self.search_bm25(query, limit=5)
|
|
125
|
-
if not seed_results:
|
|
126
|
-
return []
|
|
127
|
-
|
|
128
|
-
seed_ids = [mem_id for mem_id, _ in seed_results]
|
|
129
|
-
|
|
130
|
-
# Traverse graph from seed nodes
|
|
131
|
-
visited = set(seed_ids)
|
|
132
|
-
results = []
|
|
133
|
-
|
|
134
|
-
# BFS traversal
|
|
135
|
-
queue = [(mem_id, 1.0, 0) for mem_id in seed_ids] # (id, score, depth)
|
|
136
|
-
|
|
137
|
-
while queue and len(results) < limit:
|
|
138
|
-
current_id, current_score, depth = queue.pop(0)
|
|
139
|
-
|
|
140
|
-
if depth > max_depth:
|
|
141
|
-
continue
|
|
142
|
-
|
|
143
|
-
# Add to results
|
|
144
|
-
if current_id not in [r[0] for r in results]:
|
|
145
|
-
results.append((current_id, current_score))
|
|
146
|
-
|
|
147
|
-
# Get related memories from graph
|
|
148
|
-
try:
|
|
149
|
-
related = graph.get_related_memories(current_id, limit=5)
|
|
150
|
-
|
|
151
|
-
for rel_id, similarity in related:
|
|
152
|
-
if rel_id not in visited:
|
|
153
|
-
visited.add(rel_id)
|
|
154
|
-
# Decay score by depth
|
|
155
|
-
new_score = current_score * similarity * (0.7 ** depth)
|
|
156
|
-
queue.append((rel_id, new_score, depth + 1))
|
|
157
|
-
|
|
158
|
-
except Exception:
|
|
159
|
-
# Graph operation failed - skip
|
|
160
|
-
continue
|
|
161
|
-
|
|
162
|
-
return results[:limit]
|