superlocalmemory 2.8.6 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +9 -1
- package/NOTICE +63 -0
- package/README.md +165 -480
- package/bin/slm +17 -449
- package/bin/slm-npm +62 -48
- package/conftest.py +5 -0
- package/docs/api-reference.md +284 -0
- package/docs/architecture.md +149 -0
- package/docs/auto-memory.md +150 -0
- package/docs/cli-reference.md +276 -0
- package/docs/compliance.md +191 -0
- package/docs/configuration.md +182 -0
- package/docs/getting-started.md +102 -0
- package/docs/ide-setup.md +261 -0
- package/docs/mcp-tools.md +220 -0
- package/docs/migration-from-v2.md +170 -0
- package/docs/profiles.md +173 -0
- package/docs/troubleshooting.md +310 -0
- package/{configs → ide/configs}/antigravity-mcp.json +3 -3
- package/ide/configs/chatgpt-desktop-mcp.json +16 -0
- package/{configs → ide/configs}/claude-desktop-mcp.json +3 -3
- package/{configs → ide/configs}/codex-mcp.toml +4 -4
- package/{configs → ide/configs}/continue-mcp.yaml +4 -3
- package/{configs → ide/configs}/continue-skills.yaml +6 -6
- package/ide/configs/cursor-mcp.json +15 -0
- package/{configs → ide/configs}/gemini-cli-mcp.json +2 -2
- package/{configs → ide/configs}/jetbrains-mcp.json +2 -2
- package/{configs → ide/configs}/opencode-mcp.json +2 -2
- package/{configs → ide/configs}/perplexity-mcp.json +2 -2
- package/{configs → ide/configs}/vscode-copilot-mcp.json +2 -2
- package/{configs → ide/configs}/windsurf-mcp.json +3 -3
- package/{configs → ide/configs}/zed-mcp.json +2 -2
- package/{hooks → ide/hooks}/context-hook.js +9 -20
- package/ide/hooks/memory-list-skill.js +70 -0
- package/ide/hooks/memory-profile-skill.js +101 -0
- package/ide/hooks/memory-recall-skill.js +62 -0
- package/ide/hooks/memory-remember-skill.js +68 -0
- package/ide/hooks/memory-reset-skill.js +160 -0
- package/{hooks → ide/hooks}/post-recall-hook.js +2 -2
- package/ide/integrations/langchain/README.md +106 -0
- package/ide/integrations/langchain/langchain_superlocalmemory/__init__.py +9 -0
- package/ide/integrations/langchain/langchain_superlocalmemory/chat_message_history.py +201 -0
- package/ide/integrations/langchain/pyproject.toml +38 -0
- package/{src/learning → ide/integrations/langchain}/tests/__init__.py +1 -0
- package/ide/integrations/langchain/tests/test_chat_message_history.py +215 -0
- package/ide/integrations/langchain/tests/test_security.py +117 -0
- package/ide/integrations/llamaindex/README.md +81 -0
- package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/__init__.py +9 -0
- package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/base.py +316 -0
- package/ide/integrations/llamaindex/pyproject.toml +43 -0
- package/{src/lifecycle → ide/integrations/llamaindex}/tests/__init__.py +1 -2
- package/ide/integrations/llamaindex/tests/test_chat_store.py +294 -0
- package/ide/integrations/llamaindex/tests/test_security.py +241 -0
- package/{skills → ide/skills}/slm-build-graph/SKILL.md +6 -6
- package/{skills → ide/skills}/slm-list-recent/SKILL.md +5 -5
- package/{skills → ide/skills}/slm-recall/SKILL.md +5 -5
- package/{skills → ide/skills}/slm-remember/SKILL.md +6 -6
- package/{skills → ide/skills}/slm-show-patterns/SKILL.md +7 -7
- package/{skills → ide/skills}/slm-status/SKILL.md +9 -9
- package/{skills → ide/skills}/slm-switch-profile/SKILL.md +9 -9
- package/package.json +13 -22
- package/pyproject.toml +85 -0
- package/scripts/build-dmg.sh +417 -0
- package/scripts/install-skills.ps1 +334 -0
- package/scripts/postinstall.js +2 -2
- package/scripts/start-dashboard.ps1 +52 -0
- package/scripts/start-dashboard.sh +41 -0
- package/scripts/sync-wiki.ps1 +127 -0
- package/scripts/sync-wiki.sh +82 -0
- package/scripts/test-dmg.sh +161 -0
- package/scripts/test-npm-package.ps1 +252 -0
- package/scripts/test-npm-package.sh +207 -0
- package/scripts/verify-install.ps1 +294 -0
- package/scripts/verify-install.sh +266 -0
- package/src/superlocalmemory/__init__.py +0 -0
- package/src/superlocalmemory/attribution/__init__.py +9 -0
- package/src/superlocalmemory/attribution/mathematical_dna.py +235 -0
- package/src/superlocalmemory/attribution/signer.py +153 -0
- package/src/superlocalmemory/attribution/watermark.py +189 -0
- package/src/superlocalmemory/cli/__init__.py +5 -0
- package/src/superlocalmemory/cli/commands.py +245 -0
- package/src/superlocalmemory/cli/main.py +89 -0
- package/src/superlocalmemory/cli/migrate_cmd.py +55 -0
- package/src/superlocalmemory/cli/post_install.py +99 -0
- package/src/superlocalmemory/cli/setup_wizard.py +129 -0
- package/src/superlocalmemory/compliance/__init__.py +0 -0
- package/src/superlocalmemory/compliance/abac.py +204 -0
- package/src/superlocalmemory/compliance/audit.py +314 -0
- package/src/superlocalmemory/compliance/eu_ai_act.py +131 -0
- package/src/superlocalmemory/compliance/gdpr.py +294 -0
- package/src/superlocalmemory/compliance/lifecycle.py +158 -0
- package/src/superlocalmemory/compliance/retention.py +232 -0
- package/src/superlocalmemory/compliance/scheduler.py +148 -0
- package/src/superlocalmemory/core/__init__.py +0 -0
- package/src/superlocalmemory/core/config.py +391 -0
- package/src/superlocalmemory/core/embeddings.py +293 -0
- package/src/superlocalmemory/core/engine.py +701 -0
- package/src/superlocalmemory/core/hooks.py +65 -0
- package/src/superlocalmemory/core/maintenance.py +172 -0
- package/src/superlocalmemory/core/modes.py +140 -0
- package/src/superlocalmemory/core/profiles.py +234 -0
- package/src/superlocalmemory/core/registry.py +117 -0
- package/src/superlocalmemory/dynamics/__init__.py +0 -0
- package/src/superlocalmemory/dynamics/fisher_langevin_coupling.py +223 -0
- package/src/superlocalmemory/encoding/__init__.py +0 -0
- package/src/superlocalmemory/encoding/consolidator.py +485 -0
- package/src/superlocalmemory/encoding/emotional.py +125 -0
- package/src/superlocalmemory/encoding/entity_resolver.py +525 -0
- package/src/superlocalmemory/encoding/entropy_gate.py +104 -0
- package/src/superlocalmemory/encoding/fact_extractor.py +775 -0
- package/src/superlocalmemory/encoding/foresight.py +91 -0
- package/src/superlocalmemory/encoding/graph_builder.py +302 -0
- package/src/superlocalmemory/encoding/observation_builder.py +160 -0
- package/src/superlocalmemory/encoding/scene_builder.py +183 -0
- package/src/superlocalmemory/encoding/signal_inference.py +90 -0
- package/src/superlocalmemory/encoding/temporal_parser.py +426 -0
- package/src/superlocalmemory/encoding/type_router.py +235 -0
- package/src/superlocalmemory/hooks/__init__.py +3 -0
- package/src/superlocalmemory/hooks/auto_capture.py +111 -0
- package/src/superlocalmemory/hooks/auto_recall.py +93 -0
- package/src/superlocalmemory/hooks/ide_connector.py +204 -0
- package/src/superlocalmemory/hooks/rules_engine.py +99 -0
- package/src/superlocalmemory/infra/__init__.py +3 -0
- package/src/superlocalmemory/infra/auth_middleware.py +82 -0
- package/src/superlocalmemory/infra/backup.py +317 -0
- package/src/superlocalmemory/infra/cache_manager.py +267 -0
- package/src/superlocalmemory/infra/event_bus.py +381 -0
- package/src/superlocalmemory/infra/rate_limiter.py +135 -0
- package/src/{webhook_dispatcher.py → superlocalmemory/infra/webhook_dispatcher.py} +104 -101
- package/src/superlocalmemory/learning/__init__.py +0 -0
- package/src/superlocalmemory/learning/adaptive.py +172 -0
- package/src/superlocalmemory/learning/behavioral.py +490 -0
- package/src/superlocalmemory/learning/behavioral_listener.py +94 -0
- package/src/superlocalmemory/learning/bootstrap.py +298 -0
- package/src/superlocalmemory/learning/cross_project.py +399 -0
- package/src/superlocalmemory/learning/database.py +376 -0
- package/src/superlocalmemory/learning/engagement.py +323 -0
- package/src/superlocalmemory/learning/features.py +138 -0
- package/src/superlocalmemory/learning/feedback.py +316 -0
- package/src/superlocalmemory/learning/outcomes.py +255 -0
- package/src/superlocalmemory/learning/project_context.py +366 -0
- package/src/superlocalmemory/learning/ranker.py +155 -0
- package/src/superlocalmemory/learning/source_quality.py +303 -0
- package/src/superlocalmemory/learning/workflows.py +309 -0
- package/src/superlocalmemory/llm/__init__.py +0 -0
- package/src/superlocalmemory/llm/backbone.py +316 -0
- package/src/superlocalmemory/math/__init__.py +0 -0
- package/src/superlocalmemory/math/fisher.py +356 -0
- package/src/superlocalmemory/math/langevin.py +398 -0
- package/src/superlocalmemory/math/sheaf.py +257 -0
- package/src/superlocalmemory/mcp/__init__.py +0 -0
- package/src/superlocalmemory/mcp/resources.py +245 -0
- package/src/superlocalmemory/mcp/server.py +61 -0
- package/src/superlocalmemory/mcp/tools.py +18 -0
- package/src/superlocalmemory/mcp/tools_core.py +305 -0
- package/src/superlocalmemory/mcp/tools_v28.py +223 -0
- package/src/superlocalmemory/mcp/tools_v3.py +286 -0
- package/src/superlocalmemory/retrieval/__init__.py +0 -0
- package/src/superlocalmemory/retrieval/agentic.py +295 -0
- package/src/superlocalmemory/retrieval/ann_index.py +223 -0
- package/src/superlocalmemory/retrieval/bm25_channel.py +185 -0
- package/src/superlocalmemory/retrieval/bridge_discovery.py +170 -0
- package/src/superlocalmemory/retrieval/engine.py +390 -0
- package/src/superlocalmemory/retrieval/entity_channel.py +179 -0
- package/src/superlocalmemory/retrieval/fusion.py +78 -0
- package/src/superlocalmemory/retrieval/profile_channel.py +105 -0
- package/src/superlocalmemory/retrieval/reranker.py +154 -0
- package/src/superlocalmemory/retrieval/semantic_channel.py +232 -0
- package/src/superlocalmemory/retrieval/strategy.py +96 -0
- package/src/superlocalmemory/retrieval/temporal_channel.py +175 -0
- package/src/superlocalmemory/server/__init__.py +1 -0
- package/src/superlocalmemory/server/api.py +248 -0
- package/src/superlocalmemory/server/routes/__init__.py +4 -0
- package/src/superlocalmemory/server/routes/agents.py +107 -0
- package/src/superlocalmemory/server/routes/backup.py +91 -0
- package/src/superlocalmemory/server/routes/behavioral.py +127 -0
- package/src/superlocalmemory/server/routes/compliance.py +160 -0
- package/src/superlocalmemory/server/routes/data_io.py +188 -0
- package/src/superlocalmemory/server/routes/events.py +183 -0
- package/src/superlocalmemory/server/routes/helpers.py +85 -0
- package/src/superlocalmemory/server/routes/learning.py +273 -0
- package/src/superlocalmemory/server/routes/lifecycle.py +116 -0
- package/src/superlocalmemory/server/routes/memories.py +399 -0
- package/src/superlocalmemory/server/routes/profiles.py +219 -0
- package/src/superlocalmemory/server/routes/stats.py +346 -0
- package/src/superlocalmemory/server/routes/v3_api.py +365 -0
- package/src/superlocalmemory/server/routes/ws.py +82 -0
- package/src/superlocalmemory/server/security_middleware.py +57 -0
- package/src/superlocalmemory/server/ui.py +245 -0
- package/src/superlocalmemory/storage/__init__.py +0 -0
- package/src/superlocalmemory/storage/access_control.py +182 -0
- package/src/superlocalmemory/storage/database.py +594 -0
- package/src/superlocalmemory/storage/migrations.py +303 -0
- package/src/superlocalmemory/storage/models.py +406 -0
- package/src/superlocalmemory/storage/schema.py +726 -0
- package/src/superlocalmemory/storage/v2_migrator.py +317 -0
- package/src/superlocalmemory/trust/__init__.py +0 -0
- package/src/superlocalmemory/trust/gate.py +130 -0
- package/src/superlocalmemory/trust/provenance.py +124 -0
- package/src/superlocalmemory/trust/scorer.py +347 -0
- package/src/superlocalmemory/trust/signals.py +153 -0
- package/ui/index.html +278 -5
- package/ui/js/auto-settings.js +70 -0
- package/ui/js/dashboard.js +90 -0
- package/ui/js/fact-detail.js +92 -0
- package/ui/js/feedback.js +2 -2
- package/ui/js/ide-status.js +102 -0
- package/ui/js/math-health.js +98 -0
- package/ui/js/recall-lab.js +127 -0
- package/ui/js/settings.js +2 -2
- package/ui/js/trust-dashboard.js +73 -0
- package/api_server.py +0 -724
- package/bin/aider-smart +0 -72
- package/bin/superlocalmemoryv2-learning +0 -4
- package/bin/superlocalmemoryv2-list +0 -3
- package/bin/superlocalmemoryv2-patterns +0 -4
- package/bin/superlocalmemoryv2-profile +0 -3
- package/bin/superlocalmemoryv2-recall +0 -3
- package/bin/superlocalmemoryv2-remember +0 -3
- package/bin/superlocalmemoryv2-reset +0 -3
- package/bin/superlocalmemoryv2-status +0 -3
- package/configs/chatgpt-desktop-mcp.json +0 -16
- package/configs/cursor-mcp.json +0 -15
- package/hooks/memory-list-skill.js +0 -139
- package/hooks/memory-profile-skill.js +0 -273
- package/hooks/memory-recall-skill.js +0 -114
- package/hooks/memory-remember-skill.js +0 -127
- package/hooks/memory-reset-skill.js +0 -274
- package/mcp_server.py +0 -1808
- package/requirements-core.txt +0 -22
- package/requirements-learning.txt +0 -12
- package/requirements.txt +0 -12
- package/src/agent_registry.py +0 -411
- package/src/auth_middleware.py +0 -61
- package/src/auto_backup.py +0 -459
- package/src/behavioral/__init__.py +0 -49
- package/src/behavioral/behavioral_listener.py +0 -203
- package/src/behavioral/behavioral_patterns.py +0 -275
- package/src/behavioral/cross_project_transfer.py +0 -206
- package/src/behavioral/outcome_inference.py +0 -194
- package/src/behavioral/outcome_tracker.py +0 -193
- package/src/behavioral/tests/__init__.py +0 -4
- package/src/behavioral/tests/test_behavioral_integration.py +0 -108
- package/src/behavioral/tests/test_behavioral_patterns.py +0 -150
- package/src/behavioral/tests/test_cross_project_transfer.py +0 -142
- package/src/behavioral/tests/test_mcp_behavioral.py +0 -139
- package/src/behavioral/tests/test_mcp_report_outcome.py +0 -117
- package/src/behavioral/tests/test_outcome_inference.py +0 -107
- package/src/behavioral/tests/test_outcome_tracker.py +0 -96
- package/src/cache_manager.py +0 -518
- package/src/compliance/__init__.py +0 -48
- package/src/compliance/abac_engine.py +0 -149
- package/src/compliance/abac_middleware.py +0 -116
- package/src/compliance/audit_db.py +0 -215
- package/src/compliance/audit_logger.py +0 -148
- package/src/compliance/retention_manager.py +0 -289
- package/src/compliance/retention_scheduler.py +0 -186
- package/src/compliance/tests/__init__.py +0 -4
- package/src/compliance/tests/test_abac_enforcement.py +0 -95
- package/src/compliance/tests/test_abac_engine.py +0 -124
- package/src/compliance/tests/test_abac_mcp_integration.py +0 -118
- package/src/compliance/tests/test_audit_db.py +0 -123
- package/src/compliance/tests/test_audit_logger.py +0 -98
- package/src/compliance/tests/test_mcp_audit.py +0 -128
- package/src/compliance/tests/test_mcp_retention_policy.py +0 -125
- package/src/compliance/tests/test_retention_manager.py +0 -131
- package/src/compliance/tests/test_retention_scheduler.py +0 -99
- package/src/compression/__init__.py +0 -25
- package/src/compression/cli.py +0 -150
- package/src/compression/cold_storage.py +0 -217
- package/src/compression/config.py +0 -72
- package/src/compression/orchestrator.py +0 -133
- package/src/compression/tier2_compressor.py +0 -228
- package/src/compression/tier3_compressor.py +0 -153
- package/src/compression/tier_classifier.py +0 -148
- package/src/db_connection_manager.py +0 -536
- package/src/embedding_engine.py +0 -63
- package/src/embeddings/__init__.py +0 -47
- package/src/embeddings/cache.py +0 -70
- package/src/embeddings/cli.py +0 -113
- package/src/embeddings/constants.py +0 -47
- package/src/embeddings/database.py +0 -91
- package/src/embeddings/engine.py +0 -247
- package/src/embeddings/model_loader.py +0 -145
- package/src/event_bus.py +0 -562
- package/src/graph/__init__.py +0 -36
- package/src/graph/build_helpers.py +0 -74
- package/src/graph/cli.py +0 -87
- package/src/graph/cluster_builder.py +0 -188
- package/src/graph/cluster_summary.py +0 -148
- package/src/graph/constants.py +0 -47
- package/src/graph/edge_builder.py +0 -162
- package/src/graph/entity_extractor.py +0 -95
- package/src/graph/graph_core.py +0 -226
- package/src/graph/graph_search.py +0 -231
- package/src/graph/hierarchical.py +0 -207
- package/src/graph/schema.py +0 -99
- package/src/graph_engine.py +0 -52
- package/src/hnsw_index.py +0 -628
- package/src/hybrid_search.py +0 -46
- package/src/learning/__init__.py +0 -217
- package/src/learning/adaptive_ranker.py +0 -682
- package/src/learning/bootstrap/__init__.py +0 -69
- package/src/learning/bootstrap/constants.py +0 -93
- package/src/learning/bootstrap/db_queries.py +0 -316
- package/src/learning/bootstrap/sampling.py +0 -82
- package/src/learning/bootstrap/text_utils.py +0 -71
- package/src/learning/cross_project_aggregator.py +0 -857
- package/src/learning/db/__init__.py +0 -40
- package/src/learning/db/constants.py +0 -44
- package/src/learning/db/schema.py +0 -279
- package/src/learning/engagement_tracker.py +0 -628
- package/src/learning/feature_extractor.py +0 -708
- package/src/learning/feedback_collector.py +0 -806
- package/src/learning/learning_db.py +0 -915
- package/src/learning/project_context_manager.py +0 -572
- package/src/learning/ranking/__init__.py +0 -33
- package/src/learning/ranking/constants.py +0 -84
- package/src/learning/ranking/helpers.py +0 -278
- package/src/learning/source_quality_scorer.py +0 -676
- package/src/learning/synthetic_bootstrap.py +0 -755
- package/src/learning/tests/test_adaptive_ranker.py +0 -325
- package/src/learning/tests/test_adaptive_ranker_v28.py +0 -60
- package/src/learning/tests/test_aggregator.py +0 -306
- package/src/learning/tests/test_auto_retrain_v28.py +0 -35
- package/src/learning/tests/test_e2e_ranking_v28.py +0 -82
- package/src/learning/tests/test_feature_extractor_v28.py +0 -93
- package/src/learning/tests/test_feedback_collector.py +0 -294
- package/src/learning/tests/test_learning_db.py +0 -602
- package/src/learning/tests/test_learning_db_v28.py +0 -110
- package/src/learning/tests/test_learning_init_v28.py +0 -48
- package/src/learning/tests/test_outcome_signals.py +0 -48
- package/src/learning/tests/test_project_context.py +0 -292
- package/src/learning/tests/test_schema_migration.py +0 -319
- package/src/learning/tests/test_signal_inference.py +0 -397
- package/src/learning/tests/test_source_quality.py +0 -351
- package/src/learning/tests/test_synthetic_bootstrap.py +0 -429
- package/src/learning/tests/test_workflow_miner.py +0 -318
- package/src/learning/workflow_pattern_miner.py +0 -655
- package/src/lifecycle/__init__.py +0 -54
- package/src/lifecycle/bounded_growth.py +0 -239
- package/src/lifecycle/compaction_engine.py +0 -226
- package/src/lifecycle/lifecycle_engine.py +0 -355
- package/src/lifecycle/lifecycle_evaluator.py +0 -257
- package/src/lifecycle/lifecycle_scheduler.py +0 -130
- package/src/lifecycle/retention_policy.py +0 -285
- package/src/lifecycle/tests/test_bounded_growth.py +0 -193
- package/src/lifecycle/tests/test_compaction.py +0 -179
- package/src/lifecycle/tests/test_lifecycle_engine.py +0 -137
- package/src/lifecycle/tests/test_lifecycle_evaluation.py +0 -177
- package/src/lifecycle/tests/test_lifecycle_scheduler.py +0 -127
- package/src/lifecycle/tests/test_lifecycle_search.py +0 -109
- package/src/lifecycle/tests/test_mcp_compact.py +0 -149
- package/src/lifecycle/tests/test_mcp_lifecycle_status.py +0 -114
- package/src/lifecycle/tests/test_retention_policy.py +0 -162
- package/src/mcp_tools_v28.py +0 -281
- package/src/memory/__init__.py +0 -36
- package/src/memory/cli.py +0 -205
- package/src/memory/constants.py +0 -39
- package/src/memory/helpers.py +0 -28
- package/src/memory/schema.py +0 -166
- package/src/memory-profiles.py +0 -595
- package/src/memory-reset.py +0 -491
- package/src/memory_compression.py +0 -989
- package/src/memory_store_v2.py +0 -1155
- package/src/migrate_v1_to_v2.py +0 -629
- package/src/pattern_learner.py +0 -34
- package/src/patterns/__init__.py +0 -24
- package/src/patterns/analyzers.py +0 -251
- package/src/patterns/learner.py +0 -271
- package/src/patterns/scoring.py +0 -171
- package/src/patterns/store.py +0 -225
- package/src/patterns/terminology.py +0 -140
- package/src/provenance_tracker.py +0 -312
- package/src/qualixar_attribution.py +0 -139
- package/src/qualixar_watermark.py +0 -78
- package/src/query_optimizer.py +0 -511
- package/src/rate_limiter.py +0 -83
- package/src/search/__init__.py +0 -20
- package/src/search/cli.py +0 -77
- package/src/search/constants.py +0 -26
- package/src/search/engine.py +0 -241
- package/src/search/fusion.py +0 -122
- package/src/search/index_loader.py +0 -114
- package/src/search/methods.py +0 -162
- package/src/search_engine_v2.py +0 -401
- package/src/setup_validator.py +0 -482
- package/src/subscription_manager.py +0 -391
- package/src/tree/__init__.py +0 -59
- package/src/tree/builder.py +0 -185
- package/src/tree/nodes.py +0 -202
- package/src/tree/queries.py +0 -257
- package/src/tree/schema.py +0 -80
- package/src/tree_manager.py +0 -19
- package/src/trust/__init__.py +0 -45
- package/src/trust/constants.py +0 -66
- package/src/trust/queries.py +0 -157
- package/src/trust/schema.py +0 -95
- package/src/trust/scorer.py +0 -299
- package/src/trust/signals.py +0 -95
- package/src/trust_scorer.py +0 -44
- package/ui/app.js +0 -1588
- package/ui/js/graph-cytoscape-monolithic-backup.js +0 -1168
- package/ui/js/graph-cytoscape.js +0 -1168
- package/ui/js/graph-d3-backup.js +0 -32
- package/ui/js/graph.js +0 -32
- package/ui_server.py +0 -286
- /package/docs/{ACCESSIBILITY.md → v2-archive/ACCESSIBILITY.md} +0 -0
- /package/docs/{ARCHITECTURE.md → v2-archive/ARCHITECTURE.md} +0 -0
- /package/docs/{CLI-COMMANDS-REFERENCE.md → v2-archive/CLI-COMMANDS-REFERENCE.md} +0 -0
- /package/docs/{COMPRESSION-README.md → v2-archive/COMPRESSION-README.md} +0 -0
- /package/docs/{FRAMEWORK-INTEGRATIONS.md → v2-archive/FRAMEWORK-INTEGRATIONS.md} +0 -0
- /package/docs/{MCP-MANUAL-SETUP.md → v2-archive/MCP-MANUAL-SETUP.md} +0 -0
- /package/docs/{MCP-TROUBLESHOOTING.md → v2-archive/MCP-TROUBLESHOOTING.md} +0 -0
- /package/docs/{PATTERN-LEARNING.md → v2-archive/PATTERN-LEARNING.md} +0 -0
- /package/docs/{PROFILES-GUIDE.md → v2-archive/PROFILES-GUIDE.md} +0 -0
- /package/docs/{RESET-GUIDE.md → v2-archive/RESET-GUIDE.md} +0 -0
- /package/docs/{SEARCH-ENGINE-V2.2.0.md → v2-archive/SEARCH-ENGINE-V2.2.0.md} +0 -0
- /package/docs/{SEARCH-INTEGRATION-GUIDE.md → v2-archive/SEARCH-INTEGRATION-GUIDE.md} +0 -0
- /package/docs/{UI-SERVER.md → v2-archive/UI-SERVER.md} +0 -0
- /package/docs/{UNIVERSAL-INTEGRATION.md → v2-archive/UNIVERSAL-INTEGRATION.md} +0 -0
- /package/docs/{V2.2.0-OPTIONAL-SEARCH.md → v2-archive/V2.2.0-OPTIONAL-SEARCH.md} +0 -0
- /package/docs/{WINDOWS-INSTALL-README.txt → v2-archive/WINDOWS-INSTALL-README.txt} +0 -0
- /package/docs/{WINDOWS-POST-INSTALL.txt → v2-archive/WINDOWS-POST-INSTALL.txt} +0 -0
- /package/docs/{example_graph_usage.py → v2-archive/example_graph_usage.py} +0 -0
- /package/{completions → ide/completions}/slm.bash +0 -0
- /package/{completions → ide/completions}/slm.zsh +0 -0
- /package/{configs → ide/configs}/cody-commands.json +0 -0
- /package/{install-skills.sh → scripts/install-skills.sh} +0 -0
- /package/{install.ps1 → scripts/install.ps1} +0 -0
- /package/{install.sh → scripts/install.sh} +0 -0
|
@@ -1,682 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# SPDX-License-Identifier: MIT
|
|
3
|
-
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
-
"""
|
|
5
|
-
AdaptiveRanker — Three-phase adaptive re-ranking engine.
|
|
6
|
-
|
|
7
|
-
This is the core ranking engine for v2.7 "Your AI Learns You". It sits
|
|
8
|
-
between the existing search methods (FTS5 + TF-IDF + HNSW) and the final
|
|
9
|
-
result list, re-ordering candidates based on learned user preferences.
|
|
10
|
-
|
|
11
|
-
Three Phases (progressive adaptation):
|
|
12
|
-
|
|
13
|
-
Phase 0 — Baseline (< 20 feedback signals):
|
|
14
|
-
Pure v2.6 behavior. No re-ranking applied. Results returned as-is
|
|
15
|
-
from the existing search pipeline. Zero risk of degradation.
|
|
16
|
-
|
|
17
|
-
Phase 1 — Rule-Based (20-199 signals):
|
|
18
|
-
Applies learned-pattern boosting to search results. Uses feature
|
|
19
|
-
extraction to compute boost multipliers for tech match, project
|
|
20
|
-
match, recency, and source quality. Deterministic and interpretable.
|
|
21
|
-
|
|
22
|
-
Phase 2 — ML Model (200+ signals across 50+ unique queries):
|
|
23
|
-
LightGBM LambdaRank re-ranker. Trained on real feedback data
|
|
24
|
-
(and optionally bootstrapped from synthetic data). Produces ML
|
|
25
|
-
scores that replace the original ranking order.
|
|
26
|
-
|
|
27
|
-
Design Principles:
|
|
28
|
-
- LightGBM is OPTIONAL. If not installed, falls back to rule-based.
|
|
29
|
-
- Any exception in re-ranking falls back to original v2.6 results.
|
|
30
|
-
- Model is loaded lazily and cached in memory.
|
|
31
|
-
- Training is explicit (called by user or scheduled), never implicit.
|
|
32
|
-
- Original scores are preserved as 'base_score' for diagnostics.
|
|
33
|
-
|
|
34
|
-
Research Backing:
|
|
35
|
-
- eKNOW 2025: BM25 -> re-ranker pipeline for personal collections
|
|
36
|
-
- MACLA (arXiv:2512.18950): Bayesian confidence scoring
|
|
37
|
-
- FCS LREC 2024: Cold-start mitigation via synthetic bootstrap
|
|
38
|
-
"""
|
|
39
|
-
|
|
40
|
-
import logging
|
|
41
|
-
import threading
|
|
42
|
-
from datetime import datetime
|
|
43
|
-
from pathlib import Path
|
|
44
|
-
from typing import Any, Dict, List, Optional
|
|
45
|
-
|
|
46
|
-
# LightGBM is OPTIONAL — graceful fallback to rule-based ranking
|
|
47
|
-
try:
|
|
48
|
-
import lightgbm as lgb
|
|
49
|
-
HAS_LIGHTGBM = True
|
|
50
|
-
except ImportError:
|
|
51
|
-
lgb = None
|
|
52
|
-
HAS_LIGHTGBM = False
|
|
53
|
-
|
|
54
|
-
# NumPy is used for feature matrix construction (comes with sklearn)
|
|
55
|
-
try:
|
|
56
|
-
import numpy as np
|
|
57
|
-
HAS_NUMPY = True
|
|
58
|
-
except ImportError:
|
|
59
|
-
np = None
|
|
60
|
-
HAS_NUMPY = False
|
|
61
|
-
|
|
62
|
-
from .feature_extractor import FeatureExtractor, FEATURE_NAMES, NUM_FEATURES
|
|
63
|
-
|
|
64
|
-
logger = logging.getLogger("superlocalmemory.learning.adaptive_ranker")
|
|
65
|
-
|
|
66
|
-
# Import constants and helpers from ranking subpackage
|
|
67
|
-
from .ranking import (
|
|
68
|
-
MODELS_DIR,
|
|
69
|
-
MODEL_PATH,
|
|
70
|
-
PHASE_THRESHOLDS,
|
|
71
|
-
MIN_UNIQUE_QUERIES_FOR_ML,
|
|
72
|
-
RULE_BOOST,
|
|
73
|
-
TRAINING_PARAMS,
|
|
74
|
-
calculate_rule_boost,
|
|
75
|
-
prepare_training_data_internal,
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
class AdaptiveRanker:
|
|
80
|
-
"""
|
|
81
|
-
Three-phase adaptive re-ranking engine.
|
|
82
|
-
|
|
83
|
-
Usage (called by memory_store_v2.search or mcp_server recall):
|
|
84
|
-
ranker = AdaptiveRanker()
|
|
85
|
-
results = ranker.rerank(search_results, query, context={
|
|
86
|
-
'tech_preferences': {...},
|
|
87
|
-
'current_project': 'MyProject',
|
|
88
|
-
'source_scores': {...},
|
|
89
|
-
'workflow_phase': 'testing',
|
|
90
|
-
})
|
|
91
|
-
|
|
92
|
-
The caller wraps this in try/except — any exception here causes
|
|
93
|
-
fallback to original v2.6 results. Zero risk of degradation.
|
|
94
|
-
"""
|
|
95
|
-
|
|
96
|
-
PHASE_THRESHOLDS = PHASE_THRESHOLDS
|
|
97
|
-
MODEL_PATH = MODEL_PATH
|
|
98
|
-
|
|
99
|
-
def __init__(self, learning_db=None):
|
|
100
|
-
"""
|
|
101
|
-
Initialize AdaptiveRanker.
|
|
102
|
-
|
|
103
|
-
Args:
|
|
104
|
-
learning_db: Optional LearningDB instance. If None, imports
|
|
105
|
-
and creates one lazily.
|
|
106
|
-
"""
|
|
107
|
-
self._learning_db = learning_db
|
|
108
|
-
self._feature_extractor = FeatureExtractor()
|
|
109
|
-
self._model = None # Loaded lazily on first ML rerank
|
|
110
|
-
self._model_load_attempted = False
|
|
111
|
-
self._lock = threading.Lock()
|
|
112
|
-
|
|
113
|
-
# ========================================================================
|
|
114
|
-
# LearningDB Access
|
|
115
|
-
# ========================================================================
|
|
116
|
-
|
|
117
|
-
def _get_learning_db(self):
|
|
118
|
-
"""Get or create the LearningDB instance."""
|
|
119
|
-
if self._learning_db is None:
|
|
120
|
-
try:
|
|
121
|
-
from .learning_db import LearningDB
|
|
122
|
-
self._learning_db = LearningDB()
|
|
123
|
-
except Exception as e:
|
|
124
|
-
logger.warning("Cannot access LearningDB: %s", e)
|
|
125
|
-
return None
|
|
126
|
-
return self._learning_db
|
|
127
|
-
|
|
128
|
-
# ========================================================================
|
|
129
|
-
# Phase Detection
|
|
130
|
-
# ========================================================================
|
|
131
|
-
|
|
132
|
-
def get_phase(self) -> str:
|
|
133
|
-
"""
|
|
134
|
-
Determine the current ranking phase based on feedback data.
|
|
135
|
-
|
|
136
|
-
Returns:
|
|
137
|
-
'baseline' — Not enough data for personalization
|
|
138
|
-
'rule_based' — Enough data for rule-based boosting
|
|
139
|
-
'ml_model' — Enough data for ML ranking (if LightGBM available)
|
|
140
|
-
"""
|
|
141
|
-
ldb = self._get_learning_db()
|
|
142
|
-
if ldb is None:
|
|
143
|
-
return 'baseline'
|
|
144
|
-
|
|
145
|
-
try:
|
|
146
|
-
feedback_count = ldb.get_feedback_count()
|
|
147
|
-
unique_queries = ldb.get_unique_query_count()
|
|
148
|
-
except Exception as e:
|
|
149
|
-
logger.warning("Failed to check feedback counts: %s", e)
|
|
150
|
-
return 'baseline'
|
|
151
|
-
|
|
152
|
-
# Phase 2: ML model — requires enough data AND LightGBM AND numpy
|
|
153
|
-
if (
|
|
154
|
-
feedback_count >= PHASE_THRESHOLDS['ml_model']
|
|
155
|
-
and unique_queries >= MIN_UNIQUE_QUERIES_FOR_ML
|
|
156
|
-
and HAS_LIGHTGBM
|
|
157
|
-
and HAS_NUMPY
|
|
158
|
-
):
|
|
159
|
-
return 'ml_model'
|
|
160
|
-
|
|
161
|
-
# Phase 1: Rule-based — just needs minimum feedback
|
|
162
|
-
if feedback_count >= PHASE_THRESHOLDS['rule_based']:
|
|
163
|
-
return 'rule_based'
|
|
164
|
-
|
|
165
|
-
# Phase 0: Not enough data yet
|
|
166
|
-
return 'baseline'
|
|
167
|
-
|
|
168
|
-
def get_phase_info(self) -> Dict[str, Any]:
|
|
169
|
-
"""
|
|
170
|
-
Return detailed phase information for diagnostics.
|
|
171
|
-
|
|
172
|
-
Returns:
|
|
173
|
-
Dict with phase, feedback_count, unique_queries, thresholds,
|
|
174
|
-
model_loaded, lightgbm_available.
|
|
175
|
-
"""
|
|
176
|
-
ldb = self._get_learning_db()
|
|
177
|
-
feedback_count = 0
|
|
178
|
-
unique_queries = 0
|
|
179
|
-
|
|
180
|
-
if ldb is not None:
|
|
181
|
-
try:
|
|
182
|
-
feedback_count = ldb.get_feedback_count()
|
|
183
|
-
unique_queries = ldb.get_unique_query_count()
|
|
184
|
-
except Exception:
|
|
185
|
-
pass
|
|
186
|
-
|
|
187
|
-
phase = self.get_phase()
|
|
188
|
-
|
|
189
|
-
return {
|
|
190
|
-
'phase': phase,
|
|
191
|
-
'feedback_count': feedback_count,
|
|
192
|
-
'unique_queries': unique_queries,
|
|
193
|
-
'thresholds': dict(PHASE_THRESHOLDS),
|
|
194
|
-
'min_unique_queries_for_ml': MIN_UNIQUE_QUERIES_FOR_ML,
|
|
195
|
-
'model_loaded': self._model is not None,
|
|
196
|
-
'model_path_exists': MODEL_PATH.exists(),
|
|
197
|
-
'lightgbm_available': HAS_LIGHTGBM,
|
|
198
|
-
'numpy_available': HAS_NUMPY,
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
# ========================================================================
|
|
202
|
-
# Main Re-ranking Entry Point
|
|
203
|
-
# ========================================================================
|
|
204
|
-
|
|
205
|
-
def rerank(
|
|
206
|
-
self,
|
|
207
|
-
results: List[dict],
|
|
208
|
-
query: str,
|
|
209
|
-
context: Optional[dict] = None,
|
|
210
|
-
) -> List[dict]:
|
|
211
|
-
"""
|
|
212
|
-
Re-rank search results based on learned user preferences.
|
|
213
|
-
|
|
214
|
-
This is the main entry point, called after the search pipeline
|
|
215
|
-
produces initial results. It determines the current phase and
|
|
216
|
-
routes to the appropriate ranking strategy.
|
|
217
|
-
|
|
218
|
-
Args:
|
|
219
|
-
results: List of memory dicts from search (with 'score' field).
|
|
220
|
-
query: The recall query string.
|
|
221
|
-
context: Optional context dict with:
|
|
222
|
-
- tech_preferences: Dict[str, dict] — user's tech prefs
|
|
223
|
-
- current_project: str — active project name
|
|
224
|
-
- source_scores: Dict[str, float] — source quality scores
|
|
225
|
-
- workflow_phase: str — current workflow phase
|
|
226
|
-
|
|
227
|
-
Returns:
|
|
228
|
-
Re-ranked list of memory dicts. Each memory gets:
|
|
229
|
-
- 'base_score': Original score from search pipeline
|
|
230
|
-
- 'ranking_phase': Which phase was used
|
|
231
|
-
- 'score': Updated score (may differ from base_score)
|
|
232
|
-
|
|
233
|
-
CRITICAL: The caller wraps this in try/except. Any exception
|
|
234
|
-
causes fallback to original v2.6 results. This method must
|
|
235
|
-
never corrupt the results list.
|
|
236
|
-
"""
|
|
237
|
-
if not results:
|
|
238
|
-
return results
|
|
239
|
-
|
|
240
|
-
# Short-circuit: don't re-rank trivially small result sets
|
|
241
|
-
if len(results) <= 1:
|
|
242
|
-
for r in results:
|
|
243
|
-
r['base_score'] = r.get('score', 0.0)
|
|
244
|
-
r['ranking_phase'] = 'baseline'
|
|
245
|
-
return results
|
|
246
|
-
|
|
247
|
-
context = context or {}
|
|
248
|
-
|
|
249
|
-
# Fetch signal stats for features [10-11] (v2.7.4)
|
|
250
|
-
signal_stats = {}
|
|
251
|
-
ldb = self._get_learning_db()
|
|
252
|
-
if ldb:
|
|
253
|
-
try:
|
|
254
|
-
memory_ids = [r.get('id') for r in results if r.get('id')]
|
|
255
|
-
if memory_ids:
|
|
256
|
-
signal_stats = ldb.get_signal_stats_for_memories(memory_ids)
|
|
257
|
-
except Exception:
|
|
258
|
-
pass # Signal stats failure is not critical
|
|
259
|
-
|
|
260
|
-
# Set up feature extraction context (once per query)
|
|
261
|
-
self._feature_extractor.set_context(
|
|
262
|
-
source_scores=context.get('source_scores'),
|
|
263
|
-
tech_preferences=context.get('tech_preferences'),
|
|
264
|
-
current_project=context.get('current_project'),
|
|
265
|
-
workflow_phase=context.get('workflow_phase'),
|
|
266
|
-
signal_stats=signal_stats,
|
|
267
|
-
)
|
|
268
|
-
|
|
269
|
-
# Determine phase and route
|
|
270
|
-
phase = self.get_phase()
|
|
271
|
-
|
|
272
|
-
if phase == 'baseline':
|
|
273
|
-
# Phase 0: No re-ranking — preserve original order
|
|
274
|
-
for r in results:
|
|
275
|
-
r['base_score'] = r.get('score', 0.0)
|
|
276
|
-
r['ranking_phase'] = 'baseline'
|
|
277
|
-
return results
|
|
278
|
-
|
|
279
|
-
elif phase == 'rule_based':
|
|
280
|
-
return self._rerank_rule_based(results, query, context)
|
|
281
|
-
|
|
282
|
-
elif phase == 'ml_model':
|
|
283
|
-
# Try ML first, fall back to rule-based if model fails
|
|
284
|
-
try:
|
|
285
|
-
return self._rerank_ml(results, query, context)
|
|
286
|
-
except Exception as e:
|
|
287
|
-
logger.warning(
|
|
288
|
-
"ML re-ranking failed, falling back to rule-based: %s", e
|
|
289
|
-
)
|
|
290
|
-
return self._rerank_rule_based(results, query, context)
|
|
291
|
-
|
|
292
|
-
# Defensive: unknown phase -> no re-ranking
|
|
293
|
-
for r in results:
|
|
294
|
-
r['base_score'] = r.get('score', 0.0)
|
|
295
|
-
r['ranking_phase'] = 'unknown'
|
|
296
|
-
return results
|
|
297
|
-
|
|
298
|
-
# ========================================================================
|
|
299
|
-
# Phase 1: Rule-Based Re-ranking
|
|
300
|
-
# ========================================================================
|
|
301
|
-
|
|
302
|
-
def _rerank_rule_based(
|
|
303
|
-
self,
|
|
304
|
-
results: List[dict],
|
|
305
|
-
query: str,
|
|
306
|
-
context: dict,
|
|
307
|
-
) -> List[dict]:
|
|
308
|
-
"""
|
|
309
|
-
Phase 1: Apply rule-based boosting using extracted features.
|
|
310
|
-
|
|
311
|
-
Each result's score is multiplied by boost factors derived from
|
|
312
|
-
feature values. The boosts are conservative — they nudge the
|
|
313
|
-
ranking order without dramatically flipping results.
|
|
314
|
-
"""
|
|
315
|
-
feature_vectors = self._feature_extractor.extract_batch(results, query)
|
|
316
|
-
|
|
317
|
-
for i, result in enumerate(results):
|
|
318
|
-
base_score = result.get('score', 0.0)
|
|
319
|
-
result['base_score'] = base_score
|
|
320
|
-
result['ranking_phase'] = 'rule_based'
|
|
321
|
-
|
|
322
|
-
if i >= len(feature_vectors):
|
|
323
|
-
continue
|
|
324
|
-
|
|
325
|
-
features = feature_vectors[i]
|
|
326
|
-
boost = calculate_rule_boost(features)
|
|
327
|
-
|
|
328
|
-
# Apply boost to score
|
|
329
|
-
result['score'] = base_score * boost
|
|
330
|
-
|
|
331
|
-
# Re-sort by boosted score (highest first)
|
|
332
|
-
results.sort(key=lambda x: x.get('score', 0.0), reverse=True)
|
|
333
|
-
return results
|
|
334
|
-
|
|
335
|
-
# ========================================================================
|
|
336
|
-
# Phase 2: ML Re-ranking (LightGBM)
|
|
337
|
-
# ========================================================================
|
|
338
|
-
|
|
339
|
-
def _rerank_ml(
|
|
340
|
-
self,
|
|
341
|
-
results: List[dict],
|
|
342
|
-
query: str,
|
|
343
|
-
context: dict,
|
|
344
|
-
) -> List[dict]:
|
|
345
|
-
"""
|
|
346
|
-
Phase 2: LightGBM LambdaRank re-ranking.
|
|
347
|
-
|
|
348
|
-
Extracts features, runs the trained model, and sorts by ML scores.
|
|
349
|
-
Preserves original score as 'base_score' and adds 'ml_score'.
|
|
350
|
-
"""
|
|
351
|
-
if not HAS_LIGHTGBM or not HAS_NUMPY:
|
|
352
|
-
raise RuntimeError("LightGBM or NumPy not available for ML ranking")
|
|
353
|
-
|
|
354
|
-
# Load model if not cached
|
|
355
|
-
model = self._load_model()
|
|
356
|
-
if model is None:
|
|
357
|
-
raise RuntimeError("No trained ranking model available")
|
|
358
|
-
|
|
359
|
-
# Extract features
|
|
360
|
-
feature_vectors = self._feature_extractor.extract_batch(results, query)
|
|
361
|
-
if not feature_vectors:
|
|
362
|
-
raise ValueError("Feature extraction returned empty results")
|
|
363
|
-
|
|
364
|
-
# Build feature matrix
|
|
365
|
-
X = np.array(feature_vectors, dtype=np.float64)
|
|
366
|
-
|
|
367
|
-
# Validate shape
|
|
368
|
-
if X.shape[1] != NUM_FEATURES:
|
|
369
|
-
raise ValueError(
|
|
370
|
-
f"Feature dimension mismatch: expected {NUM_FEATURES}, "
|
|
371
|
-
f"got {X.shape[1]}"
|
|
372
|
-
)
|
|
373
|
-
|
|
374
|
-
# Predict scores
|
|
375
|
-
ml_scores = model.predict(X)
|
|
376
|
-
|
|
377
|
-
# Annotate results with ML scores
|
|
378
|
-
for i, result in enumerate(results):
|
|
379
|
-
result['base_score'] = result.get('score', 0.0)
|
|
380
|
-
result['ranking_phase'] = 'ml_model'
|
|
381
|
-
if i < len(ml_scores):
|
|
382
|
-
result['ml_score'] = float(ml_scores[i])
|
|
383
|
-
result['score'] = float(ml_scores[i])
|
|
384
|
-
else:
|
|
385
|
-
result['ml_score'] = 0.0
|
|
386
|
-
|
|
387
|
-
# Re-sort by ML score (highest first)
|
|
388
|
-
results.sort(key=lambda x: x.get('score', 0.0), reverse=True)
|
|
389
|
-
return results
|
|
390
|
-
|
|
391
|
-
# ========================================================================
|
|
392
|
-
# Model Management
|
|
393
|
-
# ========================================================================
|
|
394
|
-
|
|
395
|
-
def _load_model(self):
|
|
396
|
-
"""
|
|
397
|
-
Load LightGBM model from disk (lazy, cached).
|
|
398
|
-
|
|
399
|
-
Returns:
|
|
400
|
-
lgb.Booster instance or None if unavailable.
|
|
401
|
-
"""
|
|
402
|
-
# Return cached model if already loaded
|
|
403
|
-
if self._model is not None:
|
|
404
|
-
return self._model
|
|
405
|
-
|
|
406
|
-
# Avoid repeated failed load attempts
|
|
407
|
-
if self._model_load_attempted:
|
|
408
|
-
return None
|
|
409
|
-
|
|
410
|
-
with self._lock:
|
|
411
|
-
# Double-check after acquiring lock
|
|
412
|
-
if self._model is not None:
|
|
413
|
-
return self._model
|
|
414
|
-
if self._model_load_attempted:
|
|
415
|
-
return None
|
|
416
|
-
|
|
417
|
-
self._model_load_attempted = True
|
|
418
|
-
|
|
419
|
-
if not HAS_LIGHTGBM:
|
|
420
|
-
logger.info("LightGBM not installed — ML ranking unavailable")
|
|
421
|
-
return None
|
|
422
|
-
|
|
423
|
-
if not MODEL_PATH.exists():
|
|
424
|
-
logger.info(
|
|
425
|
-
"No ranking model at %s — ML ranking unavailable",
|
|
426
|
-
MODEL_PATH
|
|
427
|
-
)
|
|
428
|
-
return None
|
|
429
|
-
|
|
430
|
-
try:
|
|
431
|
-
model = lgb.Booster(model_file=str(MODEL_PATH))
|
|
432
|
-
|
|
433
|
-
# v2.7.4: Check for feature dimension mismatch (10→12 upgrade)
|
|
434
|
-
model_num_features = model.num_feature()
|
|
435
|
-
if model_num_features != NUM_FEATURES:
|
|
436
|
-
logger.info(
|
|
437
|
-
"Feature mismatch: model has %d features, expected %d. "
|
|
438
|
-
"Triggering auto-retrain in background.",
|
|
439
|
-
model_num_features, NUM_FEATURES,
|
|
440
|
-
)
|
|
441
|
-
# Delete old model and trigger re-bootstrap
|
|
442
|
-
MODEL_PATH.unlink(missing_ok=True)
|
|
443
|
-
self._trigger_retrain_background()
|
|
444
|
-
return None
|
|
445
|
-
|
|
446
|
-
self._model = model
|
|
447
|
-
logger.info("Loaded ranking model from %s", MODEL_PATH)
|
|
448
|
-
return self._model
|
|
449
|
-
except Exception as e:
|
|
450
|
-
logger.warning("Failed to load ranking model: %s", e)
|
|
451
|
-
return None
|
|
452
|
-
|
|
453
|
-
def _trigger_retrain_background(self):
|
|
454
|
-
"""Trigger model re-bootstrap in a background thread (v2.7.4)."""
|
|
455
|
-
try:
|
|
456
|
-
import threading
|
|
457
|
-
|
|
458
|
-
def _retrain():
|
|
459
|
-
try:
|
|
460
|
-
from .synthetic_bootstrap import SyntheticBootstrapper
|
|
461
|
-
bootstrapper = SyntheticBootstrapper()
|
|
462
|
-
if bootstrapper.should_bootstrap():
|
|
463
|
-
result = bootstrapper.bootstrap_model()
|
|
464
|
-
if result:
|
|
465
|
-
logger.info(
|
|
466
|
-
"Auto-retrain complete with %d-feature model",
|
|
467
|
-
NUM_FEATURES,
|
|
468
|
-
)
|
|
469
|
-
# Reload the new model
|
|
470
|
-
with self._lock:
|
|
471
|
-
self._model = None
|
|
472
|
-
self._model_load_attempted = False
|
|
473
|
-
except Exception as e:
|
|
474
|
-
logger.warning("Auto-retrain failed: %s", e)
|
|
475
|
-
|
|
476
|
-
thread = threading.Thread(target=_retrain, daemon=True)
|
|
477
|
-
thread.start()
|
|
478
|
-
except Exception:
|
|
479
|
-
pass
|
|
480
|
-
|
|
481
|
-
def reload_model(self):
|
|
482
|
-
"""
|
|
483
|
-
Force reload of the ranking model from disk.
|
|
484
|
-
|
|
485
|
-
Call this after training a new model to pick up the updated weights.
|
|
486
|
-
"""
|
|
487
|
-
with self._lock:
|
|
488
|
-
self._model = None
|
|
489
|
-
self._model_load_attempted = False
|
|
490
|
-
# Trigger fresh load
|
|
491
|
-
return self._load_model()
|
|
492
|
-
|
|
493
|
-
# ========================================================================
|
|
494
|
-
# Model Training
|
|
495
|
-
# ========================================================================
|
|
496
|
-
|
|
497
|
-
def train(self, force: bool = False) -> Optional[Dict[str, Any]]:
|
|
498
|
-
"""
|
|
499
|
-
Train or retrain the LightGBM ranking model.
|
|
500
|
-
|
|
501
|
-
Uses continued training (init_model) if a model already exists,
|
|
502
|
-
incorporating new feedback data incrementally.
|
|
503
|
-
|
|
504
|
-
Args:
|
|
505
|
-
force: If True, train even if below ML threshold.
|
|
506
|
-
Useful for synthetic bootstrap training.
|
|
507
|
-
|
|
508
|
-
Returns:
|
|
509
|
-
Training metadata dict, or None if training not possible.
|
|
510
|
-
Metadata includes: model_version, training_samples, ndcg_at_10,
|
|
511
|
-
model_path, created_at.
|
|
512
|
-
"""
|
|
513
|
-
if not HAS_LIGHTGBM or not HAS_NUMPY:
|
|
514
|
-
logger.warning(
|
|
515
|
-
"Cannot train: LightGBM=%s, NumPy=%s",
|
|
516
|
-
HAS_LIGHTGBM, HAS_NUMPY
|
|
517
|
-
)
|
|
518
|
-
return None
|
|
519
|
-
|
|
520
|
-
ldb = self._get_learning_db()
|
|
521
|
-
if ldb is None:
|
|
522
|
-
logger.warning("Cannot train: LearningDB unavailable")
|
|
523
|
-
return None
|
|
524
|
-
|
|
525
|
-
# Check if we have enough data (unless forced)
|
|
526
|
-
if not force:
|
|
527
|
-
feedback_count = ldb.get_feedback_count()
|
|
528
|
-
unique_queries = ldb.get_unique_query_count()
|
|
529
|
-
if (
|
|
530
|
-
feedback_count < PHASE_THRESHOLDS['ml_model']
|
|
531
|
-
or unique_queries < MIN_UNIQUE_QUERIES_FOR_ML
|
|
532
|
-
):
|
|
533
|
-
logger.info(
|
|
534
|
-
"Insufficient data for training: %d feedback / %d queries "
|
|
535
|
-
"(need %d / %d)",
|
|
536
|
-
feedback_count, unique_queries,
|
|
537
|
-
PHASE_THRESHOLDS['ml_model'], MIN_UNIQUE_QUERIES_FOR_ML,
|
|
538
|
-
)
|
|
539
|
-
return None
|
|
540
|
-
|
|
541
|
-
# Prepare training data
|
|
542
|
-
training_data = self._prepare_training_data()
|
|
543
|
-
if training_data is None:
|
|
544
|
-
logger.warning("No usable training data available")
|
|
545
|
-
return None
|
|
546
|
-
|
|
547
|
-
X, y, groups = training_data
|
|
548
|
-
total_samples = X.shape[0]
|
|
549
|
-
|
|
550
|
-
if total_samples < 10:
|
|
551
|
-
logger.warning("Too few training samples: %d", total_samples)
|
|
552
|
-
return None
|
|
553
|
-
|
|
554
|
-
logger.info(
|
|
555
|
-
"Training ranking model: %d samples, %d groups",
|
|
556
|
-
total_samples, len(groups)
|
|
557
|
-
)
|
|
558
|
-
|
|
559
|
-
# Create LightGBM dataset
|
|
560
|
-
train_dataset = lgb.Dataset(
|
|
561
|
-
X, label=y, group=groups,
|
|
562
|
-
feature_name=list(FEATURE_NAMES),
|
|
563
|
-
free_raw_data=False,
|
|
564
|
-
)
|
|
565
|
-
|
|
566
|
-
# Training parameters
|
|
567
|
-
params = dict(TRAINING_PARAMS)
|
|
568
|
-
n_estimators = params.pop('n_estimators', 50)
|
|
569
|
-
|
|
570
|
-
# Check for existing model (continued training)
|
|
571
|
-
init_model = None
|
|
572
|
-
if MODEL_PATH.exists():
|
|
573
|
-
try:
|
|
574
|
-
init_model = lgb.Booster(model_file=str(MODEL_PATH))
|
|
575
|
-
logger.info("Continuing training from existing model")
|
|
576
|
-
except Exception:
|
|
577
|
-
logger.info("Starting fresh training (existing model unreadable)")
|
|
578
|
-
init_model = None
|
|
579
|
-
|
|
580
|
-
# Train
|
|
581
|
-
try:
|
|
582
|
-
booster = lgb.train(
|
|
583
|
-
params,
|
|
584
|
-
train_dataset,
|
|
585
|
-
num_boost_round=n_estimators,
|
|
586
|
-
init_model=init_model,
|
|
587
|
-
valid_sets=[train_dataset],
|
|
588
|
-
valid_names=['train'],
|
|
589
|
-
callbacks=[lgb.log_evaluation(period=0)], # Silent training
|
|
590
|
-
)
|
|
591
|
-
except Exception as e:
|
|
592
|
-
logger.error("LightGBM training failed: %s", e)
|
|
593
|
-
return None
|
|
594
|
-
|
|
595
|
-
# Save model
|
|
596
|
-
MODELS_DIR.mkdir(parents=True, exist_ok=True)
|
|
597
|
-
try:
|
|
598
|
-
booster.save_model(str(MODEL_PATH))
|
|
599
|
-
logger.info("Ranking model saved to %s", MODEL_PATH)
|
|
600
|
-
except Exception as e:
|
|
601
|
-
logger.error("Failed to save ranking model: %s", e)
|
|
602
|
-
return None
|
|
603
|
-
|
|
604
|
-
# Extract NDCG@10 from training evaluation (if available)
|
|
605
|
-
ndcg_at_10 = None
|
|
606
|
-
try:
|
|
607
|
-
eval_results = booster.eval_train(lgb.Dataset(X, label=y, group=groups))
|
|
608
|
-
for name, _dataset_name, value, _is_higher_better in eval_results:
|
|
609
|
-
if 'ndcg@10' in name:
|
|
610
|
-
ndcg_at_10 = value
|
|
611
|
-
break
|
|
612
|
-
except Exception:
|
|
613
|
-
pass
|
|
614
|
-
|
|
615
|
-
# Record metadata in learning_db
|
|
616
|
-
model_version = datetime.now().strftime("v%Y%m%d_%H%M%S")
|
|
617
|
-
try:
|
|
618
|
-
ldb.record_model_training(
|
|
619
|
-
model_version=model_version,
|
|
620
|
-
training_samples=total_samples,
|
|
621
|
-
real_samples=total_samples,
|
|
622
|
-
synthetic_samples=0,
|
|
623
|
-
ndcg_at_10=ndcg_at_10,
|
|
624
|
-
model_path=str(MODEL_PATH),
|
|
625
|
-
)
|
|
626
|
-
except Exception as e:
|
|
627
|
-
logger.warning("Failed to record training metadata: %s", e)
|
|
628
|
-
|
|
629
|
-
# Reload model into cache
|
|
630
|
-
self.reload_model()
|
|
631
|
-
|
|
632
|
-
metadata = {
|
|
633
|
-
'model_version': model_version,
|
|
634
|
-
'training_samples': total_samples,
|
|
635
|
-
'query_groups': len(groups),
|
|
636
|
-
'n_estimators': n_estimators,
|
|
637
|
-
'ndcg_at_10': ndcg_at_10,
|
|
638
|
-
'model_path': str(MODEL_PATH),
|
|
639
|
-
'continued_from': init_model is not None,
|
|
640
|
-
'created_at': datetime.now().isoformat(),
|
|
641
|
-
}
|
|
642
|
-
logger.info("Training complete: %s", metadata)
|
|
643
|
-
return metadata
|
|
644
|
-
|
|
645
|
-
def _prepare_training_data(self) -> Optional[tuple]:
|
|
646
|
-
"""
|
|
647
|
-
Prepare training data from feedback records.
|
|
648
|
-
|
|
649
|
-
For each unique query (grouped by query_hash):
|
|
650
|
-
- Fetch all feedback entries for that query
|
|
651
|
-
- Look up the corresponding memory from memory.db
|
|
652
|
-
- Extract features for each memory
|
|
653
|
-
- Use signal_value as the relevance label
|
|
654
|
-
|
|
655
|
-
Returns:
|
|
656
|
-
Tuple of (X, y, groups) for LGBMRanker, or None if insufficient.
|
|
657
|
-
X: numpy array (n_samples, NUM_FEATURES)
|
|
658
|
-
y: numpy array (n_samples,) — relevance labels
|
|
659
|
-
groups: list of ints — samples per query group
|
|
660
|
-
"""
|
|
661
|
-
ldb = self._get_learning_db()
|
|
662
|
-
if ldb is None:
|
|
663
|
-
return None
|
|
664
|
-
|
|
665
|
-
feedback = ldb.get_feedback_for_training()
|
|
666
|
-
if not feedback:
|
|
667
|
-
return None
|
|
668
|
-
|
|
669
|
-
return prepare_training_data_internal(feedback, self._feature_extractor)
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
# ============================================================================
|
|
673
|
-
# Module-level convenience
|
|
674
|
-
# ============================================================================
|
|
675
|
-
|
|
676
|
-
def get_phase() -> str:
|
|
677
|
-
"""Quick check of current ranking phase (creates temporary ranker)."""
|
|
678
|
-
try:
|
|
679
|
-
ranker = AdaptiveRanker()
|
|
680
|
-
return ranker.get_phase()
|
|
681
|
-
except Exception:
|
|
682
|
-
return 'baseline'
|