superlocalmemory 2.8.6 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +9 -1
- package/NOTICE +63 -0
- package/README.md +165 -480
- package/bin/slm +17 -449
- package/bin/slm-npm +62 -48
- package/conftest.py +5 -0
- package/docs/api-reference.md +284 -0
- package/docs/architecture.md +149 -0
- package/docs/auto-memory.md +150 -0
- package/docs/cli-reference.md +276 -0
- package/docs/compliance.md +191 -0
- package/docs/configuration.md +182 -0
- package/docs/getting-started.md +102 -0
- package/docs/ide-setup.md +261 -0
- package/docs/mcp-tools.md +220 -0
- package/docs/migration-from-v2.md +170 -0
- package/docs/profiles.md +173 -0
- package/docs/troubleshooting.md +310 -0
- package/{configs → ide/configs}/antigravity-mcp.json +3 -3
- package/ide/configs/chatgpt-desktop-mcp.json +16 -0
- package/{configs → ide/configs}/claude-desktop-mcp.json +3 -3
- package/{configs → ide/configs}/codex-mcp.toml +4 -4
- package/{configs → ide/configs}/continue-mcp.yaml +4 -3
- package/{configs → ide/configs}/continue-skills.yaml +6 -6
- package/ide/configs/cursor-mcp.json +15 -0
- package/{configs → ide/configs}/gemini-cli-mcp.json +2 -2
- package/{configs → ide/configs}/jetbrains-mcp.json +2 -2
- package/{configs → ide/configs}/opencode-mcp.json +2 -2
- package/{configs → ide/configs}/perplexity-mcp.json +2 -2
- package/{configs → ide/configs}/vscode-copilot-mcp.json +2 -2
- package/{configs → ide/configs}/windsurf-mcp.json +3 -3
- package/{configs → ide/configs}/zed-mcp.json +2 -2
- package/{hooks → ide/hooks}/context-hook.js +9 -20
- package/ide/hooks/memory-list-skill.js +70 -0
- package/ide/hooks/memory-profile-skill.js +101 -0
- package/ide/hooks/memory-recall-skill.js +62 -0
- package/ide/hooks/memory-remember-skill.js +68 -0
- package/ide/hooks/memory-reset-skill.js +160 -0
- package/{hooks → ide/hooks}/post-recall-hook.js +2 -2
- package/ide/integrations/langchain/README.md +106 -0
- package/ide/integrations/langchain/langchain_superlocalmemory/__init__.py +9 -0
- package/ide/integrations/langchain/langchain_superlocalmemory/chat_message_history.py +201 -0
- package/ide/integrations/langchain/pyproject.toml +38 -0
- package/{src/learning → ide/integrations/langchain}/tests/__init__.py +1 -0
- package/ide/integrations/langchain/tests/test_chat_message_history.py +215 -0
- package/ide/integrations/langchain/tests/test_security.py +117 -0
- package/ide/integrations/llamaindex/README.md +81 -0
- package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/__init__.py +9 -0
- package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/base.py +316 -0
- package/ide/integrations/llamaindex/pyproject.toml +43 -0
- package/{src/lifecycle → ide/integrations/llamaindex}/tests/__init__.py +1 -2
- package/ide/integrations/llamaindex/tests/test_chat_store.py +294 -0
- package/ide/integrations/llamaindex/tests/test_security.py +241 -0
- package/{skills → ide/skills}/slm-build-graph/SKILL.md +6 -6
- package/{skills → ide/skills}/slm-list-recent/SKILL.md +5 -5
- package/{skills → ide/skills}/slm-recall/SKILL.md +5 -5
- package/{skills → ide/skills}/slm-remember/SKILL.md +6 -6
- package/{skills → ide/skills}/slm-show-patterns/SKILL.md +7 -7
- package/{skills → ide/skills}/slm-status/SKILL.md +9 -9
- package/{skills → ide/skills}/slm-switch-profile/SKILL.md +9 -9
- package/package.json +13 -22
- package/pyproject.toml +85 -0
- package/scripts/build-dmg.sh +417 -0
- package/scripts/install-skills.ps1 +334 -0
- package/scripts/postinstall.js +2 -2
- package/scripts/start-dashboard.ps1 +52 -0
- package/scripts/start-dashboard.sh +41 -0
- package/scripts/sync-wiki.ps1 +127 -0
- package/scripts/sync-wiki.sh +82 -0
- package/scripts/test-dmg.sh +161 -0
- package/scripts/test-npm-package.ps1 +252 -0
- package/scripts/test-npm-package.sh +207 -0
- package/scripts/verify-install.ps1 +294 -0
- package/scripts/verify-install.sh +266 -0
- package/src/superlocalmemory/__init__.py +0 -0
- package/src/superlocalmemory/attribution/__init__.py +9 -0
- package/src/superlocalmemory/attribution/mathematical_dna.py +235 -0
- package/src/superlocalmemory/attribution/signer.py +153 -0
- package/src/superlocalmemory/attribution/watermark.py +189 -0
- package/src/superlocalmemory/cli/__init__.py +5 -0
- package/src/superlocalmemory/cli/commands.py +245 -0
- package/src/superlocalmemory/cli/main.py +89 -0
- package/src/superlocalmemory/cli/migrate_cmd.py +55 -0
- package/src/superlocalmemory/cli/post_install.py +99 -0
- package/src/superlocalmemory/cli/setup_wizard.py +129 -0
- package/src/superlocalmemory/compliance/__init__.py +0 -0
- package/src/superlocalmemory/compliance/abac.py +204 -0
- package/src/superlocalmemory/compliance/audit.py +314 -0
- package/src/superlocalmemory/compliance/eu_ai_act.py +131 -0
- package/src/superlocalmemory/compliance/gdpr.py +294 -0
- package/src/superlocalmemory/compliance/lifecycle.py +158 -0
- package/src/superlocalmemory/compliance/retention.py +232 -0
- package/src/superlocalmemory/compliance/scheduler.py +148 -0
- package/src/superlocalmemory/core/__init__.py +0 -0
- package/src/superlocalmemory/core/config.py +391 -0
- package/src/superlocalmemory/core/embeddings.py +293 -0
- package/src/superlocalmemory/core/engine.py +701 -0
- package/src/superlocalmemory/core/hooks.py +65 -0
- package/src/superlocalmemory/core/maintenance.py +172 -0
- package/src/superlocalmemory/core/modes.py +140 -0
- package/src/superlocalmemory/core/profiles.py +234 -0
- package/src/superlocalmemory/core/registry.py +117 -0
- package/src/superlocalmemory/dynamics/__init__.py +0 -0
- package/src/superlocalmemory/dynamics/fisher_langevin_coupling.py +223 -0
- package/src/superlocalmemory/encoding/__init__.py +0 -0
- package/src/superlocalmemory/encoding/consolidator.py +485 -0
- package/src/superlocalmemory/encoding/emotional.py +125 -0
- package/src/superlocalmemory/encoding/entity_resolver.py +525 -0
- package/src/superlocalmemory/encoding/entropy_gate.py +104 -0
- package/src/superlocalmemory/encoding/fact_extractor.py +775 -0
- package/src/superlocalmemory/encoding/foresight.py +91 -0
- package/src/superlocalmemory/encoding/graph_builder.py +302 -0
- package/src/superlocalmemory/encoding/observation_builder.py +160 -0
- package/src/superlocalmemory/encoding/scene_builder.py +183 -0
- package/src/superlocalmemory/encoding/signal_inference.py +90 -0
- package/src/superlocalmemory/encoding/temporal_parser.py +426 -0
- package/src/superlocalmemory/encoding/type_router.py +235 -0
- package/src/superlocalmemory/hooks/__init__.py +3 -0
- package/src/superlocalmemory/hooks/auto_capture.py +111 -0
- package/src/superlocalmemory/hooks/auto_recall.py +93 -0
- package/src/superlocalmemory/hooks/ide_connector.py +204 -0
- package/src/superlocalmemory/hooks/rules_engine.py +99 -0
- package/src/superlocalmemory/infra/__init__.py +3 -0
- package/src/superlocalmemory/infra/auth_middleware.py +82 -0
- package/src/superlocalmemory/infra/backup.py +317 -0
- package/src/superlocalmemory/infra/cache_manager.py +267 -0
- package/src/superlocalmemory/infra/event_bus.py +381 -0
- package/src/superlocalmemory/infra/rate_limiter.py +135 -0
- package/src/{webhook_dispatcher.py → superlocalmemory/infra/webhook_dispatcher.py} +104 -101
- package/src/superlocalmemory/learning/__init__.py +0 -0
- package/src/superlocalmemory/learning/adaptive.py +172 -0
- package/src/superlocalmemory/learning/behavioral.py +490 -0
- package/src/superlocalmemory/learning/behavioral_listener.py +94 -0
- package/src/superlocalmemory/learning/bootstrap.py +298 -0
- package/src/superlocalmemory/learning/cross_project.py +399 -0
- package/src/superlocalmemory/learning/database.py +376 -0
- package/src/superlocalmemory/learning/engagement.py +323 -0
- package/src/superlocalmemory/learning/features.py +138 -0
- package/src/superlocalmemory/learning/feedback.py +316 -0
- package/src/superlocalmemory/learning/outcomes.py +255 -0
- package/src/superlocalmemory/learning/project_context.py +366 -0
- package/src/superlocalmemory/learning/ranker.py +155 -0
- package/src/superlocalmemory/learning/source_quality.py +303 -0
- package/src/superlocalmemory/learning/workflows.py +309 -0
- package/src/superlocalmemory/llm/__init__.py +0 -0
- package/src/superlocalmemory/llm/backbone.py +316 -0
- package/src/superlocalmemory/math/__init__.py +0 -0
- package/src/superlocalmemory/math/fisher.py +356 -0
- package/src/superlocalmemory/math/langevin.py +398 -0
- package/src/superlocalmemory/math/sheaf.py +257 -0
- package/src/superlocalmemory/mcp/__init__.py +0 -0
- package/src/superlocalmemory/mcp/resources.py +245 -0
- package/src/superlocalmemory/mcp/server.py +61 -0
- package/src/superlocalmemory/mcp/tools.py +18 -0
- package/src/superlocalmemory/mcp/tools_core.py +305 -0
- package/src/superlocalmemory/mcp/tools_v28.py +223 -0
- package/src/superlocalmemory/mcp/tools_v3.py +286 -0
- package/src/superlocalmemory/retrieval/__init__.py +0 -0
- package/src/superlocalmemory/retrieval/agentic.py +295 -0
- package/src/superlocalmemory/retrieval/ann_index.py +223 -0
- package/src/superlocalmemory/retrieval/bm25_channel.py +185 -0
- package/src/superlocalmemory/retrieval/bridge_discovery.py +170 -0
- package/src/superlocalmemory/retrieval/engine.py +390 -0
- package/src/superlocalmemory/retrieval/entity_channel.py +179 -0
- package/src/superlocalmemory/retrieval/fusion.py +78 -0
- package/src/superlocalmemory/retrieval/profile_channel.py +105 -0
- package/src/superlocalmemory/retrieval/reranker.py +154 -0
- package/src/superlocalmemory/retrieval/semantic_channel.py +232 -0
- package/src/superlocalmemory/retrieval/strategy.py +96 -0
- package/src/superlocalmemory/retrieval/temporal_channel.py +175 -0
- package/src/superlocalmemory/server/__init__.py +1 -0
- package/src/superlocalmemory/server/api.py +248 -0
- package/src/superlocalmemory/server/routes/__init__.py +4 -0
- package/src/superlocalmemory/server/routes/agents.py +107 -0
- package/src/superlocalmemory/server/routes/backup.py +91 -0
- package/src/superlocalmemory/server/routes/behavioral.py +127 -0
- package/src/superlocalmemory/server/routes/compliance.py +160 -0
- package/src/superlocalmemory/server/routes/data_io.py +188 -0
- package/src/superlocalmemory/server/routes/events.py +183 -0
- package/src/superlocalmemory/server/routes/helpers.py +85 -0
- package/src/superlocalmemory/server/routes/learning.py +273 -0
- package/src/superlocalmemory/server/routes/lifecycle.py +116 -0
- package/src/superlocalmemory/server/routes/memories.py +399 -0
- package/src/superlocalmemory/server/routes/profiles.py +219 -0
- package/src/superlocalmemory/server/routes/stats.py +346 -0
- package/src/superlocalmemory/server/routes/v3_api.py +365 -0
- package/src/superlocalmemory/server/routes/ws.py +82 -0
- package/src/superlocalmemory/server/security_middleware.py +57 -0
- package/src/superlocalmemory/server/ui.py +245 -0
- package/src/superlocalmemory/storage/__init__.py +0 -0
- package/src/superlocalmemory/storage/access_control.py +182 -0
- package/src/superlocalmemory/storage/database.py +594 -0
- package/src/superlocalmemory/storage/migrations.py +303 -0
- package/src/superlocalmemory/storage/models.py +406 -0
- package/src/superlocalmemory/storage/schema.py +726 -0
- package/src/superlocalmemory/storage/v2_migrator.py +317 -0
- package/src/superlocalmemory/trust/__init__.py +0 -0
- package/src/superlocalmemory/trust/gate.py +130 -0
- package/src/superlocalmemory/trust/provenance.py +124 -0
- package/src/superlocalmemory/trust/scorer.py +347 -0
- package/src/superlocalmemory/trust/signals.py +153 -0
- package/ui/index.html +278 -5
- package/ui/js/auto-settings.js +70 -0
- package/ui/js/dashboard.js +90 -0
- package/ui/js/fact-detail.js +92 -0
- package/ui/js/feedback.js +2 -2
- package/ui/js/ide-status.js +102 -0
- package/ui/js/math-health.js +98 -0
- package/ui/js/recall-lab.js +127 -0
- package/ui/js/settings.js +2 -2
- package/ui/js/trust-dashboard.js +73 -0
- package/api_server.py +0 -724
- package/bin/aider-smart +0 -72
- package/bin/superlocalmemoryv2-learning +0 -4
- package/bin/superlocalmemoryv2-list +0 -3
- package/bin/superlocalmemoryv2-patterns +0 -4
- package/bin/superlocalmemoryv2-profile +0 -3
- package/bin/superlocalmemoryv2-recall +0 -3
- package/bin/superlocalmemoryv2-remember +0 -3
- package/bin/superlocalmemoryv2-reset +0 -3
- package/bin/superlocalmemoryv2-status +0 -3
- package/configs/chatgpt-desktop-mcp.json +0 -16
- package/configs/cursor-mcp.json +0 -15
- package/hooks/memory-list-skill.js +0 -139
- package/hooks/memory-profile-skill.js +0 -273
- package/hooks/memory-recall-skill.js +0 -114
- package/hooks/memory-remember-skill.js +0 -127
- package/hooks/memory-reset-skill.js +0 -274
- package/mcp_server.py +0 -1808
- package/requirements-core.txt +0 -22
- package/requirements-learning.txt +0 -12
- package/requirements.txt +0 -12
- package/src/agent_registry.py +0 -411
- package/src/auth_middleware.py +0 -61
- package/src/auto_backup.py +0 -459
- package/src/behavioral/__init__.py +0 -49
- package/src/behavioral/behavioral_listener.py +0 -203
- package/src/behavioral/behavioral_patterns.py +0 -275
- package/src/behavioral/cross_project_transfer.py +0 -206
- package/src/behavioral/outcome_inference.py +0 -194
- package/src/behavioral/outcome_tracker.py +0 -193
- package/src/behavioral/tests/__init__.py +0 -4
- package/src/behavioral/tests/test_behavioral_integration.py +0 -108
- package/src/behavioral/tests/test_behavioral_patterns.py +0 -150
- package/src/behavioral/tests/test_cross_project_transfer.py +0 -142
- package/src/behavioral/tests/test_mcp_behavioral.py +0 -139
- package/src/behavioral/tests/test_mcp_report_outcome.py +0 -117
- package/src/behavioral/tests/test_outcome_inference.py +0 -107
- package/src/behavioral/tests/test_outcome_tracker.py +0 -96
- package/src/cache_manager.py +0 -518
- package/src/compliance/__init__.py +0 -48
- package/src/compliance/abac_engine.py +0 -149
- package/src/compliance/abac_middleware.py +0 -116
- package/src/compliance/audit_db.py +0 -215
- package/src/compliance/audit_logger.py +0 -148
- package/src/compliance/retention_manager.py +0 -289
- package/src/compliance/retention_scheduler.py +0 -186
- package/src/compliance/tests/__init__.py +0 -4
- package/src/compliance/tests/test_abac_enforcement.py +0 -95
- package/src/compliance/tests/test_abac_engine.py +0 -124
- package/src/compliance/tests/test_abac_mcp_integration.py +0 -118
- package/src/compliance/tests/test_audit_db.py +0 -123
- package/src/compliance/tests/test_audit_logger.py +0 -98
- package/src/compliance/tests/test_mcp_audit.py +0 -128
- package/src/compliance/tests/test_mcp_retention_policy.py +0 -125
- package/src/compliance/tests/test_retention_manager.py +0 -131
- package/src/compliance/tests/test_retention_scheduler.py +0 -99
- package/src/compression/__init__.py +0 -25
- package/src/compression/cli.py +0 -150
- package/src/compression/cold_storage.py +0 -217
- package/src/compression/config.py +0 -72
- package/src/compression/orchestrator.py +0 -133
- package/src/compression/tier2_compressor.py +0 -228
- package/src/compression/tier3_compressor.py +0 -153
- package/src/compression/tier_classifier.py +0 -148
- package/src/db_connection_manager.py +0 -536
- package/src/embedding_engine.py +0 -63
- package/src/embeddings/__init__.py +0 -47
- package/src/embeddings/cache.py +0 -70
- package/src/embeddings/cli.py +0 -113
- package/src/embeddings/constants.py +0 -47
- package/src/embeddings/database.py +0 -91
- package/src/embeddings/engine.py +0 -247
- package/src/embeddings/model_loader.py +0 -145
- package/src/event_bus.py +0 -562
- package/src/graph/__init__.py +0 -36
- package/src/graph/build_helpers.py +0 -74
- package/src/graph/cli.py +0 -87
- package/src/graph/cluster_builder.py +0 -188
- package/src/graph/cluster_summary.py +0 -148
- package/src/graph/constants.py +0 -47
- package/src/graph/edge_builder.py +0 -162
- package/src/graph/entity_extractor.py +0 -95
- package/src/graph/graph_core.py +0 -226
- package/src/graph/graph_search.py +0 -231
- package/src/graph/hierarchical.py +0 -207
- package/src/graph/schema.py +0 -99
- package/src/graph_engine.py +0 -52
- package/src/hnsw_index.py +0 -628
- package/src/hybrid_search.py +0 -46
- package/src/learning/__init__.py +0 -217
- package/src/learning/adaptive_ranker.py +0 -682
- package/src/learning/bootstrap/__init__.py +0 -69
- package/src/learning/bootstrap/constants.py +0 -93
- package/src/learning/bootstrap/db_queries.py +0 -316
- package/src/learning/bootstrap/sampling.py +0 -82
- package/src/learning/bootstrap/text_utils.py +0 -71
- package/src/learning/cross_project_aggregator.py +0 -857
- package/src/learning/db/__init__.py +0 -40
- package/src/learning/db/constants.py +0 -44
- package/src/learning/db/schema.py +0 -279
- package/src/learning/engagement_tracker.py +0 -628
- package/src/learning/feature_extractor.py +0 -708
- package/src/learning/feedback_collector.py +0 -806
- package/src/learning/learning_db.py +0 -915
- package/src/learning/project_context_manager.py +0 -572
- package/src/learning/ranking/__init__.py +0 -33
- package/src/learning/ranking/constants.py +0 -84
- package/src/learning/ranking/helpers.py +0 -278
- package/src/learning/source_quality_scorer.py +0 -676
- package/src/learning/synthetic_bootstrap.py +0 -755
- package/src/learning/tests/test_adaptive_ranker.py +0 -325
- package/src/learning/tests/test_adaptive_ranker_v28.py +0 -60
- package/src/learning/tests/test_aggregator.py +0 -306
- package/src/learning/tests/test_auto_retrain_v28.py +0 -35
- package/src/learning/tests/test_e2e_ranking_v28.py +0 -82
- package/src/learning/tests/test_feature_extractor_v28.py +0 -93
- package/src/learning/tests/test_feedback_collector.py +0 -294
- package/src/learning/tests/test_learning_db.py +0 -602
- package/src/learning/tests/test_learning_db_v28.py +0 -110
- package/src/learning/tests/test_learning_init_v28.py +0 -48
- package/src/learning/tests/test_outcome_signals.py +0 -48
- package/src/learning/tests/test_project_context.py +0 -292
- package/src/learning/tests/test_schema_migration.py +0 -319
- package/src/learning/tests/test_signal_inference.py +0 -397
- package/src/learning/tests/test_source_quality.py +0 -351
- package/src/learning/tests/test_synthetic_bootstrap.py +0 -429
- package/src/learning/tests/test_workflow_miner.py +0 -318
- package/src/learning/workflow_pattern_miner.py +0 -655
- package/src/lifecycle/__init__.py +0 -54
- package/src/lifecycle/bounded_growth.py +0 -239
- package/src/lifecycle/compaction_engine.py +0 -226
- package/src/lifecycle/lifecycle_engine.py +0 -355
- package/src/lifecycle/lifecycle_evaluator.py +0 -257
- package/src/lifecycle/lifecycle_scheduler.py +0 -130
- package/src/lifecycle/retention_policy.py +0 -285
- package/src/lifecycle/tests/test_bounded_growth.py +0 -193
- package/src/lifecycle/tests/test_compaction.py +0 -179
- package/src/lifecycle/tests/test_lifecycle_engine.py +0 -137
- package/src/lifecycle/tests/test_lifecycle_evaluation.py +0 -177
- package/src/lifecycle/tests/test_lifecycle_scheduler.py +0 -127
- package/src/lifecycle/tests/test_lifecycle_search.py +0 -109
- package/src/lifecycle/tests/test_mcp_compact.py +0 -149
- package/src/lifecycle/tests/test_mcp_lifecycle_status.py +0 -114
- package/src/lifecycle/tests/test_retention_policy.py +0 -162
- package/src/mcp_tools_v28.py +0 -281
- package/src/memory/__init__.py +0 -36
- package/src/memory/cli.py +0 -205
- package/src/memory/constants.py +0 -39
- package/src/memory/helpers.py +0 -28
- package/src/memory/schema.py +0 -166
- package/src/memory-profiles.py +0 -595
- package/src/memory-reset.py +0 -491
- package/src/memory_compression.py +0 -989
- package/src/memory_store_v2.py +0 -1155
- package/src/migrate_v1_to_v2.py +0 -629
- package/src/pattern_learner.py +0 -34
- package/src/patterns/__init__.py +0 -24
- package/src/patterns/analyzers.py +0 -251
- package/src/patterns/learner.py +0 -271
- package/src/patterns/scoring.py +0 -171
- package/src/patterns/store.py +0 -225
- package/src/patterns/terminology.py +0 -140
- package/src/provenance_tracker.py +0 -312
- package/src/qualixar_attribution.py +0 -139
- package/src/qualixar_watermark.py +0 -78
- package/src/query_optimizer.py +0 -511
- package/src/rate_limiter.py +0 -83
- package/src/search/__init__.py +0 -20
- package/src/search/cli.py +0 -77
- package/src/search/constants.py +0 -26
- package/src/search/engine.py +0 -241
- package/src/search/fusion.py +0 -122
- package/src/search/index_loader.py +0 -114
- package/src/search/methods.py +0 -162
- package/src/search_engine_v2.py +0 -401
- package/src/setup_validator.py +0 -482
- package/src/subscription_manager.py +0 -391
- package/src/tree/__init__.py +0 -59
- package/src/tree/builder.py +0 -185
- package/src/tree/nodes.py +0 -202
- package/src/tree/queries.py +0 -257
- package/src/tree/schema.py +0 -80
- package/src/tree_manager.py +0 -19
- package/src/trust/__init__.py +0 -45
- package/src/trust/constants.py +0 -66
- package/src/trust/queries.py +0 -157
- package/src/trust/schema.py +0 -95
- package/src/trust/scorer.py +0 -299
- package/src/trust/signals.py +0 -95
- package/src/trust_scorer.py +0 -44
- package/ui/app.js +0 -1588
- package/ui/js/graph-cytoscape-monolithic-backup.js +0 -1168
- package/ui/js/graph-cytoscape.js +0 -1168
- package/ui/js/graph-d3-backup.js +0 -32
- package/ui/js/graph.js +0 -32
- package/ui_server.py +0 -286
- /package/docs/{ACCESSIBILITY.md → v2-archive/ACCESSIBILITY.md} +0 -0
- /package/docs/{ARCHITECTURE.md → v2-archive/ARCHITECTURE.md} +0 -0
- /package/docs/{CLI-COMMANDS-REFERENCE.md → v2-archive/CLI-COMMANDS-REFERENCE.md} +0 -0
- /package/docs/{COMPRESSION-README.md → v2-archive/COMPRESSION-README.md} +0 -0
- /package/docs/{FRAMEWORK-INTEGRATIONS.md → v2-archive/FRAMEWORK-INTEGRATIONS.md} +0 -0
- /package/docs/{MCP-MANUAL-SETUP.md → v2-archive/MCP-MANUAL-SETUP.md} +0 -0
- /package/docs/{MCP-TROUBLESHOOTING.md → v2-archive/MCP-TROUBLESHOOTING.md} +0 -0
- /package/docs/{PATTERN-LEARNING.md → v2-archive/PATTERN-LEARNING.md} +0 -0
- /package/docs/{PROFILES-GUIDE.md → v2-archive/PROFILES-GUIDE.md} +0 -0
- /package/docs/{RESET-GUIDE.md → v2-archive/RESET-GUIDE.md} +0 -0
- /package/docs/{SEARCH-ENGINE-V2.2.0.md → v2-archive/SEARCH-ENGINE-V2.2.0.md} +0 -0
- /package/docs/{SEARCH-INTEGRATION-GUIDE.md → v2-archive/SEARCH-INTEGRATION-GUIDE.md} +0 -0
- /package/docs/{UI-SERVER.md → v2-archive/UI-SERVER.md} +0 -0
- /package/docs/{UNIVERSAL-INTEGRATION.md → v2-archive/UNIVERSAL-INTEGRATION.md} +0 -0
- /package/docs/{V2.2.0-OPTIONAL-SEARCH.md → v2-archive/V2.2.0-OPTIONAL-SEARCH.md} +0 -0
- /package/docs/{WINDOWS-INSTALL-README.txt → v2-archive/WINDOWS-INSTALL-README.txt} +0 -0
- /package/docs/{WINDOWS-POST-INSTALL.txt → v2-archive/WINDOWS-POST-INSTALL.txt} +0 -0
- /package/docs/{example_graph_usage.py → v2-archive/example_graph_usage.py} +0 -0
- /package/{completions → ide/completions}/slm.bash +0 -0
- /package/{completions → ide/completions}/slm.zsh +0 -0
- /package/{configs → ide/configs}/cody-commands.json +0 -0
- /package/{install-skills.sh → scripts/install-skills.sh} +0 -0
- /package/{install.ps1 → scripts/install.ps1} +0 -0
- /package/{install.sh → scripts/install.sh} +0 -0
|
@@ -1,708 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# SPDX-License-Identifier: MIT
|
|
3
|
-
# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
|
|
4
|
-
"""
|
|
5
|
-
FeatureExtractor — Extracts 20-dimensional feature vectors for candidate memories.
|
|
6
|
-
|
|
7
|
-
Each memory retrieved during recall gets a feature vector that feeds into
|
|
8
|
-
the AdaptiveRanker. In Phase 1 (rule-based), features drive boosting weights.
|
|
9
|
-
In Phase 2 (ML), features become LightGBM input columns.
|
|
10
|
-
|
|
11
|
-
Feature Vector (20 dimensions):
|
|
12
|
-
[0] bm25_score — Existing retrieval score from search results
|
|
13
|
-
[1] tfidf_score — TF-IDF cosine similarity from search results
|
|
14
|
-
[2] tech_match — Does memory match user's tech preferences?
|
|
15
|
-
[3] project_match — Is memory from the current project?
|
|
16
|
-
[4] workflow_fit — Does memory fit current workflow phase?
|
|
17
|
-
[5] source_quality — Quality score of the source that created this memory
|
|
18
|
-
[6] importance_norm — Normalized importance (importance / 10.0)
|
|
19
|
-
[7] recency_score — Exponential decay based on age (180-day half-life)
|
|
20
|
-
[8] access_frequency — How often this memory was accessed (capped at 1.0)
|
|
21
|
-
[9] pattern_confidence — Max Beta-Binomial confidence from learned patterns
|
|
22
|
-
[10] signal_count — Number of feedback signals for this memory (v2.7.4)
|
|
23
|
-
[11] avg_signal_value — Average signal value for this memory (v2.7.4)
|
|
24
|
-
[12] lifecycle_state — Encoded lifecycle state (v2.8)
|
|
25
|
-
[13] outcome_success_rate — Success rate from behavioral outcomes (v2.8)
|
|
26
|
-
[14] outcome_count — Number of outcomes recorded (v2.8)
|
|
27
|
-
[15] behavioral_match — Match against known success patterns (v2.8)
|
|
28
|
-
[16] cross_project_score — Cross-project transfer confidence (v2.8)
|
|
29
|
-
[17] retention_priority — Retention policy priority (v2.8)
|
|
30
|
-
[18] trust_at_creation — Trust score of creator agent (v2.8)
|
|
31
|
-
[19] lifecycle_aware_decay — Modified recency decay with lifecycle (v2.8)
|
|
32
|
-
|
|
33
|
-
Design Principles:
|
|
34
|
-
- All features normalized to [0.0, 1.0] range for ML compatibility
|
|
35
|
-
- Graceful defaults when data is missing (0.5 = "unknown/neutral")
|
|
36
|
-
- No external API calls — everything computed locally
|
|
37
|
-
- Context (tech preferences, current project) set once per recall batch
|
|
38
|
-
- Thread-safe: no shared mutable state after set_context()
|
|
39
|
-
|
|
40
|
-
v2.7.4: Expanded from 10 to 12 features. Auto-retrain triggered on mismatch.
|
|
41
|
-
v2.8.0: Expanded from 12 to 20 features. Lifecycle, behavioral, compliance dimensions.
|
|
42
|
-
"""
|
|
43
|
-
|
|
44
|
-
import logging
|
|
45
|
-
import math
|
|
46
|
-
import re
|
|
47
|
-
from datetime import datetime, timezone
|
|
48
|
-
from typing import Any, Dict, List, Optional
|
|
49
|
-
|
|
50
|
-
logger = logging.getLogger("superlocalmemory.learning.feature_extractor")
|
|
51
|
-
|
|
52
|
-
# ============================================================================
|
|
53
|
-
# Feature Name Registry
|
|
54
|
-
# ============================================================================
|
|
55
|
-
|
|
56
|
-
FEATURE_NAMES = [
|
|
57
|
-
'bm25_score', # 0: Existing retrieval score (from search results)
|
|
58
|
-
'tfidf_score', # 1: TF-IDF cosine similarity (from search results)
|
|
59
|
-
'tech_match', # 2: Does memory match user's tech preferences?
|
|
60
|
-
'project_match', # 3: Is memory from the current project?
|
|
61
|
-
'workflow_fit', # 4: Does memory fit current workflow phase?
|
|
62
|
-
'source_quality', # 5: Quality score of the source that created this memory
|
|
63
|
-
'importance_norm', # 6: Normalized importance (importance / 10.0)
|
|
64
|
-
'recency_score', # 7: Exponential decay based on age
|
|
65
|
-
'access_frequency', # 8: How often this memory was accessed (capped at 1.0)
|
|
66
|
-
'pattern_confidence', # 9: Max Beta-Binomial confidence from learned patterns
|
|
67
|
-
'signal_count', # 10: Number of feedback signals for this memory (v2.7.4)
|
|
68
|
-
'avg_signal_value', # 11: Average signal value for this memory (v2.7.4)
|
|
69
|
-
'lifecycle_state', # 12: Encoded lifecycle state (v2.8)
|
|
70
|
-
'outcome_success_rate', # 13: Success rate from behavioral outcomes (v2.8)
|
|
71
|
-
'outcome_count', # 14: Number of outcomes recorded (v2.8)
|
|
72
|
-
'behavioral_match', # 15: Match against known success patterns (v2.8)
|
|
73
|
-
'cross_project_score', # 16: Cross-project transfer confidence (v2.8)
|
|
74
|
-
'retention_priority', # 17: Retention policy priority (v2.8)
|
|
75
|
-
'trust_at_creation', # 18: Trust score of creator agent (v2.8)
|
|
76
|
-
'lifecycle_aware_decay', # 19: Modified recency decay with lifecycle (v2.8)
|
|
77
|
-
]
|
|
78
|
-
|
|
79
|
-
NUM_FEATURES = len(FEATURE_NAMES)
|
|
80
|
-
|
|
81
|
-
# Workflow phase keywords — maps workflow phase to content signals
|
|
82
|
-
_WORKFLOW_PHASE_KEYWORDS = {
|
|
83
|
-
'planning': [
|
|
84
|
-
'architecture', 'design', 'plan', 'roadmap', 'decision',
|
|
85
|
-
'approach', 'strategy', 'requirement', 'spec', 'rfc',
|
|
86
|
-
],
|
|
87
|
-
'coding': [
|
|
88
|
-
'implement', 'function', 'class', 'method', 'api',
|
|
89
|
-
'code', 'module', 'refactor', 'pattern', 'library',
|
|
90
|
-
],
|
|
91
|
-
'testing': [
|
|
92
|
-
'test', 'assert', 'mock', 'fixture', 'coverage',
|
|
93
|
-
'pytest', 'jest', 'spec', 'validation', 'regression',
|
|
94
|
-
],
|
|
95
|
-
'debugging': [
|
|
96
|
-
'bug', 'error', 'fix', 'issue', 'traceback',
|
|
97
|
-
'debug', 'crash', 'exception', 'stack', 'log',
|
|
98
|
-
],
|
|
99
|
-
'deployment': [
|
|
100
|
-
'deploy', 'docker', 'kubernetes', 'ci/cd', 'pipeline',
|
|
101
|
-
'release', 'production', 'staging', 'env', 'config',
|
|
102
|
-
],
|
|
103
|
-
'review': [
|
|
104
|
-
'review', 'pr', 'merge', 'feedback', 'comment',
|
|
105
|
-
'approve', 'change', 'diff', 'suggestion', 'lint',
|
|
106
|
-
],
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
# Half-life for recency decay (in days)
|
|
110
|
-
_RECENCY_HALF_LIFE_DAYS = 180.0
|
|
111
|
-
|
|
112
|
-
# Maximum access count before capping to 1.0
|
|
113
|
-
_MAX_ACCESS_COUNT = 10
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
class FeatureExtractor:
|
|
117
|
-
"""
|
|
118
|
-
Extracts 20-dimensional feature vectors for candidate memories.
|
|
119
|
-
|
|
120
|
-
Usage:
|
|
121
|
-
extractor = FeatureExtractor()
|
|
122
|
-
extractor.set_context(
|
|
123
|
-
source_scores={'claude-desktop': 0.8, 'cursor': 0.6},
|
|
124
|
-
tech_preferences={'python': {'confidence': 0.9}, 'react': {'confidence': 0.7}},
|
|
125
|
-
current_project='SuperLocalMemoryV2',
|
|
126
|
-
workflow_phase='testing',
|
|
127
|
-
signal_stats={'42': {'count': 5, 'avg_value': 0.8}},
|
|
128
|
-
)
|
|
129
|
-
features = extractor.extract_batch(memories, query="search optimization")
|
|
130
|
-
# features is List[List[float]], shape (n_memories, 20)
|
|
131
|
-
"""
|
|
132
|
-
|
|
133
|
-
FEATURE_NAMES = FEATURE_NAMES
|
|
134
|
-
|
|
135
|
-
def __init__(self):
|
|
136
|
-
"""Initialize FeatureExtractor with empty context."""
|
|
137
|
-
self._source_scores: Dict[str, float] = {}
|
|
138
|
-
self._tech_preferences: Dict[str, dict] = {}
|
|
139
|
-
self._tech_keywords_lower: List[str] = []
|
|
140
|
-
self._current_project: Optional[str] = None
|
|
141
|
-
self._current_project_lower: Optional[str] = None
|
|
142
|
-
self._workflow_phase: Optional[str] = None
|
|
143
|
-
self._workflow_keywords: List[str] = []
|
|
144
|
-
# Pattern confidence cache: maps lowercased pattern value -> confidence
|
|
145
|
-
self._pattern_cache: Dict[str, float] = {}
|
|
146
|
-
# Signal stats cache: maps str(memory_id) -> {count, avg_value} (v2.7.4)
|
|
147
|
-
self._signal_stats: Dict[str, Dict[str, float]] = {}
|
|
148
|
-
|
|
149
|
-
def set_context(
|
|
150
|
-
self,
|
|
151
|
-
source_scores: Optional[Dict[str, float]] = None,
|
|
152
|
-
tech_preferences: Optional[Dict[str, dict]] = None,
|
|
153
|
-
current_project: Optional[str] = None,
|
|
154
|
-
workflow_phase: Optional[str] = None,
|
|
155
|
-
pattern_confidences: Optional[Dict[str, float]] = None,
|
|
156
|
-
signal_stats: Optional[Dict[str, Dict[str, float]]] = None,
|
|
157
|
-
):
|
|
158
|
-
"""
|
|
159
|
-
Set context for feature extraction. Called once per recall query.
|
|
160
|
-
|
|
161
|
-
These values are expensive to compute (require DB lookups in learning_db),
|
|
162
|
-
so they are set once and reused across all candidate memories in a batch.
|
|
163
|
-
|
|
164
|
-
Args:
|
|
165
|
-
source_scores: Map of source_id -> quality score (0.0-1.0).
|
|
166
|
-
From learning_db.get_source_scores().
|
|
167
|
-
tech_preferences: Map of tech_name -> {confidence, evidence_count, ...}.
|
|
168
|
-
From cross_project_aggregator or pattern_learner.
|
|
169
|
-
current_project: Name of the currently active project (if detected).
|
|
170
|
-
workflow_phase: Current workflow phase (planning, coding, testing, etc).
|
|
171
|
-
pattern_confidences: Map of lowercased pattern value -> confidence (0.0-1.0).
|
|
172
|
-
From pattern_learner.PatternStore.get_patterns().
|
|
173
|
-
Used for feature [9] pattern_confidence.
|
|
174
|
-
signal_stats: Map of str(memory_id) -> {count: int, avg_value: float}.
|
|
175
|
-
From learning_db feedback aggregation. Used for features [10-11].
|
|
176
|
-
"""
|
|
177
|
-
self._source_scores = source_scores or {}
|
|
178
|
-
self._tech_preferences = tech_preferences or {}
|
|
179
|
-
|
|
180
|
-
# Pre-compute lowercased tech keywords for faster matching
|
|
181
|
-
self._tech_keywords_lower = [
|
|
182
|
-
k.lower() for k in self._tech_preferences.keys()
|
|
183
|
-
]
|
|
184
|
-
|
|
185
|
-
self._current_project = current_project
|
|
186
|
-
self._current_project_lower = (
|
|
187
|
-
current_project.lower() if current_project else None
|
|
188
|
-
)
|
|
189
|
-
|
|
190
|
-
self._workflow_phase = workflow_phase
|
|
191
|
-
self._workflow_keywords = (
|
|
192
|
-
_WORKFLOW_PHASE_KEYWORDS.get(workflow_phase, [])
|
|
193
|
-
if workflow_phase else []
|
|
194
|
-
)
|
|
195
|
-
|
|
196
|
-
# Cache pattern confidences for feature [9]
|
|
197
|
-
self._pattern_cache = pattern_confidences or {}
|
|
198
|
-
|
|
199
|
-
# Cache signal stats for features [10-11] (v2.7.4)
|
|
200
|
-
self._signal_stats = signal_stats or {}
|
|
201
|
-
|
|
202
|
-
def extract_features(self, memory: dict, query: str) -> List[float]:
|
|
203
|
-
"""
|
|
204
|
-
Extract 20-dimensional feature vector for a single memory.
|
|
205
|
-
|
|
206
|
-
Args:
|
|
207
|
-
memory: Memory dict from search results. Expected keys:
|
|
208
|
-
id, content, score, match_type, importance, created_at,
|
|
209
|
-
access_count, project_name, tags, created_by (optional).
|
|
210
|
-
v2.8 optional keys: lifecycle_state, outcome_success_rate,
|
|
211
|
-
outcome_count, behavioral_match, cross_project_score,
|
|
212
|
-
retention_priority, trust_at_creation.
|
|
213
|
-
query: The recall query string.
|
|
214
|
-
|
|
215
|
-
Returns:
|
|
216
|
-
List of 20 floats in [0.0, 1.0] range, one per feature.
|
|
217
|
-
"""
|
|
218
|
-
return [
|
|
219
|
-
self._compute_bm25_score(memory), # 0
|
|
220
|
-
self._compute_tfidf_score(memory), # 1
|
|
221
|
-
self._compute_tech_match(memory), # 2
|
|
222
|
-
self._compute_project_match(memory), # 3
|
|
223
|
-
self._compute_workflow_fit(memory), # 4
|
|
224
|
-
self._compute_source_quality(memory), # 5
|
|
225
|
-
self._compute_importance_norm(memory), # 6
|
|
226
|
-
self._compute_recency_score(memory), # 7
|
|
227
|
-
self._compute_access_frequency(memory), # 8
|
|
228
|
-
self._compute_pattern_confidence(memory), # 9
|
|
229
|
-
self._compute_signal_count(memory), # 10
|
|
230
|
-
self._compute_avg_signal_value(memory), # 11
|
|
231
|
-
# v2.8 features: lifecycle, behavioral, compliance
|
|
232
|
-
self._compute_lifecycle_state(memory), # 12
|
|
233
|
-
self._compute_outcome_success_rate(memory), # 13
|
|
234
|
-
self._compute_outcome_count(memory), # 14
|
|
235
|
-
self._compute_behavioral_match(memory), # 15
|
|
236
|
-
self._compute_cross_project_score(memory), # 16
|
|
237
|
-
self._compute_retention_priority(memory), # 17
|
|
238
|
-
self._compute_trust_at_creation(memory), # 18
|
|
239
|
-
self._compute_lifecycle_aware_decay(memory), # 19
|
|
240
|
-
]
|
|
241
|
-
|
|
242
|
-
def extract_batch(
|
|
243
|
-
self,
|
|
244
|
-
memories: List[dict],
|
|
245
|
-
query: str,
|
|
246
|
-
) -> List[List[float]]:
|
|
247
|
-
"""
|
|
248
|
-
Extract feature vectors for all candidate memories.
|
|
249
|
-
|
|
250
|
-
Args:
|
|
251
|
-
memories: List of memory dicts from search results.
|
|
252
|
-
query: The recall query string.
|
|
253
|
-
|
|
254
|
-
Returns:
|
|
255
|
-
List of feature vectors (List[List[float]]), shape (n, 20).
|
|
256
|
-
Returns empty list if memories is empty.
|
|
257
|
-
"""
|
|
258
|
-
if not memories:
|
|
259
|
-
return []
|
|
260
|
-
|
|
261
|
-
return [self.extract_features(m, query) for m in memories]
|
|
262
|
-
|
|
263
|
-
# ========================================================================
|
|
264
|
-
# Individual Feature Computations
|
|
265
|
-
# ========================================================================
|
|
266
|
-
|
|
267
|
-
def _compute_bm25_score(self, memory: dict) -> float:
|
|
268
|
-
"""
|
|
269
|
-
Use 'score' field from search results for keyword-based retrieval.
|
|
270
|
-
|
|
271
|
-
BM25/FTS5 rank scores are not naturally bounded to [0,1], so we
|
|
272
|
-
apply a simple normalization. For keyword matches, score is
|
|
273
|
-
typically set to 0.5 by MemoryStoreV2._row_to_dict(). For semantic
|
|
274
|
-
matches, score is already in [0,1] from cosine similarity.
|
|
275
|
-
|
|
276
|
-
We use match_type to distinguish: 'keyword' -> treat as BM25 signal,
|
|
277
|
-
'semantic'/'hnsw' -> set to 0.0 (not a BM25 signal).
|
|
278
|
-
"""
|
|
279
|
-
match_type = memory.get('match_type', '')
|
|
280
|
-
if match_type == 'keyword':
|
|
281
|
-
# FTS5 keyword match — normalize the rank score
|
|
282
|
-
score = memory.get('score', 0.0)
|
|
283
|
-
# FTS5 rank is negative (lower = better), score field is already
|
|
284
|
-
# mapped to 0.5 by _row_to_dict, so use it directly
|
|
285
|
-
return max(0.0, min(float(score), 1.0))
|
|
286
|
-
# Not a keyword match — no BM25 signal
|
|
287
|
-
return 0.0
|
|
288
|
-
|
|
289
|
-
def _compute_tfidf_score(self, memory: dict) -> float:
|
|
290
|
-
"""
|
|
291
|
-
Use cosine similarity score from TF-IDF semantic search.
|
|
292
|
-
|
|
293
|
-
For semantic matches, the score field contains the cosine
|
|
294
|
-
similarity (already in [0,1]). For keyword-only matches,
|
|
295
|
-
this returns 0.0.
|
|
296
|
-
"""
|
|
297
|
-
match_type = memory.get('match_type', '')
|
|
298
|
-
if match_type in ('semantic', 'hnsw'):
|
|
299
|
-
score = memory.get('score', 0.0)
|
|
300
|
-
return max(0.0, min(float(score), 1.0))
|
|
301
|
-
return 0.0
|
|
302
|
-
|
|
303
|
-
def _compute_tech_match(self, memory: dict) -> float:
|
|
304
|
-
"""
|
|
305
|
-
Check if memory content mentions user's preferred technologies.
|
|
306
|
-
|
|
307
|
-
Returns:
|
|
308
|
-
1.0 if strong match (2+ tech keywords found)
|
|
309
|
-
0.5 if weak match (1 tech keyword found)
|
|
310
|
-
0.0 if no match or no tech preferences set
|
|
311
|
-
"""
|
|
312
|
-
if not self._tech_keywords_lower:
|
|
313
|
-
return 0.5 # No preferences known — neutral
|
|
314
|
-
|
|
315
|
-
content = memory.get('content', '')
|
|
316
|
-
if not content:
|
|
317
|
-
return 0.0
|
|
318
|
-
|
|
319
|
-
content_lower = content.lower()
|
|
320
|
-
tags_str = ''
|
|
321
|
-
tags = memory.get('tags', [])
|
|
322
|
-
if isinstance(tags, list):
|
|
323
|
-
tags_str = ' '.join(t.lower() for t in tags)
|
|
324
|
-
elif isinstance(tags, str):
|
|
325
|
-
tags_str = tags.lower()
|
|
326
|
-
|
|
327
|
-
searchable = content_lower + ' ' + tags_str
|
|
328
|
-
match_count = 0
|
|
329
|
-
|
|
330
|
-
for tech_kw in self._tech_keywords_lower:
|
|
331
|
-
# Word-boundary check for short keywords to avoid false positives
|
|
332
|
-
# e.g., "go" matching "google" — require word boundary
|
|
333
|
-
if len(tech_kw) <= 3:
|
|
334
|
-
if re.search(r'\b' + re.escape(tech_kw) + r'\b', searchable):
|
|
335
|
-
match_count += 1
|
|
336
|
-
else:
|
|
337
|
-
if tech_kw in searchable:
|
|
338
|
-
match_count += 1
|
|
339
|
-
|
|
340
|
-
if match_count >= 2:
|
|
341
|
-
return 1.0
|
|
342
|
-
elif match_count == 1:
|
|
343
|
-
return 0.5
|
|
344
|
-
return 0.0
|
|
345
|
-
|
|
346
|
-
def _compute_project_match(self, memory: dict) -> float:
|
|
347
|
-
"""
|
|
348
|
-
Check if memory belongs to the currently active project.
|
|
349
|
-
|
|
350
|
-
Returns:
|
|
351
|
-
1.0 if memory's project_name matches current_project
|
|
352
|
-
0.6 if no current project detected (neutral — don't penalize)
|
|
353
|
-
0.3 if memory is from a different project
|
|
354
|
-
0.5 if memory has no project_name (unknown)
|
|
355
|
-
"""
|
|
356
|
-
if self._current_project_lower is None:
|
|
357
|
-
# No current project context — neutral for all
|
|
358
|
-
return 0.6
|
|
359
|
-
|
|
360
|
-
memory_project = memory.get('project_name', '')
|
|
361
|
-
if not memory_project:
|
|
362
|
-
return 0.5 # Memory has no project — slightly neutral
|
|
363
|
-
|
|
364
|
-
if memory_project.lower() == self._current_project_lower:
|
|
365
|
-
return 1.0
|
|
366
|
-
return 0.3
|
|
367
|
-
|
|
368
|
-
def _compute_workflow_fit(self, memory: dict) -> float:
|
|
369
|
-
"""
|
|
370
|
-
Check if memory content aligns with the current workflow phase.
|
|
371
|
-
|
|
372
|
-
Returns:
|
|
373
|
-
0.8 if strong fit (3+ keywords match)
|
|
374
|
-
0.6 if moderate fit (1-2 keywords match)
|
|
375
|
-
0.5 if unknown workflow phase (neutral)
|
|
376
|
-
0.3 if no fit at all
|
|
377
|
-
"""
|
|
378
|
-
if not self._workflow_keywords:
|
|
379
|
-
return 0.5 # No workflow phase known — neutral
|
|
380
|
-
|
|
381
|
-
content = memory.get('content', '')
|
|
382
|
-
if not content:
|
|
383
|
-
return 0.3
|
|
384
|
-
|
|
385
|
-
content_lower = content.lower()
|
|
386
|
-
match_count = sum(
|
|
387
|
-
1 for kw in self._workflow_keywords
|
|
388
|
-
if kw in content_lower
|
|
389
|
-
)
|
|
390
|
-
|
|
391
|
-
if match_count >= 3:
|
|
392
|
-
return 0.8
|
|
393
|
-
elif match_count >= 1:
|
|
394
|
-
return 0.6
|
|
395
|
-
return 0.3
|
|
396
|
-
|
|
397
|
-
def _compute_source_quality(self, memory: dict) -> float:
|
|
398
|
-
"""
|
|
399
|
-
Look up source quality from cached scores.
|
|
400
|
-
|
|
401
|
-
Returns:
|
|
402
|
-
The source's quality score if known (0.0-1.0)
|
|
403
|
-
0.5 for unknown sources (neutral default)
|
|
404
|
-
"""
|
|
405
|
-
# Try created_by first (v2.5+ provenance), then source_tool
|
|
406
|
-
source_id = memory.get('created_by') or memory.get('source_tool', '')
|
|
407
|
-
if not source_id:
|
|
408
|
-
return 0.5 # Unknown source — neutral
|
|
409
|
-
|
|
410
|
-
return self._source_scores.get(source_id, 0.5)
|
|
411
|
-
|
|
412
|
-
def _compute_importance_norm(self, memory: dict) -> float:
|
|
413
|
-
"""
|
|
414
|
-
Normalize importance to [0.0, 1.0].
|
|
415
|
-
|
|
416
|
-
importance is stored as 1-10 integer in memory.db.
|
|
417
|
-
Dividing by 10.0 gives clean normalization.
|
|
418
|
-
"""
|
|
419
|
-
importance = memory.get('importance', 5)
|
|
420
|
-
if importance is None:
|
|
421
|
-
importance = 5
|
|
422
|
-
try:
|
|
423
|
-
importance = int(importance)
|
|
424
|
-
except (ValueError, TypeError):
|
|
425
|
-
importance = 5
|
|
426
|
-
# Clamp to valid range before normalizing
|
|
427
|
-
importance = max(1, min(importance, 10))
|
|
428
|
-
return importance / 10.0
|
|
429
|
-
|
|
430
|
-
def _compute_recency_score(self, memory: dict) -> float:
|
|
431
|
-
"""
|
|
432
|
-
Exponential decay based on memory age.
|
|
433
|
-
|
|
434
|
-
Formula: exp(-age_days / half_life)
|
|
435
|
-
With 180-day half-life:
|
|
436
|
-
- 0 days old -> 1.0
|
|
437
|
-
- 30 days old -> ~0.85
|
|
438
|
-
- 90 days old -> ~0.61
|
|
439
|
-
- 180 days old -> ~0.37
|
|
440
|
-
- 365 days old -> ~0.13
|
|
441
|
-
|
|
442
|
-
Handles missing, None, or malformed created_at gracefully.
|
|
443
|
-
"""
|
|
444
|
-
created_at = memory.get('created_at')
|
|
445
|
-
if not created_at:
|
|
446
|
-
return 0.5 # Unknown age — neutral
|
|
447
|
-
|
|
448
|
-
try:
|
|
449
|
-
# Parse the timestamp — handle multiple formats
|
|
450
|
-
if isinstance(created_at, str):
|
|
451
|
-
# Try ISO format first (most common in SQLite)
|
|
452
|
-
created_at = created_at.replace('Z', '+00:00')
|
|
453
|
-
try:
|
|
454
|
-
created_dt = datetime.fromisoformat(created_at)
|
|
455
|
-
except ValueError:
|
|
456
|
-
# Fallback: try common SQLite format
|
|
457
|
-
created_dt = datetime.strptime(
|
|
458
|
-
created_at, '%Y-%m-%d %H:%M:%S'
|
|
459
|
-
)
|
|
460
|
-
elif isinstance(created_at, (int, float)):
|
|
461
|
-
created_dt = datetime.fromtimestamp(created_at)
|
|
462
|
-
else:
|
|
463
|
-
return 0.5
|
|
464
|
-
|
|
465
|
-
# Make timezone-naive for comparison
|
|
466
|
-
if created_dt.tzinfo is not None:
|
|
467
|
-
created_dt = created_dt.replace(tzinfo=None)
|
|
468
|
-
|
|
469
|
-
now = datetime.now()
|
|
470
|
-
age_days = max(0, (now - created_dt).total_seconds() / 86400.0)
|
|
471
|
-
|
|
472
|
-
# Exponential decay: e^(-age / half_life)
|
|
473
|
-
score = math.exp(-age_days / _RECENCY_HALF_LIFE_DAYS)
|
|
474
|
-
return max(0.0, min(score, 1.0))
|
|
475
|
-
|
|
476
|
-
except (ValueError, TypeError, OverflowError, OSError) as e:
|
|
477
|
-
logger.debug("Failed to parse created_at for recency: %s", e)
|
|
478
|
-
return 0.5 # Parse failure — neutral
|
|
479
|
-
|
|
480
|
-
def _compute_access_frequency(self, memory: dict) -> float:
|
|
481
|
-
"""
|
|
482
|
-
Normalize access_count to [0.0, 1.0], capped at MAX_ACCESS_COUNT.
|
|
483
|
-
|
|
484
|
-
access_count tracks how many times a memory has been recalled.
|
|
485
|
-
Capping prevents frequently-accessed memories from dominating.
|
|
486
|
-
"""
|
|
487
|
-
access_count = memory.get('access_count', 0)
|
|
488
|
-
if access_count is None:
|
|
489
|
-
access_count = 0
|
|
490
|
-
try:
|
|
491
|
-
access_count = int(access_count)
|
|
492
|
-
except (ValueError, TypeError):
|
|
493
|
-
access_count = 0
|
|
494
|
-
|
|
495
|
-
return min(access_count / float(_MAX_ACCESS_COUNT), 1.0)
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
def _compute_signal_count(self, memory: dict) -> float:
|
|
499
|
-
"""
|
|
500
|
-
Number of feedback signals for this memory, normalized to [0, 1].
|
|
501
|
-
|
|
502
|
-
Uses cached signal_stats from learning.db. Capped at 10 signals.
|
|
503
|
-
Memories with more feedback signals are more "known" to the system.
|
|
504
|
-
|
|
505
|
-
Returns:
|
|
506
|
-
min(count / 10.0, 1.0) — 0.0 if no signals, 1.0 if 10+ signals
|
|
507
|
-
0.0 if no signal stats available (v2.7.3 or earlier)
|
|
508
|
-
"""
|
|
509
|
-
memory_id = str(memory.get('id', ''))
|
|
510
|
-
if not memory_id or not self._signal_stats:
|
|
511
|
-
return 0.0
|
|
512
|
-
|
|
513
|
-
stats = self._signal_stats.get(memory_id, {})
|
|
514
|
-
count = stats.get('count', 0)
|
|
515
|
-
return min(count / 10.0, 1.0)
|
|
516
|
-
|
|
517
|
-
def _compute_avg_signal_value(self, memory: dict) -> float:
|
|
518
|
-
"""
|
|
519
|
-
Average signal value for this memory.
|
|
520
|
-
|
|
521
|
-
Uses cached signal_stats from learning.db. Gives the ranker a direct
|
|
522
|
-
view of whether this memory's feedback is positive (>0.5) or negative (<0.5).
|
|
523
|
-
|
|
524
|
-
Returns:
|
|
525
|
-
Average signal value (0.0-1.0), or 0.5 (neutral) if no data.
|
|
526
|
-
"""
|
|
527
|
-
memory_id = str(memory.get('id', ''))
|
|
528
|
-
if not memory_id or not self._signal_stats:
|
|
529
|
-
return 0.5 # Neutral default
|
|
530
|
-
|
|
531
|
-
stats = self._signal_stats.get(memory_id, {})
|
|
532
|
-
avg = stats.get('avg_value', 0.5)
|
|
533
|
-
return max(0.0, min(float(avg), 1.0))
|
|
534
|
-
|
|
535
|
-
def _compute_pattern_confidence(self, memory: dict) -> float:
|
|
536
|
-
"""
|
|
537
|
-
Compute max Beta-Binomial confidence from learned patterns matching this memory.
|
|
538
|
-
|
|
539
|
-
Looks up the cached pattern_confidences (set via set_context) and checks
|
|
540
|
-
if any pattern value appears in the memory's content or tags. Returns the
|
|
541
|
-
maximum confidence among all matching patterns.
|
|
542
|
-
|
|
543
|
-
Returns:
|
|
544
|
-
Max confidence (0.0-1.0) from matching patterns
|
|
545
|
-
0.5 if no patterns loaded (neutral — unknown)
|
|
546
|
-
0.0 if patterns loaded but none match
|
|
547
|
-
"""
|
|
548
|
-
if not self._pattern_cache:
|
|
549
|
-
return 0.5 # No patterns available — neutral
|
|
550
|
-
|
|
551
|
-
content = memory.get('content', '')
|
|
552
|
-
if not content:
|
|
553
|
-
return 0.0
|
|
554
|
-
|
|
555
|
-
content_lower = content.lower()
|
|
556
|
-
|
|
557
|
-
# Also check tags
|
|
558
|
-
tags_str = ''
|
|
559
|
-
tags = memory.get('tags', [])
|
|
560
|
-
if isinstance(tags, list):
|
|
561
|
-
tags_str = ' '.join(t.lower() for t in tags)
|
|
562
|
-
elif isinstance(tags, str):
|
|
563
|
-
tags_str = tags.lower()
|
|
564
|
-
|
|
565
|
-
searchable = content_lower + ' ' + tags_str
|
|
566
|
-
|
|
567
|
-
max_confidence = 0.0
|
|
568
|
-
for pattern_value, confidence in self._pattern_cache.items():
|
|
569
|
-
# Pattern values are already lowercased in the cache
|
|
570
|
-
pattern_lower = pattern_value.lower() if pattern_value else ''
|
|
571
|
-
if not pattern_lower:
|
|
572
|
-
continue
|
|
573
|
-
# Word-boundary check for short patterns to avoid false positives
|
|
574
|
-
if len(pattern_lower) <= 3:
|
|
575
|
-
if re.search(r'\b' + re.escape(pattern_lower) + r'\b', searchable):
|
|
576
|
-
max_confidence = max(max_confidence, confidence)
|
|
577
|
-
else:
|
|
578
|
-
if pattern_lower in searchable:
|
|
579
|
-
max_confidence = max(max_confidence, confidence)
|
|
580
|
-
|
|
581
|
-
return max(0.0, min(max_confidence, 1.0))
|
|
582
|
-
|
|
583
|
-
# ========================================================================
|
|
584
|
-
# v2.8 Feature Computations: Lifecycle, Behavioral, Compliance
|
|
585
|
-
# ========================================================================
|
|
586
|
-
|
|
587
|
-
def _compute_lifecycle_state(self, memory: dict) -> float:
|
|
588
|
-
"""
|
|
589
|
-
Encode lifecycle state as numeric value.
|
|
590
|
-
|
|
591
|
-
State mapping:
|
|
592
|
-
active = 1.0 (fully available, highest priority)
|
|
593
|
-
warm = 0.7 (less frequently accessed, still relevant)
|
|
594
|
-
cold = 0.4 (rarely accessed, compressed)
|
|
595
|
-
archived = 0.1 (long-term storage, minimal priority)
|
|
596
|
-
tombstoned = 0.0 (pending deletion)
|
|
597
|
-
|
|
598
|
-
Default: 'active' (1.0) — backward compatible with pre-v2.8 memories
|
|
599
|
-
that have no lifecycle_state field.
|
|
600
|
-
"""
|
|
601
|
-
state_map = {
|
|
602
|
-
'active': 1.0,
|
|
603
|
-
'warm': 0.7,
|
|
604
|
-
'cold': 0.4,
|
|
605
|
-
'archived': 0.1,
|
|
606
|
-
'tombstoned': 0.0,
|
|
607
|
-
}
|
|
608
|
-
return state_map.get(memory.get('lifecycle_state', 'active'), 1.0)
|
|
609
|
-
|
|
610
|
-
def _compute_outcome_success_rate(self, memory: dict) -> float:
|
|
611
|
-
"""
|
|
612
|
-
Success rate from behavioral outcomes.
|
|
613
|
-
|
|
614
|
-
Reads directly from the memory dict — the behavioral engine
|
|
615
|
-
enriches memories with this field during recall.
|
|
616
|
-
|
|
617
|
-
Default: 0.5 (neutral — no outcome data available).
|
|
618
|
-
"""
|
|
619
|
-
return float(memory.get('outcome_success_rate', 0.5))
|
|
620
|
-
|
|
621
|
-
def _compute_outcome_count(self, memory: dict) -> float:
|
|
622
|
-
"""
|
|
623
|
-
Number of outcomes recorded, normalized to [0.0, 1.0].
|
|
624
|
-
|
|
625
|
-
Capped at 20 outcomes (count / 20.0). Memories with more
|
|
626
|
-
behavioral observations are better calibrated.
|
|
627
|
-
|
|
628
|
-
Default: 0 outcomes (returns 0.0).
|
|
629
|
-
"""
|
|
630
|
-
count = memory.get('outcome_count', 0)
|
|
631
|
-
try:
|
|
632
|
-
count = int(count)
|
|
633
|
-
except (ValueError, TypeError):
|
|
634
|
-
count = 0
|
|
635
|
-
return min(count / 20.0, 1.0)
|
|
636
|
-
|
|
637
|
-
def _compute_behavioral_match(self, memory: dict) -> float:
|
|
638
|
-
"""
|
|
639
|
-
How well this memory matches known success patterns.
|
|
640
|
-
|
|
641
|
-
The behavioral engine computes this by comparing the memory's
|
|
642
|
-
characteristics against patterns extracted from successful outcomes.
|
|
643
|
-
|
|
644
|
-
Default: 0.0 (no behavioral match data available).
|
|
645
|
-
"""
|
|
646
|
-
return float(memory.get('behavioral_match', 0.0))
|
|
647
|
-
|
|
648
|
-
def _compute_cross_project_score(self, memory: dict) -> float:
|
|
649
|
-
"""
|
|
650
|
-
Cross-project transfer confidence.
|
|
651
|
-
|
|
652
|
-
Measures how likely this memory's knowledge transfers successfully
|
|
653
|
-
across project boundaries (e.g., a Python pattern useful in any project).
|
|
654
|
-
|
|
655
|
-
Default: 0.0 (no cross-project data available).
|
|
656
|
-
"""
|
|
657
|
-
return float(memory.get('cross_project_score', 0.0))
|
|
658
|
-
|
|
659
|
-
def _compute_retention_priority(self, memory: dict) -> float:
|
|
660
|
-
"""
|
|
661
|
-
Retention policy priority.
|
|
662
|
-
|
|
663
|
-
1.0 = protected by retention policy (must not be deleted)
|
|
664
|
-
0.5 = default (no special retention rules)
|
|
665
|
-
0.0 = eligible for aggressive cleanup
|
|
666
|
-
|
|
667
|
-
Default: 0.5 (standard retention).
|
|
668
|
-
"""
|
|
669
|
-
return float(memory.get('retention_priority', 0.5))
|
|
670
|
-
|
|
671
|
-
def _compute_trust_at_creation(self, memory: dict) -> float:
|
|
672
|
-
"""
|
|
673
|
-
Trust score of the agent that created this memory.
|
|
674
|
-
|
|
675
|
-
Human-created memories default to 0.8 (high trust).
|
|
676
|
-
Agent-created memories can have lower trust until validated.
|
|
677
|
-
|
|
678
|
-
Default: 0.8 (assumes human creator for backward compatibility).
|
|
679
|
-
"""
|
|
680
|
-
return float(memory.get('trust_at_creation', 0.8))
|
|
681
|
-
|
|
682
|
-
def _compute_lifecycle_aware_decay(self, memory: dict) -> float:
|
|
683
|
-
"""
|
|
684
|
-
Modified recency decay that factors in lifecycle state.
|
|
685
|
-
|
|
686
|
-
Combines the standard exponential recency decay with the lifecycle
|
|
687
|
-
state encoding. This means archived memories decay faster than
|
|
688
|
-
active memories of the same age, creating a compound signal.
|
|
689
|
-
|
|
690
|
-
Formula: recency_score * lifecycle_state_value
|
|
691
|
-
"""
|
|
692
|
-
recency = self._compute_recency_score(memory)
|
|
693
|
-
lifecycle = self._compute_lifecycle_state(memory)
|
|
694
|
-
return recency * lifecycle
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
# ============================================================================
|
|
698
|
-
# Module-level convenience functions
|
|
699
|
-
# ============================================================================
|
|
700
|
-
|
|
701
|
-
def get_feature_names() -> List[str]:
|
|
702
|
-
"""Return ordered list of feature names (matches vector indices)."""
|
|
703
|
-
return list(FEATURE_NAMES)
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
def get_num_features() -> int:
|
|
707
|
-
"""Return the number of features in the vector."""
|
|
708
|
-
return NUM_FEATURES
|