superlocalmemory 2.8.6 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. package/LICENSE +9 -1
  2. package/NOTICE +63 -0
  3. package/README.md +165 -480
  4. package/bin/slm +17 -449
  5. package/bin/slm-npm +62 -48
  6. package/conftest.py +5 -0
  7. package/docs/api-reference.md +284 -0
  8. package/docs/architecture.md +149 -0
  9. package/docs/auto-memory.md +150 -0
  10. package/docs/cli-reference.md +276 -0
  11. package/docs/compliance.md +191 -0
  12. package/docs/configuration.md +182 -0
  13. package/docs/getting-started.md +102 -0
  14. package/docs/ide-setup.md +261 -0
  15. package/docs/mcp-tools.md +220 -0
  16. package/docs/migration-from-v2.md +170 -0
  17. package/docs/profiles.md +173 -0
  18. package/docs/troubleshooting.md +310 -0
  19. package/{configs → ide/configs}/antigravity-mcp.json +3 -3
  20. package/ide/configs/chatgpt-desktop-mcp.json +16 -0
  21. package/{configs → ide/configs}/claude-desktop-mcp.json +3 -3
  22. package/{configs → ide/configs}/codex-mcp.toml +4 -4
  23. package/{configs → ide/configs}/continue-mcp.yaml +4 -3
  24. package/{configs → ide/configs}/continue-skills.yaml +6 -6
  25. package/ide/configs/cursor-mcp.json +15 -0
  26. package/{configs → ide/configs}/gemini-cli-mcp.json +2 -2
  27. package/{configs → ide/configs}/jetbrains-mcp.json +2 -2
  28. package/{configs → ide/configs}/opencode-mcp.json +2 -2
  29. package/{configs → ide/configs}/perplexity-mcp.json +2 -2
  30. package/{configs → ide/configs}/vscode-copilot-mcp.json +2 -2
  31. package/{configs → ide/configs}/windsurf-mcp.json +3 -3
  32. package/{configs → ide/configs}/zed-mcp.json +2 -2
  33. package/{hooks → ide/hooks}/context-hook.js +9 -20
  34. package/ide/hooks/memory-list-skill.js +70 -0
  35. package/ide/hooks/memory-profile-skill.js +101 -0
  36. package/ide/hooks/memory-recall-skill.js +62 -0
  37. package/ide/hooks/memory-remember-skill.js +68 -0
  38. package/ide/hooks/memory-reset-skill.js +160 -0
  39. package/{hooks → ide/hooks}/post-recall-hook.js +2 -2
  40. package/ide/integrations/langchain/README.md +106 -0
  41. package/ide/integrations/langchain/langchain_superlocalmemory/__init__.py +9 -0
  42. package/ide/integrations/langchain/langchain_superlocalmemory/chat_message_history.py +201 -0
  43. package/ide/integrations/langchain/pyproject.toml +38 -0
  44. package/{src/learning → ide/integrations/langchain}/tests/__init__.py +1 -0
  45. package/ide/integrations/langchain/tests/test_chat_message_history.py +215 -0
  46. package/ide/integrations/langchain/tests/test_security.py +117 -0
  47. package/ide/integrations/llamaindex/README.md +81 -0
  48. package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/__init__.py +9 -0
  49. package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/base.py +316 -0
  50. package/ide/integrations/llamaindex/pyproject.toml +43 -0
  51. package/{src/lifecycle → ide/integrations/llamaindex}/tests/__init__.py +1 -2
  52. package/ide/integrations/llamaindex/tests/test_chat_store.py +294 -0
  53. package/ide/integrations/llamaindex/tests/test_security.py +241 -0
  54. package/{skills → ide/skills}/slm-build-graph/SKILL.md +6 -6
  55. package/{skills → ide/skills}/slm-list-recent/SKILL.md +5 -5
  56. package/{skills → ide/skills}/slm-recall/SKILL.md +5 -5
  57. package/{skills → ide/skills}/slm-remember/SKILL.md +6 -6
  58. package/{skills → ide/skills}/slm-show-patterns/SKILL.md +7 -7
  59. package/{skills → ide/skills}/slm-status/SKILL.md +9 -9
  60. package/{skills → ide/skills}/slm-switch-profile/SKILL.md +9 -9
  61. package/package.json +13 -22
  62. package/pyproject.toml +85 -0
  63. package/scripts/build-dmg.sh +417 -0
  64. package/scripts/install-skills.ps1 +334 -0
  65. package/scripts/postinstall.js +2 -2
  66. package/scripts/start-dashboard.ps1 +52 -0
  67. package/scripts/start-dashboard.sh +41 -0
  68. package/scripts/sync-wiki.ps1 +127 -0
  69. package/scripts/sync-wiki.sh +82 -0
  70. package/scripts/test-dmg.sh +161 -0
  71. package/scripts/test-npm-package.ps1 +252 -0
  72. package/scripts/test-npm-package.sh +207 -0
  73. package/scripts/verify-install.ps1 +294 -0
  74. package/scripts/verify-install.sh +266 -0
  75. package/src/superlocalmemory/__init__.py +0 -0
  76. package/src/superlocalmemory/attribution/__init__.py +9 -0
  77. package/src/superlocalmemory/attribution/mathematical_dna.py +235 -0
  78. package/src/superlocalmemory/attribution/signer.py +153 -0
  79. package/src/superlocalmemory/attribution/watermark.py +189 -0
  80. package/src/superlocalmemory/cli/__init__.py +5 -0
  81. package/src/superlocalmemory/cli/commands.py +245 -0
  82. package/src/superlocalmemory/cli/main.py +89 -0
  83. package/src/superlocalmemory/cli/migrate_cmd.py +55 -0
  84. package/src/superlocalmemory/cli/post_install.py +99 -0
  85. package/src/superlocalmemory/cli/setup_wizard.py +129 -0
  86. package/src/superlocalmemory/compliance/__init__.py +0 -0
  87. package/src/superlocalmemory/compliance/abac.py +204 -0
  88. package/src/superlocalmemory/compliance/audit.py +314 -0
  89. package/src/superlocalmemory/compliance/eu_ai_act.py +131 -0
  90. package/src/superlocalmemory/compliance/gdpr.py +294 -0
  91. package/src/superlocalmemory/compliance/lifecycle.py +158 -0
  92. package/src/superlocalmemory/compliance/retention.py +232 -0
  93. package/src/superlocalmemory/compliance/scheduler.py +148 -0
  94. package/src/superlocalmemory/core/__init__.py +0 -0
  95. package/src/superlocalmemory/core/config.py +391 -0
  96. package/src/superlocalmemory/core/embeddings.py +293 -0
  97. package/src/superlocalmemory/core/engine.py +701 -0
  98. package/src/superlocalmemory/core/hooks.py +65 -0
  99. package/src/superlocalmemory/core/maintenance.py +172 -0
  100. package/src/superlocalmemory/core/modes.py +140 -0
  101. package/src/superlocalmemory/core/profiles.py +234 -0
  102. package/src/superlocalmemory/core/registry.py +117 -0
  103. package/src/superlocalmemory/dynamics/__init__.py +0 -0
  104. package/src/superlocalmemory/dynamics/fisher_langevin_coupling.py +223 -0
  105. package/src/superlocalmemory/encoding/__init__.py +0 -0
  106. package/src/superlocalmemory/encoding/consolidator.py +485 -0
  107. package/src/superlocalmemory/encoding/emotional.py +125 -0
  108. package/src/superlocalmemory/encoding/entity_resolver.py +525 -0
  109. package/src/superlocalmemory/encoding/entropy_gate.py +104 -0
  110. package/src/superlocalmemory/encoding/fact_extractor.py +775 -0
  111. package/src/superlocalmemory/encoding/foresight.py +91 -0
  112. package/src/superlocalmemory/encoding/graph_builder.py +302 -0
  113. package/src/superlocalmemory/encoding/observation_builder.py +160 -0
  114. package/src/superlocalmemory/encoding/scene_builder.py +183 -0
  115. package/src/superlocalmemory/encoding/signal_inference.py +90 -0
  116. package/src/superlocalmemory/encoding/temporal_parser.py +426 -0
  117. package/src/superlocalmemory/encoding/type_router.py +235 -0
  118. package/src/superlocalmemory/hooks/__init__.py +3 -0
  119. package/src/superlocalmemory/hooks/auto_capture.py +111 -0
  120. package/src/superlocalmemory/hooks/auto_recall.py +93 -0
  121. package/src/superlocalmemory/hooks/ide_connector.py +204 -0
  122. package/src/superlocalmemory/hooks/rules_engine.py +99 -0
  123. package/src/superlocalmemory/infra/__init__.py +3 -0
  124. package/src/superlocalmemory/infra/auth_middleware.py +82 -0
  125. package/src/superlocalmemory/infra/backup.py +317 -0
  126. package/src/superlocalmemory/infra/cache_manager.py +267 -0
  127. package/src/superlocalmemory/infra/event_bus.py +381 -0
  128. package/src/superlocalmemory/infra/rate_limiter.py +135 -0
  129. package/src/{webhook_dispatcher.py → superlocalmemory/infra/webhook_dispatcher.py} +104 -101
  130. package/src/superlocalmemory/learning/__init__.py +0 -0
  131. package/src/superlocalmemory/learning/adaptive.py +172 -0
  132. package/src/superlocalmemory/learning/behavioral.py +490 -0
  133. package/src/superlocalmemory/learning/behavioral_listener.py +94 -0
  134. package/src/superlocalmemory/learning/bootstrap.py +298 -0
  135. package/src/superlocalmemory/learning/cross_project.py +399 -0
  136. package/src/superlocalmemory/learning/database.py +376 -0
  137. package/src/superlocalmemory/learning/engagement.py +323 -0
  138. package/src/superlocalmemory/learning/features.py +138 -0
  139. package/src/superlocalmemory/learning/feedback.py +316 -0
  140. package/src/superlocalmemory/learning/outcomes.py +255 -0
  141. package/src/superlocalmemory/learning/project_context.py +366 -0
  142. package/src/superlocalmemory/learning/ranker.py +155 -0
  143. package/src/superlocalmemory/learning/source_quality.py +303 -0
  144. package/src/superlocalmemory/learning/workflows.py +309 -0
  145. package/src/superlocalmemory/llm/__init__.py +0 -0
  146. package/src/superlocalmemory/llm/backbone.py +316 -0
  147. package/src/superlocalmemory/math/__init__.py +0 -0
  148. package/src/superlocalmemory/math/fisher.py +356 -0
  149. package/src/superlocalmemory/math/langevin.py +398 -0
  150. package/src/superlocalmemory/math/sheaf.py +257 -0
  151. package/src/superlocalmemory/mcp/__init__.py +0 -0
  152. package/src/superlocalmemory/mcp/resources.py +245 -0
  153. package/src/superlocalmemory/mcp/server.py +61 -0
  154. package/src/superlocalmemory/mcp/tools.py +18 -0
  155. package/src/superlocalmemory/mcp/tools_core.py +305 -0
  156. package/src/superlocalmemory/mcp/tools_v28.py +223 -0
  157. package/src/superlocalmemory/mcp/tools_v3.py +286 -0
  158. package/src/superlocalmemory/retrieval/__init__.py +0 -0
  159. package/src/superlocalmemory/retrieval/agentic.py +295 -0
  160. package/src/superlocalmemory/retrieval/ann_index.py +223 -0
  161. package/src/superlocalmemory/retrieval/bm25_channel.py +185 -0
  162. package/src/superlocalmemory/retrieval/bridge_discovery.py +170 -0
  163. package/src/superlocalmemory/retrieval/engine.py +390 -0
  164. package/src/superlocalmemory/retrieval/entity_channel.py +179 -0
  165. package/src/superlocalmemory/retrieval/fusion.py +78 -0
  166. package/src/superlocalmemory/retrieval/profile_channel.py +105 -0
  167. package/src/superlocalmemory/retrieval/reranker.py +154 -0
  168. package/src/superlocalmemory/retrieval/semantic_channel.py +232 -0
  169. package/src/superlocalmemory/retrieval/strategy.py +96 -0
  170. package/src/superlocalmemory/retrieval/temporal_channel.py +175 -0
  171. package/src/superlocalmemory/server/__init__.py +1 -0
  172. package/src/superlocalmemory/server/api.py +248 -0
  173. package/src/superlocalmemory/server/routes/__init__.py +4 -0
  174. package/src/superlocalmemory/server/routes/agents.py +107 -0
  175. package/src/superlocalmemory/server/routes/backup.py +91 -0
  176. package/src/superlocalmemory/server/routes/behavioral.py +127 -0
  177. package/src/superlocalmemory/server/routes/compliance.py +160 -0
  178. package/src/superlocalmemory/server/routes/data_io.py +188 -0
  179. package/src/superlocalmemory/server/routes/events.py +183 -0
  180. package/src/superlocalmemory/server/routes/helpers.py +85 -0
  181. package/src/superlocalmemory/server/routes/learning.py +273 -0
  182. package/src/superlocalmemory/server/routes/lifecycle.py +116 -0
  183. package/src/superlocalmemory/server/routes/memories.py +399 -0
  184. package/src/superlocalmemory/server/routes/profiles.py +219 -0
  185. package/src/superlocalmemory/server/routes/stats.py +346 -0
  186. package/src/superlocalmemory/server/routes/v3_api.py +365 -0
  187. package/src/superlocalmemory/server/routes/ws.py +82 -0
  188. package/src/superlocalmemory/server/security_middleware.py +57 -0
  189. package/src/superlocalmemory/server/ui.py +245 -0
  190. package/src/superlocalmemory/storage/__init__.py +0 -0
  191. package/src/superlocalmemory/storage/access_control.py +182 -0
  192. package/src/superlocalmemory/storage/database.py +594 -0
  193. package/src/superlocalmemory/storage/migrations.py +303 -0
  194. package/src/superlocalmemory/storage/models.py +406 -0
  195. package/src/superlocalmemory/storage/schema.py +726 -0
  196. package/src/superlocalmemory/storage/v2_migrator.py +317 -0
  197. package/src/superlocalmemory/trust/__init__.py +0 -0
  198. package/src/superlocalmemory/trust/gate.py +130 -0
  199. package/src/superlocalmemory/trust/provenance.py +124 -0
  200. package/src/superlocalmemory/trust/scorer.py +347 -0
  201. package/src/superlocalmemory/trust/signals.py +153 -0
  202. package/ui/index.html +278 -5
  203. package/ui/js/auto-settings.js +70 -0
  204. package/ui/js/dashboard.js +90 -0
  205. package/ui/js/fact-detail.js +92 -0
  206. package/ui/js/feedback.js +2 -2
  207. package/ui/js/ide-status.js +102 -0
  208. package/ui/js/math-health.js +98 -0
  209. package/ui/js/recall-lab.js +127 -0
  210. package/ui/js/settings.js +2 -2
  211. package/ui/js/trust-dashboard.js +73 -0
  212. package/api_server.py +0 -724
  213. package/bin/aider-smart +0 -72
  214. package/bin/superlocalmemoryv2-learning +0 -4
  215. package/bin/superlocalmemoryv2-list +0 -3
  216. package/bin/superlocalmemoryv2-patterns +0 -4
  217. package/bin/superlocalmemoryv2-profile +0 -3
  218. package/bin/superlocalmemoryv2-recall +0 -3
  219. package/bin/superlocalmemoryv2-remember +0 -3
  220. package/bin/superlocalmemoryv2-reset +0 -3
  221. package/bin/superlocalmemoryv2-status +0 -3
  222. package/configs/chatgpt-desktop-mcp.json +0 -16
  223. package/configs/cursor-mcp.json +0 -15
  224. package/hooks/memory-list-skill.js +0 -139
  225. package/hooks/memory-profile-skill.js +0 -273
  226. package/hooks/memory-recall-skill.js +0 -114
  227. package/hooks/memory-remember-skill.js +0 -127
  228. package/hooks/memory-reset-skill.js +0 -274
  229. package/mcp_server.py +0 -1808
  230. package/requirements-core.txt +0 -22
  231. package/requirements-learning.txt +0 -12
  232. package/requirements.txt +0 -12
  233. package/src/agent_registry.py +0 -411
  234. package/src/auth_middleware.py +0 -61
  235. package/src/auto_backup.py +0 -459
  236. package/src/behavioral/__init__.py +0 -49
  237. package/src/behavioral/behavioral_listener.py +0 -203
  238. package/src/behavioral/behavioral_patterns.py +0 -275
  239. package/src/behavioral/cross_project_transfer.py +0 -206
  240. package/src/behavioral/outcome_inference.py +0 -194
  241. package/src/behavioral/outcome_tracker.py +0 -193
  242. package/src/behavioral/tests/__init__.py +0 -4
  243. package/src/behavioral/tests/test_behavioral_integration.py +0 -108
  244. package/src/behavioral/tests/test_behavioral_patterns.py +0 -150
  245. package/src/behavioral/tests/test_cross_project_transfer.py +0 -142
  246. package/src/behavioral/tests/test_mcp_behavioral.py +0 -139
  247. package/src/behavioral/tests/test_mcp_report_outcome.py +0 -117
  248. package/src/behavioral/tests/test_outcome_inference.py +0 -107
  249. package/src/behavioral/tests/test_outcome_tracker.py +0 -96
  250. package/src/cache_manager.py +0 -518
  251. package/src/compliance/__init__.py +0 -48
  252. package/src/compliance/abac_engine.py +0 -149
  253. package/src/compliance/abac_middleware.py +0 -116
  254. package/src/compliance/audit_db.py +0 -215
  255. package/src/compliance/audit_logger.py +0 -148
  256. package/src/compliance/retention_manager.py +0 -289
  257. package/src/compliance/retention_scheduler.py +0 -186
  258. package/src/compliance/tests/__init__.py +0 -4
  259. package/src/compliance/tests/test_abac_enforcement.py +0 -95
  260. package/src/compliance/tests/test_abac_engine.py +0 -124
  261. package/src/compliance/tests/test_abac_mcp_integration.py +0 -118
  262. package/src/compliance/tests/test_audit_db.py +0 -123
  263. package/src/compliance/tests/test_audit_logger.py +0 -98
  264. package/src/compliance/tests/test_mcp_audit.py +0 -128
  265. package/src/compliance/tests/test_mcp_retention_policy.py +0 -125
  266. package/src/compliance/tests/test_retention_manager.py +0 -131
  267. package/src/compliance/tests/test_retention_scheduler.py +0 -99
  268. package/src/compression/__init__.py +0 -25
  269. package/src/compression/cli.py +0 -150
  270. package/src/compression/cold_storage.py +0 -217
  271. package/src/compression/config.py +0 -72
  272. package/src/compression/orchestrator.py +0 -133
  273. package/src/compression/tier2_compressor.py +0 -228
  274. package/src/compression/tier3_compressor.py +0 -153
  275. package/src/compression/tier_classifier.py +0 -148
  276. package/src/db_connection_manager.py +0 -536
  277. package/src/embedding_engine.py +0 -63
  278. package/src/embeddings/__init__.py +0 -47
  279. package/src/embeddings/cache.py +0 -70
  280. package/src/embeddings/cli.py +0 -113
  281. package/src/embeddings/constants.py +0 -47
  282. package/src/embeddings/database.py +0 -91
  283. package/src/embeddings/engine.py +0 -247
  284. package/src/embeddings/model_loader.py +0 -145
  285. package/src/event_bus.py +0 -562
  286. package/src/graph/__init__.py +0 -36
  287. package/src/graph/build_helpers.py +0 -74
  288. package/src/graph/cli.py +0 -87
  289. package/src/graph/cluster_builder.py +0 -188
  290. package/src/graph/cluster_summary.py +0 -148
  291. package/src/graph/constants.py +0 -47
  292. package/src/graph/edge_builder.py +0 -162
  293. package/src/graph/entity_extractor.py +0 -95
  294. package/src/graph/graph_core.py +0 -226
  295. package/src/graph/graph_search.py +0 -231
  296. package/src/graph/hierarchical.py +0 -207
  297. package/src/graph/schema.py +0 -99
  298. package/src/graph_engine.py +0 -52
  299. package/src/hnsw_index.py +0 -628
  300. package/src/hybrid_search.py +0 -46
  301. package/src/learning/__init__.py +0 -217
  302. package/src/learning/adaptive_ranker.py +0 -682
  303. package/src/learning/bootstrap/__init__.py +0 -69
  304. package/src/learning/bootstrap/constants.py +0 -93
  305. package/src/learning/bootstrap/db_queries.py +0 -316
  306. package/src/learning/bootstrap/sampling.py +0 -82
  307. package/src/learning/bootstrap/text_utils.py +0 -71
  308. package/src/learning/cross_project_aggregator.py +0 -857
  309. package/src/learning/db/__init__.py +0 -40
  310. package/src/learning/db/constants.py +0 -44
  311. package/src/learning/db/schema.py +0 -279
  312. package/src/learning/engagement_tracker.py +0 -628
  313. package/src/learning/feature_extractor.py +0 -708
  314. package/src/learning/feedback_collector.py +0 -806
  315. package/src/learning/learning_db.py +0 -915
  316. package/src/learning/project_context_manager.py +0 -572
  317. package/src/learning/ranking/__init__.py +0 -33
  318. package/src/learning/ranking/constants.py +0 -84
  319. package/src/learning/ranking/helpers.py +0 -278
  320. package/src/learning/source_quality_scorer.py +0 -676
  321. package/src/learning/synthetic_bootstrap.py +0 -755
  322. package/src/learning/tests/test_adaptive_ranker.py +0 -325
  323. package/src/learning/tests/test_adaptive_ranker_v28.py +0 -60
  324. package/src/learning/tests/test_aggregator.py +0 -306
  325. package/src/learning/tests/test_auto_retrain_v28.py +0 -35
  326. package/src/learning/tests/test_e2e_ranking_v28.py +0 -82
  327. package/src/learning/tests/test_feature_extractor_v28.py +0 -93
  328. package/src/learning/tests/test_feedback_collector.py +0 -294
  329. package/src/learning/tests/test_learning_db.py +0 -602
  330. package/src/learning/tests/test_learning_db_v28.py +0 -110
  331. package/src/learning/tests/test_learning_init_v28.py +0 -48
  332. package/src/learning/tests/test_outcome_signals.py +0 -48
  333. package/src/learning/tests/test_project_context.py +0 -292
  334. package/src/learning/tests/test_schema_migration.py +0 -319
  335. package/src/learning/tests/test_signal_inference.py +0 -397
  336. package/src/learning/tests/test_source_quality.py +0 -351
  337. package/src/learning/tests/test_synthetic_bootstrap.py +0 -429
  338. package/src/learning/tests/test_workflow_miner.py +0 -318
  339. package/src/learning/workflow_pattern_miner.py +0 -655
  340. package/src/lifecycle/__init__.py +0 -54
  341. package/src/lifecycle/bounded_growth.py +0 -239
  342. package/src/lifecycle/compaction_engine.py +0 -226
  343. package/src/lifecycle/lifecycle_engine.py +0 -355
  344. package/src/lifecycle/lifecycle_evaluator.py +0 -257
  345. package/src/lifecycle/lifecycle_scheduler.py +0 -130
  346. package/src/lifecycle/retention_policy.py +0 -285
  347. package/src/lifecycle/tests/test_bounded_growth.py +0 -193
  348. package/src/lifecycle/tests/test_compaction.py +0 -179
  349. package/src/lifecycle/tests/test_lifecycle_engine.py +0 -137
  350. package/src/lifecycle/tests/test_lifecycle_evaluation.py +0 -177
  351. package/src/lifecycle/tests/test_lifecycle_scheduler.py +0 -127
  352. package/src/lifecycle/tests/test_lifecycle_search.py +0 -109
  353. package/src/lifecycle/tests/test_mcp_compact.py +0 -149
  354. package/src/lifecycle/tests/test_mcp_lifecycle_status.py +0 -114
  355. package/src/lifecycle/tests/test_retention_policy.py +0 -162
  356. package/src/mcp_tools_v28.py +0 -281
  357. package/src/memory/__init__.py +0 -36
  358. package/src/memory/cli.py +0 -205
  359. package/src/memory/constants.py +0 -39
  360. package/src/memory/helpers.py +0 -28
  361. package/src/memory/schema.py +0 -166
  362. package/src/memory-profiles.py +0 -595
  363. package/src/memory-reset.py +0 -491
  364. package/src/memory_compression.py +0 -989
  365. package/src/memory_store_v2.py +0 -1155
  366. package/src/migrate_v1_to_v2.py +0 -629
  367. package/src/pattern_learner.py +0 -34
  368. package/src/patterns/__init__.py +0 -24
  369. package/src/patterns/analyzers.py +0 -251
  370. package/src/patterns/learner.py +0 -271
  371. package/src/patterns/scoring.py +0 -171
  372. package/src/patterns/store.py +0 -225
  373. package/src/patterns/terminology.py +0 -140
  374. package/src/provenance_tracker.py +0 -312
  375. package/src/qualixar_attribution.py +0 -139
  376. package/src/qualixar_watermark.py +0 -78
  377. package/src/query_optimizer.py +0 -511
  378. package/src/rate_limiter.py +0 -83
  379. package/src/search/__init__.py +0 -20
  380. package/src/search/cli.py +0 -77
  381. package/src/search/constants.py +0 -26
  382. package/src/search/engine.py +0 -241
  383. package/src/search/fusion.py +0 -122
  384. package/src/search/index_loader.py +0 -114
  385. package/src/search/methods.py +0 -162
  386. package/src/search_engine_v2.py +0 -401
  387. package/src/setup_validator.py +0 -482
  388. package/src/subscription_manager.py +0 -391
  389. package/src/tree/__init__.py +0 -59
  390. package/src/tree/builder.py +0 -185
  391. package/src/tree/nodes.py +0 -202
  392. package/src/tree/queries.py +0 -257
  393. package/src/tree/schema.py +0 -80
  394. package/src/tree_manager.py +0 -19
  395. package/src/trust/__init__.py +0 -45
  396. package/src/trust/constants.py +0 -66
  397. package/src/trust/queries.py +0 -157
  398. package/src/trust/schema.py +0 -95
  399. package/src/trust/scorer.py +0 -299
  400. package/src/trust/signals.py +0 -95
  401. package/src/trust_scorer.py +0 -44
  402. package/ui/app.js +0 -1588
  403. package/ui/js/graph-cytoscape-monolithic-backup.js +0 -1168
  404. package/ui/js/graph-cytoscape.js +0 -1168
  405. package/ui/js/graph-d3-backup.js +0 -32
  406. package/ui/js/graph.js +0 -32
  407. package/ui_server.py +0 -286
  408. /package/docs/{ACCESSIBILITY.md → v2-archive/ACCESSIBILITY.md} +0 -0
  409. /package/docs/{ARCHITECTURE.md → v2-archive/ARCHITECTURE.md} +0 -0
  410. /package/docs/{CLI-COMMANDS-REFERENCE.md → v2-archive/CLI-COMMANDS-REFERENCE.md} +0 -0
  411. /package/docs/{COMPRESSION-README.md → v2-archive/COMPRESSION-README.md} +0 -0
  412. /package/docs/{FRAMEWORK-INTEGRATIONS.md → v2-archive/FRAMEWORK-INTEGRATIONS.md} +0 -0
  413. /package/docs/{MCP-MANUAL-SETUP.md → v2-archive/MCP-MANUAL-SETUP.md} +0 -0
  414. /package/docs/{MCP-TROUBLESHOOTING.md → v2-archive/MCP-TROUBLESHOOTING.md} +0 -0
  415. /package/docs/{PATTERN-LEARNING.md → v2-archive/PATTERN-LEARNING.md} +0 -0
  416. /package/docs/{PROFILES-GUIDE.md → v2-archive/PROFILES-GUIDE.md} +0 -0
  417. /package/docs/{RESET-GUIDE.md → v2-archive/RESET-GUIDE.md} +0 -0
  418. /package/docs/{SEARCH-ENGINE-V2.2.0.md → v2-archive/SEARCH-ENGINE-V2.2.0.md} +0 -0
  419. /package/docs/{SEARCH-INTEGRATION-GUIDE.md → v2-archive/SEARCH-INTEGRATION-GUIDE.md} +0 -0
  420. /package/docs/{UI-SERVER.md → v2-archive/UI-SERVER.md} +0 -0
  421. /package/docs/{UNIVERSAL-INTEGRATION.md → v2-archive/UNIVERSAL-INTEGRATION.md} +0 -0
  422. /package/docs/{V2.2.0-OPTIONAL-SEARCH.md → v2-archive/V2.2.0-OPTIONAL-SEARCH.md} +0 -0
  423. /package/docs/{WINDOWS-INSTALL-README.txt → v2-archive/WINDOWS-INSTALL-README.txt} +0 -0
  424. /package/docs/{WINDOWS-POST-INSTALL.txt → v2-archive/WINDOWS-POST-INSTALL.txt} +0 -0
  425. /package/docs/{example_graph_usage.py → v2-archive/example_graph_usage.py} +0 -0
  426. /package/{completions → ide/completions}/slm.bash +0 -0
  427. /package/{completions → ide/completions}/slm.zsh +0 -0
  428. /package/{configs → ide/configs}/cody-commands.json +0 -0
  429. /package/{install-skills.sh → scripts/install-skills.sh} +0 -0
  430. /package/{install.ps1 → scripts/install.ps1} +0 -0
  431. /package/{install.sh → scripts/install.sh} +0 -0
@@ -1,682 +0,0 @@
1
- #!/usr/bin/env python3
2
- # SPDX-License-Identifier: MIT
3
- # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
- """
5
- AdaptiveRanker — Three-phase adaptive re-ranking engine.
6
-
7
- This is the core ranking engine for v2.7 "Your AI Learns You". It sits
8
- between the existing search methods (FTS5 + TF-IDF + HNSW) and the final
9
- result list, re-ordering candidates based on learned user preferences.
10
-
11
- Three Phases (progressive adaptation):
12
-
13
- Phase 0 — Baseline (< 20 feedback signals):
14
- Pure v2.6 behavior. No re-ranking applied. Results returned as-is
15
- from the existing search pipeline. Zero risk of degradation.
16
-
17
- Phase 1 — Rule-Based (20-199 signals):
18
- Applies learned-pattern boosting to search results. Uses feature
19
- extraction to compute boost multipliers for tech match, project
20
- match, recency, and source quality. Deterministic and interpretable.
21
-
22
- Phase 2 — ML Model (200+ signals across 50+ unique queries):
23
- LightGBM LambdaRank re-ranker. Trained on real feedback data
24
- (and optionally bootstrapped from synthetic data). Produces ML
25
- scores that replace the original ranking order.
26
-
27
- Design Principles:
28
- - LightGBM is OPTIONAL. If not installed, falls back to rule-based.
29
- - Any exception in re-ranking falls back to original v2.6 results.
30
- - Model is loaded lazily and cached in memory.
31
- - Training is explicit (called by user or scheduled), never implicit.
32
- - Original scores are preserved as 'base_score' for diagnostics.
33
-
34
- Research Backing:
35
- - eKNOW 2025: BM25 -> re-ranker pipeline for personal collections
36
- - MACLA (arXiv:2512.18950): Bayesian confidence scoring
37
- - FCS LREC 2024: Cold-start mitigation via synthetic bootstrap
38
- """
39
-
40
- import logging
41
- import threading
42
- from datetime import datetime
43
- from pathlib import Path
44
- from typing import Any, Dict, List, Optional
45
-
46
- # LightGBM is OPTIONAL — graceful fallback to rule-based ranking
47
- try:
48
- import lightgbm as lgb
49
- HAS_LIGHTGBM = True
50
- except ImportError:
51
- lgb = None
52
- HAS_LIGHTGBM = False
53
-
54
- # NumPy is used for feature matrix construction (comes with sklearn)
55
- try:
56
- import numpy as np
57
- HAS_NUMPY = True
58
- except ImportError:
59
- np = None
60
- HAS_NUMPY = False
61
-
62
- from .feature_extractor import FeatureExtractor, FEATURE_NAMES, NUM_FEATURES
63
-
64
- logger = logging.getLogger("superlocalmemory.learning.adaptive_ranker")
65
-
66
- # Import constants and helpers from ranking subpackage
67
- from .ranking import (
68
- MODELS_DIR,
69
- MODEL_PATH,
70
- PHASE_THRESHOLDS,
71
- MIN_UNIQUE_QUERIES_FOR_ML,
72
- RULE_BOOST,
73
- TRAINING_PARAMS,
74
- calculate_rule_boost,
75
- prepare_training_data_internal,
76
- )
77
-
78
-
79
- class AdaptiveRanker:
80
- """
81
- Three-phase adaptive re-ranking engine.
82
-
83
- Usage (called by memory_store_v2.search or mcp_server recall):
84
- ranker = AdaptiveRanker()
85
- results = ranker.rerank(search_results, query, context={
86
- 'tech_preferences': {...},
87
- 'current_project': 'MyProject',
88
- 'source_scores': {...},
89
- 'workflow_phase': 'testing',
90
- })
91
-
92
- The caller wraps this in try/except — any exception here causes
93
- fallback to original v2.6 results. Zero risk of degradation.
94
- """
95
-
96
- PHASE_THRESHOLDS = PHASE_THRESHOLDS
97
- MODEL_PATH = MODEL_PATH
98
-
99
- def __init__(self, learning_db=None):
100
- """
101
- Initialize AdaptiveRanker.
102
-
103
- Args:
104
- learning_db: Optional LearningDB instance. If None, imports
105
- and creates one lazily.
106
- """
107
- self._learning_db = learning_db
108
- self._feature_extractor = FeatureExtractor()
109
- self._model = None # Loaded lazily on first ML rerank
110
- self._model_load_attempted = False
111
- self._lock = threading.Lock()
112
-
113
- # ========================================================================
114
- # LearningDB Access
115
- # ========================================================================
116
-
117
- def _get_learning_db(self):
118
- """Get or create the LearningDB instance."""
119
- if self._learning_db is None:
120
- try:
121
- from .learning_db import LearningDB
122
- self._learning_db = LearningDB()
123
- except Exception as e:
124
- logger.warning("Cannot access LearningDB: %s", e)
125
- return None
126
- return self._learning_db
127
-
128
- # ========================================================================
129
- # Phase Detection
130
- # ========================================================================
131
-
132
- def get_phase(self) -> str:
133
- """
134
- Determine the current ranking phase based on feedback data.
135
-
136
- Returns:
137
- 'baseline' — Not enough data for personalization
138
- 'rule_based' — Enough data for rule-based boosting
139
- 'ml_model' — Enough data for ML ranking (if LightGBM available)
140
- """
141
- ldb = self._get_learning_db()
142
- if ldb is None:
143
- return 'baseline'
144
-
145
- try:
146
- feedback_count = ldb.get_feedback_count()
147
- unique_queries = ldb.get_unique_query_count()
148
- except Exception as e:
149
- logger.warning("Failed to check feedback counts: %s", e)
150
- return 'baseline'
151
-
152
- # Phase 2: ML model — requires enough data AND LightGBM AND numpy
153
- if (
154
- feedback_count >= PHASE_THRESHOLDS['ml_model']
155
- and unique_queries >= MIN_UNIQUE_QUERIES_FOR_ML
156
- and HAS_LIGHTGBM
157
- and HAS_NUMPY
158
- ):
159
- return 'ml_model'
160
-
161
- # Phase 1: Rule-based — just needs minimum feedback
162
- if feedback_count >= PHASE_THRESHOLDS['rule_based']:
163
- return 'rule_based'
164
-
165
- # Phase 0: Not enough data yet
166
- return 'baseline'
167
-
168
- def get_phase_info(self) -> Dict[str, Any]:
169
- """
170
- Return detailed phase information for diagnostics.
171
-
172
- Returns:
173
- Dict with phase, feedback_count, unique_queries, thresholds,
174
- model_loaded, lightgbm_available.
175
- """
176
- ldb = self._get_learning_db()
177
- feedback_count = 0
178
- unique_queries = 0
179
-
180
- if ldb is not None:
181
- try:
182
- feedback_count = ldb.get_feedback_count()
183
- unique_queries = ldb.get_unique_query_count()
184
- except Exception:
185
- pass
186
-
187
- phase = self.get_phase()
188
-
189
- return {
190
- 'phase': phase,
191
- 'feedback_count': feedback_count,
192
- 'unique_queries': unique_queries,
193
- 'thresholds': dict(PHASE_THRESHOLDS),
194
- 'min_unique_queries_for_ml': MIN_UNIQUE_QUERIES_FOR_ML,
195
- 'model_loaded': self._model is not None,
196
- 'model_path_exists': MODEL_PATH.exists(),
197
- 'lightgbm_available': HAS_LIGHTGBM,
198
- 'numpy_available': HAS_NUMPY,
199
- }
200
-
201
- # ========================================================================
202
- # Main Re-ranking Entry Point
203
- # ========================================================================
204
-
205
- def rerank(
206
- self,
207
- results: List[dict],
208
- query: str,
209
- context: Optional[dict] = None,
210
- ) -> List[dict]:
211
- """
212
- Re-rank search results based on learned user preferences.
213
-
214
- This is the main entry point, called after the search pipeline
215
- produces initial results. It determines the current phase and
216
- routes to the appropriate ranking strategy.
217
-
218
- Args:
219
- results: List of memory dicts from search (with 'score' field).
220
- query: The recall query string.
221
- context: Optional context dict with:
222
- - tech_preferences: Dict[str, dict] — user's tech prefs
223
- - current_project: str — active project name
224
- - source_scores: Dict[str, float] — source quality scores
225
- - workflow_phase: str — current workflow phase
226
-
227
- Returns:
228
- Re-ranked list of memory dicts. Each memory gets:
229
- - 'base_score': Original score from search pipeline
230
- - 'ranking_phase': Which phase was used
231
- - 'score': Updated score (may differ from base_score)
232
-
233
- CRITICAL: The caller wraps this in try/except. Any exception
234
- causes fallback to original v2.6 results. This method must
235
- never corrupt the results list.
236
- """
237
- if not results:
238
- return results
239
-
240
- # Short-circuit: don't re-rank trivially small result sets
241
- if len(results) <= 1:
242
- for r in results:
243
- r['base_score'] = r.get('score', 0.0)
244
- r['ranking_phase'] = 'baseline'
245
- return results
246
-
247
- context = context or {}
248
-
249
- # Fetch signal stats for features [10-11] (v2.7.4)
250
- signal_stats = {}
251
- ldb = self._get_learning_db()
252
- if ldb:
253
- try:
254
- memory_ids = [r.get('id') for r in results if r.get('id')]
255
- if memory_ids:
256
- signal_stats = ldb.get_signal_stats_for_memories(memory_ids)
257
- except Exception:
258
- pass # Signal stats failure is not critical
259
-
260
- # Set up feature extraction context (once per query)
261
- self._feature_extractor.set_context(
262
- source_scores=context.get('source_scores'),
263
- tech_preferences=context.get('tech_preferences'),
264
- current_project=context.get('current_project'),
265
- workflow_phase=context.get('workflow_phase'),
266
- signal_stats=signal_stats,
267
- )
268
-
269
- # Determine phase and route
270
- phase = self.get_phase()
271
-
272
- if phase == 'baseline':
273
- # Phase 0: No re-ranking — preserve original order
274
- for r in results:
275
- r['base_score'] = r.get('score', 0.0)
276
- r['ranking_phase'] = 'baseline'
277
- return results
278
-
279
- elif phase == 'rule_based':
280
- return self._rerank_rule_based(results, query, context)
281
-
282
- elif phase == 'ml_model':
283
- # Try ML first, fall back to rule-based if model fails
284
- try:
285
- return self._rerank_ml(results, query, context)
286
- except Exception as e:
287
- logger.warning(
288
- "ML re-ranking failed, falling back to rule-based: %s", e
289
- )
290
- return self._rerank_rule_based(results, query, context)
291
-
292
- # Defensive: unknown phase -> no re-ranking
293
- for r in results:
294
- r['base_score'] = r.get('score', 0.0)
295
- r['ranking_phase'] = 'unknown'
296
- return results
297
-
298
- # ========================================================================
299
- # Phase 1: Rule-Based Re-ranking
300
- # ========================================================================
301
-
302
- def _rerank_rule_based(
303
- self,
304
- results: List[dict],
305
- query: str,
306
- context: dict,
307
- ) -> List[dict]:
308
- """
309
- Phase 1: Apply rule-based boosting using extracted features.
310
-
311
- Each result's score is multiplied by boost factors derived from
312
- feature values. The boosts are conservative — they nudge the
313
- ranking order without dramatically flipping results.
314
- """
315
- feature_vectors = self._feature_extractor.extract_batch(results, query)
316
-
317
- for i, result in enumerate(results):
318
- base_score = result.get('score', 0.0)
319
- result['base_score'] = base_score
320
- result['ranking_phase'] = 'rule_based'
321
-
322
- if i >= len(feature_vectors):
323
- continue
324
-
325
- features = feature_vectors[i]
326
- boost = calculate_rule_boost(features)
327
-
328
- # Apply boost to score
329
- result['score'] = base_score * boost
330
-
331
- # Re-sort by boosted score (highest first)
332
- results.sort(key=lambda x: x.get('score', 0.0), reverse=True)
333
- return results
334
-
335
- # ========================================================================
336
- # Phase 2: ML Re-ranking (LightGBM)
337
- # ========================================================================
338
-
339
- def _rerank_ml(
340
- self,
341
- results: List[dict],
342
- query: str,
343
- context: dict,
344
- ) -> List[dict]:
345
- """
346
- Phase 2: LightGBM LambdaRank re-ranking.
347
-
348
- Extracts features, runs the trained model, and sorts by ML scores.
349
- Preserves original score as 'base_score' and adds 'ml_score'.
350
- """
351
- if not HAS_LIGHTGBM or not HAS_NUMPY:
352
- raise RuntimeError("LightGBM or NumPy not available for ML ranking")
353
-
354
- # Load model if not cached
355
- model = self._load_model()
356
- if model is None:
357
- raise RuntimeError("No trained ranking model available")
358
-
359
- # Extract features
360
- feature_vectors = self._feature_extractor.extract_batch(results, query)
361
- if not feature_vectors:
362
- raise ValueError("Feature extraction returned empty results")
363
-
364
- # Build feature matrix
365
- X = np.array(feature_vectors, dtype=np.float64)
366
-
367
- # Validate shape
368
- if X.shape[1] != NUM_FEATURES:
369
- raise ValueError(
370
- f"Feature dimension mismatch: expected {NUM_FEATURES}, "
371
- f"got {X.shape[1]}"
372
- )
373
-
374
- # Predict scores
375
- ml_scores = model.predict(X)
376
-
377
- # Annotate results with ML scores
378
- for i, result in enumerate(results):
379
- result['base_score'] = result.get('score', 0.0)
380
- result['ranking_phase'] = 'ml_model'
381
- if i < len(ml_scores):
382
- result['ml_score'] = float(ml_scores[i])
383
- result['score'] = float(ml_scores[i])
384
- else:
385
- result['ml_score'] = 0.0
386
-
387
- # Re-sort by ML score (highest first)
388
- results.sort(key=lambda x: x.get('score', 0.0), reverse=True)
389
- return results
390
-
391
- # ========================================================================
392
- # Model Management
393
- # ========================================================================
394
-
395
- def _load_model(self):
396
- """
397
- Load LightGBM model from disk (lazy, cached).
398
-
399
- Returns:
400
- lgb.Booster instance or None if unavailable.
401
- """
402
- # Return cached model if already loaded
403
- if self._model is not None:
404
- return self._model
405
-
406
- # Avoid repeated failed load attempts
407
- if self._model_load_attempted:
408
- return None
409
-
410
- with self._lock:
411
- # Double-check after acquiring lock
412
- if self._model is not None:
413
- return self._model
414
- if self._model_load_attempted:
415
- return None
416
-
417
- self._model_load_attempted = True
418
-
419
- if not HAS_LIGHTGBM:
420
- logger.info("LightGBM not installed — ML ranking unavailable")
421
- return None
422
-
423
- if not MODEL_PATH.exists():
424
- logger.info(
425
- "No ranking model at %s — ML ranking unavailable",
426
- MODEL_PATH
427
- )
428
- return None
429
-
430
- try:
431
- model = lgb.Booster(model_file=str(MODEL_PATH))
432
-
433
- # v2.7.4: Check for feature dimension mismatch (10→12 upgrade)
434
- model_num_features = model.num_feature()
435
- if model_num_features != NUM_FEATURES:
436
- logger.info(
437
- "Feature mismatch: model has %d features, expected %d. "
438
- "Triggering auto-retrain in background.",
439
- model_num_features, NUM_FEATURES,
440
- )
441
- # Delete old model and trigger re-bootstrap
442
- MODEL_PATH.unlink(missing_ok=True)
443
- self._trigger_retrain_background()
444
- return None
445
-
446
- self._model = model
447
- logger.info("Loaded ranking model from %s", MODEL_PATH)
448
- return self._model
449
- except Exception as e:
450
- logger.warning("Failed to load ranking model: %s", e)
451
- return None
452
-
453
- def _trigger_retrain_background(self):
454
- """Trigger model re-bootstrap in a background thread (v2.7.4)."""
455
- try:
456
- import threading
457
-
458
- def _retrain():
459
- try:
460
- from .synthetic_bootstrap import SyntheticBootstrapper
461
- bootstrapper = SyntheticBootstrapper()
462
- if bootstrapper.should_bootstrap():
463
- result = bootstrapper.bootstrap_model()
464
- if result:
465
- logger.info(
466
- "Auto-retrain complete with %d-feature model",
467
- NUM_FEATURES,
468
- )
469
- # Reload the new model
470
- with self._lock:
471
- self._model = None
472
- self._model_load_attempted = False
473
- except Exception as e:
474
- logger.warning("Auto-retrain failed: %s", e)
475
-
476
- thread = threading.Thread(target=_retrain, daemon=True)
477
- thread.start()
478
- except Exception:
479
- pass
480
-
481
- def reload_model(self):
482
- """
483
- Force reload of the ranking model from disk.
484
-
485
- Call this after training a new model to pick up the updated weights.
486
- """
487
- with self._lock:
488
- self._model = None
489
- self._model_load_attempted = False
490
- # Trigger fresh load
491
- return self._load_model()
492
-
493
- # ========================================================================
494
- # Model Training
495
- # ========================================================================
496
-
497
- def train(self, force: bool = False) -> Optional[Dict[str, Any]]:
498
- """
499
- Train or retrain the LightGBM ranking model.
500
-
501
- Uses continued training (init_model) if a model already exists,
502
- incorporating new feedback data incrementally.
503
-
504
- Args:
505
- force: If True, train even if below ML threshold.
506
- Useful for synthetic bootstrap training.
507
-
508
- Returns:
509
- Training metadata dict, or None if training not possible.
510
- Metadata includes: model_version, training_samples, ndcg_at_10,
511
- model_path, created_at.
512
- """
513
- if not HAS_LIGHTGBM or not HAS_NUMPY:
514
- logger.warning(
515
- "Cannot train: LightGBM=%s, NumPy=%s",
516
- HAS_LIGHTGBM, HAS_NUMPY
517
- )
518
- return None
519
-
520
- ldb = self._get_learning_db()
521
- if ldb is None:
522
- logger.warning("Cannot train: LearningDB unavailable")
523
- return None
524
-
525
- # Check if we have enough data (unless forced)
526
- if not force:
527
- feedback_count = ldb.get_feedback_count()
528
- unique_queries = ldb.get_unique_query_count()
529
- if (
530
- feedback_count < PHASE_THRESHOLDS['ml_model']
531
- or unique_queries < MIN_UNIQUE_QUERIES_FOR_ML
532
- ):
533
- logger.info(
534
- "Insufficient data for training: %d feedback / %d queries "
535
- "(need %d / %d)",
536
- feedback_count, unique_queries,
537
- PHASE_THRESHOLDS['ml_model'], MIN_UNIQUE_QUERIES_FOR_ML,
538
- )
539
- return None
540
-
541
- # Prepare training data
542
- training_data = self._prepare_training_data()
543
- if training_data is None:
544
- logger.warning("No usable training data available")
545
- return None
546
-
547
- X, y, groups = training_data
548
- total_samples = X.shape[0]
549
-
550
- if total_samples < 10:
551
- logger.warning("Too few training samples: %d", total_samples)
552
- return None
553
-
554
- logger.info(
555
- "Training ranking model: %d samples, %d groups",
556
- total_samples, len(groups)
557
- )
558
-
559
- # Create LightGBM dataset
560
- train_dataset = lgb.Dataset(
561
- X, label=y, group=groups,
562
- feature_name=list(FEATURE_NAMES),
563
- free_raw_data=False,
564
- )
565
-
566
- # Training parameters
567
- params = dict(TRAINING_PARAMS)
568
- n_estimators = params.pop('n_estimators', 50)
569
-
570
- # Check for existing model (continued training)
571
- init_model = None
572
- if MODEL_PATH.exists():
573
- try:
574
- init_model = lgb.Booster(model_file=str(MODEL_PATH))
575
- logger.info("Continuing training from existing model")
576
- except Exception:
577
- logger.info("Starting fresh training (existing model unreadable)")
578
- init_model = None
579
-
580
- # Train
581
- try:
582
- booster = lgb.train(
583
- params,
584
- train_dataset,
585
- num_boost_round=n_estimators,
586
- init_model=init_model,
587
- valid_sets=[train_dataset],
588
- valid_names=['train'],
589
- callbacks=[lgb.log_evaluation(period=0)], # Silent training
590
- )
591
- except Exception as e:
592
- logger.error("LightGBM training failed: %s", e)
593
- return None
594
-
595
- # Save model
596
- MODELS_DIR.mkdir(parents=True, exist_ok=True)
597
- try:
598
- booster.save_model(str(MODEL_PATH))
599
- logger.info("Ranking model saved to %s", MODEL_PATH)
600
- except Exception as e:
601
- logger.error("Failed to save ranking model: %s", e)
602
- return None
603
-
604
- # Extract NDCG@10 from training evaluation (if available)
605
- ndcg_at_10 = None
606
- try:
607
- eval_results = booster.eval_train(lgb.Dataset(X, label=y, group=groups))
608
- for name, _dataset_name, value, _is_higher_better in eval_results:
609
- if 'ndcg@10' in name:
610
- ndcg_at_10 = value
611
- break
612
- except Exception:
613
- pass
614
-
615
- # Record metadata in learning_db
616
- model_version = datetime.now().strftime("v%Y%m%d_%H%M%S")
617
- try:
618
- ldb.record_model_training(
619
- model_version=model_version,
620
- training_samples=total_samples,
621
- real_samples=total_samples,
622
- synthetic_samples=0,
623
- ndcg_at_10=ndcg_at_10,
624
- model_path=str(MODEL_PATH),
625
- )
626
- except Exception as e:
627
- logger.warning("Failed to record training metadata: %s", e)
628
-
629
- # Reload model into cache
630
- self.reload_model()
631
-
632
- metadata = {
633
- 'model_version': model_version,
634
- 'training_samples': total_samples,
635
- 'query_groups': len(groups),
636
- 'n_estimators': n_estimators,
637
- 'ndcg_at_10': ndcg_at_10,
638
- 'model_path': str(MODEL_PATH),
639
- 'continued_from': init_model is not None,
640
- 'created_at': datetime.now().isoformat(),
641
- }
642
- logger.info("Training complete: %s", metadata)
643
- return metadata
644
-
645
- def _prepare_training_data(self) -> Optional[tuple]:
646
- """
647
- Prepare training data from feedback records.
648
-
649
- For each unique query (grouped by query_hash):
650
- - Fetch all feedback entries for that query
651
- - Look up the corresponding memory from memory.db
652
- - Extract features for each memory
653
- - Use signal_value as the relevance label
654
-
655
- Returns:
656
- Tuple of (X, y, groups) for LGBMRanker, or None if insufficient.
657
- X: numpy array (n_samples, NUM_FEATURES)
658
- y: numpy array (n_samples,) — relevance labels
659
- groups: list of ints — samples per query group
660
- """
661
- ldb = self._get_learning_db()
662
- if ldb is None:
663
- return None
664
-
665
- feedback = ldb.get_feedback_for_training()
666
- if not feedback:
667
- return None
668
-
669
- return prepare_training_data_internal(feedback, self._feature_extractor)
670
-
671
-
672
- # ============================================================================
673
- # Module-level convenience
674
- # ============================================================================
675
-
676
- def get_phase() -> str:
677
- """Quick check of current ranking phase (creates temporary ranker)."""
678
- try:
679
- ranker = AdaptiveRanker()
680
- return ranker.get_phase()
681
- except Exception:
682
- return 'baseline'