superlocalmemory 2.8.5 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (434) hide show
  1. package/CHANGELOG.md +11 -0
  2. package/LICENSE +9 -1
  3. package/NOTICE +63 -0
  4. package/README.md +165 -480
  5. package/bin/slm +17 -449
  6. package/bin/slm-npm +2 -2
  7. package/bin/slm.bat +4 -2
  8. package/conftest.py +5 -0
  9. package/docs/api-reference.md +284 -0
  10. package/docs/architecture.md +149 -0
  11. package/docs/auto-memory.md +150 -0
  12. package/docs/cli-reference.md +276 -0
  13. package/docs/compliance.md +191 -0
  14. package/docs/configuration.md +182 -0
  15. package/docs/getting-started.md +102 -0
  16. package/docs/ide-setup.md +261 -0
  17. package/docs/mcp-tools.md +220 -0
  18. package/docs/migration-from-v2.md +170 -0
  19. package/docs/profiles.md +173 -0
  20. package/docs/troubleshooting.md +310 -0
  21. package/{configs → ide/configs}/antigravity-mcp.json +3 -3
  22. package/ide/configs/chatgpt-desktop-mcp.json +16 -0
  23. package/{configs → ide/configs}/claude-desktop-mcp.json +3 -3
  24. package/{configs → ide/configs}/codex-mcp.toml +4 -4
  25. package/{configs → ide/configs}/continue-mcp.yaml +4 -3
  26. package/{configs → ide/configs}/continue-skills.yaml +6 -6
  27. package/ide/configs/cursor-mcp.json +15 -0
  28. package/{configs → ide/configs}/gemini-cli-mcp.json +2 -2
  29. package/{configs → ide/configs}/jetbrains-mcp.json +2 -2
  30. package/{configs → ide/configs}/opencode-mcp.json +2 -2
  31. package/{configs → ide/configs}/perplexity-mcp.json +2 -2
  32. package/{configs → ide/configs}/vscode-copilot-mcp.json +2 -2
  33. package/{configs → ide/configs}/windsurf-mcp.json +3 -3
  34. package/{configs → ide/configs}/zed-mcp.json +2 -2
  35. package/{hooks → ide/hooks}/context-hook.js +9 -20
  36. package/ide/hooks/memory-list-skill.js +70 -0
  37. package/ide/hooks/memory-profile-skill.js +101 -0
  38. package/ide/hooks/memory-recall-skill.js +62 -0
  39. package/ide/hooks/memory-remember-skill.js +68 -0
  40. package/ide/hooks/memory-reset-skill.js +160 -0
  41. package/{hooks → ide/hooks}/post-recall-hook.js +2 -2
  42. package/ide/integrations/langchain/README.md +106 -0
  43. package/ide/integrations/langchain/langchain_superlocalmemory/__init__.py +9 -0
  44. package/ide/integrations/langchain/langchain_superlocalmemory/chat_message_history.py +201 -0
  45. package/ide/integrations/langchain/pyproject.toml +38 -0
  46. package/{src/learning → ide/integrations/langchain}/tests/__init__.py +1 -0
  47. package/ide/integrations/langchain/tests/test_chat_message_history.py +215 -0
  48. package/ide/integrations/langchain/tests/test_security.py +117 -0
  49. package/ide/integrations/llamaindex/README.md +81 -0
  50. package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/__init__.py +9 -0
  51. package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/base.py +316 -0
  52. package/ide/integrations/llamaindex/pyproject.toml +43 -0
  53. package/{src/lifecycle → ide/integrations/llamaindex}/tests/__init__.py +1 -2
  54. package/ide/integrations/llamaindex/tests/test_chat_store.py +294 -0
  55. package/ide/integrations/llamaindex/tests/test_security.py +241 -0
  56. package/{skills → ide/skills}/slm-build-graph/SKILL.md +6 -6
  57. package/{skills → ide/skills}/slm-list-recent/SKILL.md +5 -5
  58. package/{skills → ide/skills}/slm-recall/SKILL.md +5 -5
  59. package/{skills → ide/skills}/slm-remember/SKILL.md +6 -6
  60. package/{skills → ide/skills}/slm-show-patterns/SKILL.md +7 -7
  61. package/{skills → ide/skills}/slm-status/SKILL.md +9 -9
  62. package/{skills → ide/skills}/slm-switch-profile/SKILL.md +9 -9
  63. package/package.json +13 -22
  64. package/pyproject.toml +85 -0
  65. package/scripts/build-dmg.sh +417 -0
  66. package/scripts/install-skills.ps1 +334 -0
  67. package/{install.ps1 → scripts/install.ps1} +36 -4
  68. package/{install.sh → scripts/install.sh} +14 -13
  69. package/scripts/postinstall.js +2 -2
  70. package/scripts/start-dashboard.ps1 +52 -0
  71. package/scripts/start-dashboard.sh +41 -0
  72. package/scripts/sync-wiki.ps1 +127 -0
  73. package/scripts/sync-wiki.sh +82 -0
  74. package/scripts/test-dmg.sh +161 -0
  75. package/scripts/test-npm-package.ps1 +252 -0
  76. package/scripts/test-npm-package.sh +207 -0
  77. package/scripts/verify-install.ps1 +294 -0
  78. package/scripts/verify-install.sh +266 -0
  79. package/src/superlocalmemory/__init__.py +0 -0
  80. package/src/superlocalmemory/attribution/__init__.py +9 -0
  81. package/src/superlocalmemory/attribution/mathematical_dna.py +235 -0
  82. package/src/superlocalmemory/attribution/signer.py +153 -0
  83. package/src/superlocalmemory/attribution/watermark.py +189 -0
  84. package/src/superlocalmemory/cli/__init__.py +5 -0
  85. package/src/superlocalmemory/cli/commands.py +245 -0
  86. package/src/superlocalmemory/cli/main.py +89 -0
  87. package/src/superlocalmemory/cli/migrate_cmd.py +55 -0
  88. package/src/superlocalmemory/cli/post_install.py +99 -0
  89. package/src/superlocalmemory/cli/setup_wizard.py +129 -0
  90. package/src/superlocalmemory/compliance/__init__.py +0 -0
  91. package/src/superlocalmemory/compliance/abac.py +204 -0
  92. package/src/superlocalmemory/compliance/audit.py +314 -0
  93. package/src/superlocalmemory/compliance/eu_ai_act.py +131 -0
  94. package/src/superlocalmemory/compliance/gdpr.py +294 -0
  95. package/src/superlocalmemory/compliance/lifecycle.py +158 -0
  96. package/src/superlocalmemory/compliance/retention.py +232 -0
  97. package/src/superlocalmemory/compliance/scheduler.py +148 -0
  98. package/src/superlocalmemory/core/__init__.py +0 -0
  99. package/src/superlocalmemory/core/config.py +391 -0
  100. package/src/superlocalmemory/core/embeddings.py +293 -0
  101. package/src/superlocalmemory/core/engine.py +701 -0
  102. package/src/superlocalmemory/core/hooks.py +65 -0
  103. package/src/superlocalmemory/core/maintenance.py +172 -0
  104. package/src/superlocalmemory/core/modes.py +140 -0
  105. package/src/superlocalmemory/core/profiles.py +234 -0
  106. package/src/superlocalmemory/core/registry.py +117 -0
  107. package/src/superlocalmemory/dynamics/__init__.py +0 -0
  108. package/src/superlocalmemory/dynamics/fisher_langevin_coupling.py +223 -0
  109. package/src/superlocalmemory/encoding/__init__.py +0 -0
  110. package/src/superlocalmemory/encoding/consolidator.py +485 -0
  111. package/src/superlocalmemory/encoding/emotional.py +125 -0
  112. package/src/superlocalmemory/encoding/entity_resolver.py +525 -0
  113. package/src/superlocalmemory/encoding/entropy_gate.py +104 -0
  114. package/src/superlocalmemory/encoding/fact_extractor.py +775 -0
  115. package/src/superlocalmemory/encoding/foresight.py +91 -0
  116. package/src/superlocalmemory/encoding/graph_builder.py +302 -0
  117. package/src/superlocalmemory/encoding/observation_builder.py +160 -0
  118. package/src/superlocalmemory/encoding/scene_builder.py +183 -0
  119. package/src/superlocalmemory/encoding/signal_inference.py +90 -0
  120. package/src/superlocalmemory/encoding/temporal_parser.py +426 -0
  121. package/src/superlocalmemory/encoding/type_router.py +235 -0
  122. package/src/superlocalmemory/hooks/__init__.py +3 -0
  123. package/src/superlocalmemory/hooks/auto_capture.py +111 -0
  124. package/src/superlocalmemory/hooks/auto_recall.py +93 -0
  125. package/src/superlocalmemory/hooks/ide_connector.py +204 -0
  126. package/src/superlocalmemory/hooks/rules_engine.py +99 -0
  127. package/src/superlocalmemory/infra/__init__.py +3 -0
  128. package/src/superlocalmemory/infra/auth_middleware.py +82 -0
  129. package/src/superlocalmemory/infra/backup.py +317 -0
  130. package/src/superlocalmemory/infra/cache_manager.py +267 -0
  131. package/src/superlocalmemory/infra/event_bus.py +381 -0
  132. package/src/superlocalmemory/infra/rate_limiter.py +135 -0
  133. package/src/{webhook_dispatcher.py → superlocalmemory/infra/webhook_dispatcher.py} +104 -101
  134. package/src/superlocalmemory/learning/__init__.py +0 -0
  135. package/src/superlocalmemory/learning/adaptive.py +172 -0
  136. package/src/superlocalmemory/learning/behavioral.py +490 -0
  137. package/src/superlocalmemory/learning/behavioral_listener.py +94 -0
  138. package/src/superlocalmemory/learning/bootstrap.py +298 -0
  139. package/src/superlocalmemory/learning/cross_project.py +399 -0
  140. package/src/superlocalmemory/learning/database.py +376 -0
  141. package/src/superlocalmemory/learning/engagement.py +323 -0
  142. package/src/superlocalmemory/learning/features.py +138 -0
  143. package/src/superlocalmemory/learning/feedback.py +316 -0
  144. package/src/superlocalmemory/learning/outcomes.py +255 -0
  145. package/src/superlocalmemory/learning/project_context.py +366 -0
  146. package/src/superlocalmemory/learning/ranker.py +155 -0
  147. package/src/superlocalmemory/learning/source_quality.py +303 -0
  148. package/src/superlocalmemory/learning/workflows.py +309 -0
  149. package/src/superlocalmemory/llm/__init__.py +0 -0
  150. package/src/superlocalmemory/llm/backbone.py +316 -0
  151. package/src/superlocalmemory/math/__init__.py +0 -0
  152. package/src/superlocalmemory/math/fisher.py +356 -0
  153. package/src/superlocalmemory/math/langevin.py +398 -0
  154. package/src/superlocalmemory/math/sheaf.py +257 -0
  155. package/src/superlocalmemory/mcp/__init__.py +0 -0
  156. package/src/superlocalmemory/mcp/resources.py +245 -0
  157. package/src/superlocalmemory/mcp/server.py +61 -0
  158. package/src/superlocalmemory/mcp/tools.py +18 -0
  159. package/src/superlocalmemory/mcp/tools_core.py +305 -0
  160. package/src/superlocalmemory/mcp/tools_v28.py +223 -0
  161. package/src/superlocalmemory/mcp/tools_v3.py +286 -0
  162. package/src/superlocalmemory/retrieval/__init__.py +0 -0
  163. package/src/superlocalmemory/retrieval/agentic.py +295 -0
  164. package/src/superlocalmemory/retrieval/ann_index.py +223 -0
  165. package/src/superlocalmemory/retrieval/bm25_channel.py +185 -0
  166. package/src/superlocalmemory/retrieval/bridge_discovery.py +170 -0
  167. package/src/superlocalmemory/retrieval/engine.py +390 -0
  168. package/src/superlocalmemory/retrieval/entity_channel.py +179 -0
  169. package/src/superlocalmemory/retrieval/fusion.py +78 -0
  170. package/src/superlocalmemory/retrieval/profile_channel.py +105 -0
  171. package/src/superlocalmemory/retrieval/reranker.py +154 -0
  172. package/src/superlocalmemory/retrieval/semantic_channel.py +232 -0
  173. package/src/superlocalmemory/retrieval/strategy.py +96 -0
  174. package/src/superlocalmemory/retrieval/temporal_channel.py +175 -0
  175. package/src/superlocalmemory/server/__init__.py +1 -0
  176. package/src/superlocalmemory/server/api.py +248 -0
  177. package/src/superlocalmemory/server/routes/__init__.py +4 -0
  178. package/src/superlocalmemory/server/routes/agents.py +107 -0
  179. package/src/superlocalmemory/server/routes/backup.py +91 -0
  180. package/src/superlocalmemory/server/routes/behavioral.py +127 -0
  181. package/src/superlocalmemory/server/routes/compliance.py +160 -0
  182. package/src/superlocalmemory/server/routes/data_io.py +188 -0
  183. package/src/superlocalmemory/server/routes/events.py +183 -0
  184. package/src/superlocalmemory/server/routes/helpers.py +85 -0
  185. package/src/superlocalmemory/server/routes/learning.py +273 -0
  186. package/src/superlocalmemory/server/routes/lifecycle.py +116 -0
  187. package/src/superlocalmemory/server/routes/memories.py +399 -0
  188. package/src/superlocalmemory/server/routes/profiles.py +219 -0
  189. package/src/superlocalmemory/server/routes/stats.py +346 -0
  190. package/src/superlocalmemory/server/routes/v3_api.py +365 -0
  191. package/src/superlocalmemory/server/routes/ws.py +82 -0
  192. package/src/superlocalmemory/server/security_middleware.py +57 -0
  193. package/src/superlocalmemory/server/ui.py +245 -0
  194. package/src/superlocalmemory/storage/__init__.py +0 -0
  195. package/src/superlocalmemory/storage/access_control.py +182 -0
  196. package/src/superlocalmemory/storage/database.py +594 -0
  197. package/src/superlocalmemory/storage/migrations.py +303 -0
  198. package/src/superlocalmemory/storage/models.py +406 -0
  199. package/src/superlocalmemory/storage/schema.py +726 -0
  200. package/src/superlocalmemory/storage/v2_migrator.py +317 -0
  201. package/src/superlocalmemory/trust/__init__.py +0 -0
  202. package/src/superlocalmemory/trust/gate.py +130 -0
  203. package/src/superlocalmemory/trust/provenance.py +124 -0
  204. package/src/superlocalmemory/trust/scorer.py +347 -0
  205. package/src/superlocalmemory/trust/signals.py +153 -0
  206. package/ui/index.html +278 -5
  207. package/ui/js/auto-settings.js +70 -0
  208. package/ui/js/dashboard.js +90 -0
  209. package/ui/js/fact-detail.js +92 -0
  210. package/ui/js/feedback.js +2 -2
  211. package/ui/js/ide-status.js +102 -0
  212. package/ui/js/math-health.js +98 -0
  213. package/ui/js/recall-lab.js +127 -0
  214. package/ui/js/settings.js +2 -2
  215. package/ui/js/trust-dashboard.js +73 -0
  216. package/api_server.py +0 -724
  217. package/bin/aider-smart +0 -72
  218. package/bin/superlocalmemoryv2-learning +0 -4
  219. package/bin/superlocalmemoryv2-list +0 -3
  220. package/bin/superlocalmemoryv2-patterns +0 -4
  221. package/bin/superlocalmemoryv2-profile +0 -3
  222. package/bin/superlocalmemoryv2-recall +0 -3
  223. package/bin/superlocalmemoryv2-remember +0 -3
  224. package/bin/superlocalmemoryv2-reset +0 -3
  225. package/bin/superlocalmemoryv2-status +0 -3
  226. package/configs/chatgpt-desktop-mcp.json +0 -16
  227. package/configs/cursor-mcp.json +0 -15
  228. package/docs/SECURITY-QUICK-REFERENCE.md +0 -214
  229. package/hooks/memory-list-skill.js +0 -139
  230. package/hooks/memory-profile-skill.js +0 -273
  231. package/hooks/memory-recall-skill.js +0 -114
  232. package/hooks/memory-remember-skill.js +0 -127
  233. package/hooks/memory-reset-skill.js +0 -274
  234. package/mcp_server.py +0 -1800
  235. package/requirements-core.txt +0 -22
  236. package/requirements-learning.txt +0 -12
  237. package/requirements.txt +0 -12
  238. package/src/agent_registry.py +0 -411
  239. package/src/auth_middleware.py +0 -61
  240. package/src/auto_backup.py +0 -459
  241. package/src/behavioral/__init__.py +0 -49
  242. package/src/behavioral/behavioral_listener.py +0 -203
  243. package/src/behavioral/behavioral_patterns.py +0 -275
  244. package/src/behavioral/cross_project_transfer.py +0 -206
  245. package/src/behavioral/outcome_inference.py +0 -194
  246. package/src/behavioral/outcome_tracker.py +0 -193
  247. package/src/behavioral/tests/__init__.py +0 -4
  248. package/src/behavioral/tests/test_behavioral_integration.py +0 -108
  249. package/src/behavioral/tests/test_behavioral_patterns.py +0 -150
  250. package/src/behavioral/tests/test_cross_project_transfer.py +0 -142
  251. package/src/behavioral/tests/test_mcp_behavioral.py +0 -139
  252. package/src/behavioral/tests/test_mcp_report_outcome.py +0 -117
  253. package/src/behavioral/tests/test_outcome_inference.py +0 -107
  254. package/src/behavioral/tests/test_outcome_tracker.py +0 -96
  255. package/src/cache_manager.py +0 -518
  256. package/src/compliance/__init__.py +0 -48
  257. package/src/compliance/abac_engine.py +0 -149
  258. package/src/compliance/abac_middleware.py +0 -116
  259. package/src/compliance/audit_db.py +0 -215
  260. package/src/compliance/audit_logger.py +0 -148
  261. package/src/compliance/retention_manager.py +0 -289
  262. package/src/compliance/retention_scheduler.py +0 -186
  263. package/src/compliance/tests/__init__.py +0 -4
  264. package/src/compliance/tests/test_abac_enforcement.py +0 -95
  265. package/src/compliance/tests/test_abac_engine.py +0 -124
  266. package/src/compliance/tests/test_abac_mcp_integration.py +0 -118
  267. package/src/compliance/tests/test_audit_db.py +0 -123
  268. package/src/compliance/tests/test_audit_logger.py +0 -98
  269. package/src/compliance/tests/test_mcp_audit.py +0 -128
  270. package/src/compliance/tests/test_mcp_retention_policy.py +0 -125
  271. package/src/compliance/tests/test_retention_manager.py +0 -131
  272. package/src/compliance/tests/test_retention_scheduler.py +0 -99
  273. package/src/compression/__init__.py +0 -25
  274. package/src/compression/cli.py +0 -150
  275. package/src/compression/cold_storage.py +0 -217
  276. package/src/compression/config.py +0 -72
  277. package/src/compression/orchestrator.py +0 -133
  278. package/src/compression/tier2_compressor.py +0 -228
  279. package/src/compression/tier3_compressor.py +0 -153
  280. package/src/compression/tier_classifier.py +0 -148
  281. package/src/db_connection_manager.py +0 -536
  282. package/src/embedding_engine.py +0 -63
  283. package/src/embeddings/__init__.py +0 -47
  284. package/src/embeddings/cache.py +0 -70
  285. package/src/embeddings/cli.py +0 -113
  286. package/src/embeddings/constants.py +0 -47
  287. package/src/embeddings/database.py +0 -91
  288. package/src/embeddings/engine.py +0 -247
  289. package/src/embeddings/model_loader.py +0 -145
  290. package/src/event_bus.py +0 -562
  291. package/src/graph/__init__.py +0 -36
  292. package/src/graph/build_helpers.py +0 -74
  293. package/src/graph/cli.py +0 -87
  294. package/src/graph/cluster_builder.py +0 -188
  295. package/src/graph/cluster_summary.py +0 -148
  296. package/src/graph/constants.py +0 -47
  297. package/src/graph/edge_builder.py +0 -162
  298. package/src/graph/entity_extractor.py +0 -95
  299. package/src/graph/graph_core.py +0 -226
  300. package/src/graph/graph_search.py +0 -231
  301. package/src/graph/hierarchical.py +0 -207
  302. package/src/graph/schema.py +0 -99
  303. package/src/graph_engine.py +0 -52
  304. package/src/hnsw_index.py +0 -628
  305. package/src/hybrid_search.py +0 -46
  306. package/src/learning/__init__.py +0 -217
  307. package/src/learning/adaptive_ranker.py +0 -682
  308. package/src/learning/bootstrap/__init__.py +0 -69
  309. package/src/learning/bootstrap/constants.py +0 -93
  310. package/src/learning/bootstrap/db_queries.py +0 -316
  311. package/src/learning/bootstrap/sampling.py +0 -82
  312. package/src/learning/bootstrap/text_utils.py +0 -71
  313. package/src/learning/cross_project_aggregator.py +0 -857
  314. package/src/learning/db/__init__.py +0 -40
  315. package/src/learning/db/constants.py +0 -44
  316. package/src/learning/db/schema.py +0 -279
  317. package/src/learning/engagement_tracker.py +0 -628
  318. package/src/learning/feature_extractor.py +0 -708
  319. package/src/learning/feedback_collector.py +0 -806
  320. package/src/learning/learning_db.py +0 -915
  321. package/src/learning/project_context_manager.py +0 -572
  322. package/src/learning/ranking/__init__.py +0 -33
  323. package/src/learning/ranking/constants.py +0 -84
  324. package/src/learning/ranking/helpers.py +0 -278
  325. package/src/learning/source_quality_scorer.py +0 -676
  326. package/src/learning/synthetic_bootstrap.py +0 -755
  327. package/src/learning/tests/test_adaptive_ranker.py +0 -325
  328. package/src/learning/tests/test_adaptive_ranker_v28.py +0 -60
  329. package/src/learning/tests/test_aggregator.py +0 -306
  330. package/src/learning/tests/test_auto_retrain_v28.py +0 -35
  331. package/src/learning/tests/test_e2e_ranking_v28.py +0 -82
  332. package/src/learning/tests/test_feature_extractor_v28.py +0 -93
  333. package/src/learning/tests/test_feedback_collector.py +0 -294
  334. package/src/learning/tests/test_learning_db.py +0 -602
  335. package/src/learning/tests/test_learning_db_v28.py +0 -110
  336. package/src/learning/tests/test_learning_init_v28.py +0 -48
  337. package/src/learning/tests/test_outcome_signals.py +0 -48
  338. package/src/learning/tests/test_project_context.py +0 -292
  339. package/src/learning/tests/test_schema_migration.py +0 -319
  340. package/src/learning/tests/test_signal_inference.py +0 -397
  341. package/src/learning/tests/test_source_quality.py +0 -351
  342. package/src/learning/tests/test_synthetic_bootstrap.py +0 -429
  343. package/src/learning/tests/test_workflow_miner.py +0 -318
  344. package/src/learning/workflow_pattern_miner.py +0 -655
  345. package/src/lifecycle/__init__.py +0 -54
  346. package/src/lifecycle/bounded_growth.py +0 -239
  347. package/src/lifecycle/compaction_engine.py +0 -226
  348. package/src/lifecycle/lifecycle_engine.py +0 -355
  349. package/src/lifecycle/lifecycle_evaluator.py +0 -257
  350. package/src/lifecycle/lifecycle_scheduler.py +0 -130
  351. package/src/lifecycle/retention_policy.py +0 -285
  352. package/src/lifecycle/tests/test_bounded_growth.py +0 -193
  353. package/src/lifecycle/tests/test_compaction.py +0 -179
  354. package/src/lifecycle/tests/test_lifecycle_engine.py +0 -137
  355. package/src/lifecycle/tests/test_lifecycle_evaluation.py +0 -177
  356. package/src/lifecycle/tests/test_lifecycle_scheduler.py +0 -127
  357. package/src/lifecycle/tests/test_lifecycle_search.py +0 -109
  358. package/src/lifecycle/tests/test_mcp_compact.py +0 -149
  359. package/src/lifecycle/tests/test_mcp_lifecycle_status.py +0 -114
  360. package/src/lifecycle/tests/test_retention_policy.py +0 -162
  361. package/src/mcp_tools_v28.py +0 -281
  362. package/src/memory/__init__.py +0 -36
  363. package/src/memory/cli.py +0 -205
  364. package/src/memory/constants.py +0 -39
  365. package/src/memory/helpers.py +0 -28
  366. package/src/memory/schema.py +0 -166
  367. package/src/memory-profiles.py +0 -595
  368. package/src/memory-reset.py +0 -491
  369. package/src/memory_compression.py +0 -989
  370. package/src/memory_store_v2.py +0 -1155
  371. package/src/migrate_v1_to_v2.py +0 -629
  372. package/src/pattern_learner.py +0 -34
  373. package/src/patterns/__init__.py +0 -24
  374. package/src/patterns/analyzers.py +0 -251
  375. package/src/patterns/learner.py +0 -271
  376. package/src/patterns/scoring.py +0 -171
  377. package/src/patterns/store.py +0 -225
  378. package/src/patterns/terminology.py +0 -140
  379. package/src/provenance_tracker.py +0 -312
  380. package/src/qualixar_attribution.py +0 -139
  381. package/src/qualixar_watermark.py +0 -78
  382. package/src/query_optimizer.py +0 -511
  383. package/src/rate_limiter.py +0 -83
  384. package/src/search/__init__.py +0 -20
  385. package/src/search/cli.py +0 -77
  386. package/src/search/constants.py +0 -26
  387. package/src/search/engine.py +0 -241
  388. package/src/search/fusion.py +0 -122
  389. package/src/search/index_loader.py +0 -114
  390. package/src/search/methods.py +0 -162
  391. package/src/search_engine_v2.py +0 -401
  392. package/src/setup_validator.py +0 -482
  393. package/src/subscription_manager.py +0 -391
  394. package/src/tree/__init__.py +0 -59
  395. package/src/tree/builder.py +0 -185
  396. package/src/tree/nodes.py +0 -202
  397. package/src/tree/queries.py +0 -257
  398. package/src/tree/schema.py +0 -80
  399. package/src/tree_manager.py +0 -19
  400. package/src/trust/__init__.py +0 -45
  401. package/src/trust/constants.py +0 -66
  402. package/src/trust/queries.py +0 -157
  403. package/src/trust/schema.py +0 -95
  404. package/src/trust/scorer.py +0 -299
  405. package/src/trust/signals.py +0 -95
  406. package/src/trust_scorer.py +0 -44
  407. package/ui/app.js +0 -1588
  408. package/ui/js/graph-cytoscape-monolithic-backup.js +0 -1168
  409. package/ui/js/graph-cytoscape.js +0 -1168
  410. package/ui/js/graph-d3-backup.js +0 -32
  411. package/ui/js/graph.js +0 -32
  412. package/ui_server.py +0 -266
  413. /package/docs/{ACCESSIBILITY.md → v2-archive/ACCESSIBILITY.md} +0 -0
  414. /package/docs/{ARCHITECTURE.md → v2-archive/ARCHITECTURE.md} +0 -0
  415. /package/docs/{CLI-COMMANDS-REFERENCE.md → v2-archive/CLI-COMMANDS-REFERENCE.md} +0 -0
  416. /package/docs/{COMPRESSION-README.md → v2-archive/COMPRESSION-README.md} +0 -0
  417. /package/docs/{FRAMEWORK-INTEGRATIONS.md → v2-archive/FRAMEWORK-INTEGRATIONS.md} +0 -0
  418. /package/docs/{MCP-MANUAL-SETUP.md → v2-archive/MCP-MANUAL-SETUP.md} +0 -0
  419. /package/docs/{MCP-TROUBLESHOOTING.md → v2-archive/MCP-TROUBLESHOOTING.md} +0 -0
  420. /package/docs/{PATTERN-LEARNING.md → v2-archive/PATTERN-LEARNING.md} +0 -0
  421. /package/docs/{PROFILES-GUIDE.md → v2-archive/PROFILES-GUIDE.md} +0 -0
  422. /package/docs/{RESET-GUIDE.md → v2-archive/RESET-GUIDE.md} +0 -0
  423. /package/docs/{SEARCH-ENGINE-V2.2.0.md → v2-archive/SEARCH-ENGINE-V2.2.0.md} +0 -0
  424. /package/docs/{SEARCH-INTEGRATION-GUIDE.md → v2-archive/SEARCH-INTEGRATION-GUIDE.md} +0 -0
  425. /package/docs/{UI-SERVER.md → v2-archive/UI-SERVER.md} +0 -0
  426. /package/docs/{UNIVERSAL-INTEGRATION.md → v2-archive/UNIVERSAL-INTEGRATION.md} +0 -0
  427. /package/docs/{V2.2.0-OPTIONAL-SEARCH.md → v2-archive/V2.2.0-OPTIONAL-SEARCH.md} +0 -0
  428. /package/docs/{WINDOWS-INSTALL-README.txt → v2-archive/WINDOWS-INSTALL-README.txt} +0 -0
  429. /package/docs/{WINDOWS-POST-INSTALL.txt → v2-archive/WINDOWS-POST-INSTALL.txt} +0 -0
  430. /package/docs/{example_graph_usage.py → v2-archive/example_graph_usage.py} +0 -0
  431. /package/{completions → ide/completions}/slm.bash +0 -0
  432. /package/{completions → ide/completions}/slm.zsh +0 -0
  433. /package/{configs → ide/configs}/cody-commands.json +0 -0
  434. /package/{install-skills.sh → scripts/install-skills.sh} +0 -0
@@ -1,682 +0,0 @@
1
- #!/usr/bin/env python3
2
- # SPDX-License-Identifier: MIT
3
- # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
- """
5
- AdaptiveRanker — Three-phase adaptive re-ranking engine.
6
-
7
- This is the core ranking engine for v2.7 "Your AI Learns You". It sits
8
- between the existing search methods (FTS5 + TF-IDF + HNSW) and the final
9
- result list, re-ordering candidates based on learned user preferences.
10
-
11
- Three Phases (progressive adaptation):
12
-
13
- Phase 0 — Baseline (< 20 feedback signals):
14
- Pure v2.6 behavior. No re-ranking applied. Results returned as-is
15
- from the existing search pipeline. Zero risk of degradation.
16
-
17
- Phase 1 — Rule-Based (20-199 signals):
18
- Applies learned-pattern boosting to search results. Uses feature
19
- extraction to compute boost multipliers for tech match, project
20
- match, recency, and source quality. Deterministic and interpretable.
21
-
22
- Phase 2 — ML Model (200+ signals across 50+ unique queries):
23
- LightGBM LambdaRank re-ranker. Trained on real feedback data
24
- (and optionally bootstrapped from synthetic data). Produces ML
25
- scores that replace the original ranking order.
26
-
27
- Design Principles:
28
- - LightGBM is OPTIONAL. If not installed, falls back to rule-based.
29
- - Any exception in re-ranking falls back to original v2.6 results.
30
- - Model is loaded lazily and cached in memory.
31
- - Training is explicit (called by user or scheduled), never implicit.
32
- - Original scores are preserved as 'base_score' for diagnostics.
33
-
34
- Research Backing:
35
- - eKNOW 2025: BM25 -> re-ranker pipeline for personal collections
36
- - MACLA (arXiv:2512.18950): Bayesian confidence scoring
37
- - FCS LREC 2024: Cold-start mitigation via synthetic bootstrap
38
- """
39
-
40
- import logging
41
- import threading
42
- from datetime import datetime
43
- from pathlib import Path
44
- from typing import Any, Dict, List, Optional
45
-
46
- # LightGBM is OPTIONAL — graceful fallback to rule-based ranking
47
- try:
48
- import lightgbm as lgb
49
- HAS_LIGHTGBM = True
50
- except ImportError:
51
- lgb = None
52
- HAS_LIGHTGBM = False
53
-
54
- # NumPy is used for feature matrix construction (comes with sklearn)
55
- try:
56
- import numpy as np
57
- HAS_NUMPY = True
58
- except ImportError:
59
- np = None
60
- HAS_NUMPY = False
61
-
62
- from .feature_extractor import FeatureExtractor, FEATURE_NAMES, NUM_FEATURES
63
-
64
- logger = logging.getLogger("superlocalmemory.learning.adaptive_ranker")
65
-
66
- # Import constants and helpers from ranking subpackage
67
- from .ranking import (
68
- MODELS_DIR,
69
- MODEL_PATH,
70
- PHASE_THRESHOLDS,
71
- MIN_UNIQUE_QUERIES_FOR_ML,
72
- RULE_BOOST,
73
- TRAINING_PARAMS,
74
- calculate_rule_boost,
75
- prepare_training_data_internal,
76
- )
77
-
78
-
79
- class AdaptiveRanker:
80
- """
81
- Three-phase adaptive re-ranking engine.
82
-
83
- Usage (called by memory_store_v2.search or mcp_server recall):
84
- ranker = AdaptiveRanker()
85
- results = ranker.rerank(search_results, query, context={
86
- 'tech_preferences': {...},
87
- 'current_project': 'MyProject',
88
- 'source_scores': {...},
89
- 'workflow_phase': 'testing',
90
- })
91
-
92
- The caller wraps this in try/except — any exception here causes
93
- fallback to original v2.6 results. Zero risk of degradation.
94
- """
95
-
96
- PHASE_THRESHOLDS = PHASE_THRESHOLDS
97
- MODEL_PATH = MODEL_PATH
98
-
99
- def __init__(self, learning_db=None):
100
- """
101
- Initialize AdaptiveRanker.
102
-
103
- Args:
104
- learning_db: Optional LearningDB instance. If None, imports
105
- and creates one lazily.
106
- """
107
- self._learning_db = learning_db
108
- self._feature_extractor = FeatureExtractor()
109
- self._model = None # Loaded lazily on first ML rerank
110
- self._model_load_attempted = False
111
- self._lock = threading.Lock()
112
-
113
- # ========================================================================
114
- # LearningDB Access
115
- # ========================================================================
116
-
117
- def _get_learning_db(self):
118
- """Get or create the LearningDB instance."""
119
- if self._learning_db is None:
120
- try:
121
- from .learning_db import LearningDB
122
- self._learning_db = LearningDB()
123
- except Exception as e:
124
- logger.warning("Cannot access LearningDB: %s", e)
125
- return None
126
- return self._learning_db
127
-
128
- # ========================================================================
129
- # Phase Detection
130
- # ========================================================================
131
-
132
- def get_phase(self) -> str:
133
- """
134
- Determine the current ranking phase based on feedback data.
135
-
136
- Returns:
137
- 'baseline' — Not enough data for personalization
138
- 'rule_based' — Enough data for rule-based boosting
139
- 'ml_model' — Enough data for ML ranking (if LightGBM available)
140
- """
141
- ldb = self._get_learning_db()
142
- if ldb is None:
143
- return 'baseline'
144
-
145
- try:
146
- feedback_count = ldb.get_feedback_count()
147
- unique_queries = ldb.get_unique_query_count()
148
- except Exception as e:
149
- logger.warning("Failed to check feedback counts: %s", e)
150
- return 'baseline'
151
-
152
- # Phase 2: ML model — requires enough data AND LightGBM AND numpy
153
- if (
154
- feedback_count >= PHASE_THRESHOLDS['ml_model']
155
- and unique_queries >= MIN_UNIQUE_QUERIES_FOR_ML
156
- and HAS_LIGHTGBM
157
- and HAS_NUMPY
158
- ):
159
- return 'ml_model'
160
-
161
- # Phase 1: Rule-based — just needs minimum feedback
162
- if feedback_count >= PHASE_THRESHOLDS['rule_based']:
163
- return 'rule_based'
164
-
165
- # Phase 0: Not enough data yet
166
- return 'baseline'
167
-
168
- def get_phase_info(self) -> Dict[str, Any]:
169
- """
170
- Return detailed phase information for diagnostics.
171
-
172
- Returns:
173
- Dict with phase, feedback_count, unique_queries, thresholds,
174
- model_loaded, lightgbm_available.
175
- """
176
- ldb = self._get_learning_db()
177
- feedback_count = 0
178
- unique_queries = 0
179
-
180
- if ldb is not None:
181
- try:
182
- feedback_count = ldb.get_feedback_count()
183
- unique_queries = ldb.get_unique_query_count()
184
- except Exception:
185
- pass
186
-
187
- phase = self.get_phase()
188
-
189
- return {
190
- 'phase': phase,
191
- 'feedback_count': feedback_count,
192
- 'unique_queries': unique_queries,
193
- 'thresholds': dict(PHASE_THRESHOLDS),
194
- 'min_unique_queries_for_ml': MIN_UNIQUE_QUERIES_FOR_ML,
195
- 'model_loaded': self._model is not None,
196
- 'model_path_exists': MODEL_PATH.exists(),
197
- 'lightgbm_available': HAS_LIGHTGBM,
198
- 'numpy_available': HAS_NUMPY,
199
- }
200
-
201
- # ========================================================================
202
- # Main Re-ranking Entry Point
203
- # ========================================================================
204
-
205
- def rerank(
206
- self,
207
- results: List[dict],
208
- query: str,
209
- context: Optional[dict] = None,
210
- ) -> List[dict]:
211
- """
212
- Re-rank search results based on learned user preferences.
213
-
214
- This is the main entry point, called after the search pipeline
215
- produces initial results. It determines the current phase and
216
- routes to the appropriate ranking strategy.
217
-
218
- Args:
219
- results: List of memory dicts from search (with 'score' field).
220
- query: The recall query string.
221
- context: Optional context dict with:
222
- - tech_preferences: Dict[str, dict] — user's tech prefs
223
- - current_project: str — active project name
224
- - source_scores: Dict[str, float] — source quality scores
225
- - workflow_phase: str — current workflow phase
226
-
227
- Returns:
228
- Re-ranked list of memory dicts. Each memory gets:
229
- - 'base_score': Original score from search pipeline
230
- - 'ranking_phase': Which phase was used
231
- - 'score': Updated score (may differ from base_score)
232
-
233
- CRITICAL: The caller wraps this in try/except. Any exception
234
- causes fallback to original v2.6 results. This method must
235
- never corrupt the results list.
236
- """
237
- if not results:
238
- return results
239
-
240
- # Short-circuit: don't re-rank trivially small result sets
241
- if len(results) <= 1:
242
- for r in results:
243
- r['base_score'] = r.get('score', 0.0)
244
- r['ranking_phase'] = 'baseline'
245
- return results
246
-
247
- context = context or {}
248
-
249
- # Fetch signal stats for features [10-11] (v2.7.4)
250
- signal_stats = {}
251
- ldb = self._get_learning_db()
252
- if ldb:
253
- try:
254
- memory_ids = [r.get('id') for r in results if r.get('id')]
255
- if memory_ids:
256
- signal_stats = ldb.get_signal_stats_for_memories(memory_ids)
257
- except Exception:
258
- pass # Signal stats failure is not critical
259
-
260
- # Set up feature extraction context (once per query)
261
- self._feature_extractor.set_context(
262
- source_scores=context.get('source_scores'),
263
- tech_preferences=context.get('tech_preferences'),
264
- current_project=context.get('current_project'),
265
- workflow_phase=context.get('workflow_phase'),
266
- signal_stats=signal_stats,
267
- )
268
-
269
- # Determine phase and route
270
- phase = self.get_phase()
271
-
272
- if phase == 'baseline':
273
- # Phase 0: No re-ranking — preserve original order
274
- for r in results:
275
- r['base_score'] = r.get('score', 0.0)
276
- r['ranking_phase'] = 'baseline'
277
- return results
278
-
279
- elif phase == 'rule_based':
280
- return self._rerank_rule_based(results, query, context)
281
-
282
- elif phase == 'ml_model':
283
- # Try ML first, fall back to rule-based if model fails
284
- try:
285
- return self._rerank_ml(results, query, context)
286
- except Exception as e:
287
- logger.warning(
288
- "ML re-ranking failed, falling back to rule-based: %s", e
289
- )
290
- return self._rerank_rule_based(results, query, context)
291
-
292
- # Defensive: unknown phase -> no re-ranking
293
- for r in results:
294
- r['base_score'] = r.get('score', 0.0)
295
- r['ranking_phase'] = 'unknown'
296
- return results
297
-
298
- # ========================================================================
299
- # Phase 1: Rule-Based Re-ranking
300
- # ========================================================================
301
-
302
- def _rerank_rule_based(
303
- self,
304
- results: List[dict],
305
- query: str,
306
- context: dict,
307
- ) -> List[dict]:
308
- """
309
- Phase 1: Apply rule-based boosting using extracted features.
310
-
311
- Each result's score is multiplied by boost factors derived from
312
- feature values. The boosts are conservative — they nudge the
313
- ranking order without dramatically flipping results.
314
- """
315
- feature_vectors = self._feature_extractor.extract_batch(results, query)
316
-
317
- for i, result in enumerate(results):
318
- base_score = result.get('score', 0.0)
319
- result['base_score'] = base_score
320
- result['ranking_phase'] = 'rule_based'
321
-
322
- if i >= len(feature_vectors):
323
- continue
324
-
325
- features = feature_vectors[i]
326
- boost = calculate_rule_boost(features)
327
-
328
- # Apply boost to score
329
- result['score'] = base_score * boost
330
-
331
- # Re-sort by boosted score (highest first)
332
- results.sort(key=lambda x: x.get('score', 0.0), reverse=True)
333
- return results
334
-
335
- # ========================================================================
336
- # Phase 2: ML Re-ranking (LightGBM)
337
- # ========================================================================
338
-
339
- def _rerank_ml(
340
- self,
341
- results: List[dict],
342
- query: str,
343
- context: dict,
344
- ) -> List[dict]:
345
- """
346
- Phase 2: LightGBM LambdaRank re-ranking.
347
-
348
- Extracts features, runs the trained model, and sorts by ML scores.
349
- Preserves original score as 'base_score' and adds 'ml_score'.
350
- """
351
- if not HAS_LIGHTGBM or not HAS_NUMPY:
352
- raise RuntimeError("LightGBM or NumPy not available for ML ranking")
353
-
354
- # Load model if not cached
355
- model = self._load_model()
356
- if model is None:
357
- raise RuntimeError("No trained ranking model available")
358
-
359
- # Extract features
360
- feature_vectors = self._feature_extractor.extract_batch(results, query)
361
- if not feature_vectors:
362
- raise ValueError("Feature extraction returned empty results")
363
-
364
- # Build feature matrix
365
- X = np.array(feature_vectors, dtype=np.float64)
366
-
367
- # Validate shape
368
- if X.shape[1] != NUM_FEATURES:
369
- raise ValueError(
370
- f"Feature dimension mismatch: expected {NUM_FEATURES}, "
371
- f"got {X.shape[1]}"
372
- )
373
-
374
- # Predict scores
375
- ml_scores = model.predict(X)
376
-
377
- # Annotate results with ML scores
378
- for i, result in enumerate(results):
379
- result['base_score'] = result.get('score', 0.0)
380
- result['ranking_phase'] = 'ml_model'
381
- if i < len(ml_scores):
382
- result['ml_score'] = float(ml_scores[i])
383
- result['score'] = float(ml_scores[i])
384
- else:
385
- result['ml_score'] = 0.0
386
-
387
- # Re-sort by ML score (highest first)
388
- results.sort(key=lambda x: x.get('score', 0.0), reverse=True)
389
- return results
390
-
391
- # ========================================================================
392
- # Model Management
393
- # ========================================================================
394
-
395
- def _load_model(self):
396
- """
397
- Load LightGBM model from disk (lazy, cached).
398
-
399
- Returns:
400
- lgb.Booster instance or None if unavailable.
401
- """
402
- # Return cached model if already loaded
403
- if self._model is not None:
404
- return self._model
405
-
406
- # Avoid repeated failed load attempts
407
- if self._model_load_attempted:
408
- return None
409
-
410
- with self._lock:
411
- # Double-check after acquiring lock
412
- if self._model is not None:
413
- return self._model
414
- if self._model_load_attempted:
415
- return None
416
-
417
- self._model_load_attempted = True
418
-
419
- if not HAS_LIGHTGBM:
420
- logger.info("LightGBM not installed — ML ranking unavailable")
421
- return None
422
-
423
- if not MODEL_PATH.exists():
424
- logger.info(
425
- "No ranking model at %s — ML ranking unavailable",
426
- MODEL_PATH
427
- )
428
- return None
429
-
430
- try:
431
- model = lgb.Booster(model_file=str(MODEL_PATH))
432
-
433
- # v2.7.4: Check for feature dimension mismatch (10→12 upgrade)
434
- model_num_features = model.num_feature()
435
- if model_num_features != NUM_FEATURES:
436
- logger.info(
437
- "Feature mismatch: model has %d features, expected %d. "
438
- "Triggering auto-retrain in background.",
439
- model_num_features, NUM_FEATURES,
440
- )
441
- # Delete old model and trigger re-bootstrap
442
- MODEL_PATH.unlink(missing_ok=True)
443
- self._trigger_retrain_background()
444
- return None
445
-
446
- self._model = model
447
- logger.info("Loaded ranking model from %s", MODEL_PATH)
448
- return self._model
449
- except Exception as e:
450
- logger.warning("Failed to load ranking model: %s", e)
451
- return None
452
-
453
- def _trigger_retrain_background(self):
454
- """Trigger model re-bootstrap in a background thread (v2.7.4)."""
455
- try:
456
- import threading
457
-
458
- def _retrain():
459
- try:
460
- from .synthetic_bootstrap import SyntheticBootstrapper
461
- bootstrapper = SyntheticBootstrapper()
462
- if bootstrapper.should_bootstrap():
463
- result = bootstrapper.bootstrap_model()
464
- if result:
465
- logger.info(
466
- "Auto-retrain complete with %d-feature model",
467
- NUM_FEATURES,
468
- )
469
- # Reload the new model
470
- with self._lock:
471
- self._model = None
472
- self._model_load_attempted = False
473
- except Exception as e:
474
- logger.warning("Auto-retrain failed: %s", e)
475
-
476
- thread = threading.Thread(target=_retrain, daemon=True)
477
- thread.start()
478
- except Exception:
479
- pass
480
-
481
- def reload_model(self):
482
- """
483
- Force reload of the ranking model from disk.
484
-
485
- Call this after training a new model to pick up the updated weights.
486
- """
487
- with self._lock:
488
- self._model = None
489
- self._model_load_attempted = False
490
- # Trigger fresh load
491
- return self._load_model()
492
-
493
- # ========================================================================
494
- # Model Training
495
- # ========================================================================
496
-
497
- def train(self, force: bool = False) -> Optional[Dict[str, Any]]:
498
- """
499
- Train or retrain the LightGBM ranking model.
500
-
501
- Uses continued training (init_model) if a model already exists,
502
- incorporating new feedback data incrementally.
503
-
504
- Args:
505
- force: If True, train even if below ML threshold.
506
- Useful for synthetic bootstrap training.
507
-
508
- Returns:
509
- Training metadata dict, or None if training not possible.
510
- Metadata includes: model_version, training_samples, ndcg_at_10,
511
- model_path, created_at.
512
- """
513
- if not HAS_LIGHTGBM or not HAS_NUMPY:
514
- logger.warning(
515
- "Cannot train: LightGBM=%s, NumPy=%s",
516
- HAS_LIGHTGBM, HAS_NUMPY
517
- )
518
- return None
519
-
520
- ldb = self._get_learning_db()
521
- if ldb is None:
522
- logger.warning("Cannot train: LearningDB unavailable")
523
- return None
524
-
525
- # Check if we have enough data (unless forced)
526
- if not force:
527
- feedback_count = ldb.get_feedback_count()
528
- unique_queries = ldb.get_unique_query_count()
529
- if (
530
- feedback_count < PHASE_THRESHOLDS['ml_model']
531
- or unique_queries < MIN_UNIQUE_QUERIES_FOR_ML
532
- ):
533
- logger.info(
534
- "Insufficient data for training: %d feedback / %d queries "
535
- "(need %d / %d)",
536
- feedback_count, unique_queries,
537
- PHASE_THRESHOLDS['ml_model'], MIN_UNIQUE_QUERIES_FOR_ML,
538
- )
539
- return None
540
-
541
- # Prepare training data
542
- training_data = self._prepare_training_data()
543
- if training_data is None:
544
- logger.warning("No usable training data available")
545
- return None
546
-
547
- X, y, groups = training_data
548
- total_samples = X.shape[0]
549
-
550
- if total_samples < 10:
551
- logger.warning("Too few training samples: %d", total_samples)
552
- return None
553
-
554
- logger.info(
555
- "Training ranking model: %d samples, %d groups",
556
- total_samples, len(groups)
557
- )
558
-
559
- # Create LightGBM dataset
560
- train_dataset = lgb.Dataset(
561
- X, label=y, group=groups,
562
- feature_name=list(FEATURE_NAMES),
563
- free_raw_data=False,
564
- )
565
-
566
- # Training parameters
567
- params = dict(TRAINING_PARAMS)
568
- n_estimators = params.pop('n_estimators', 50)
569
-
570
- # Check for existing model (continued training)
571
- init_model = None
572
- if MODEL_PATH.exists():
573
- try:
574
- init_model = lgb.Booster(model_file=str(MODEL_PATH))
575
- logger.info("Continuing training from existing model")
576
- except Exception:
577
- logger.info("Starting fresh training (existing model unreadable)")
578
- init_model = None
579
-
580
- # Train
581
- try:
582
- booster = lgb.train(
583
- params,
584
- train_dataset,
585
- num_boost_round=n_estimators,
586
- init_model=init_model,
587
- valid_sets=[train_dataset],
588
- valid_names=['train'],
589
- callbacks=[lgb.log_evaluation(period=0)], # Silent training
590
- )
591
- except Exception as e:
592
- logger.error("LightGBM training failed: %s", e)
593
- return None
594
-
595
- # Save model
596
- MODELS_DIR.mkdir(parents=True, exist_ok=True)
597
- try:
598
- booster.save_model(str(MODEL_PATH))
599
- logger.info("Ranking model saved to %s", MODEL_PATH)
600
- except Exception as e:
601
- logger.error("Failed to save ranking model: %s", e)
602
- return None
603
-
604
- # Extract NDCG@10 from training evaluation (if available)
605
- ndcg_at_10 = None
606
- try:
607
- eval_results = booster.eval_train(lgb.Dataset(X, label=y, group=groups))
608
- for name, _dataset_name, value, _is_higher_better in eval_results:
609
- if 'ndcg@10' in name:
610
- ndcg_at_10 = value
611
- break
612
- except Exception:
613
- pass
614
-
615
- # Record metadata in learning_db
616
- model_version = datetime.now().strftime("v%Y%m%d_%H%M%S")
617
- try:
618
- ldb.record_model_training(
619
- model_version=model_version,
620
- training_samples=total_samples,
621
- real_samples=total_samples,
622
- synthetic_samples=0,
623
- ndcg_at_10=ndcg_at_10,
624
- model_path=str(MODEL_PATH),
625
- )
626
- except Exception as e:
627
- logger.warning("Failed to record training metadata: %s", e)
628
-
629
- # Reload model into cache
630
- self.reload_model()
631
-
632
- metadata = {
633
- 'model_version': model_version,
634
- 'training_samples': total_samples,
635
- 'query_groups': len(groups),
636
- 'n_estimators': n_estimators,
637
- 'ndcg_at_10': ndcg_at_10,
638
- 'model_path': str(MODEL_PATH),
639
- 'continued_from': init_model is not None,
640
- 'created_at': datetime.now().isoformat(),
641
- }
642
- logger.info("Training complete: %s", metadata)
643
- return metadata
644
-
645
- def _prepare_training_data(self) -> Optional[tuple]:
646
- """
647
- Prepare training data from feedback records.
648
-
649
- For each unique query (grouped by query_hash):
650
- - Fetch all feedback entries for that query
651
- - Look up the corresponding memory from memory.db
652
- - Extract features for each memory
653
- - Use signal_value as the relevance label
654
-
655
- Returns:
656
- Tuple of (X, y, groups) for LGBMRanker, or None if insufficient.
657
- X: numpy array (n_samples, NUM_FEATURES)
658
- y: numpy array (n_samples,) — relevance labels
659
- groups: list of ints — samples per query group
660
- """
661
- ldb = self._get_learning_db()
662
- if ldb is None:
663
- return None
664
-
665
- feedback = ldb.get_feedback_for_training()
666
- if not feedback:
667
- return None
668
-
669
- return prepare_training_data_internal(feedback, self._feature_extractor)
670
-
671
-
672
- # ============================================================================
673
- # Module-level convenience
674
- # ============================================================================
675
-
676
- def get_phase() -> str:
677
- """Quick check of current ranking phase (creates temporary ranker)."""
678
- try:
679
- ranker = AdaptiveRanker()
680
- return ranker.get_phase()
681
- except Exception:
682
- return 'baseline'