superlocalmemory 2.8.5 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (434) hide show
  1. package/CHANGELOG.md +11 -0
  2. package/LICENSE +9 -1
  3. package/NOTICE +63 -0
  4. package/README.md +165 -480
  5. package/bin/slm +17 -449
  6. package/bin/slm-npm +2 -2
  7. package/bin/slm.bat +4 -2
  8. package/conftest.py +5 -0
  9. package/docs/api-reference.md +284 -0
  10. package/docs/architecture.md +149 -0
  11. package/docs/auto-memory.md +150 -0
  12. package/docs/cli-reference.md +276 -0
  13. package/docs/compliance.md +191 -0
  14. package/docs/configuration.md +182 -0
  15. package/docs/getting-started.md +102 -0
  16. package/docs/ide-setup.md +261 -0
  17. package/docs/mcp-tools.md +220 -0
  18. package/docs/migration-from-v2.md +170 -0
  19. package/docs/profiles.md +173 -0
  20. package/docs/troubleshooting.md +310 -0
  21. package/{configs → ide/configs}/antigravity-mcp.json +3 -3
  22. package/ide/configs/chatgpt-desktop-mcp.json +16 -0
  23. package/{configs → ide/configs}/claude-desktop-mcp.json +3 -3
  24. package/{configs → ide/configs}/codex-mcp.toml +4 -4
  25. package/{configs → ide/configs}/continue-mcp.yaml +4 -3
  26. package/{configs → ide/configs}/continue-skills.yaml +6 -6
  27. package/ide/configs/cursor-mcp.json +15 -0
  28. package/{configs → ide/configs}/gemini-cli-mcp.json +2 -2
  29. package/{configs → ide/configs}/jetbrains-mcp.json +2 -2
  30. package/{configs → ide/configs}/opencode-mcp.json +2 -2
  31. package/{configs → ide/configs}/perplexity-mcp.json +2 -2
  32. package/{configs → ide/configs}/vscode-copilot-mcp.json +2 -2
  33. package/{configs → ide/configs}/windsurf-mcp.json +3 -3
  34. package/{configs → ide/configs}/zed-mcp.json +2 -2
  35. package/{hooks → ide/hooks}/context-hook.js +9 -20
  36. package/ide/hooks/memory-list-skill.js +70 -0
  37. package/ide/hooks/memory-profile-skill.js +101 -0
  38. package/ide/hooks/memory-recall-skill.js +62 -0
  39. package/ide/hooks/memory-remember-skill.js +68 -0
  40. package/ide/hooks/memory-reset-skill.js +160 -0
  41. package/{hooks → ide/hooks}/post-recall-hook.js +2 -2
  42. package/ide/integrations/langchain/README.md +106 -0
  43. package/ide/integrations/langchain/langchain_superlocalmemory/__init__.py +9 -0
  44. package/ide/integrations/langchain/langchain_superlocalmemory/chat_message_history.py +201 -0
  45. package/ide/integrations/langchain/pyproject.toml +38 -0
  46. package/{src/learning → ide/integrations/langchain}/tests/__init__.py +1 -0
  47. package/ide/integrations/langchain/tests/test_chat_message_history.py +215 -0
  48. package/ide/integrations/langchain/tests/test_security.py +117 -0
  49. package/ide/integrations/llamaindex/README.md +81 -0
  50. package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/__init__.py +9 -0
  51. package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/base.py +316 -0
  52. package/ide/integrations/llamaindex/pyproject.toml +43 -0
  53. package/{src/lifecycle → ide/integrations/llamaindex}/tests/__init__.py +1 -2
  54. package/ide/integrations/llamaindex/tests/test_chat_store.py +294 -0
  55. package/ide/integrations/llamaindex/tests/test_security.py +241 -0
  56. package/{skills → ide/skills}/slm-build-graph/SKILL.md +6 -6
  57. package/{skills → ide/skills}/slm-list-recent/SKILL.md +5 -5
  58. package/{skills → ide/skills}/slm-recall/SKILL.md +5 -5
  59. package/{skills → ide/skills}/slm-remember/SKILL.md +6 -6
  60. package/{skills → ide/skills}/slm-show-patterns/SKILL.md +7 -7
  61. package/{skills → ide/skills}/slm-status/SKILL.md +9 -9
  62. package/{skills → ide/skills}/slm-switch-profile/SKILL.md +9 -9
  63. package/package.json +13 -22
  64. package/pyproject.toml +85 -0
  65. package/scripts/build-dmg.sh +417 -0
  66. package/scripts/install-skills.ps1 +334 -0
  67. package/{install.ps1 → scripts/install.ps1} +36 -4
  68. package/{install.sh → scripts/install.sh} +14 -13
  69. package/scripts/postinstall.js +2 -2
  70. package/scripts/start-dashboard.ps1 +52 -0
  71. package/scripts/start-dashboard.sh +41 -0
  72. package/scripts/sync-wiki.ps1 +127 -0
  73. package/scripts/sync-wiki.sh +82 -0
  74. package/scripts/test-dmg.sh +161 -0
  75. package/scripts/test-npm-package.ps1 +252 -0
  76. package/scripts/test-npm-package.sh +207 -0
  77. package/scripts/verify-install.ps1 +294 -0
  78. package/scripts/verify-install.sh +266 -0
  79. package/src/superlocalmemory/__init__.py +0 -0
  80. package/src/superlocalmemory/attribution/__init__.py +9 -0
  81. package/src/superlocalmemory/attribution/mathematical_dna.py +235 -0
  82. package/src/superlocalmemory/attribution/signer.py +153 -0
  83. package/src/superlocalmemory/attribution/watermark.py +189 -0
  84. package/src/superlocalmemory/cli/__init__.py +5 -0
  85. package/src/superlocalmemory/cli/commands.py +245 -0
  86. package/src/superlocalmemory/cli/main.py +89 -0
  87. package/src/superlocalmemory/cli/migrate_cmd.py +55 -0
  88. package/src/superlocalmemory/cli/post_install.py +99 -0
  89. package/src/superlocalmemory/cli/setup_wizard.py +129 -0
  90. package/src/superlocalmemory/compliance/__init__.py +0 -0
  91. package/src/superlocalmemory/compliance/abac.py +204 -0
  92. package/src/superlocalmemory/compliance/audit.py +314 -0
  93. package/src/superlocalmemory/compliance/eu_ai_act.py +131 -0
  94. package/src/superlocalmemory/compliance/gdpr.py +294 -0
  95. package/src/superlocalmemory/compliance/lifecycle.py +158 -0
  96. package/src/superlocalmemory/compliance/retention.py +232 -0
  97. package/src/superlocalmemory/compliance/scheduler.py +148 -0
  98. package/src/superlocalmemory/core/__init__.py +0 -0
  99. package/src/superlocalmemory/core/config.py +391 -0
  100. package/src/superlocalmemory/core/embeddings.py +293 -0
  101. package/src/superlocalmemory/core/engine.py +701 -0
  102. package/src/superlocalmemory/core/hooks.py +65 -0
  103. package/src/superlocalmemory/core/maintenance.py +172 -0
  104. package/src/superlocalmemory/core/modes.py +140 -0
  105. package/src/superlocalmemory/core/profiles.py +234 -0
  106. package/src/superlocalmemory/core/registry.py +117 -0
  107. package/src/superlocalmemory/dynamics/__init__.py +0 -0
  108. package/src/superlocalmemory/dynamics/fisher_langevin_coupling.py +223 -0
  109. package/src/superlocalmemory/encoding/__init__.py +0 -0
  110. package/src/superlocalmemory/encoding/consolidator.py +485 -0
  111. package/src/superlocalmemory/encoding/emotional.py +125 -0
  112. package/src/superlocalmemory/encoding/entity_resolver.py +525 -0
  113. package/src/superlocalmemory/encoding/entropy_gate.py +104 -0
  114. package/src/superlocalmemory/encoding/fact_extractor.py +775 -0
  115. package/src/superlocalmemory/encoding/foresight.py +91 -0
  116. package/src/superlocalmemory/encoding/graph_builder.py +302 -0
  117. package/src/superlocalmemory/encoding/observation_builder.py +160 -0
  118. package/src/superlocalmemory/encoding/scene_builder.py +183 -0
  119. package/src/superlocalmemory/encoding/signal_inference.py +90 -0
  120. package/src/superlocalmemory/encoding/temporal_parser.py +426 -0
  121. package/src/superlocalmemory/encoding/type_router.py +235 -0
  122. package/src/superlocalmemory/hooks/__init__.py +3 -0
  123. package/src/superlocalmemory/hooks/auto_capture.py +111 -0
  124. package/src/superlocalmemory/hooks/auto_recall.py +93 -0
  125. package/src/superlocalmemory/hooks/ide_connector.py +204 -0
  126. package/src/superlocalmemory/hooks/rules_engine.py +99 -0
  127. package/src/superlocalmemory/infra/__init__.py +3 -0
  128. package/src/superlocalmemory/infra/auth_middleware.py +82 -0
  129. package/src/superlocalmemory/infra/backup.py +317 -0
  130. package/src/superlocalmemory/infra/cache_manager.py +267 -0
  131. package/src/superlocalmemory/infra/event_bus.py +381 -0
  132. package/src/superlocalmemory/infra/rate_limiter.py +135 -0
  133. package/src/{webhook_dispatcher.py → superlocalmemory/infra/webhook_dispatcher.py} +104 -101
  134. package/src/superlocalmemory/learning/__init__.py +0 -0
  135. package/src/superlocalmemory/learning/adaptive.py +172 -0
  136. package/src/superlocalmemory/learning/behavioral.py +490 -0
  137. package/src/superlocalmemory/learning/behavioral_listener.py +94 -0
  138. package/src/superlocalmemory/learning/bootstrap.py +298 -0
  139. package/src/superlocalmemory/learning/cross_project.py +399 -0
  140. package/src/superlocalmemory/learning/database.py +376 -0
  141. package/src/superlocalmemory/learning/engagement.py +323 -0
  142. package/src/superlocalmemory/learning/features.py +138 -0
  143. package/src/superlocalmemory/learning/feedback.py +316 -0
  144. package/src/superlocalmemory/learning/outcomes.py +255 -0
  145. package/src/superlocalmemory/learning/project_context.py +366 -0
  146. package/src/superlocalmemory/learning/ranker.py +155 -0
  147. package/src/superlocalmemory/learning/source_quality.py +303 -0
  148. package/src/superlocalmemory/learning/workflows.py +309 -0
  149. package/src/superlocalmemory/llm/__init__.py +0 -0
  150. package/src/superlocalmemory/llm/backbone.py +316 -0
  151. package/src/superlocalmemory/math/__init__.py +0 -0
  152. package/src/superlocalmemory/math/fisher.py +356 -0
  153. package/src/superlocalmemory/math/langevin.py +398 -0
  154. package/src/superlocalmemory/math/sheaf.py +257 -0
  155. package/src/superlocalmemory/mcp/__init__.py +0 -0
  156. package/src/superlocalmemory/mcp/resources.py +245 -0
  157. package/src/superlocalmemory/mcp/server.py +61 -0
  158. package/src/superlocalmemory/mcp/tools.py +18 -0
  159. package/src/superlocalmemory/mcp/tools_core.py +305 -0
  160. package/src/superlocalmemory/mcp/tools_v28.py +223 -0
  161. package/src/superlocalmemory/mcp/tools_v3.py +286 -0
  162. package/src/superlocalmemory/retrieval/__init__.py +0 -0
  163. package/src/superlocalmemory/retrieval/agentic.py +295 -0
  164. package/src/superlocalmemory/retrieval/ann_index.py +223 -0
  165. package/src/superlocalmemory/retrieval/bm25_channel.py +185 -0
  166. package/src/superlocalmemory/retrieval/bridge_discovery.py +170 -0
  167. package/src/superlocalmemory/retrieval/engine.py +390 -0
  168. package/src/superlocalmemory/retrieval/entity_channel.py +179 -0
  169. package/src/superlocalmemory/retrieval/fusion.py +78 -0
  170. package/src/superlocalmemory/retrieval/profile_channel.py +105 -0
  171. package/src/superlocalmemory/retrieval/reranker.py +154 -0
  172. package/src/superlocalmemory/retrieval/semantic_channel.py +232 -0
  173. package/src/superlocalmemory/retrieval/strategy.py +96 -0
  174. package/src/superlocalmemory/retrieval/temporal_channel.py +175 -0
  175. package/src/superlocalmemory/server/__init__.py +1 -0
  176. package/src/superlocalmemory/server/api.py +248 -0
  177. package/src/superlocalmemory/server/routes/__init__.py +4 -0
  178. package/src/superlocalmemory/server/routes/agents.py +107 -0
  179. package/src/superlocalmemory/server/routes/backup.py +91 -0
  180. package/src/superlocalmemory/server/routes/behavioral.py +127 -0
  181. package/src/superlocalmemory/server/routes/compliance.py +160 -0
  182. package/src/superlocalmemory/server/routes/data_io.py +188 -0
  183. package/src/superlocalmemory/server/routes/events.py +183 -0
  184. package/src/superlocalmemory/server/routes/helpers.py +85 -0
  185. package/src/superlocalmemory/server/routes/learning.py +273 -0
  186. package/src/superlocalmemory/server/routes/lifecycle.py +116 -0
  187. package/src/superlocalmemory/server/routes/memories.py +399 -0
  188. package/src/superlocalmemory/server/routes/profiles.py +219 -0
  189. package/src/superlocalmemory/server/routes/stats.py +346 -0
  190. package/src/superlocalmemory/server/routes/v3_api.py +365 -0
  191. package/src/superlocalmemory/server/routes/ws.py +82 -0
  192. package/src/superlocalmemory/server/security_middleware.py +57 -0
  193. package/src/superlocalmemory/server/ui.py +245 -0
  194. package/src/superlocalmemory/storage/__init__.py +0 -0
  195. package/src/superlocalmemory/storage/access_control.py +182 -0
  196. package/src/superlocalmemory/storage/database.py +594 -0
  197. package/src/superlocalmemory/storage/migrations.py +303 -0
  198. package/src/superlocalmemory/storage/models.py +406 -0
  199. package/src/superlocalmemory/storage/schema.py +726 -0
  200. package/src/superlocalmemory/storage/v2_migrator.py +317 -0
  201. package/src/superlocalmemory/trust/__init__.py +0 -0
  202. package/src/superlocalmemory/trust/gate.py +130 -0
  203. package/src/superlocalmemory/trust/provenance.py +124 -0
  204. package/src/superlocalmemory/trust/scorer.py +347 -0
  205. package/src/superlocalmemory/trust/signals.py +153 -0
  206. package/ui/index.html +278 -5
  207. package/ui/js/auto-settings.js +70 -0
  208. package/ui/js/dashboard.js +90 -0
  209. package/ui/js/fact-detail.js +92 -0
  210. package/ui/js/feedback.js +2 -2
  211. package/ui/js/ide-status.js +102 -0
  212. package/ui/js/math-health.js +98 -0
  213. package/ui/js/recall-lab.js +127 -0
  214. package/ui/js/settings.js +2 -2
  215. package/ui/js/trust-dashboard.js +73 -0
  216. package/api_server.py +0 -724
  217. package/bin/aider-smart +0 -72
  218. package/bin/superlocalmemoryv2-learning +0 -4
  219. package/bin/superlocalmemoryv2-list +0 -3
  220. package/bin/superlocalmemoryv2-patterns +0 -4
  221. package/bin/superlocalmemoryv2-profile +0 -3
  222. package/bin/superlocalmemoryv2-recall +0 -3
  223. package/bin/superlocalmemoryv2-remember +0 -3
  224. package/bin/superlocalmemoryv2-reset +0 -3
  225. package/bin/superlocalmemoryv2-status +0 -3
  226. package/configs/chatgpt-desktop-mcp.json +0 -16
  227. package/configs/cursor-mcp.json +0 -15
  228. package/docs/SECURITY-QUICK-REFERENCE.md +0 -214
  229. package/hooks/memory-list-skill.js +0 -139
  230. package/hooks/memory-profile-skill.js +0 -273
  231. package/hooks/memory-recall-skill.js +0 -114
  232. package/hooks/memory-remember-skill.js +0 -127
  233. package/hooks/memory-reset-skill.js +0 -274
  234. package/mcp_server.py +0 -1800
  235. package/requirements-core.txt +0 -22
  236. package/requirements-learning.txt +0 -12
  237. package/requirements.txt +0 -12
  238. package/src/agent_registry.py +0 -411
  239. package/src/auth_middleware.py +0 -61
  240. package/src/auto_backup.py +0 -459
  241. package/src/behavioral/__init__.py +0 -49
  242. package/src/behavioral/behavioral_listener.py +0 -203
  243. package/src/behavioral/behavioral_patterns.py +0 -275
  244. package/src/behavioral/cross_project_transfer.py +0 -206
  245. package/src/behavioral/outcome_inference.py +0 -194
  246. package/src/behavioral/outcome_tracker.py +0 -193
  247. package/src/behavioral/tests/__init__.py +0 -4
  248. package/src/behavioral/tests/test_behavioral_integration.py +0 -108
  249. package/src/behavioral/tests/test_behavioral_patterns.py +0 -150
  250. package/src/behavioral/tests/test_cross_project_transfer.py +0 -142
  251. package/src/behavioral/tests/test_mcp_behavioral.py +0 -139
  252. package/src/behavioral/tests/test_mcp_report_outcome.py +0 -117
  253. package/src/behavioral/tests/test_outcome_inference.py +0 -107
  254. package/src/behavioral/tests/test_outcome_tracker.py +0 -96
  255. package/src/cache_manager.py +0 -518
  256. package/src/compliance/__init__.py +0 -48
  257. package/src/compliance/abac_engine.py +0 -149
  258. package/src/compliance/abac_middleware.py +0 -116
  259. package/src/compliance/audit_db.py +0 -215
  260. package/src/compliance/audit_logger.py +0 -148
  261. package/src/compliance/retention_manager.py +0 -289
  262. package/src/compliance/retention_scheduler.py +0 -186
  263. package/src/compliance/tests/__init__.py +0 -4
  264. package/src/compliance/tests/test_abac_enforcement.py +0 -95
  265. package/src/compliance/tests/test_abac_engine.py +0 -124
  266. package/src/compliance/tests/test_abac_mcp_integration.py +0 -118
  267. package/src/compliance/tests/test_audit_db.py +0 -123
  268. package/src/compliance/tests/test_audit_logger.py +0 -98
  269. package/src/compliance/tests/test_mcp_audit.py +0 -128
  270. package/src/compliance/tests/test_mcp_retention_policy.py +0 -125
  271. package/src/compliance/tests/test_retention_manager.py +0 -131
  272. package/src/compliance/tests/test_retention_scheduler.py +0 -99
  273. package/src/compression/__init__.py +0 -25
  274. package/src/compression/cli.py +0 -150
  275. package/src/compression/cold_storage.py +0 -217
  276. package/src/compression/config.py +0 -72
  277. package/src/compression/orchestrator.py +0 -133
  278. package/src/compression/tier2_compressor.py +0 -228
  279. package/src/compression/tier3_compressor.py +0 -153
  280. package/src/compression/tier_classifier.py +0 -148
  281. package/src/db_connection_manager.py +0 -536
  282. package/src/embedding_engine.py +0 -63
  283. package/src/embeddings/__init__.py +0 -47
  284. package/src/embeddings/cache.py +0 -70
  285. package/src/embeddings/cli.py +0 -113
  286. package/src/embeddings/constants.py +0 -47
  287. package/src/embeddings/database.py +0 -91
  288. package/src/embeddings/engine.py +0 -247
  289. package/src/embeddings/model_loader.py +0 -145
  290. package/src/event_bus.py +0 -562
  291. package/src/graph/__init__.py +0 -36
  292. package/src/graph/build_helpers.py +0 -74
  293. package/src/graph/cli.py +0 -87
  294. package/src/graph/cluster_builder.py +0 -188
  295. package/src/graph/cluster_summary.py +0 -148
  296. package/src/graph/constants.py +0 -47
  297. package/src/graph/edge_builder.py +0 -162
  298. package/src/graph/entity_extractor.py +0 -95
  299. package/src/graph/graph_core.py +0 -226
  300. package/src/graph/graph_search.py +0 -231
  301. package/src/graph/hierarchical.py +0 -207
  302. package/src/graph/schema.py +0 -99
  303. package/src/graph_engine.py +0 -52
  304. package/src/hnsw_index.py +0 -628
  305. package/src/hybrid_search.py +0 -46
  306. package/src/learning/__init__.py +0 -217
  307. package/src/learning/adaptive_ranker.py +0 -682
  308. package/src/learning/bootstrap/__init__.py +0 -69
  309. package/src/learning/bootstrap/constants.py +0 -93
  310. package/src/learning/bootstrap/db_queries.py +0 -316
  311. package/src/learning/bootstrap/sampling.py +0 -82
  312. package/src/learning/bootstrap/text_utils.py +0 -71
  313. package/src/learning/cross_project_aggregator.py +0 -857
  314. package/src/learning/db/__init__.py +0 -40
  315. package/src/learning/db/constants.py +0 -44
  316. package/src/learning/db/schema.py +0 -279
  317. package/src/learning/engagement_tracker.py +0 -628
  318. package/src/learning/feature_extractor.py +0 -708
  319. package/src/learning/feedback_collector.py +0 -806
  320. package/src/learning/learning_db.py +0 -915
  321. package/src/learning/project_context_manager.py +0 -572
  322. package/src/learning/ranking/__init__.py +0 -33
  323. package/src/learning/ranking/constants.py +0 -84
  324. package/src/learning/ranking/helpers.py +0 -278
  325. package/src/learning/source_quality_scorer.py +0 -676
  326. package/src/learning/synthetic_bootstrap.py +0 -755
  327. package/src/learning/tests/test_adaptive_ranker.py +0 -325
  328. package/src/learning/tests/test_adaptive_ranker_v28.py +0 -60
  329. package/src/learning/tests/test_aggregator.py +0 -306
  330. package/src/learning/tests/test_auto_retrain_v28.py +0 -35
  331. package/src/learning/tests/test_e2e_ranking_v28.py +0 -82
  332. package/src/learning/tests/test_feature_extractor_v28.py +0 -93
  333. package/src/learning/tests/test_feedback_collector.py +0 -294
  334. package/src/learning/tests/test_learning_db.py +0 -602
  335. package/src/learning/tests/test_learning_db_v28.py +0 -110
  336. package/src/learning/tests/test_learning_init_v28.py +0 -48
  337. package/src/learning/tests/test_outcome_signals.py +0 -48
  338. package/src/learning/tests/test_project_context.py +0 -292
  339. package/src/learning/tests/test_schema_migration.py +0 -319
  340. package/src/learning/tests/test_signal_inference.py +0 -397
  341. package/src/learning/tests/test_source_quality.py +0 -351
  342. package/src/learning/tests/test_synthetic_bootstrap.py +0 -429
  343. package/src/learning/tests/test_workflow_miner.py +0 -318
  344. package/src/learning/workflow_pattern_miner.py +0 -655
  345. package/src/lifecycle/__init__.py +0 -54
  346. package/src/lifecycle/bounded_growth.py +0 -239
  347. package/src/lifecycle/compaction_engine.py +0 -226
  348. package/src/lifecycle/lifecycle_engine.py +0 -355
  349. package/src/lifecycle/lifecycle_evaluator.py +0 -257
  350. package/src/lifecycle/lifecycle_scheduler.py +0 -130
  351. package/src/lifecycle/retention_policy.py +0 -285
  352. package/src/lifecycle/tests/test_bounded_growth.py +0 -193
  353. package/src/lifecycle/tests/test_compaction.py +0 -179
  354. package/src/lifecycle/tests/test_lifecycle_engine.py +0 -137
  355. package/src/lifecycle/tests/test_lifecycle_evaluation.py +0 -177
  356. package/src/lifecycle/tests/test_lifecycle_scheduler.py +0 -127
  357. package/src/lifecycle/tests/test_lifecycle_search.py +0 -109
  358. package/src/lifecycle/tests/test_mcp_compact.py +0 -149
  359. package/src/lifecycle/tests/test_mcp_lifecycle_status.py +0 -114
  360. package/src/lifecycle/tests/test_retention_policy.py +0 -162
  361. package/src/mcp_tools_v28.py +0 -281
  362. package/src/memory/__init__.py +0 -36
  363. package/src/memory/cli.py +0 -205
  364. package/src/memory/constants.py +0 -39
  365. package/src/memory/helpers.py +0 -28
  366. package/src/memory/schema.py +0 -166
  367. package/src/memory-profiles.py +0 -595
  368. package/src/memory-reset.py +0 -491
  369. package/src/memory_compression.py +0 -989
  370. package/src/memory_store_v2.py +0 -1155
  371. package/src/migrate_v1_to_v2.py +0 -629
  372. package/src/pattern_learner.py +0 -34
  373. package/src/patterns/__init__.py +0 -24
  374. package/src/patterns/analyzers.py +0 -251
  375. package/src/patterns/learner.py +0 -271
  376. package/src/patterns/scoring.py +0 -171
  377. package/src/patterns/store.py +0 -225
  378. package/src/patterns/terminology.py +0 -140
  379. package/src/provenance_tracker.py +0 -312
  380. package/src/qualixar_attribution.py +0 -139
  381. package/src/qualixar_watermark.py +0 -78
  382. package/src/query_optimizer.py +0 -511
  383. package/src/rate_limiter.py +0 -83
  384. package/src/search/__init__.py +0 -20
  385. package/src/search/cli.py +0 -77
  386. package/src/search/constants.py +0 -26
  387. package/src/search/engine.py +0 -241
  388. package/src/search/fusion.py +0 -122
  389. package/src/search/index_loader.py +0 -114
  390. package/src/search/methods.py +0 -162
  391. package/src/search_engine_v2.py +0 -401
  392. package/src/setup_validator.py +0 -482
  393. package/src/subscription_manager.py +0 -391
  394. package/src/tree/__init__.py +0 -59
  395. package/src/tree/builder.py +0 -185
  396. package/src/tree/nodes.py +0 -202
  397. package/src/tree/queries.py +0 -257
  398. package/src/tree/schema.py +0 -80
  399. package/src/tree_manager.py +0 -19
  400. package/src/trust/__init__.py +0 -45
  401. package/src/trust/constants.py +0 -66
  402. package/src/trust/queries.py +0 -157
  403. package/src/trust/schema.py +0 -95
  404. package/src/trust/scorer.py +0 -299
  405. package/src/trust/signals.py +0 -95
  406. package/src/trust_scorer.py +0 -44
  407. package/ui/app.js +0 -1588
  408. package/ui/js/graph-cytoscape-monolithic-backup.js +0 -1168
  409. package/ui/js/graph-cytoscape.js +0 -1168
  410. package/ui/js/graph-d3-backup.js +0 -32
  411. package/ui/js/graph.js +0 -32
  412. package/ui_server.py +0 -266
  413. /package/docs/{ACCESSIBILITY.md → v2-archive/ACCESSIBILITY.md} +0 -0
  414. /package/docs/{ARCHITECTURE.md → v2-archive/ARCHITECTURE.md} +0 -0
  415. /package/docs/{CLI-COMMANDS-REFERENCE.md → v2-archive/CLI-COMMANDS-REFERENCE.md} +0 -0
  416. /package/docs/{COMPRESSION-README.md → v2-archive/COMPRESSION-README.md} +0 -0
  417. /package/docs/{FRAMEWORK-INTEGRATIONS.md → v2-archive/FRAMEWORK-INTEGRATIONS.md} +0 -0
  418. /package/docs/{MCP-MANUAL-SETUP.md → v2-archive/MCP-MANUAL-SETUP.md} +0 -0
  419. /package/docs/{MCP-TROUBLESHOOTING.md → v2-archive/MCP-TROUBLESHOOTING.md} +0 -0
  420. /package/docs/{PATTERN-LEARNING.md → v2-archive/PATTERN-LEARNING.md} +0 -0
  421. /package/docs/{PROFILES-GUIDE.md → v2-archive/PROFILES-GUIDE.md} +0 -0
  422. /package/docs/{RESET-GUIDE.md → v2-archive/RESET-GUIDE.md} +0 -0
  423. /package/docs/{SEARCH-ENGINE-V2.2.0.md → v2-archive/SEARCH-ENGINE-V2.2.0.md} +0 -0
  424. /package/docs/{SEARCH-INTEGRATION-GUIDE.md → v2-archive/SEARCH-INTEGRATION-GUIDE.md} +0 -0
  425. /package/docs/{UI-SERVER.md → v2-archive/UI-SERVER.md} +0 -0
  426. /package/docs/{UNIVERSAL-INTEGRATION.md → v2-archive/UNIVERSAL-INTEGRATION.md} +0 -0
  427. /package/docs/{V2.2.0-OPTIONAL-SEARCH.md → v2-archive/V2.2.0-OPTIONAL-SEARCH.md} +0 -0
  428. /package/docs/{WINDOWS-INSTALL-README.txt → v2-archive/WINDOWS-INSTALL-README.txt} +0 -0
  429. /package/docs/{WINDOWS-POST-INSTALL.txt → v2-archive/WINDOWS-POST-INSTALL.txt} +0 -0
  430. /package/docs/{example_graph_usage.py → v2-archive/example_graph_usage.py} +0 -0
  431. /package/{completions → ide/completions}/slm.bash +0 -0
  432. /package/{completions → ide/completions}/slm.zsh +0 -0
  433. /package/{configs → ide/configs}/cody-commands.json +0 -0
  434. /package/{install-skills.sh → scripts/install-skills.sh} +0 -0
@@ -1,755 +0,0 @@
1
- #!/usr/bin/env python3
2
- # SPDX-License-Identifier: MIT
3
- # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
- """
5
- SyntheticBootstrapper — Bootstrap ML model from existing data patterns.
6
-
7
- PROBLEM: LightGBM needs 200+ feedback signals across 50+ unique queries
8
- to activate ML ranking (Phase 2). A new user has zero feedback. Without
9
- bootstrap, users must endure ~200 recalls before getting personalization.
10
- That's weeks of usage with no benefit. Users abandon before reaching Phase 2.
11
-
12
- SOLUTION: Generate synthetic (query, memory, relevance_label) tuples from
13
- EXISTING data patterns in memory.db. These aren't real user feedback, but
14
- they encode reasonable assumptions:
15
- - Frequently accessed memories are probably relevant to their keywords
16
- - High-importance memories should rank higher for their topics
17
- - Learned patterns (from pattern_learner.py) encode real preferences
18
- - Recent memories should generally outrank older ones
19
-
20
- Four Strategies:
21
- 1. Access-based: Memories accessed 5+ times -> positive for their keywords
22
- 2. Importance-based: Importance >= 8 -> positive for their tags
23
- 3. Pattern-based: Learned identity_patterns -> positive for matching memories
24
- 4. Recency decay: For any synthetic query, recent memories rank higher
25
-
26
- The bootstrap model uses MORE aggressive regularization than the real model
27
- (fewer trees, smaller depth, higher reg_lambda) to prevent overfitting
28
- on synthetic data. Once real feedback accumulates, the model is retrained
29
- with continued learning (init_model), gradually replacing synthetic signal
30
- with real signal.
31
-
32
- Research Backing:
33
- - FCS LREC 2024: Cold-start mitigation via synthetic bootstrap
34
- - eKNOW 2025: BM25 -> re-ranker pipeline effectiveness
35
- """
36
-
37
- import hashlib
38
- import logging
39
- from datetime import datetime
40
- from pathlib import Path
41
- from typing import Any, Dict, List, Optional, Set
42
-
43
- # LightGBM is OPTIONAL — bootstrap only works when LightGBM is installed
44
- try:
45
- import lightgbm as lgb
46
- HAS_LIGHTGBM = True
47
- except ImportError:
48
- lgb = None
49
- HAS_LIGHTGBM = False
50
-
51
- try:
52
- import numpy as np
53
- HAS_NUMPY = True
54
- except ImportError:
55
- np = None
56
- HAS_NUMPY = False
57
-
58
- from .feature_extractor import FeatureExtractor, FEATURE_NAMES, NUM_FEATURES
59
- from .bootstrap import (
60
- MEMORY_DB_PATH,
61
- MODELS_DIR,
62
- MODEL_PATH,
63
- MIN_MEMORIES_FOR_BOOTSTRAP,
64
- BOOTSTRAP_CONFIG,
65
- BOOTSTRAP_PARAMS,
66
- extract_keywords,
67
- get_memory_count,
68
- get_memories_by_access,
69
- get_memories_by_importance,
70
- get_recent_memories,
71
- get_learned_patterns,
72
- search_memories,
73
- find_negative_memories,
74
- diverse_sample,
75
- count_sources,
76
- )
77
-
78
- logger = logging.getLogger("superlocalmemory.learning.synthetic_bootstrap")
79
-
80
-
81
- class SyntheticBootstrapper:
82
- """
83
- Generates synthetic training data and bootstraps the ML ranking model.
84
-
85
- Usage:
86
- bootstrapper = SyntheticBootstrapper()
87
- if bootstrapper.should_bootstrap():
88
- result = bootstrapper.bootstrap_model()
89
- if result:
90
- print(f"Bootstrapped with {result['training_samples']} samples")
91
-
92
- The bootstrapped model is saved to the same path as the real model.
93
- When real feedback accumulates, AdaptiveRanker.train() uses
94
- continued learning (init_model) to incrementally replace synthetic
95
- signal with real signal.
96
- """
97
-
98
- MIN_MEMORIES_FOR_BOOTSTRAP = MIN_MEMORIES_FOR_BOOTSTRAP
99
- BOOTSTRAP_CONFIG = BOOTSTRAP_CONFIG
100
-
101
- def __init__(
102
- self,
103
- memory_db_path: Optional[Path] = None,
104
- learning_db=None,
105
- ):
106
- """
107
- Initialize SyntheticBootstrapper.
108
-
109
- Args:
110
- memory_db_path: Path to memory.db (defaults to ~/.claude-memory/memory.db).
111
- learning_db: Optional LearningDB instance for recording metadata.
112
- """
113
- self._memory_db = Path(memory_db_path) if memory_db_path else MEMORY_DB_PATH
114
- self._learning_db = learning_db
115
- self._feature_extractor = FeatureExtractor()
116
-
117
- # ========================================================================
118
- # LearningDB Access
119
- # ========================================================================
120
-
121
- def _get_learning_db(self):
122
- """Get or create the LearningDB instance."""
123
- if self._learning_db is None:
124
- try:
125
- from .learning_db import LearningDB
126
- self._learning_db = LearningDB()
127
- except Exception as e:
128
- logger.warning("Cannot access LearningDB: %s", e)
129
- return None
130
- return self._learning_db
131
-
132
- # ========================================================================
133
- # Pre-flight Checks
134
- # ========================================================================
135
-
136
- def should_bootstrap(self) -> bool:
137
- """
138
- Check if synthetic bootstrap is needed and possible.
139
-
140
- Returns True if:
141
- 1. LightGBM + NumPy are available
142
- 2. No existing model file (or forced rebuild)
143
- 3. At least MIN_MEMORIES_FOR_BOOTSTRAP memories exist in memory.db
144
- """
145
- if not HAS_LIGHTGBM or not HAS_NUMPY:
146
- logger.debug("Bootstrap unavailable: LightGBM=%s, NumPy=%s",
147
- HAS_LIGHTGBM, HAS_NUMPY)
148
- return False
149
-
150
- if MODEL_PATH.exists():
151
- logger.debug("Model already exists at %s — skipping bootstrap",
152
- MODEL_PATH)
153
- return False
154
-
155
- memory_count = self._get_memory_count()
156
- if memory_count < MIN_MEMORIES_FOR_BOOTSTRAP:
157
- logger.debug(
158
- "Not enough memories for bootstrap: %d (need %d)",
159
- memory_count, MIN_MEMORIES_FOR_BOOTSTRAP
160
- )
161
- return False
162
-
163
- return True
164
-
165
- def get_tier(self) -> Optional[str]:
166
- """
167
- Determine bootstrap tier based on memory count.
168
-
169
- Returns:
170
- 'small', 'medium', 'large', or None if < MIN_MEMORIES.
171
- """
172
- count = self._get_memory_count()
173
- for tier_name, config in BOOTSTRAP_CONFIG.items():
174
- if config['min_memories'] <= count <= config['max_memories']:
175
- return tier_name
176
- return None
177
-
178
- def _get_memory_count(self) -> int:
179
- """Count total memories in memory.db."""
180
- return get_memory_count(self._memory_db)
181
-
182
- # ========================================================================
183
- # Synthetic Data Generation
184
- # ========================================================================
185
-
186
- def generate_synthetic_training_data(self) -> List[dict]:
187
- """
188
- Generate synthetic (query, memory, label, features) records.
189
-
190
- Combines four strategies to produce training data from existing
191
- memory patterns. Each record contains:
192
- - query: Synthetic query string (extracted keywords)
193
- - memory_id: ID of the memory in memory.db
194
- - label: Relevance label (0.0 = irrelevant, 1.0 = highly relevant)
195
- - source: Which strategy generated this record
196
- - features: 9-dimensional feature vector
197
-
198
- Returns:
199
- List of training record dicts. May be empty if insufficient data.
200
- """
201
- records = []
202
-
203
- # Strategy 1: Access-based pseudo-labels
204
- access_records = self._generate_access_based()
205
- records.extend(access_records)
206
- logger.info("Strategy 1 (access): %d records", len(access_records))
207
-
208
- # Strategy 2: Importance-based pseudo-labels
209
- importance_records = self._generate_importance_based()
210
- records.extend(importance_records)
211
- logger.info("Strategy 2 (importance): %d records",
212
- len(importance_records))
213
-
214
- # Strategy 3: Pattern-based synthetic queries
215
- pattern_records = self._generate_pattern_based()
216
- records.extend(pattern_records)
217
- logger.info("Strategy 3 (patterns): %d records", len(pattern_records))
218
-
219
- # Strategy 4: Recency decay pseudo-labels
220
- recency_records = self._generate_recency_based()
221
- records.extend(recency_records)
222
- logger.info("Strategy 4 (recency): %d records", len(recency_records))
223
-
224
- logger.info("Total synthetic records: %d", len(records))
225
- return records
226
-
227
- def _generate_access_based(self) -> List[dict]:
228
- """
229
- Strategy 1: Memories accessed 5+ times are relevant for their keywords.
230
-
231
- Logic: If a user keeps coming back to a memory via certain searches,
232
- the keywords in that memory are relevant queries for it.
233
- """
234
- records = []
235
- high_access_memories = self._get_memories_by_access(min_access=5)
236
-
237
- for memory in high_access_memories:
238
- keywords = self._extract_keywords(memory.get('content', ''))
239
- if not keywords:
240
- continue
241
-
242
- query = ' '.join(keywords)
243
-
244
- # Positive: This memory is relevant to its own keywords
245
- records.append(self._build_record(
246
- query=query,
247
- memory=memory,
248
- label=1.0,
249
- source='access_positive',
250
- ))
251
-
252
- # Find some non-matching memories as negatives
253
- negatives = self._find_negative_memories(
254
- memory, exclude_ids={memory['id']}, limit=2
255
- )
256
- for neg_memory in negatives:
257
- records.append(self._build_record(
258
- query=query,
259
- memory=neg_memory,
260
- label=0.0,
261
- source='access_negative',
262
- ))
263
-
264
- return records
265
-
266
- def _generate_importance_based(self) -> List[dict]:
267
- """
268
- Strategy 2: High-importance memories (>= 8) are positive for their tags.
269
-
270
- Logic: User explicitly rated these memories as important. Their tags
271
- represent topics the user cares about.
272
- """
273
- records = []
274
- important_memories = self._get_memories_by_importance(min_importance=8)
275
-
276
- for memory in important_memories:
277
- # Use tags as synthetic query, fall back to content keywords
278
- tags = memory.get('tags', '')
279
- if isinstance(tags, str):
280
- try:
281
- import json
282
- tags_list = json.loads(tags)
283
- except (ValueError, TypeError):
284
- tags_list = [t.strip() for t in tags.split(',') if t.strip()]
285
- elif isinstance(tags, list):
286
- tags_list = tags
287
- else:
288
- tags_list = []
289
-
290
- if tags_list:
291
- query = ' '.join(tags_list[:5])
292
- else:
293
- keywords = self._extract_keywords(memory.get('content', ''))
294
- query = ' '.join(keywords) if keywords else ''
295
-
296
- if not query:
297
- continue
298
-
299
- # Positive: High-importance memory matches its tags
300
- records.append(self._build_record(
301
- query=query,
302
- memory=memory,
303
- label=1.0,
304
- source='importance_positive',
305
- ))
306
-
307
- # Find some negatives
308
- negatives = self._find_negative_memories(
309
- memory, exclude_ids={memory['id']}, limit=2
310
- )
311
- for neg_memory in negatives:
312
- records.append(self._build_record(
313
- query=query,
314
- memory=neg_memory,
315
- label=0.0,
316
- source='importance_negative',
317
- ))
318
-
319
- return records
320
-
321
- def _generate_pattern_based(self) -> List[dict]:
322
- """
323
- Strategy 3: Use learned identity_patterns to create synthetic queries.
324
-
325
- Logic: Pattern learner has already identified user's tech preferences,
326
- coding style, etc. Use these as queries and find matching memories.
327
- """
328
- records = []
329
- patterns = self._get_learned_patterns(min_confidence=0.7)
330
-
331
- if not patterns:
332
- return records
333
-
334
- for pattern in patterns:
335
- # Build query from pattern key + value
336
- query_parts = []
337
- key = pattern.get('key', '')
338
- value = pattern.get('value', '')
339
- if key:
340
- query_parts.append(key)
341
- if value and value != key:
342
- query_parts.append(value)
343
-
344
- query = ' '.join(query_parts)
345
- if not query or len(query) < 3:
346
- continue
347
-
348
- # Search for memories matching this pattern
349
- matching = self._search_memories(query, limit=10)
350
-
351
- if len(matching) < 2:
352
- continue
353
-
354
- # Top results are positive, bottom results are weak negatives
355
- for i, memory in enumerate(matching):
356
- if i < 3:
357
- label = 1.0 # Top matches are relevant
358
- elif i < 6:
359
- label = 0.5 # Middle matches are weakly relevant
360
- else:
361
- label = 0.1 # Bottom matches are marginal
362
-
363
- records.append(self._build_record(
364
- query=query,
365
- memory=memory,
366
- label=label,
367
- source='pattern',
368
- ))
369
-
370
- return records
371
-
372
- def _generate_recency_based(self) -> List[dict]:
373
- """
374
- Strategy 4: Recency decay — for shared-topic queries, recent wins.
375
-
376
- Logic: For memories about the same topic, more recent memories
377
- should generally rank higher (fresher context, more current).
378
- Generates pairs where newer = positive, older = weak negative.
379
- """
380
- records = []
381
-
382
- # Get a sample of recent and old memories
383
- recent = self._get_recent_memories(limit=30)
384
- if len(recent) < 4:
385
- return records
386
-
387
- # Take pairs: for each recent memory's keywords, create a query
388
- # then the recent memory is positive and older memories are negative
389
- processed_queries: Set[str] = set()
390
-
391
- for memory in recent[:15]:
392
- keywords = self._extract_keywords(memory.get('content', ''))
393
- query = ' '.join(keywords) if keywords else ''
394
- if not query or query in processed_queries:
395
- continue
396
- processed_queries.add(query)
397
-
398
- # This recent memory is positive
399
- records.append(self._build_record(
400
- query=query,
401
- memory=memory,
402
- label=0.8, # Good but not perfect (it's synthetic)
403
- source='recency_positive',
404
- ))
405
-
406
- # Find older memories about similar topic
407
- similar_old = self._search_memories(query, limit=5)
408
- for old_mem in similar_old:
409
- if old_mem['id'] == memory['id']:
410
- continue
411
- # Older memories get lower label
412
- records.append(self._build_record(
413
- query=query,
414
- memory=old_mem,
415
- label=0.3,
416
- source='recency_negative',
417
- ))
418
-
419
- return records
420
-
421
- # ========================================================================
422
- # Record Building
423
- # ========================================================================
424
-
425
- def _build_record(
426
- self,
427
- query: str,
428
- memory: dict,
429
- label: float,
430
- source: str,
431
- ) -> dict:
432
- """
433
- Build a training record with features.
434
-
435
- For synthetic data, we use simplified context:
436
- - No tech preferences (unknown at bootstrap time)
437
- - No current project
438
- - No workflow phase
439
- Focus on measurable features: importance, recency, access_frequency.
440
- """
441
- # Set neutral context (no query-time info for synthetic data)
442
- # Context is already set externally or defaults to neutral
443
- features = self._feature_extractor.extract_features(memory, query)
444
-
445
- return {
446
- 'query': query,
447
- 'query_hash': hashlib.sha256(query.encode()).hexdigest()[:16],
448
- 'memory_id': memory.get('id', 0),
449
- 'label': label,
450
- 'source': source,
451
- 'features': features,
452
- }
453
-
454
- # ========================================================================
455
- # Model Training
456
- # ========================================================================
457
-
458
- def bootstrap_model(self) -> Optional[Dict[str, Any]]:
459
- """
460
- Generate synthetic data and train the bootstrap model.
461
-
462
- Steps:
463
- 1. Generate synthetic training data
464
- 2. Build feature matrix and label vectors
465
- 3. Train LightGBM with aggressive regularization
466
- 4. Save model to ~/.claude-memory/models/ranker.txt
467
- 5. Record metadata in learning_db
468
- 6. Return metadata
469
-
470
- Returns:
471
- Training metadata dict, or None if bootstrap not possible.
472
- """
473
- if not HAS_LIGHTGBM or not HAS_NUMPY:
474
- logger.warning("Bootstrap requires LightGBM and NumPy")
475
- return None
476
-
477
- tier = self.get_tier()
478
- if tier is None:
479
- logger.info("Not enough memories for bootstrap")
480
- return None
481
-
482
- config = BOOTSTRAP_CONFIG[tier]
483
- logger.info(
484
- "Starting bootstrap (tier=%s, target=%d samples)",
485
- tier, config['target_samples']
486
- )
487
-
488
- # Set neutral context for feature extraction
489
- self._feature_extractor.set_context()
490
-
491
- # Generate synthetic data
492
- records = self.generate_synthetic_training_data()
493
- if not records:
494
- logger.warning("No synthetic records generated")
495
- return None
496
-
497
- # Trim to target sample count if needed
498
- if len(records) > config['target_samples']:
499
- # Keep a diverse sample across sources
500
- records = self._diverse_sample(records, config['target_samples'])
501
-
502
- # Group by query_hash for LGBMRanker
503
- query_groups: Dict[str, List[dict]] = {}
504
- for record in records:
505
- qh = record['query_hash']
506
- if qh not in query_groups:
507
- query_groups[qh] = []
508
- query_groups[qh].append(record)
509
-
510
- # Filter: only keep groups with 2+ items
511
- query_groups = {
512
- qh: recs for qh, recs in query_groups.items()
513
- if len(recs) >= 2
514
- }
515
-
516
- if not query_groups:
517
- logger.warning("No valid query groups (need 2+ records per group)")
518
- return None
519
-
520
- # Build matrices
521
- all_features = []
522
- all_labels = []
523
- groups = []
524
-
525
- for qh, group_records in query_groups.items():
526
- group_size = 0
527
- for record in group_records:
528
- all_features.append(record['features'])
529
- all_labels.append(record['label'])
530
- group_size += 1
531
- groups.append(group_size)
532
-
533
- X = np.array(all_features, dtype=np.float64)
534
- y = np.array(all_labels, dtype=np.float64)
535
- total_samples = X.shape[0]
536
-
537
- if total_samples < 10:
538
- logger.warning("Too few samples after grouping: %d", total_samples)
539
- return None
540
-
541
- logger.info(
542
- "Training bootstrap model: %d samples, %d groups, tier=%s",
543
- total_samples, len(groups), tier
544
- )
545
-
546
- # Create LightGBM dataset
547
- train_dataset = lgb.Dataset(
548
- X, label=y, group=groups,
549
- feature_name=list(FEATURE_NAMES),
550
- free_raw_data=False,
551
- )
552
-
553
- # Use tiered n_estimators and max_depth
554
- params = dict(BOOTSTRAP_PARAMS)
555
- params['max_depth'] = config['max_depth']
556
- n_estimators = config['n_estimators']
557
-
558
- # Train
559
- try:
560
- booster = lgb.train(
561
- params,
562
- train_dataset,
563
- num_boost_round=n_estimators,
564
- valid_sets=[train_dataset],
565
- valid_names=['train'],
566
- callbacks=[lgb.log_evaluation(period=0)], # Silent
567
- )
568
- except Exception as e:
569
- logger.error("Bootstrap training failed: %s", e)
570
- return None
571
-
572
- # Save model
573
- MODELS_DIR.mkdir(parents=True, exist_ok=True)
574
- try:
575
- booster.save_model(str(MODEL_PATH))
576
- logger.info("Bootstrap model saved to %s", MODEL_PATH)
577
- except Exception as e:
578
- logger.error("Failed to save bootstrap model: %s", e)
579
- return None
580
-
581
- # Extract NDCG@10 from training evaluation
582
- ndcg_at_10 = None
583
- try:
584
- eval_results = booster.eval_train(
585
- lgb.Dataset(X, label=y, group=groups)
586
- )
587
- for name, _dataset_name, value, _is_higher_better in eval_results:
588
- if 'ndcg@10' in name:
589
- ndcg_at_10 = value
590
- break
591
- except Exception:
592
- pass
593
-
594
- # Record metadata in learning_db
595
- model_version = f"bootstrap_{tier}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
596
- ldb = self._get_learning_db()
597
- if ldb:
598
- try:
599
- ldb.record_model_training(
600
- model_version=model_version,
601
- training_samples=total_samples,
602
- synthetic_samples=total_samples,
603
- real_samples=0,
604
- ndcg_at_10=ndcg_at_10,
605
- model_path=str(MODEL_PATH),
606
- )
607
- except Exception as e:
608
- logger.warning("Failed to record bootstrap metadata: %s", e)
609
-
610
- metadata = {
611
- 'model_version': model_version,
612
- 'tier': tier,
613
- 'training_samples': total_samples,
614
- 'synthetic_samples': total_samples,
615
- 'query_groups': len(groups),
616
- 'n_estimators': n_estimators,
617
- 'max_depth': config['max_depth'],
618
- 'ndcg_at_10': ndcg_at_10,
619
- 'model_path': str(MODEL_PATH),
620
- 'source_breakdown': self._count_sources(records),
621
- 'created_at': datetime.now().isoformat(),
622
- }
623
- logger.info("Bootstrap complete: %s", metadata)
624
- return metadata
625
-
626
- # ========================================================================
627
- # Memory Database Queries (READ-ONLY on memory.db)
628
- # ========================================================================
629
-
630
- def _get_memories_by_access(self, min_access: int = 5) -> List[dict]:
631
- """
632
- Fetch memories with access_count >= min_access from memory.db.
633
-
634
- These are memories the user keeps coming back to — strong positive signal.
635
- """
636
- return get_memories_by_access(self._memory_db, min_access)
637
-
638
- def _get_memories_by_importance(self, min_importance: int = 8) -> List[dict]:
639
- """
640
- Fetch memories with importance >= min_importance from memory.db.
641
-
642
- High importance = user explicitly rated these as valuable.
643
- """
644
- return get_memories_by_importance(self._memory_db, min_importance)
645
-
646
- def _get_recent_memories(self, limit: int = 30) -> List[dict]:
647
- """Fetch the N most recently created memories."""
648
- return get_recent_memories(self._memory_db, limit)
649
-
650
- def _get_learned_patterns(
651
- self,
652
- min_confidence: float = 0.7,
653
- ) -> List[dict]:
654
- """
655
- Fetch high-confidence identity_patterns from memory.db.
656
-
657
- These are patterns detected by pattern_learner.py (Layer 4) —
658
- tech preferences, coding style, terminology, etc.
659
-
660
- Returns empty list if identity_patterns table doesn't exist
661
- (backward compatible with pre-v2.3 databases).
662
- """
663
- return get_learned_patterns(self._memory_db, min_confidence)
664
-
665
- def _search_memories(self, query: str, limit: int = 20) -> List[dict]:
666
- """
667
- Simple FTS5 search in memory.db.
668
-
669
- Used to find memories matching synthetic query terms.
670
- This is a lightweight search — no TF-IDF, no HNSW, just FTS5.
671
- """
672
- return search_memories(self._memory_db, query, limit)
673
-
674
- def _find_negative_memories(
675
- self,
676
- anchor_memory: dict,
677
- exclude_ids: Optional[Set[int]] = None,
678
- limit: int = 2,
679
- ) -> List[dict]:
680
- """
681
- Find memories dissimilar to the anchor (for negative examples).
682
-
683
- Simple heuristic: pick memories from a different category or project.
684
- Falls back to random sample if no structured differences available.
685
- """
686
- return find_negative_memories(self._memory_db, anchor_memory, exclude_ids, limit)
687
-
688
- # ========================================================================
689
- # Text Processing
690
- # ========================================================================
691
-
692
- def _extract_keywords(self, content: str, top_n: int = 3) -> List[str]:
693
- """
694
- Extract meaningful keywords from memory content.
695
-
696
- Simple frequency-based extraction:
697
- 1. Tokenize (alphanumeric words)
698
- 2. Remove stopwords and short words
699
- 3. Return top N by frequency
700
-
701
- No external NLP dependencies — just regex + counter.
702
- """
703
- return extract_keywords(content, top_n)
704
-
705
- # ========================================================================
706
- # Utility
707
- # ========================================================================
708
-
709
- def _diverse_sample(
710
- self,
711
- records: List[dict],
712
- target: int,
713
- ) -> List[dict]:
714
- """
715
- Sample records while maintaining source diversity.
716
-
717
- Takes proportional samples from each source strategy to ensure
718
- the training data isn't dominated by one strategy.
719
- """
720
- return diverse_sample(records, target)
721
-
722
- def _count_sources(self, records: List[dict]) -> Dict[str, int]:
723
- """Count records by source strategy."""
724
- return count_sources(records)
725
-
726
-
727
- # ============================================================================
728
- # Module-level convenience
729
- # ============================================================================
730
-
731
- def should_bootstrap(memory_db_path: Optional[Path] = None) -> bool:
732
- """Quick check if bootstrap is needed (creates temporary bootstrapper)."""
733
- try:
734
- bootstrapper = SyntheticBootstrapper(memory_db_path=memory_db_path)
735
- return bootstrapper.should_bootstrap()
736
- except Exception:
737
- return False
738
-
739
-
740
- def run_bootstrap(
741
- memory_db_path: Optional[Path] = None,
742
- learning_db=None,
743
- ) -> Optional[Dict[str, Any]]:
744
- """Run bootstrap and return metadata (convenience function)."""
745
- try:
746
- bootstrapper = SyntheticBootstrapper(
747
- memory_db_path=memory_db_path,
748
- learning_db=learning_db,
749
- )
750
- if bootstrapper.should_bootstrap():
751
- return bootstrapper.bootstrap_model()
752
- return None
753
- except Exception as e:
754
- logger.error("Bootstrap failed: %s", e)
755
- return None