superlocalmemory 2.8.6 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. package/LICENSE +9 -1
  2. package/NOTICE +63 -0
  3. package/README.md +165 -480
  4. package/bin/slm +17 -449
  5. package/bin/slm-npm +62 -48
  6. package/conftest.py +5 -0
  7. package/docs/api-reference.md +284 -0
  8. package/docs/architecture.md +149 -0
  9. package/docs/auto-memory.md +150 -0
  10. package/docs/cli-reference.md +276 -0
  11. package/docs/compliance.md +191 -0
  12. package/docs/configuration.md +182 -0
  13. package/docs/getting-started.md +102 -0
  14. package/docs/ide-setup.md +261 -0
  15. package/docs/mcp-tools.md +220 -0
  16. package/docs/migration-from-v2.md +170 -0
  17. package/docs/profiles.md +173 -0
  18. package/docs/troubleshooting.md +310 -0
  19. package/{configs → ide/configs}/antigravity-mcp.json +3 -3
  20. package/ide/configs/chatgpt-desktop-mcp.json +16 -0
  21. package/{configs → ide/configs}/claude-desktop-mcp.json +3 -3
  22. package/{configs → ide/configs}/codex-mcp.toml +4 -4
  23. package/{configs → ide/configs}/continue-mcp.yaml +4 -3
  24. package/{configs → ide/configs}/continue-skills.yaml +6 -6
  25. package/ide/configs/cursor-mcp.json +15 -0
  26. package/{configs → ide/configs}/gemini-cli-mcp.json +2 -2
  27. package/{configs → ide/configs}/jetbrains-mcp.json +2 -2
  28. package/{configs → ide/configs}/opencode-mcp.json +2 -2
  29. package/{configs → ide/configs}/perplexity-mcp.json +2 -2
  30. package/{configs → ide/configs}/vscode-copilot-mcp.json +2 -2
  31. package/{configs → ide/configs}/windsurf-mcp.json +3 -3
  32. package/{configs → ide/configs}/zed-mcp.json +2 -2
  33. package/{hooks → ide/hooks}/context-hook.js +9 -20
  34. package/ide/hooks/memory-list-skill.js +70 -0
  35. package/ide/hooks/memory-profile-skill.js +101 -0
  36. package/ide/hooks/memory-recall-skill.js +62 -0
  37. package/ide/hooks/memory-remember-skill.js +68 -0
  38. package/ide/hooks/memory-reset-skill.js +160 -0
  39. package/{hooks → ide/hooks}/post-recall-hook.js +2 -2
  40. package/ide/integrations/langchain/README.md +106 -0
  41. package/ide/integrations/langchain/langchain_superlocalmemory/__init__.py +9 -0
  42. package/ide/integrations/langchain/langchain_superlocalmemory/chat_message_history.py +201 -0
  43. package/ide/integrations/langchain/pyproject.toml +38 -0
  44. package/{src/learning → ide/integrations/langchain}/tests/__init__.py +1 -0
  45. package/ide/integrations/langchain/tests/test_chat_message_history.py +215 -0
  46. package/ide/integrations/langchain/tests/test_security.py +117 -0
  47. package/ide/integrations/llamaindex/README.md +81 -0
  48. package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/__init__.py +9 -0
  49. package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/base.py +316 -0
  50. package/ide/integrations/llamaindex/pyproject.toml +43 -0
  51. package/{src/lifecycle → ide/integrations/llamaindex}/tests/__init__.py +1 -2
  52. package/ide/integrations/llamaindex/tests/test_chat_store.py +294 -0
  53. package/ide/integrations/llamaindex/tests/test_security.py +241 -0
  54. package/{skills → ide/skills}/slm-build-graph/SKILL.md +6 -6
  55. package/{skills → ide/skills}/slm-list-recent/SKILL.md +5 -5
  56. package/{skills → ide/skills}/slm-recall/SKILL.md +5 -5
  57. package/{skills → ide/skills}/slm-remember/SKILL.md +6 -6
  58. package/{skills → ide/skills}/slm-show-patterns/SKILL.md +7 -7
  59. package/{skills → ide/skills}/slm-status/SKILL.md +9 -9
  60. package/{skills → ide/skills}/slm-switch-profile/SKILL.md +9 -9
  61. package/package.json +13 -22
  62. package/pyproject.toml +85 -0
  63. package/scripts/build-dmg.sh +417 -0
  64. package/scripts/install-skills.ps1 +334 -0
  65. package/scripts/postinstall.js +2 -2
  66. package/scripts/start-dashboard.ps1 +52 -0
  67. package/scripts/start-dashboard.sh +41 -0
  68. package/scripts/sync-wiki.ps1 +127 -0
  69. package/scripts/sync-wiki.sh +82 -0
  70. package/scripts/test-dmg.sh +161 -0
  71. package/scripts/test-npm-package.ps1 +252 -0
  72. package/scripts/test-npm-package.sh +207 -0
  73. package/scripts/verify-install.ps1 +294 -0
  74. package/scripts/verify-install.sh +266 -0
  75. package/src/superlocalmemory/__init__.py +0 -0
  76. package/src/superlocalmemory/attribution/__init__.py +9 -0
  77. package/src/superlocalmemory/attribution/mathematical_dna.py +235 -0
  78. package/src/superlocalmemory/attribution/signer.py +153 -0
  79. package/src/superlocalmemory/attribution/watermark.py +189 -0
  80. package/src/superlocalmemory/cli/__init__.py +5 -0
  81. package/src/superlocalmemory/cli/commands.py +245 -0
  82. package/src/superlocalmemory/cli/main.py +89 -0
  83. package/src/superlocalmemory/cli/migrate_cmd.py +55 -0
  84. package/src/superlocalmemory/cli/post_install.py +99 -0
  85. package/src/superlocalmemory/cli/setup_wizard.py +129 -0
  86. package/src/superlocalmemory/compliance/__init__.py +0 -0
  87. package/src/superlocalmemory/compliance/abac.py +204 -0
  88. package/src/superlocalmemory/compliance/audit.py +314 -0
  89. package/src/superlocalmemory/compliance/eu_ai_act.py +131 -0
  90. package/src/superlocalmemory/compliance/gdpr.py +294 -0
  91. package/src/superlocalmemory/compliance/lifecycle.py +158 -0
  92. package/src/superlocalmemory/compliance/retention.py +232 -0
  93. package/src/superlocalmemory/compliance/scheduler.py +148 -0
  94. package/src/superlocalmemory/core/__init__.py +0 -0
  95. package/src/superlocalmemory/core/config.py +391 -0
  96. package/src/superlocalmemory/core/embeddings.py +293 -0
  97. package/src/superlocalmemory/core/engine.py +701 -0
  98. package/src/superlocalmemory/core/hooks.py +65 -0
  99. package/src/superlocalmemory/core/maintenance.py +172 -0
  100. package/src/superlocalmemory/core/modes.py +140 -0
  101. package/src/superlocalmemory/core/profiles.py +234 -0
  102. package/src/superlocalmemory/core/registry.py +117 -0
  103. package/src/superlocalmemory/dynamics/__init__.py +0 -0
  104. package/src/superlocalmemory/dynamics/fisher_langevin_coupling.py +223 -0
  105. package/src/superlocalmemory/encoding/__init__.py +0 -0
  106. package/src/superlocalmemory/encoding/consolidator.py +485 -0
  107. package/src/superlocalmemory/encoding/emotional.py +125 -0
  108. package/src/superlocalmemory/encoding/entity_resolver.py +525 -0
  109. package/src/superlocalmemory/encoding/entropy_gate.py +104 -0
  110. package/src/superlocalmemory/encoding/fact_extractor.py +775 -0
  111. package/src/superlocalmemory/encoding/foresight.py +91 -0
  112. package/src/superlocalmemory/encoding/graph_builder.py +302 -0
  113. package/src/superlocalmemory/encoding/observation_builder.py +160 -0
  114. package/src/superlocalmemory/encoding/scene_builder.py +183 -0
  115. package/src/superlocalmemory/encoding/signal_inference.py +90 -0
  116. package/src/superlocalmemory/encoding/temporal_parser.py +426 -0
  117. package/src/superlocalmemory/encoding/type_router.py +235 -0
  118. package/src/superlocalmemory/hooks/__init__.py +3 -0
  119. package/src/superlocalmemory/hooks/auto_capture.py +111 -0
  120. package/src/superlocalmemory/hooks/auto_recall.py +93 -0
  121. package/src/superlocalmemory/hooks/ide_connector.py +204 -0
  122. package/src/superlocalmemory/hooks/rules_engine.py +99 -0
  123. package/src/superlocalmemory/infra/__init__.py +3 -0
  124. package/src/superlocalmemory/infra/auth_middleware.py +82 -0
  125. package/src/superlocalmemory/infra/backup.py +317 -0
  126. package/src/superlocalmemory/infra/cache_manager.py +267 -0
  127. package/src/superlocalmemory/infra/event_bus.py +381 -0
  128. package/src/superlocalmemory/infra/rate_limiter.py +135 -0
  129. package/src/{webhook_dispatcher.py → superlocalmemory/infra/webhook_dispatcher.py} +104 -101
  130. package/src/superlocalmemory/learning/__init__.py +0 -0
  131. package/src/superlocalmemory/learning/adaptive.py +172 -0
  132. package/src/superlocalmemory/learning/behavioral.py +490 -0
  133. package/src/superlocalmemory/learning/behavioral_listener.py +94 -0
  134. package/src/superlocalmemory/learning/bootstrap.py +298 -0
  135. package/src/superlocalmemory/learning/cross_project.py +399 -0
  136. package/src/superlocalmemory/learning/database.py +376 -0
  137. package/src/superlocalmemory/learning/engagement.py +323 -0
  138. package/src/superlocalmemory/learning/features.py +138 -0
  139. package/src/superlocalmemory/learning/feedback.py +316 -0
  140. package/src/superlocalmemory/learning/outcomes.py +255 -0
  141. package/src/superlocalmemory/learning/project_context.py +366 -0
  142. package/src/superlocalmemory/learning/ranker.py +155 -0
  143. package/src/superlocalmemory/learning/source_quality.py +303 -0
  144. package/src/superlocalmemory/learning/workflows.py +309 -0
  145. package/src/superlocalmemory/llm/__init__.py +0 -0
  146. package/src/superlocalmemory/llm/backbone.py +316 -0
  147. package/src/superlocalmemory/math/__init__.py +0 -0
  148. package/src/superlocalmemory/math/fisher.py +356 -0
  149. package/src/superlocalmemory/math/langevin.py +398 -0
  150. package/src/superlocalmemory/math/sheaf.py +257 -0
  151. package/src/superlocalmemory/mcp/__init__.py +0 -0
  152. package/src/superlocalmemory/mcp/resources.py +245 -0
  153. package/src/superlocalmemory/mcp/server.py +61 -0
  154. package/src/superlocalmemory/mcp/tools.py +18 -0
  155. package/src/superlocalmemory/mcp/tools_core.py +305 -0
  156. package/src/superlocalmemory/mcp/tools_v28.py +223 -0
  157. package/src/superlocalmemory/mcp/tools_v3.py +286 -0
  158. package/src/superlocalmemory/retrieval/__init__.py +0 -0
  159. package/src/superlocalmemory/retrieval/agentic.py +295 -0
  160. package/src/superlocalmemory/retrieval/ann_index.py +223 -0
  161. package/src/superlocalmemory/retrieval/bm25_channel.py +185 -0
  162. package/src/superlocalmemory/retrieval/bridge_discovery.py +170 -0
  163. package/src/superlocalmemory/retrieval/engine.py +390 -0
  164. package/src/superlocalmemory/retrieval/entity_channel.py +179 -0
  165. package/src/superlocalmemory/retrieval/fusion.py +78 -0
  166. package/src/superlocalmemory/retrieval/profile_channel.py +105 -0
  167. package/src/superlocalmemory/retrieval/reranker.py +154 -0
  168. package/src/superlocalmemory/retrieval/semantic_channel.py +232 -0
  169. package/src/superlocalmemory/retrieval/strategy.py +96 -0
  170. package/src/superlocalmemory/retrieval/temporal_channel.py +175 -0
  171. package/src/superlocalmemory/server/__init__.py +1 -0
  172. package/src/superlocalmemory/server/api.py +248 -0
  173. package/src/superlocalmemory/server/routes/__init__.py +4 -0
  174. package/src/superlocalmemory/server/routes/agents.py +107 -0
  175. package/src/superlocalmemory/server/routes/backup.py +91 -0
  176. package/src/superlocalmemory/server/routes/behavioral.py +127 -0
  177. package/src/superlocalmemory/server/routes/compliance.py +160 -0
  178. package/src/superlocalmemory/server/routes/data_io.py +188 -0
  179. package/src/superlocalmemory/server/routes/events.py +183 -0
  180. package/src/superlocalmemory/server/routes/helpers.py +85 -0
  181. package/src/superlocalmemory/server/routes/learning.py +273 -0
  182. package/src/superlocalmemory/server/routes/lifecycle.py +116 -0
  183. package/src/superlocalmemory/server/routes/memories.py +399 -0
  184. package/src/superlocalmemory/server/routes/profiles.py +219 -0
  185. package/src/superlocalmemory/server/routes/stats.py +346 -0
  186. package/src/superlocalmemory/server/routes/v3_api.py +365 -0
  187. package/src/superlocalmemory/server/routes/ws.py +82 -0
  188. package/src/superlocalmemory/server/security_middleware.py +57 -0
  189. package/src/superlocalmemory/server/ui.py +245 -0
  190. package/src/superlocalmemory/storage/__init__.py +0 -0
  191. package/src/superlocalmemory/storage/access_control.py +182 -0
  192. package/src/superlocalmemory/storage/database.py +594 -0
  193. package/src/superlocalmemory/storage/migrations.py +303 -0
  194. package/src/superlocalmemory/storage/models.py +406 -0
  195. package/src/superlocalmemory/storage/schema.py +726 -0
  196. package/src/superlocalmemory/storage/v2_migrator.py +317 -0
  197. package/src/superlocalmemory/trust/__init__.py +0 -0
  198. package/src/superlocalmemory/trust/gate.py +130 -0
  199. package/src/superlocalmemory/trust/provenance.py +124 -0
  200. package/src/superlocalmemory/trust/scorer.py +347 -0
  201. package/src/superlocalmemory/trust/signals.py +153 -0
  202. package/ui/index.html +278 -5
  203. package/ui/js/auto-settings.js +70 -0
  204. package/ui/js/dashboard.js +90 -0
  205. package/ui/js/fact-detail.js +92 -0
  206. package/ui/js/feedback.js +2 -2
  207. package/ui/js/ide-status.js +102 -0
  208. package/ui/js/math-health.js +98 -0
  209. package/ui/js/recall-lab.js +127 -0
  210. package/ui/js/settings.js +2 -2
  211. package/ui/js/trust-dashboard.js +73 -0
  212. package/api_server.py +0 -724
  213. package/bin/aider-smart +0 -72
  214. package/bin/superlocalmemoryv2-learning +0 -4
  215. package/bin/superlocalmemoryv2-list +0 -3
  216. package/bin/superlocalmemoryv2-patterns +0 -4
  217. package/bin/superlocalmemoryv2-profile +0 -3
  218. package/bin/superlocalmemoryv2-recall +0 -3
  219. package/bin/superlocalmemoryv2-remember +0 -3
  220. package/bin/superlocalmemoryv2-reset +0 -3
  221. package/bin/superlocalmemoryv2-status +0 -3
  222. package/configs/chatgpt-desktop-mcp.json +0 -16
  223. package/configs/cursor-mcp.json +0 -15
  224. package/hooks/memory-list-skill.js +0 -139
  225. package/hooks/memory-profile-skill.js +0 -273
  226. package/hooks/memory-recall-skill.js +0 -114
  227. package/hooks/memory-remember-skill.js +0 -127
  228. package/hooks/memory-reset-skill.js +0 -274
  229. package/mcp_server.py +0 -1808
  230. package/requirements-core.txt +0 -22
  231. package/requirements-learning.txt +0 -12
  232. package/requirements.txt +0 -12
  233. package/src/agent_registry.py +0 -411
  234. package/src/auth_middleware.py +0 -61
  235. package/src/auto_backup.py +0 -459
  236. package/src/behavioral/__init__.py +0 -49
  237. package/src/behavioral/behavioral_listener.py +0 -203
  238. package/src/behavioral/behavioral_patterns.py +0 -275
  239. package/src/behavioral/cross_project_transfer.py +0 -206
  240. package/src/behavioral/outcome_inference.py +0 -194
  241. package/src/behavioral/outcome_tracker.py +0 -193
  242. package/src/behavioral/tests/__init__.py +0 -4
  243. package/src/behavioral/tests/test_behavioral_integration.py +0 -108
  244. package/src/behavioral/tests/test_behavioral_patterns.py +0 -150
  245. package/src/behavioral/tests/test_cross_project_transfer.py +0 -142
  246. package/src/behavioral/tests/test_mcp_behavioral.py +0 -139
  247. package/src/behavioral/tests/test_mcp_report_outcome.py +0 -117
  248. package/src/behavioral/tests/test_outcome_inference.py +0 -107
  249. package/src/behavioral/tests/test_outcome_tracker.py +0 -96
  250. package/src/cache_manager.py +0 -518
  251. package/src/compliance/__init__.py +0 -48
  252. package/src/compliance/abac_engine.py +0 -149
  253. package/src/compliance/abac_middleware.py +0 -116
  254. package/src/compliance/audit_db.py +0 -215
  255. package/src/compliance/audit_logger.py +0 -148
  256. package/src/compliance/retention_manager.py +0 -289
  257. package/src/compliance/retention_scheduler.py +0 -186
  258. package/src/compliance/tests/__init__.py +0 -4
  259. package/src/compliance/tests/test_abac_enforcement.py +0 -95
  260. package/src/compliance/tests/test_abac_engine.py +0 -124
  261. package/src/compliance/tests/test_abac_mcp_integration.py +0 -118
  262. package/src/compliance/tests/test_audit_db.py +0 -123
  263. package/src/compliance/tests/test_audit_logger.py +0 -98
  264. package/src/compliance/tests/test_mcp_audit.py +0 -128
  265. package/src/compliance/tests/test_mcp_retention_policy.py +0 -125
  266. package/src/compliance/tests/test_retention_manager.py +0 -131
  267. package/src/compliance/tests/test_retention_scheduler.py +0 -99
  268. package/src/compression/__init__.py +0 -25
  269. package/src/compression/cli.py +0 -150
  270. package/src/compression/cold_storage.py +0 -217
  271. package/src/compression/config.py +0 -72
  272. package/src/compression/orchestrator.py +0 -133
  273. package/src/compression/tier2_compressor.py +0 -228
  274. package/src/compression/tier3_compressor.py +0 -153
  275. package/src/compression/tier_classifier.py +0 -148
  276. package/src/db_connection_manager.py +0 -536
  277. package/src/embedding_engine.py +0 -63
  278. package/src/embeddings/__init__.py +0 -47
  279. package/src/embeddings/cache.py +0 -70
  280. package/src/embeddings/cli.py +0 -113
  281. package/src/embeddings/constants.py +0 -47
  282. package/src/embeddings/database.py +0 -91
  283. package/src/embeddings/engine.py +0 -247
  284. package/src/embeddings/model_loader.py +0 -145
  285. package/src/event_bus.py +0 -562
  286. package/src/graph/__init__.py +0 -36
  287. package/src/graph/build_helpers.py +0 -74
  288. package/src/graph/cli.py +0 -87
  289. package/src/graph/cluster_builder.py +0 -188
  290. package/src/graph/cluster_summary.py +0 -148
  291. package/src/graph/constants.py +0 -47
  292. package/src/graph/edge_builder.py +0 -162
  293. package/src/graph/entity_extractor.py +0 -95
  294. package/src/graph/graph_core.py +0 -226
  295. package/src/graph/graph_search.py +0 -231
  296. package/src/graph/hierarchical.py +0 -207
  297. package/src/graph/schema.py +0 -99
  298. package/src/graph_engine.py +0 -52
  299. package/src/hnsw_index.py +0 -628
  300. package/src/hybrid_search.py +0 -46
  301. package/src/learning/__init__.py +0 -217
  302. package/src/learning/adaptive_ranker.py +0 -682
  303. package/src/learning/bootstrap/__init__.py +0 -69
  304. package/src/learning/bootstrap/constants.py +0 -93
  305. package/src/learning/bootstrap/db_queries.py +0 -316
  306. package/src/learning/bootstrap/sampling.py +0 -82
  307. package/src/learning/bootstrap/text_utils.py +0 -71
  308. package/src/learning/cross_project_aggregator.py +0 -857
  309. package/src/learning/db/__init__.py +0 -40
  310. package/src/learning/db/constants.py +0 -44
  311. package/src/learning/db/schema.py +0 -279
  312. package/src/learning/engagement_tracker.py +0 -628
  313. package/src/learning/feature_extractor.py +0 -708
  314. package/src/learning/feedback_collector.py +0 -806
  315. package/src/learning/learning_db.py +0 -915
  316. package/src/learning/project_context_manager.py +0 -572
  317. package/src/learning/ranking/__init__.py +0 -33
  318. package/src/learning/ranking/constants.py +0 -84
  319. package/src/learning/ranking/helpers.py +0 -278
  320. package/src/learning/source_quality_scorer.py +0 -676
  321. package/src/learning/synthetic_bootstrap.py +0 -755
  322. package/src/learning/tests/test_adaptive_ranker.py +0 -325
  323. package/src/learning/tests/test_adaptive_ranker_v28.py +0 -60
  324. package/src/learning/tests/test_aggregator.py +0 -306
  325. package/src/learning/tests/test_auto_retrain_v28.py +0 -35
  326. package/src/learning/tests/test_e2e_ranking_v28.py +0 -82
  327. package/src/learning/tests/test_feature_extractor_v28.py +0 -93
  328. package/src/learning/tests/test_feedback_collector.py +0 -294
  329. package/src/learning/tests/test_learning_db.py +0 -602
  330. package/src/learning/tests/test_learning_db_v28.py +0 -110
  331. package/src/learning/tests/test_learning_init_v28.py +0 -48
  332. package/src/learning/tests/test_outcome_signals.py +0 -48
  333. package/src/learning/tests/test_project_context.py +0 -292
  334. package/src/learning/tests/test_schema_migration.py +0 -319
  335. package/src/learning/tests/test_signal_inference.py +0 -397
  336. package/src/learning/tests/test_source_quality.py +0 -351
  337. package/src/learning/tests/test_synthetic_bootstrap.py +0 -429
  338. package/src/learning/tests/test_workflow_miner.py +0 -318
  339. package/src/learning/workflow_pattern_miner.py +0 -655
  340. package/src/lifecycle/__init__.py +0 -54
  341. package/src/lifecycle/bounded_growth.py +0 -239
  342. package/src/lifecycle/compaction_engine.py +0 -226
  343. package/src/lifecycle/lifecycle_engine.py +0 -355
  344. package/src/lifecycle/lifecycle_evaluator.py +0 -257
  345. package/src/lifecycle/lifecycle_scheduler.py +0 -130
  346. package/src/lifecycle/retention_policy.py +0 -285
  347. package/src/lifecycle/tests/test_bounded_growth.py +0 -193
  348. package/src/lifecycle/tests/test_compaction.py +0 -179
  349. package/src/lifecycle/tests/test_lifecycle_engine.py +0 -137
  350. package/src/lifecycle/tests/test_lifecycle_evaluation.py +0 -177
  351. package/src/lifecycle/tests/test_lifecycle_scheduler.py +0 -127
  352. package/src/lifecycle/tests/test_lifecycle_search.py +0 -109
  353. package/src/lifecycle/tests/test_mcp_compact.py +0 -149
  354. package/src/lifecycle/tests/test_mcp_lifecycle_status.py +0 -114
  355. package/src/lifecycle/tests/test_retention_policy.py +0 -162
  356. package/src/mcp_tools_v28.py +0 -281
  357. package/src/memory/__init__.py +0 -36
  358. package/src/memory/cli.py +0 -205
  359. package/src/memory/constants.py +0 -39
  360. package/src/memory/helpers.py +0 -28
  361. package/src/memory/schema.py +0 -166
  362. package/src/memory-profiles.py +0 -595
  363. package/src/memory-reset.py +0 -491
  364. package/src/memory_compression.py +0 -989
  365. package/src/memory_store_v2.py +0 -1155
  366. package/src/migrate_v1_to_v2.py +0 -629
  367. package/src/pattern_learner.py +0 -34
  368. package/src/patterns/__init__.py +0 -24
  369. package/src/patterns/analyzers.py +0 -251
  370. package/src/patterns/learner.py +0 -271
  371. package/src/patterns/scoring.py +0 -171
  372. package/src/patterns/store.py +0 -225
  373. package/src/patterns/terminology.py +0 -140
  374. package/src/provenance_tracker.py +0 -312
  375. package/src/qualixar_attribution.py +0 -139
  376. package/src/qualixar_watermark.py +0 -78
  377. package/src/query_optimizer.py +0 -511
  378. package/src/rate_limiter.py +0 -83
  379. package/src/search/__init__.py +0 -20
  380. package/src/search/cli.py +0 -77
  381. package/src/search/constants.py +0 -26
  382. package/src/search/engine.py +0 -241
  383. package/src/search/fusion.py +0 -122
  384. package/src/search/index_loader.py +0 -114
  385. package/src/search/methods.py +0 -162
  386. package/src/search_engine_v2.py +0 -401
  387. package/src/setup_validator.py +0 -482
  388. package/src/subscription_manager.py +0 -391
  389. package/src/tree/__init__.py +0 -59
  390. package/src/tree/builder.py +0 -185
  391. package/src/tree/nodes.py +0 -202
  392. package/src/tree/queries.py +0 -257
  393. package/src/tree/schema.py +0 -80
  394. package/src/tree_manager.py +0 -19
  395. package/src/trust/__init__.py +0 -45
  396. package/src/trust/constants.py +0 -66
  397. package/src/trust/queries.py +0 -157
  398. package/src/trust/schema.py +0 -95
  399. package/src/trust/scorer.py +0 -299
  400. package/src/trust/signals.py +0 -95
  401. package/src/trust_scorer.py +0 -44
  402. package/ui/app.js +0 -1588
  403. package/ui/js/graph-cytoscape-monolithic-backup.js +0 -1168
  404. package/ui/js/graph-cytoscape.js +0 -1168
  405. package/ui/js/graph-d3-backup.js +0 -32
  406. package/ui/js/graph.js +0 -32
  407. package/ui_server.py +0 -286
  408. /package/docs/{ACCESSIBILITY.md → v2-archive/ACCESSIBILITY.md} +0 -0
  409. /package/docs/{ARCHITECTURE.md → v2-archive/ARCHITECTURE.md} +0 -0
  410. /package/docs/{CLI-COMMANDS-REFERENCE.md → v2-archive/CLI-COMMANDS-REFERENCE.md} +0 -0
  411. /package/docs/{COMPRESSION-README.md → v2-archive/COMPRESSION-README.md} +0 -0
  412. /package/docs/{FRAMEWORK-INTEGRATIONS.md → v2-archive/FRAMEWORK-INTEGRATIONS.md} +0 -0
  413. /package/docs/{MCP-MANUAL-SETUP.md → v2-archive/MCP-MANUAL-SETUP.md} +0 -0
  414. /package/docs/{MCP-TROUBLESHOOTING.md → v2-archive/MCP-TROUBLESHOOTING.md} +0 -0
  415. /package/docs/{PATTERN-LEARNING.md → v2-archive/PATTERN-LEARNING.md} +0 -0
  416. /package/docs/{PROFILES-GUIDE.md → v2-archive/PROFILES-GUIDE.md} +0 -0
  417. /package/docs/{RESET-GUIDE.md → v2-archive/RESET-GUIDE.md} +0 -0
  418. /package/docs/{SEARCH-ENGINE-V2.2.0.md → v2-archive/SEARCH-ENGINE-V2.2.0.md} +0 -0
  419. /package/docs/{SEARCH-INTEGRATION-GUIDE.md → v2-archive/SEARCH-INTEGRATION-GUIDE.md} +0 -0
  420. /package/docs/{UI-SERVER.md → v2-archive/UI-SERVER.md} +0 -0
  421. /package/docs/{UNIVERSAL-INTEGRATION.md → v2-archive/UNIVERSAL-INTEGRATION.md} +0 -0
  422. /package/docs/{V2.2.0-OPTIONAL-SEARCH.md → v2-archive/V2.2.0-OPTIONAL-SEARCH.md} +0 -0
  423. /package/docs/{WINDOWS-INSTALL-README.txt → v2-archive/WINDOWS-INSTALL-README.txt} +0 -0
  424. /package/docs/{WINDOWS-POST-INSTALL.txt → v2-archive/WINDOWS-POST-INSTALL.txt} +0 -0
  425. /package/docs/{example_graph_usage.py → v2-archive/example_graph_usage.py} +0 -0
  426. /package/{completions → ide/completions}/slm.bash +0 -0
  427. /package/{completions → ide/completions}/slm.zsh +0 -0
  428. /package/{configs → ide/configs}/cody-commands.json +0 -0
  429. /package/{install-skills.sh → scripts/install-skills.sh} +0 -0
  430. /package/{install.ps1 → scripts/install.ps1} +0 -0
  431. /package/{install.sh → scripts/install.sh} +0 -0
@@ -1,755 +0,0 @@
1
- #!/usr/bin/env python3
2
- # SPDX-License-Identifier: MIT
3
- # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
- """
5
- SyntheticBootstrapper — Bootstrap ML model from existing data patterns.
6
-
7
- PROBLEM: LightGBM needs 200+ feedback signals across 50+ unique queries
8
- to activate ML ranking (Phase 2). A new user has zero feedback. Without
9
- bootstrap, users must endure ~200 recalls before getting personalization.
10
- That's weeks of usage with no benefit. Users abandon before reaching Phase 2.
11
-
12
- SOLUTION: Generate synthetic (query, memory, relevance_label) tuples from
13
- EXISTING data patterns in memory.db. These aren't real user feedback, but
14
- they encode reasonable assumptions:
15
- - Frequently accessed memories are probably relevant to their keywords
16
- - High-importance memories should rank higher for their topics
17
- - Learned patterns (from pattern_learner.py) encode real preferences
18
- - Recent memories should generally outrank older ones
19
-
20
- Four Strategies:
21
- 1. Access-based: Memories accessed 5+ times -> positive for their keywords
22
- 2. Importance-based: Importance >= 8 -> positive for their tags
23
- 3. Pattern-based: Learned identity_patterns -> positive for matching memories
24
- 4. Recency decay: For any synthetic query, recent memories rank higher
25
-
26
- The bootstrap model uses MORE aggressive regularization than the real model
27
- (fewer trees, smaller depth, higher reg_lambda) to prevent overfitting
28
- on synthetic data. Once real feedback accumulates, the model is retrained
29
- with continued learning (init_model), gradually replacing synthetic signal
30
- with real signal.
31
-
32
- Research Backing:
33
- - FCS LREC 2024: Cold-start mitigation via synthetic bootstrap
34
- - eKNOW 2025: BM25 -> re-ranker pipeline effectiveness
35
- """
36
-
37
- import hashlib
38
- import logging
39
- from datetime import datetime
40
- from pathlib import Path
41
- from typing import Any, Dict, List, Optional, Set
42
-
43
- # LightGBM is OPTIONAL — bootstrap only works when LightGBM is installed
44
- try:
45
- import lightgbm as lgb
46
- HAS_LIGHTGBM = True
47
- except ImportError:
48
- lgb = None
49
- HAS_LIGHTGBM = False
50
-
51
- try:
52
- import numpy as np
53
- HAS_NUMPY = True
54
- except ImportError:
55
- np = None
56
- HAS_NUMPY = False
57
-
58
- from .feature_extractor import FeatureExtractor, FEATURE_NAMES, NUM_FEATURES
59
- from .bootstrap import (
60
- MEMORY_DB_PATH,
61
- MODELS_DIR,
62
- MODEL_PATH,
63
- MIN_MEMORIES_FOR_BOOTSTRAP,
64
- BOOTSTRAP_CONFIG,
65
- BOOTSTRAP_PARAMS,
66
- extract_keywords,
67
- get_memory_count,
68
- get_memories_by_access,
69
- get_memories_by_importance,
70
- get_recent_memories,
71
- get_learned_patterns,
72
- search_memories,
73
- find_negative_memories,
74
- diverse_sample,
75
- count_sources,
76
- )
77
-
78
- logger = logging.getLogger("superlocalmemory.learning.synthetic_bootstrap")
79
-
80
-
81
- class SyntheticBootstrapper:
82
- """
83
- Generates synthetic training data and bootstraps the ML ranking model.
84
-
85
- Usage:
86
- bootstrapper = SyntheticBootstrapper()
87
- if bootstrapper.should_bootstrap():
88
- result = bootstrapper.bootstrap_model()
89
- if result:
90
- print(f"Bootstrapped with {result['training_samples']} samples")
91
-
92
- The bootstrapped model is saved to the same path as the real model.
93
- When real feedback accumulates, AdaptiveRanker.train() uses
94
- continued learning (init_model) to incrementally replace synthetic
95
- signal with real signal.
96
- """
97
-
98
- MIN_MEMORIES_FOR_BOOTSTRAP = MIN_MEMORIES_FOR_BOOTSTRAP
99
- BOOTSTRAP_CONFIG = BOOTSTRAP_CONFIG
100
-
101
- def __init__(
102
- self,
103
- memory_db_path: Optional[Path] = None,
104
- learning_db=None,
105
- ):
106
- """
107
- Initialize SyntheticBootstrapper.
108
-
109
- Args:
110
- memory_db_path: Path to memory.db (defaults to ~/.claude-memory/memory.db).
111
- learning_db: Optional LearningDB instance for recording metadata.
112
- """
113
- self._memory_db = Path(memory_db_path) if memory_db_path else MEMORY_DB_PATH
114
- self._learning_db = learning_db
115
- self._feature_extractor = FeatureExtractor()
116
-
117
- # ========================================================================
118
- # LearningDB Access
119
- # ========================================================================
120
-
121
- def _get_learning_db(self):
122
- """Get or create the LearningDB instance."""
123
- if self._learning_db is None:
124
- try:
125
- from .learning_db import LearningDB
126
- self._learning_db = LearningDB()
127
- except Exception as e:
128
- logger.warning("Cannot access LearningDB: %s", e)
129
- return None
130
- return self._learning_db
131
-
132
- # ========================================================================
133
- # Pre-flight Checks
134
- # ========================================================================
135
-
136
- def should_bootstrap(self) -> bool:
137
- """
138
- Check if synthetic bootstrap is needed and possible.
139
-
140
- Returns True if:
141
- 1. LightGBM + NumPy are available
142
- 2. No existing model file (or forced rebuild)
143
- 3. At least MIN_MEMORIES_FOR_BOOTSTRAP memories exist in memory.db
144
- """
145
- if not HAS_LIGHTGBM or not HAS_NUMPY:
146
- logger.debug("Bootstrap unavailable: LightGBM=%s, NumPy=%s",
147
- HAS_LIGHTGBM, HAS_NUMPY)
148
- return False
149
-
150
- if MODEL_PATH.exists():
151
- logger.debug("Model already exists at %s — skipping bootstrap",
152
- MODEL_PATH)
153
- return False
154
-
155
- memory_count = self._get_memory_count()
156
- if memory_count < MIN_MEMORIES_FOR_BOOTSTRAP:
157
- logger.debug(
158
- "Not enough memories for bootstrap: %d (need %d)",
159
- memory_count, MIN_MEMORIES_FOR_BOOTSTRAP
160
- )
161
- return False
162
-
163
- return True
164
-
165
- def get_tier(self) -> Optional[str]:
166
- """
167
- Determine bootstrap tier based on memory count.
168
-
169
- Returns:
170
- 'small', 'medium', 'large', or None if < MIN_MEMORIES.
171
- """
172
- count = self._get_memory_count()
173
- for tier_name, config in BOOTSTRAP_CONFIG.items():
174
- if config['min_memories'] <= count <= config['max_memories']:
175
- return tier_name
176
- return None
177
-
178
- def _get_memory_count(self) -> int:
179
- """Count total memories in memory.db."""
180
- return get_memory_count(self._memory_db)
181
-
182
- # ========================================================================
183
- # Synthetic Data Generation
184
- # ========================================================================
185
-
186
- def generate_synthetic_training_data(self) -> List[dict]:
187
- """
188
- Generate synthetic (query, memory, label, features) records.
189
-
190
- Combines four strategies to produce training data from existing
191
- memory patterns. Each record contains:
192
- - query: Synthetic query string (extracted keywords)
193
- - memory_id: ID of the memory in memory.db
194
- - label: Relevance label (0.0 = irrelevant, 1.0 = highly relevant)
195
- - source: Which strategy generated this record
196
- - features: 9-dimensional feature vector
197
-
198
- Returns:
199
- List of training record dicts. May be empty if insufficient data.
200
- """
201
- records = []
202
-
203
- # Strategy 1: Access-based pseudo-labels
204
- access_records = self._generate_access_based()
205
- records.extend(access_records)
206
- logger.info("Strategy 1 (access): %d records", len(access_records))
207
-
208
- # Strategy 2: Importance-based pseudo-labels
209
- importance_records = self._generate_importance_based()
210
- records.extend(importance_records)
211
- logger.info("Strategy 2 (importance): %d records",
212
- len(importance_records))
213
-
214
- # Strategy 3: Pattern-based synthetic queries
215
- pattern_records = self._generate_pattern_based()
216
- records.extend(pattern_records)
217
- logger.info("Strategy 3 (patterns): %d records", len(pattern_records))
218
-
219
- # Strategy 4: Recency decay pseudo-labels
220
- recency_records = self._generate_recency_based()
221
- records.extend(recency_records)
222
- logger.info("Strategy 4 (recency): %d records", len(recency_records))
223
-
224
- logger.info("Total synthetic records: %d", len(records))
225
- return records
226
-
227
- def _generate_access_based(self) -> List[dict]:
228
- """
229
- Strategy 1: Memories accessed 5+ times are relevant for their keywords.
230
-
231
- Logic: If a user keeps coming back to a memory via certain searches,
232
- the keywords in that memory are relevant queries for it.
233
- """
234
- records = []
235
- high_access_memories = self._get_memories_by_access(min_access=5)
236
-
237
- for memory in high_access_memories:
238
- keywords = self._extract_keywords(memory.get('content', ''))
239
- if not keywords:
240
- continue
241
-
242
- query = ' '.join(keywords)
243
-
244
- # Positive: This memory is relevant to its own keywords
245
- records.append(self._build_record(
246
- query=query,
247
- memory=memory,
248
- label=1.0,
249
- source='access_positive',
250
- ))
251
-
252
- # Find some non-matching memories as negatives
253
- negatives = self._find_negative_memories(
254
- memory, exclude_ids={memory['id']}, limit=2
255
- )
256
- for neg_memory in negatives:
257
- records.append(self._build_record(
258
- query=query,
259
- memory=neg_memory,
260
- label=0.0,
261
- source='access_negative',
262
- ))
263
-
264
- return records
265
-
266
- def _generate_importance_based(self) -> List[dict]:
267
- """
268
- Strategy 2: High-importance memories (>= 8) are positive for their tags.
269
-
270
- Logic: User explicitly rated these memories as important. Their tags
271
- represent topics the user cares about.
272
- """
273
- records = []
274
- important_memories = self._get_memories_by_importance(min_importance=8)
275
-
276
- for memory in important_memories:
277
- # Use tags as synthetic query, fall back to content keywords
278
- tags = memory.get('tags', '')
279
- if isinstance(tags, str):
280
- try:
281
- import json
282
- tags_list = json.loads(tags)
283
- except (ValueError, TypeError):
284
- tags_list = [t.strip() for t in tags.split(',') if t.strip()]
285
- elif isinstance(tags, list):
286
- tags_list = tags
287
- else:
288
- tags_list = []
289
-
290
- if tags_list:
291
- query = ' '.join(tags_list[:5])
292
- else:
293
- keywords = self._extract_keywords(memory.get('content', ''))
294
- query = ' '.join(keywords) if keywords else ''
295
-
296
- if not query:
297
- continue
298
-
299
- # Positive: High-importance memory matches its tags
300
- records.append(self._build_record(
301
- query=query,
302
- memory=memory,
303
- label=1.0,
304
- source='importance_positive',
305
- ))
306
-
307
- # Find some negatives
308
- negatives = self._find_negative_memories(
309
- memory, exclude_ids={memory['id']}, limit=2
310
- )
311
- for neg_memory in negatives:
312
- records.append(self._build_record(
313
- query=query,
314
- memory=neg_memory,
315
- label=0.0,
316
- source='importance_negative',
317
- ))
318
-
319
- return records
320
-
321
- def _generate_pattern_based(self) -> List[dict]:
322
- """
323
- Strategy 3: Use learned identity_patterns to create synthetic queries.
324
-
325
- Logic: Pattern learner has already identified user's tech preferences,
326
- coding style, etc. Use these as queries and find matching memories.
327
- """
328
- records = []
329
- patterns = self._get_learned_patterns(min_confidence=0.7)
330
-
331
- if not patterns:
332
- return records
333
-
334
- for pattern in patterns:
335
- # Build query from pattern key + value
336
- query_parts = []
337
- key = pattern.get('key', '')
338
- value = pattern.get('value', '')
339
- if key:
340
- query_parts.append(key)
341
- if value and value != key:
342
- query_parts.append(value)
343
-
344
- query = ' '.join(query_parts)
345
- if not query or len(query) < 3:
346
- continue
347
-
348
- # Search for memories matching this pattern
349
- matching = self._search_memories(query, limit=10)
350
-
351
- if len(matching) < 2:
352
- continue
353
-
354
- # Top results are positive, bottom results are weak negatives
355
- for i, memory in enumerate(matching):
356
- if i < 3:
357
- label = 1.0 # Top matches are relevant
358
- elif i < 6:
359
- label = 0.5 # Middle matches are weakly relevant
360
- else:
361
- label = 0.1 # Bottom matches are marginal
362
-
363
- records.append(self._build_record(
364
- query=query,
365
- memory=memory,
366
- label=label,
367
- source='pattern',
368
- ))
369
-
370
- return records
371
-
372
- def _generate_recency_based(self) -> List[dict]:
373
- """
374
- Strategy 4: Recency decay — for shared-topic queries, recent wins.
375
-
376
- Logic: For memories about the same topic, more recent memories
377
- should generally rank higher (fresher context, more current).
378
- Generates pairs where newer = positive, older = weak negative.
379
- """
380
- records = []
381
-
382
- # Get a sample of recent and old memories
383
- recent = self._get_recent_memories(limit=30)
384
- if len(recent) < 4:
385
- return records
386
-
387
- # Take pairs: for each recent memory's keywords, create a query
388
- # then the recent memory is positive and older memories are negative
389
- processed_queries: Set[str] = set()
390
-
391
- for memory in recent[:15]:
392
- keywords = self._extract_keywords(memory.get('content', ''))
393
- query = ' '.join(keywords) if keywords else ''
394
- if not query or query in processed_queries:
395
- continue
396
- processed_queries.add(query)
397
-
398
- # This recent memory is positive
399
- records.append(self._build_record(
400
- query=query,
401
- memory=memory,
402
- label=0.8, # Good but not perfect (it's synthetic)
403
- source='recency_positive',
404
- ))
405
-
406
- # Find older memories about similar topic
407
- similar_old = self._search_memories(query, limit=5)
408
- for old_mem in similar_old:
409
- if old_mem['id'] == memory['id']:
410
- continue
411
- # Older memories get lower label
412
- records.append(self._build_record(
413
- query=query,
414
- memory=old_mem,
415
- label=0.3,
416
- source='recency_negative',
417
- ))
418
-
419
- return records
420
-
421
- # ========================================================================
422
- # Record Building
423
- # ========================================================================
424
-
425
- def _build_record(
426
- self,
427
- query: str,
428
- memory: dict,
429
- label: float,
430
- source: str,
431
- ) -> dict:
432
- """
433
- Build a training record with features.
434
-
435
- For synthetic data, we use simplified context:
436
- - No tech preferences (unknown at bootstrap time)
437
- - No current project
438
- - No workflow phase
439
- Focus on measurable features: importance, recency, access_frequency.
440
- """
441
- # Set neutral context (no query-time info for synthetic data)
442
- # Context is already set externally or defaults to neutral
443
- features = self._feature_extractor.extract_features(memory, query)
444
-
445
- return {
446
- 'query': query,
447
- 'query_hash': hashlib.sha256(query.encode()).hexdigest()[:16],
448
- 'memory_id': memory.get('id', 0),
449
- 'label': label,
450
- 'source': source,
451
- 'features': features,
452
- }
453
-
454
- # ========================================================================
455
- # Model Training
456
- # ========================================================================
457
-
458
- def bootstrap_model(self) -> Optional[Dict[str, Any]]:
459
- """
460
- Generate synthetic data and train the bootstrap model.
461
-
462
- Steps:
463
- 1. Generate synthetic training data
464
- 2. Build feature matrix and label vectors
465
- 3. Train LightGBM with aggressive regularization
466
- 4. Save model to ~/.claude-memory/models/ranker.txt
467
- 5. Record metadata in learning_db
468
- 6. Return metadata
469
-
470
- Returns:
471
- Training metadata dict, or None if bootstrap not possible.
472
- """
473
- if not HAS_LIGHTGBM or not HAS_NUMPY:
474
- logger.warning("Bootstrap requires LightGBM and NumPy")
475
- return None
476
-
477
- tier = self.get_tier()
478
- if tier is None:
479
- logger.info("Not enough memories for bootstrap")
480
- return None
481
-
482
- config = BOOTSTRAP_CONFIG[tier]
483
- logger.info(
484
- "Starting bootstrap (tier=%s, target=%d samples)",
485
- tier, config['target_samples']
486
- )
487
-
488
- # Set neutral context for feature extraction
489
- self._feature_extractor.set_context()
490
-
491
- # Generate synthetic data
492
- records = self.generate_synthetic_training_data()
493
- if not records:
494
- logger.warning("No synthetic records generated")
495
- return None
496
-
497
- # Trim to target sample count if needed
498
- if len(records) > config['target_samples']:
499
- # Keep a diverse sample across sources
500
- records = self._diverse_sample(records, config['target_samples'])
501
-
502
- # Group by query_hash for LGBMRanker
503
- query_groups: Dict[str, List[dict]] = {}
504
- for record in records:
505
- qh = record['query_hash']
506
- if qh not in query_groups:
507
- query_groups[qh] = []
508
- query_groups[qh].append(record)
509
-
510
- # Filter: only keep groups with 2+ items
511
- query_groups = {
512
- qh: recs for qh, recs in query_groups.items()
513
- if len(recs) >= 2
514
- }
515
-
516
- if not query_groups:
517
- logger.warning("No valid query groups (need 2+ records per group)")
518
- return None
519
-
520
- # Build matrices
521
- all_features = []
522
- all_labels = []
523
- groups = []
524
-
525
- for qh, group_records in query_groups.items():
526
- group_size = 0
527
- for record in group_records:
528
- all_features.append(record['features'])
529
- all_labels.append(record['label'])
530
- group_size += 1
531
- groups.append(group_size)
532
-
533
- X = np.array(all_features, dtype=np.float64)
534
- y = np.array(all_labels, dtype=np.float64)
535
- total_samples = X.shape[0]
536
-
537
- if total_samples < 10:
538
- logger.warning("Too few samples after grouping: %d", total_samples)
539
- return None
540
-
541
- logger.info(
542
- "Training bootstrap model: %d samples, %d groups, tier=%s",
543
- total_samples, len(groups), tier
544
- )
545
-
546
- # Create LightGBM dataset
547
- train_dataset = lgb.Dataset(
548
- X, label=y, group=groups,
549
- feature_name=list(FEATURE_NAMES),
550
- free_raw_data=False,
551
- )
552
-
553
- # Use tiered n_estimators and max_depth
554
- params = dict(BOOTSTRAP_PARAMS)
555
- params['max_depth'] = config['max_depth']
556
- n_estimators = config['n_estimators']
557
-
558
- # Train
559
- try:
560
- booster = lgb.train(
561
- params,
562
- train_dataset,
563
- num_boost_round=n_estimators,
564
- valid_sets=[train_dataset],
565
- valid_names=['train'],
566
- callbacks=[lgb.log_evaluation(period=0)], # Silent
567
- )
568
- except Exception as e:
569
- logger.error("Bootstrap training failed: %s", e)
570
- return None
571
-
572
- # Save model
573
- MODELS_DIR.mkdir(parents=True, exist_ok=True)
574
- try:
575
- booster.save_model(str(MODEL_PATH))
576
- logger.info("Bootstrap model saved to %s", MODEL_PATH)
577
- except Exception as e:
578
- logger.error("Failed to save bootstrap model: %s", e)
579
- return None
580
-
581
- # Extract NDCG@10 from training evaluation
582
- ndcg_at_10 = None
583
- try:
584
- eval_results = booster.eval_train(
585
- lgb.Dataset(X, label=y, group=groups)
586
- )
587
- for name, _dataset_name, value, _is_higher_better in eval_results:
588
- if 'ndcg@10' in name:
589
- ndcg_at_10 = value
590
- break
591
- except Exception:
592
- pass
593
-
594
- # Record metadata in learning_db
595
- model_version = f"bootstrap_{tier}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
596
- ldb = self._get_learning_db()
597
- if ldb:
598
- try:
599
- ldb.record_model_training(
600
- model_version=model_version,
601
- training_samples=total_samples,
602
- synthetic_samples=total_samples,
603
- real_samples=0,
604
- ndcg_at_10=ndcg_at_10,
605
- model_path=str(MODEL_PATH),
606
- )
607
- except Exception as e:
608
- logger.warning("Failed to record bootstrap metadata: %s", e)
609
-
610
- metadata = {
611
- 'model_version': model_version,
612
- 'tier': tier,
613
- 'training_samples': total_samples,
614
- 'synthetic_samples': total_samples,
615
- 'query_groups': len(groups),
616
- 'n_estimators': n_estimators,
617
- 'max_depth': config['max_depth'],
618
- 'ndcg_at_10': ndcg_at_10,
619
- 'model_path': str(MODEL_PATH),
620
- 'source_breakdown': self._count_sources(records),
621
- 'created_at': datetime.now().isoformat(),
622
- }
623
- logger.info("Bootstrap complete: %s", metadata)
624
- return metadata
625
-
626
- # ========================================================================
627
- # Memory Database Queries (READ-ONLY on memory.db)
628
- # ========================================================================
629
-
630
- def _get_memories_by_access(self, min_access: int = 5) -> List[dict]:
631
- """
632
- Fetch memories with access_count >= min_access from memory.db.
633
-
634
- These are memories the user keeps coming back to — strong positive signal.
635
- """
636
- return get_memories_by_access(self._memory_db, min_access)
637
-
638
- def _get_memories_by_importance(self, min_importance: int = 8) -> List[dict]:
639
- """
640
- Fetch memories with importance >= min_importance from memory.db.
641
-
642
- High importance = user explicitly rated these as valuable.
643
- """
644
- return get_memories_by_importance(self._memory_db, min_importance)
645
-
646
- def _get_recent_memories(self, limit: int = 30) -> List[dict]:
647
- """Fetch the N most recently created memories."""
648
- return get_recent_memories(self._memory_db, limit)
649
-
650
- def _get_learned_patterns(
651
- self,
652
- min_confidence: float = 0.7,
653
- ) -> List[dict]:
654
- """
655
- Fetch high-confidence identity_patterns from memory.db.
656
-
657
- These are patterns detected by pattern_learner.py (Layer 4) —
658
- tech preferences, coding style, terminology, etc.
659
-
660
- Returns empty list if identity_patterns table doesn't exist
661
- (backward compatible with pre-v2.3 databases).
662
- """
663
- return get_learned_patterns(self._memory_db, min_confidence)
664
-
665
- def _search_memories(self, query: str, limit: int = 20) -> List[dict]:
666
- """
667
- Simple FTS5 search in memory.db.
668
-
669
- Used to find memories matching synthetic query terms.
670
- This is a lightweight search — no TF-IDF, no HNSW, just FTS5.
671
- """
672
- return search_memories(self._memory_db, query, limit)
673
-
674
- def _find_negative_memories(
675
- self,
676
- anchor_memory: dict,
677
- exclude_ids: Optional[Set[int]] = None,
678
- limit: int = 2,
679
- ) -> List[dict]:
680
- """
681
- Find memories dissimilar to the anchor (for negative examples).
682
-
683
- Simple heuristic: pick memories from a different category or project.
684
- Falls back to random sample if no structured differences available.
685
- """
686
- return find_negative_memories(self._memory_db, anchor_memory, exclude_ids, limit)
687
-
688
- # ========================================================================
689
- # Text Processing
690
- # ========================================================================
691
-
692
- def _extract_keywords(self, content: str, top_n: int = 3) -> List[str]:
693
- """
694
- Extract meaningful keywords from memory content.
695
-
696
- Simple frequency-based extraction:
697
- 1. Tokenize (alphanumeric words)
698
- 2. Remove stopwords and short words
699
- 3. Return top N by frequency
700
-
701
- No external NLP dependencies — just regex + counter.
702
- """
703
- return extract_keywords(content, top_n)
704
-
705
- # ========================================================================
706
- # Utility
707
- # ========================================================================
708
-
709
- def _diverse_sample(
710
- self,
711
- records: List[dict],
712
- target: int,
713
- ) -> List[dict]:
714
- """
715
- Sample records while maintaining source diversity.
716
-
717
- Takes proportional samples from each source strategy to ensure
718
- the training data isn't dominated by one strategy.
719
- """
720
- return diverse_sample(records, target)
721
-
722
- def _count_sources(self, records: List[dict]) -> Dict[str, int]:
723
- """Count records by source strategy."""
724
- return count_sources(records)
725
-
726
-
727
- # ============================================================================
728
- # Module-level convenience
729
- # ============================================================================
730
-
731
- def should_bootstrap(memory_db_path: Optional[Path] = None) -> bool:
732
- """Quick check if bootstrap is needed (creates temporary bootstrapper)."""
733
- try:
734
- bootstrapper = SyntheticBootstrapper(memory_db_path=memory_db_path)
735
- return bootstrapper.should_bootstrap()
736
- except Exception:
737
- return False
738
-
739
-
740
- def run_bootstrap(
741
- memory_db_path: Optional[Path] = None,
742
- learning_db=None,
743
- ) -> Optional[Dict[str, Any]]:
744
- """Run bootstrap and return metadata (convenience function)."""
745
- try:
746
- bootstrapper = SyntheticBootstrapper(
747
- memory_db_path=memory_db_path,
748
- learning_db=learning_db,
749
- )
750
- if bootstrapper.should_bootstrap():
751
- return bootstrapper.bootstrap_model()
752
- return None
753
- except Exception as e:
754
- logger.error("Bootstrap failed: %s", e)
755
- return None