superlocalmemory 2.8.6 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. package/LICENSE +9 -1
  2. package/NOTICE +63 -0
  3. package/README.md +165 -480
  4. package/bin/slm +17 -449
  5. package/bin/slm-npm +1 -1
  6. package/conftest.py +5 -0
  7. package/docs/api-reference.md +284 -0
  8. package/docs/architecture.md +149 -0
  9. package/docs/auto-memory.md +150 -0
  10. package/docs/cli-reference.md +276 -0
  11. package/docs/compliance.md +191 -0
  12. package/docs/configuration.md +182 -0
  13. package/docs/getting-started.md +102 -0
  14. package/docs/ide-setup.md +261 -0
  15. package/docs/mcp-tools.md +220 -0
  16. package/docs/migration-from-v2.md +170 -0
  17. package/docs/profiles.md +173 -0
  18. package/docs/troubleshooting.md +310 -0
  19. package/{configs → ide/configs}/antigravity-mcp.json +3 -3
  20. package/ide/configs/chatgpt-desktop-mcp.json +16 -0
  21. package/{configs → ide/configs}/claude-desktop-mcp.json +3 -3
  22. package/{configs → ide/configs}/codex-mcp.toml +4 -4
  23. package/{configs → ide/configs}/continue-mcp.yaml +4 -3
  24. package/{configs → ide/configs}/continue-skills.yaml +6 -6
  25. package/ide/configs/cursor-mcp.json +15 -0
  26. package/{configs → ide/configs}/gemini-cli-mcp.json +2 -2
  27. package/{configs → ide/configs}/jetbrains-mcp.json +2 -2
  28. package/{configs → ide/configs}/opencode-mcp.json +2 -2
  29. package/{configs → ide/configs}/perplexity-mcp.json +2 -2
  30. package/{configs → ide/configs}/vscode-copilot-mcp.json +2 -2
  31. package/{configs → ide/configs}/windsurf-mcp.json +3 -3
  32. package/{configs → ide/configs}/zed-mcp.json +2 -2
  33. package/{hooks → ide/hooks}/context-hook.js +9 -20
  34. package/ide/hooks/memory-list-skill.js +70 -0
  35. package/ide/hooks/memory-profile-skill.js +101 -0
  36. package/ide/hooks/memory-recall-skill.js +62 -0
  37. package/ide/hooks/memory-remember-skill.js +68 -0
  38. package/ide/hooks/memory-reset-skill.js +160 -0
  39. package/{hooks → ide/hooks}/post-recall-hook.js +2 -2
  40. package/ide/integrations/langchain/README.md +106 -0
  41. package/ide/integrations/langchain/langchain_superlocalmemory/__init__.py +9 -0
  42. package/ide/integrations/langchain/langchain_superlocalmemory/chat_message_history.py +201 -0
  43. package/ide/integrations/langchain/pyproject.toml +38 -0
  44. package/{src/learning → ide/integrations/langchain}/tests/__init__.py +1 -0
  45. package/ide/integrations/langchain/tests/test_chat_message_history.py +215 -0
  46. package/ide/integrations/langchain/tests/test_security.py +117 -0
  47. package/ide/integrations/llamaindex/README.md +81 -0
  48. package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/__init__.py +9 -0
  49. package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/base.py +316 -0
  50. package/ide/integrations/llamaindex/pyproject.toml +43 -0
  51. package/{src/lifecycle → ide/integrations/llamaindex}/tests/__init__.py +1 -2
  52. package/ide/integrations/llamaindex/tests/test_chat_store.py +294 -0
  53. package/ide/integrations/llamaindex/tests/test_security.py +241 -0
  54. package/{skills → ide/skills}/slm-build-graph/SKILL.md +6 -6
  55. package/{skills → ide/skills}/slm-list-recent/SKILL.md +5 -5
  56. package/{skills → ide/skills}/slm-recall/SKILL.md +5 -5
  57. package/{skills → ide/skills}/slm-remember/SKILL.md +6 -6
  58. package/{skills → ide/skills}/slm-show-patterns/SKILL.md +7 -7
  59. package/{skills → ide/skills}/slm-status/SKILL.md +9 -9
  60. package/{skills → ide/skills}/slm-switch-profile/SKILL.md +9 -9
  61. package/package.json +13 -22
  62. package/pyproject.toml +85 -0
  63. package/scripts/build-dmg.sh +417 -0
  64. package/scripts/install-skills.ps1 +334 -0
  65. package/scripts/postinstall.js +2 -2
  66. package/scripts/start-dashboard.ps1 +52 -0
  67. package/scripts/start-dashboard.sh +41 -0
  68. package/scripts/sync-wiki.ps1 +127 -0
  69. package/scripts/sync-wiki.sh +82 -0
  70. package/scripts/test-dmg.sh +161 -0
  71. package/scripts/test-npm-package.ps1 +252 -0
  72. package/scripts/test-npm-package.sh +207 -0
  73. package/scripts/verify-install.ps1 +294 -0
  74. package/scripts/verify-install.sh +266 -0
  75. package/src/superlocalmemory/__init__.py +0 -0
  76. package/src/superlocalmemory/attribution/__init__.py +9 -0
  77. package/src/superlocalmemory/attribution/mathematical_dna.py +235 -0
  78. package/src/superlocalmemory/attribution/signer.py +153 -0
  79. package/src/superlocalmemory/attribution/watermark.py +189 -0
  80. package/src/superlocalmemory/cli/__init__.py +5 -0
  81. package/src/superlocalmemory/cli/commands.py +245 -0
  82. package/src/superlocalmemory/cli/main.py +89 -0
  83. package/src/superlocalmemory/cli/migrate_cmd.py +55 -0
  84. package/src/superlocalmemory/cli/post_install.py +99 -0
  85. package/src/superlocalmemory/cli/setup_wizard.py +129 -0
  86. package/src/superlocalmemory/compliance/__init__.py +0 -0
  87. package/src/superlocalmemory/compliance/abac.py +204 -0
  88. package/src/superlocalmemory/compliance/audit.py +314 -0
  89. package/src/superlocalmemory/compliance/eu_ai_act.py +131 -0
  90. package/src/superlocalmemory/compliance/gdpr.py +294 -0
  91. package/src/superlocalmemory/compliance/lifecycle.py +158 -0
  92. package/src/superlocalmemory/compliance/retention.py +232 -0
  93. package/src/superlocalmemory/compliance/scheduler.py +148 -0
  94. package/src/superlocalmemory/core/__init__.py +0 -0
  95. package/src/superlocalmemory/core/config.py +391 -0
  96. package/src/superlocalmemory/core/embeddings.py +293 -0
  97. package/src/superlocalmemory/core/engine.py +701 -0
  98. package/src/superlocalmemory/core/hooks.py +65 -0
  99. package/src/superlocalmemory/core/maintenance.py +172 -0
  100. package/src/superlocalmemory/core/modes.py +140 -0
  101. package/src/superlocalmemory/core/profiles.py +234 -0
  102. package/src/superlocalmemory/core/registry.py +117 -0
  103. package/src/superlocalmemory/dynamics/__init__.py +0 -0
  104. package/src/superlocalmemory/dynamics/fisher_langevin_coupling.py +223 -0
  105. package/src/superlocalmemory/encoding/__init__.py +0 -0
  106. package/src/superlocalmemory/encoding/consolidator.py +485 -0
  107. package/src/superlocalmemory/encoding/emotional.py +125 -0
  108. package/src/superlocalmemory/encoding/entity_resolver.py +525 -0
  109. package/src/superlocalmemory/encoding/entropy_gate.py +104 -0
  110. package/src/superlocalmemory/encoding/fact_extractor.py +775 -0
  111. package/src/superlocalmemory/encoding/foresight.py +91 -0
  112. package/src/superlocalmemory/encoding/graph_builder.py +302 -0
  113. package/src/superlocalmemory/encoding/observation_builder.py +160 -0
  114. package/src/superlocalmemory/encoding/scene_builder.py +183 -0
  115. package/src/superlocalmemory/encoding/signal_inference.py +90 -0
  116. package/src/superlocalmemory/encoding/temporal_parser.py +426 -0
  117. package/src/superlocalmemory/encoding/type_router.py +235 -0
  118. package/src/superlocalmemory/hooks/__init__.py +3 -0
  119. package/src/superlocalmemory/hooks/auto_capture.py +111 -0
  120. package/src/superlocalmemory/hooks/auto_recall.py +93 -0
  121. package/src/superlocalmemory/hooks/ide_connector.py +204 -0
  122. package/src/superlocalmemory/hooks/rules_engine.py +99 -0
  123. package/src/superlocalmemory/infra/__init__.py +3 -0
  124. package/src/superlocalmemory/infra/auth_middleware.py +82 -0
  125. package/src/superlocalmemory/infra/backup.py +317 -0
  126. package/src/superlocalmemory/infra/cache_manager.py +267 -0
  127. package/src/superlocalmemory/infra/event_bus.py +381 -0
  128. package/src/superlocalmemory/infra/rate_limiter.py +135 -0
  129. package/src/{webhook_dispatcher.py → superlocalmemory/infra/webhook_dispatcher.py} +104 -101
  130. package/src/superlocalmemory/learning/__init__.py +0 -0
  131. package/src/superlocalmemory/learning/adaptive.py +172 -0
  132. package/src/superlocalmemory/learning/behavioral.py +490 -0
  133. package/src/superlocalmemory/learning/behavioral_listener.py +94 -0
  134. package/src/superlocalmemory/learning/bootstrap.py +298 -0
  135. package/src/superlocalmemory/learning/cross_project.py +399 -0
  136. package/src/superlocalmemory/learning/database.py +376 -0
  137. package/src/superlocalmemory/learning/engagement.py +323 -0
  138. package/src/superlocalmemory/learning/features.py +138 -0
  139. package/src/superlocalmemory/learning/feedback.py +316 -0
  140. package/src/superlocalmemory/learning/outcomes.py +255 -0
  141. package/src/superlocalmemory/learning/project_context.py +366 -0
  142. package/src/superlocalmemory/learning/ranker.py +155 -0
  143. package/src/superlocalmemory/learning/source_quality.py +303 -0
  144. package/src/superlocalmemory/learning/workflows.py +309 -0
  145. package/src/superlocalmemory/llm/__init__.py +0 -0
  146. package/src/superlocalmemory/llm/backbone.py +316 -0
  147. package/src/superlocalmemory/math/__init__.py +0 -0
  148. package/src/superlocalmemory/math/fisher.py +356 -0
  149. package/src/superlocalmemory/math/langevin.py +398 -0
  150. package/src/superlocalmemory/math/sheaf.py +257 -0
  151. package/src/superlocalmemory/mcp/__init__.py +0 -0
  152. package/src/superlocalmemory/mcp/resources.py +245 -0
  153. package/src/superlocalmemory/mcp/server.py +61 -0
  154. package/src/superlocalmemory/mcp/tools.py +18 -0
  155. package/src/superlocalmemory/mcp/tools_core.py +305 -0
  156. package/src/superlocalmemory/mcp/tools_v28.py +223 -0
  157. package/src/superlocalmemory/mcp/tools_v3.py +286 -0
  158. package/src/superlocalmemory/retrieval/__init__.py +0 -0
  159. package/src/superlocalmemory/retrieval/agentic.py +295 -0
  160. package/src/superlocalmemory/retrieval/ann_index.py +223 -0
  161. package/src/superlocalmemory/retrieval/bm25_channel.py +185 -0
  162. package/src/superlocalmemory/retrieval/bridge_discovery.py +170 -0
  163. package/src/superlocalmemory/retrieval/engine.py +390 -0
  164. package/src/superlocalmemory/retrieval/entity_channel.py +179 -0
  165. package/src/superlocalmemory/retrieval/fusion.py +78 -0
  166. package/src/superlocalmemory/retrieval/profile_channel.py +105 -0
  167. package/src/superlocalmemory/retrieval/reranker.py +154 -0
  168. package/src/superlocalmemory/retrieval/semantic_channel.py +232 -0
  169. package/src/superlocalmemory/retrieval/strategy.py +96 -0
  170. package/src/superlocalmemory/retrieval/temporal_channel.py +175 -0
  171. package/src/superlocalmemory/server/__init__.py +1 -0
  172. package/src/superlocalmemory/server/api.py +248 -0
  173. package/src/superlocalmemory/server/routes/__init__.py +4 -0
  174. package/src/superlocalmemory/server/routes/agents.py +107 -0
  175. package/src/superlocalmemory/server/routes/backup.py +91 -0
  176. package/src/superlocalmemory/server/routes/behavioral.py +127 -0
  177. package/src/superlocalmemory/server/routes/compliance.py +160 -0
  178. package/src/superlocalmemory/server/routes/data_io.py +188 -0
  179. package/src/superlocalmemory/server/routes/events.py +183 -0
  180. package/src/superlocalmemory/server/routes/helpers.py +85 -0
  181. package/src/superlocalmemory/server/routes/learning.py +273 -0
  182. package/src/superlocalmemory/server/routes/lifecycle.py +116 -0
  183. package/src/superlocalmemory/server/routes/memories.py +399 -0
  184. package/src/superlocalmemory/server/routes/profiles.py +219 -0
  185. package/src/superlocalmemory/server/routes/stats.py +346 -0
  186. package/src/superlocalmemory/server/routes/v3_api.py +365 -0
  187. package/src/superlocalmemory/server/routes/ws.py +82 -0
  188. package/src/superlocalmemory/server/security_middleware.py +57 -0
  189. package/src/superlocalmemory/server/ui.py +245 -0
  190. package/src/superlocalmemory/storage/__init__.py +0 -0
  191. package/src/superlocalmemory/storage/access_control.py +182 -0
  192. package/src/superlocalmemory/storage/database.py +594 -0
  193. package/src/superlocalmemory/storage/migrations.py +303 -0
  194. package/src/superlocalmemory/storage/models.py +406 -0
  195. package/src/superlocalmemory/storage/schema.py +726 -0
  196. package/src/superlocalmemory/storage/v2_migrator.py +317 -0
  197. package/src/superlocalmemory/trust/__init__.py +0 -0
  198. package/src/superlocalmemory/trust/gate.py +130 -0
  199. package/src/superlocalmemory/trust/provenance.py +124 -0
  200. package/src/superlocalmemory/trust/scorer.py +347 -0
  201. package/src/superlocalmemory/trust/signals.py +153 -0
  202. package/ui/index.html +278 -5
  203. package/ui/js/auto-settings.js +70 -0
  204. package/ui/js/dashboard.js +90 -0
  205. package/ui/js/fact-detail.js +92 -0
  206. package/ui/js/feedback.js +2 -2
  207. package/ui/js/ide-status.js +102 -0
  208. package/ui/js/math-health.js +98 -0
  209. package/ui/js/recall-lab.js +127 -0
  210. package/ui/js/settings.js +2 -2
  211. package/ui/js/trust-dashboard.js +73 -0
  212. package/api_server.py +0 -724
  213. package/bin/aider-smart +0 -72
  214. package/bin/superlocalmemoryv2-learning +0 -4
  215. package/bin/superlocalmemoryv2-list +0 -3
  216. package/bin/superlocalmemoryv2-patterns +0 -4
  217. package/bin/superlocalmemoryv2-profile +0 -3
  218. package/bin/superlocalmemoryv2-recall +0 -3
  219. package/bin/superlocalmemoryv2-remember +0 -3
  220. package/bin/superlocalmemoryv2-reset +0 -3
  221. package/bin/superlocalmemoryv2-status +0 -3
  222. package/configs/chatgpt-desktop-mcp.json +0 -16
  223. package/configs/cursor-mcp.json +0 -15
  224. package/hooks/memory-list-skill.js +0 -139
  225. package/hooks/memory-profile-skill.js +0 -273
  226. package/hooks/memory-recall-skill.js +0 -114
  227. package/hooks/memory-remember-skill.js +0 -127
  228. package/hooks/memory-reset-skill.js +0 -274
  229. package/mcp_server.py +0 -1808
  230. package/requirements-core.txt +0 -22
  231. package/requirements-learning.txt +0 -12
  232. package/requirements.txt +0 -12
  233. package/src/agent_registry.py +0 -411
  234. package/src/auth_middleware.py +0 -61
  235. package/src/auto_backup.py +0 -459
  236. package/src/behavioral/__init__.py +0 -49
  237. package/src/behavioral/behavioral_listener.py +0 -203
  238. package/src/behavioral/behavioral_patterns.py +0 -275
  239. package/src/behavioral/cross_project_transfer.py +0 -206
  240. package/src/behavioral/outcome_inference.py +0 -194
  241. package/src/behavioral/outcome_tracker.py +0 -193
  242. package/src/behavioral/tests/__init__.py +0 -4
  243. package/src/behavioral/tests/test_behavioral_integration.py +0 -108
  244. package/src/behavioral/tests/test_behavioral_patterns.py +0 -150
  245. package/src/behavioral/tests/test_cross_project_transfer.py +0 -142
  246. package/src/behavioral/tests/test_mcp_behavioral.py +0 -139
  247. package/src/behavioral/tests/test_mcp_report_outcome.py +0 -117
  248. package/src/behavioral/tests/test_outcome_inference.py +0 -107
  249. package/src/behavioral/tests/test_outcome_tracker.py +0 -96
  250. package/src/cache_manager.py +0 -518
  251. package/src/compliance/__init__.py +0 -48
  252. package/src/compliance/abac_engine.py +0 -149
  253. package/src/compliance/abac_middleware.py +0 -116
  254. package/src/compliance/audit_db.py +0 -215
  255. package/src/compliance/audit_logger.py +0 -148
  256. package/src/compliance/retention_manager.py +0 -289
  257. package/src/compliance/retention_scheduler.py +0 -186
  258. package/src/compliance/tests/__init__.py +0 -4
  259. package/src/compliance/tests/test_abac_enforcement.py +0 -95
  260. package/src/compliance/tests/test_abac_engine.py +0 -124
  261. package/src/compliance/tests/test_abac_mcp_integration.py +0 -118
  262. package/src/compliance/tests/test_audit_db.py +0 -123
  263. package/src/compliance/tests/test_audit_logger.py +0 -98
  264. package/src/compliance/tests/test_mcp_audit.py +0 -128
  265. package/src/compliance/tests/test_mcp_retention_policy.py +0 -125
  266. package/src/compliance/tests/test_retention_manager.py +0 -131
  267. package/src/compliance/tests/test_retention_scheduler.py +0 -99
  268. package/src/compression/__init__.py +0 -25
  269. package/src/compression/cli.py +0 -150
  270. package/src/compression/cold_storage.py +0 -217
  271. package/src/compression/config.py +0 -72
  272. package/src/compression/orchestrator.py +0 -133
  273. package/src/compression/tier2_compressor.py +0 -228
  274. package/src/compression/tier3_compressor.py +0 -153
  275. package/src/compression/tier_classifier.py +0 -148
  276. package/src/db_connection_manager.py +0 -536
  277. package/src/embedding_engine.py +0 -63
  278. package/src/embeddings/__init__.py +0 -47
  279. package/src/embeddings/cache.py +0 -70
  280. package/src/embeddings/cli.py +0 -113
  281. package/src/embeddings/constants.py +0 -47
  282. package/src/embeddings/database.py +0 -91
  283. package/src/embeddings/engine.py +0 -247
  284. package/src/embeddings/model_loader.py +0 -145
  285. package/src/event_bus.py +0 -562
  286. package/src/graph/__init__.py +0 -36
  287. package/src/graph/build_helpers.py +0 -74
  288. package/src/graph/cli.py +0 -87
  289. package/src/graph/cluster_builder.py +0 -188
  290. package/src/graph/cluster_summary.py +0 -148
  291. package/src/graph/constants.py +0 -47
  292. package/src/graph/edge_builder.py +0 -162
  293. package/src/graph/entity_extractor.py +0 -95
  294. package/src/graph/graph_core.py +0 -226
  295. package/src/graph/graph_search.py +0 -231
  296. package/src/graph/hierarchical.py +0 -207
  297. package/src/graph/schema.py +0 -99
  298. package/src/graph_engine.py +0 -52
  299. package/src/hnsw_index.py +0 -628
  300. package/src/hybrid_search.py +0 -46
  301. package/src/learning/__init__.py +0 -217
  302. package/src/learning/adaptive_ranker.py +0 -682
  303. package/src/learning/bootstrap/__init__.py +0 -69
  304. package/src/learning/bootstrap/constants.py +0 -93
  305. package/src/learning/bootstrap/db_queries.py +0 -316
  306. package/src/learning/bootstrap/sampling.py +0 -82
  307. package/src/learning/bootstrap/text_utils.py +0 -71
  308. package/src/learning/cross_project_aggregator.py +0 -857
  309. package/src/learning/db/__init__.py +0 -40
  310. package/src/learning/db/constants.py +0 -44
  311. package/src/learning/db/schema.py +0 -279
  312. package/src/learning/engagement_tracker.py +0 -628
  313. package/src/learning/feature_extractor.py +0 -708
  314. package/src/learning/feedback_collector.py +0 -806
  315. package/src/learning/learning_db.py +0 -915
  316. package/src/learning/project_context_manager.py +0 -572
  317. package/src/learning/ranking/__init__.py +0 -33
  318. package/src/learning/ranking/constants.py +0 -84
  319. package/src/learning/ranking/helpers.py +0 -278
  320. package/src/learning/source_quality_scorer.py +0 -676
  321. package/src/learning/synthetic_bootstrap.py +0 -755
  322. package/src/learning/tests/test_adaptive_ranker.py +0 -325
  323. package/src/learning/tests/test_adaptive_ranker_v28.py +0 -60
  324. package/src/learning/tests/test_aggregator.py +0 -306
  325. package/src/learning/tests/test_auto_retrain_v28.py +0 -35
  326. package/src/learning/tests/test_e2e_ranking_v28.py +0 -82
  327. package/src/learning/tests/test_feature_extractor_v28.py +0 -93
  328. package/src/learning/tests/test_feedback_collector.py +0 -294
  329. package/src/learning/tests/test_learning_db.py +0 -602
  330. package/src/learning/tests/test_learning_db_v28.py +0 -110
  331. package/src/learning/tests/test_learning_init_v28.py +0 -48
  332. package/src/learning/tests/test_outcome_signals.py +0 -48
  333. package/src/learning/tests/test_project_context.py +0 -292
  334. package/src/learning/tests/test_schema_migration.py +0 -319
  335. package/src/learning/tests/test_signal_inference.py +0 -397
  336. package/src/learning/tests/test_source_quality.py +0 -351
  337. package/src/learning/tests/test_synthetic_bootstrap.py +0 -429
  338. package/src/learning/tests/test_workflow_miner.py +0 -318
  339. package/src/learning/workflow_pattern_miner.py +0 -655
  340. package/src/lifecycle/__init__.py +0 -54
  341. package/src/lifecycle/bounded_growth.py +0 -239
  342. package/src/lifecycle/compaction_engine.py +0 -226
  343. package/src/lifecycle/lifecycle_engine.py +0 -355
  344. package/src/lifecycle/lifecycle_evaluator.py +0 -257
  345. package/src/lifecycle/lifecycle_scheduler.py +0 -130
  346. package/src/lifecycle/retention_policy.py +0 -285
  347. package/src/lifecycle/tests/test_bounded_growth.py +0 -193
  348. package/src/lifecycle/tests/test_compaction.py +0 -179
  349. package/src/lifecycle/tests/test_lifecycle_engine.py +0 -137
  350. package/src/lifecycle/tests/test_lifecycle_evaluation.py +0 -177
  351. package/src/lifecycle/tests/test_lifecycle_scheduler.py +0 -127
  352. package/src/lifecycle/tests/test_lifecycle_search.py +0 -109
  353. package/src/lifecycle/tests/test_mcp_compact.py +0 -149
  354. package/src/lifecycle/tests/test_mcp_lifecycle_status.py +0 -114
  355. package/src/lifecycle/tests/test_retention_policy.py +0 -162
  356. package/src/mcp_tools_v28.py +0 -281
  357. package/src/memory/__init__.py +0 -36
  358. package/src/memory/cli.py +0 -205
  359. package/src/memory/constants.py +0 -39
  360. package/src/memory/helpers.py +0 -28
  361. package/src/memory/schema.py +0 -166
  362. package/src/memory-profiles.py +0 -595
  363. package/src/memory-reset.py +0 -491
  364. package/src/memory_compression.py +0 -989
  365. package/src/memory_store_v2.py +0 -1155
  366. package/src/migrate_v1_to_v2.py +0 -629
  367. package/src/pattern_learner.py +0 -34
  368. package/src/patterns/__init__.py +0 -24
  369. package/src/patterns/analyzers.py +0 -251
  370. package/src/patterns/learner.py +0 -271
  371. package/src/patterns/scoring.py +0 -171
  372. package/src/patterns/store.py +0 -225
  373. package/src/patterns/terminology.py +0 -140
  374. package/src/provenance_tracker.py +0 -312
  375. package/src/qualixar_attribution.py +0 -139
  376. package/src/qualixar_watermark.py +0 -78
  377. package/src/query_optimizer.py +0 -511
  378. package/src/rate_limiter.py +0 -83
  379. package/src/search/__init__.py +0 -20
  380. package/src/search/cli.py +0 -77
  381. package/src/search/constants.py +0 -26
  382. package/src/search/engine.py +0 -241
  383. package/src/search/fusion.py +0 -122
  384. package/src/search/index_loader.py +0 -114
  385. package/src/search/methods.py +0 -162
  386. package/src/search_engine_v2.py +0 -401
  387. package/src/setup_validator.py +0 -482
  388. package/src/subscription_manager.py +0 -391
  389. package/src/tree/__init__.py +0 -59
  390. package/src/tree/builder.py +0 -185
  391. package/src/tree/nodes.py +0 -202
  392. package/src/tree/queries.py +0 -257
  393. package/src/tree/schema.py +0 -80
  394. package/src/tree_manager.py +0 -19
  395. package/src/trust/__init__.py +0 -45
  396. package/src/trust/constants.py +0 -66
  397. package/src/trust/queries.py +0 -157
  398. package/src/trust/schema.py +0 -95
  399. package/src/trust/scorer.py +0 -299
  400. package/src/trust/signals.py +0 -95
  401. package/src/trust_scorer.py +0 -44
  402. package/ui/app.js +0 -1588
  403. package/ui/js/graph-cytoscape-monolithic-backup.js +0 -1168
  404. package/ui/js/graph-cytoscape.js +0 -1168
  405. package/ui/js/graph-d3-backup.js +0 -32
  406. package/ui/js/graph.js +0 -32
  407. package/ui_server.py +0 -286
  408. /package/docs/{ACCESSIBILITY.md → v2-archive/ACCESSIBILITY.md} +0 -0
  409. /package/docs/{ARCHITECTURE.md → v2-archive/ARCHITECTURE.md} +0 -0
  410. /package/docs/{CLI-COMMANDS-REFERENCE.md → v2-archive/CLI-COMMANDS-REFERENCE.md} +0 -0
  411. /package/docs/{COMPRESSION-README.md → v2-archive/COMPRESSION-README.md} +0 -0
  412. /package/docs/{FRAMEWORK-INTEGRATIONS.md → v2-archive/FRAMEWORK-INTEGRATIONS.md} +0 -0
  413. /package/docs/{MCP-MANUAL-SETUP.md → v2-archive/MCP-MANUAL-SETUP.md} +0 -0
  414. /package/docs/{MCP-TROUBLESHOOTING.md → v2-archive/MCP-TROUBLESHOOTING.md} +0 -0
  415. /package/docs/{PATTERN-LEARNING.md → v2-archive/PATTERN-LEARNING.md} +0 -0
  416. /package/docs/{PROFILES-GUIDE.md → v2-archive/PROFILES-GUIDE.md} +0 -0
  417. /package/docs/{RESET-GUIDE.md → v2-archive/RESET-GUIDE.md} +0 -0
  418. /package/docs/{SEARCH-ENGINE-V2.2.0.md → v2-archive/SEARCH-ENGINE-V2.2.0.md} +0 -0
  419. /package/docs/{SEARCH-INTEGRATION-GUIDE.md → v2-archive/SEARCH-INTEGRATION-GUIDE.md} +0 -0
  420. /package/docs/{UI-SERVER.md → v2-archive/UI-SERVER.md} +0 -0
  421. /package/docs/{UNIVERSAL-INTEGRATION.md → v2-archive/UNIVERSAL-INTEGRATION.md} +0 -0
  422. /package/docs/{V2.2.0-OPTIONAL-SEARCH.md → v2-archive/V2.2.0-OPTIONAL-SEARCH.md} +0 -0
  423. /package/docs/{WINDOWS-INSTALL-README.txt → v2-archive/WINDOWS-INSTALL-README.txt} +0 -0
  424. /package/docs/{WINDOWS-POST-INSTALL.txt → v2-archive/WINDOWS-POST-INSTALL.txt} +0 -0
  425. /package/docs/{example_graph_usage.py → v2-archive/example_graph_usage.py} +0 -0
  426. /package/{completions → ide/completions}/slm.bash +0 -0
  427. /package/{completions → ide/completions}/slm.zsh +0 -0
  428. /package/{configs → ide/configs}/cody-commands.json +0 -0
  429. /package/{install-skills.sh → scripts/install-skills.sh} +0 -0
  430. /package/{install.ps1 → scripts/install.ps1} +0 -0
  431. /package/{install.sh → scripts/install.sh} +0 -0
@@ -0,0 +1,295 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """SuperLocalMemory V3 — 2-Round Sufficiency Verification (EverMemOS Pattern).
6
+
7
+ Round 1: Standard retrieval → sufficiency check.
8
+ Round 2 (if insufficient): LLM generates refined queries → merge → rerank.
9
+
10
+ Design decisions:
11
+ - 2 rounds MAX (3-round decomposition BROKE relational context in S16)
12
+ - Trigger: max_score < 0.6 OR multi_hop query type
13
+ - Skip agentic entirely for temporal queries (S15 lesson)
14
+ - Mode A: heuristic alias expansion (no LLM)
15
+ - Mode C: LLM sufficiency judgment with 3-way classification
16
+
17
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
18
+ License: MIT
19
+ """
20
+ from __future__ import annotations
21
+
22
+ import json
23
+ import logging
24
+ import re
25
+ from dataclasses import dataclass
26
+ from typing import Any, Protocol
27
+
28
+ from superlocalmemory.storage.models import AtomicFact
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ _MAX_ROUNDS = 2
33
+ _SUFFICIENCY_SCORE_THRESHOLD = 0.6
34
+ _SKIP_TYPES = frozenset() # Hotpatch: enable agentic for ALL query types including multi_hop
35
+
36
+ _SUFFICIENCY_SYSTEM = (
37
+ "You evaluate whether retrieved context is sufficient to answer a query. "
38
+ 'Respond ONLY with JSON: {"is_sufficient": true/false, "missing_information": "..."}'
39
+ )
40
+
41
+ _REWRITE_SYSTEM = (
42
+ "You rewrite queries for a memory retrieval system. "
43
+ "Respond ONLY with a JSON array of 1-3 rewritten queries: "
44
+ '["query1", "query2"]'
45
+ )
46
+
47
+
48
+ class LLMBackend(Protocol):
49
+ """Minimal LLM interface."""
50
+ @property
51
+ def is_available(self) -> bool: ...
52
+ def generate(self, prompt: str, system: str = "",
53
+ max_tokens: int = 512, temperature: float = 0.0) -> Any: ...
54
+
55
+
56
+ class RetrievalEngine(Protocol):
57
+ """Minimal retrieval engine interface."""
58
+ def recall_facts(self, query: str, profile_id: str,
59
+ top_k: int, skip_agentic: bool = True,
60
+ ) -> list[tuple[AtomicFact, float]]: ...
61
+
62
+
63
+ class DatabaseProtocol(Protocol):
64
+ """Minimal DB interface for alias expansion."""
65
+ def get_entity_by_name(self, name: str, profile_id: str) -> Any: ...
66
+ def get_aliases_for_entity(self, entity_id: str) -> list[Any]: ...
67
+
68
+
69
+ @dataclass
70
+ class RetrievalRound:
71
+ """Metadata for one retrieval round."""
72
+ round_num: int
73
+ query: str
74
+ result_count: int
75
+ avg_score: float
76
+ is_sufficient: bool
77
+
78
+
79
+ class AgenticRetriever:
80
+ """2-round sufficiency verification (EverMemOS pattern).
81
+
82
+ Round 1: Retrieve → check sufficiency.
83
+ Round 2: If insufficient, LLM refines queries → merge → rerank.
84
+
85
+ Mode A (no LLM): heuristic alias expansion for round 2.
86
+ Mode C (LLM): full sufficiency check + query refinement.
87
+ """
88
+
89
+ def __init__(
90
+ self,
91
+ confidence_threshold: float = _SUFFICIENCY_SCORE_THRESHOLD,
92
+ min_results_ratio: float = 0.5,
93
+ db: DatabaseProtocol | None = None,
94
+ ) -> None:
95
+ self._threshold = confidence_threshold
96
+ self._min_ratio = min_results_ratio
97
+ self._db = db
98
+ self.rounds: list[RetrievalRound] = []
99
+
100
+ def retrieve(
101
+ self, query: str, profile_id: str,
102
+ retrieval_engine: RetrievalEngine,
103
+ llm: LLMBackend | None = None,
104
+ top_k: int = 20, query_type: str = "",
105
+ ) -> list[AtomicFact]:
106
+ """2-round retrieval with sufficiency check."""
107
+ self.rounds = []
108
+
109
+ # S15: skip agentic for temporal (but NOT multi_hop — bridge handles that)
110
+ if query_type in _SKIP_TYPES:
111
+ logger.debug("Skipping agentic for query_type=%s", query_type)
112
+ return [f for f, _ in retrieval_engine.recall_facts(
113
+ query, profile_id, top_k=top_k, skip_agentic=True)]
114
+
115
+ # Round 1: standard retrieval
116
+ r1 = retrieval_engine.recall_facts(
117
+ query, profile_id, top_k=top_k, skip_agentic=True,
118
+ )
119
+ r1_avg = _avg(r1)
120
+ max_score = max((s for _, s in r1), default=0.0)
121
+
122
+ # Sufficiency check
123
+ is_sufficient = self._check_sufficiency(query, r1, llm)
124
+ self.rounds.append(RetrievalRound(1, query, len(r1), r1_avg, is_sufficient))
125
+
126
+ # Return if sufficient OR no way to improve (no LLM and no DB)
127
+ if is_sufficient:
128
+ return [f for f, _ in r1[:top_k]]
129
+
130
+ # Trigger round 2 only when: low score OR multi_hop
131
+ needs_round2 = (
132
+ max_score < self._threshold
133
+ or query_type == "multi_hop"
134
+ or len(r1) < 3
135
+ )
136
+ if not needs_round2:
137
+ return [f for f, _ in r1[:top_k]]
138
+
139
+ # Round 2: refinement
140
+ pool: dict[str, tuple[AtomicFact, float]] = {
141
+ f.fact_id: (f, s) for f, s in r1
142
+ }
143
+
144
+ if llm is not None and getattr(llm, "is_available", False):
145
+ # Mode C: LLM generates refined queries
146
+ refined = self._llm_refine(query, r1, llm)
147
+ else:
148
+ # Mode A: heuristic alias expansion
149
+ refined = self._heuristic_expand(query, profile_id)
150
+
151
+ for rq in refined:
152
+ rn = retrieval_engine.recall_facts(
153
+ rq, profile_id, top_k=top_k, skip_agentic=True,
154
+ )
155
+ for fact, score in rn:
156
+ existing = pool.get(fact.fact_id)
157
+ if existing is None or score > existing[1]:
158
+ pool[fact.fact_id] = (fact, score)
159
+ self.rounds.append(
160
+ RetrievalRound(2, rq, len(rn), _avg(rn), True),
161
+ )
162
+
163
+ merged = sorted(pool.values(), key=lambda x: x[1], reverse=True)
164
+ return [f for f, _ in merged[:top_k]]
165
+
166
+ # -- Sufficiency check ---------------------------------------------------
167
+
168
+ def _check_sufficiency(
169
+ self, query: str, results: list[tuple[AtomicFact, float]],
170
+ llm: LLMBackend | None,
171
+ ) -> bool:
172
+ """Three-way sufficiency: SUFFICIENT / INSUFFICIENT / AMBIGUOUS."""
173
+ if not results:
174
+ return False
175
+
176
+ max_score = max((s for _, s in results), default=0.0)
177
+
178
+ # Heuristic fast path: clearly sufficient
179
+ if max_score >= 0.8 and len(results) >= 5:
180
+ return True
181
+
182
+ # Heuristic fast path: clearly insufficient
183
+ if max_score < 0.3 or len(results) < 2:
184
+ return False
185
+
186
+ # LLM sufficiency check (Mode C only)
187
+ if llm is not None and getattr(llm, "is_available", False):
188
+ try:
189
+ top5_context = "\n".join(
190
+ f"- {f.content}" for f, _ in results[:5]
191
+ )
192
+ prompt = (
193
+ f"Query: {query}\n\n"
194
+ f"Retrieved context:\n{top5_context}\n\n"
195
+ "Is this context sufficient to answer the query?"
196
+ )
197
+ resp = llm.generate(
198
+ prompt=prompt, system=_SUFFICIENCY_SYSTEM,
199
+ max_tokens=128, temperature=0.0,
200
+ )
201
+ text = getattr(resp, "text", str(resp))
202
+ parsed = _parse_sufficiency(text)
203
+ if parsed is not None:
204
+ return parsed
205
+ except Exception as exc:
206
+ logger.warning("Sufficiency check failed: %s", exc)
207
+
208
+ # Default: sufficient if score is above threshold
209
+ return max_score >= self._threshold
210
+
211
+ # -- Query refinement ----------------------------------------------------
212
+
213
+ @staticmethod
214
+ def _llm_refine(
215
+ query: str,
216
+ prev: list[tuple[AtomicFact, float]],
217
+ llm: LLMBackend,
218
+ ) -> list[str]:
219
+ """LLM generates 2-3 refined queries from missing information."""
220
+ ctx = ""
221
+ if prev:
222
+ ctx = f"\nCurrent results: {[f.content[:80] for f, _ in prev[:3]]}"
223
+ try:
224
+ resp = llm.generate(
225
+ prompt=(
226
+ f"Original query: {query}\n"
227
+ f"Insufficient results.{ctx}\n"
228
+ "Generate 2-3 refined search queries to find missing information."
229
+ ),
230
+ system=_REWRITE_SYSTEM,
231
+ max_tokens=256,
232
+ temperature=0.0,
233
+ )
234
+ parsed = _parse_json_strings(getattr(resp, "text", str(resp)))
235
+ if parsed:
236
+ return parsed[:3]
237
+ except Exception as exc:
238
+ logger.warning("LLM refine failed: %s", exc)
239
+ return []
240
+
241
+ def _heuristic_expand(
242
+ self, query: str, profile_id: str,
243
+ ) -> list[str]:
244
+ """Mode A: expand query with entity aliases (no LLM)."""
245
+ if self._db is None:
246
+ return []
247
+
248
+ expanded_parts: list[str] = []
249
+ entities = re.findall(r"\b[A-Z][a-z]{2,}\b", query)
250
+ for name in entities:
251
+ entity = self._db.get_entity_by_name(name, profile_id)
252
+ if entity:
253
+ aliases = self._db.get_aliases_for_entity(entity.entity_id)
254
+ for a in aliases[:3]:
255
+ expanded_parts.append(a.alias)
256
+
257
+ if expanded_parts:
258
+ return [query + " " + " ".join(expanded_parts)]
259
+ return []
260
+
261
+
262
+ # ---------------------------------------------------------------------------
263
+ # Helpers
264
+ # ---------------------------------------------------------------------------
265
+
266
+ def _avg(results: list[tuple[AtomicFact, float]]) -> float:
267
+ return sum(s for _, s in results) / len(results) if results else 0.0
268
+
269
+
270
+ def _parse_json_strings(raw: str) -> list[str]:
271
+ """Extract JSON string array from LLM output."""
272
+ try:
273
+ m = re.search(r"\[.*\]", raw, re.DOTALL)
274
+ if not m:
275
+ return []
276
+ items = json.loads(m.group())
277
+ if isinstance(items, list):
278
+ return [str(q).strip() for q in items[:3] if q]
279
+ return []
280
+ except (json.JSONDecodeError, ValueError):
281
+ return []
282
+
283
+
284
+ def _parse_sufficiency(raw: str) -> bool | None:
285
+ """Parse LLM sufficiency response JSON."""
286
+ try:
287
+ m = re.search(r"\{.*\}", raw, re.DOTALL)
288
+ if not m:
289
+ return None
290
+ data = json.loads(m.group())
291
+ if isinstance(data, dict) and "is_sufficient" in data:
292
+ return bool(data["is_sufficient"])
293
+ return None
294
+ except (json.JSONDecodeError, ValueError):
295
+ return None
@@ -0,0 +1,223 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """SuperLocalMemory V3 — Approximate Nearest Neighbor Index.
6
+
7
+ Numpy-based cosine similarity with thread-safe operations. Supports
8
+ rebuilding from database on cold start and incremental add/remove.
9
+
10
+ No FAISS dependency — pure numpy brute-force is sufficient for the
11
+ scale we target (up to 100K facts). At that scale, a single cosine
12
+ scan takes ~5ms on CPU which is well within our latency budget.
13
+
14
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
15
+ License: MIT
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+ import threading
22
+
23
+ import numpy as np
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class ANNIndex:
29
+ """Thread-safe approximate nearest neighbor index using numpy.
30
+
31
+ Stores (fact_id, embedding) pairs and supports top-k cosine
32
+ similarity search. Vectors are L2-normalized on insertion for
33
+ efficient dot-product scoring.
34
+
35
+ Args:
36
+ dimension: Embedding vector dimension (e.g. 768 for nomic-embed).
37
+ """
38
+
39
+ def __init__(self, dimension: int) -> None:
40
+ self._dim = dimension
41
+ self._ids: list[str] = []
42
+ self._id_to_idx: dict[str, int] = {}
43
+ self._vectors: list[np.ndarray] = []
44
+ self._lock = threading.Lock()
45
+
46
+ # ------------------------------------------------------------------
47
+ # Properties
48
+ # ------------------------------------------------------------------
49
+
50
+ @property
51
+ def size(self) -> int:
52
+ """Number of indexed vectors."""
53
+ with self._lock:
54
+ return len(self._ids)
55
+
56
+ @property
57
+ def dimension(self) -> int:
58
+ """Embedding dimension this index was created for."""
59
+ return self._dim
60
+
61
+ # ------------------------------------------------------------------
62
+ # Mutation
63
+ # ------------------------------------------------------------------
64
+
65
+ def add(self, fact_id: str, embedding: list[float]) -> None:
66
+ """Add or update a fact embedding in the index.
67
+
68
+ The vector is L2-normalized before storage so that dot-product
69
+ equals cosine similarity at search time.
70
+
71
+ Args:
72
+ fact_id: Unique fact identifier.
73
+ embedding: Raw embedding vector (will be normalized).
74
+ """
75
+ vec = np.asarray(embedding, dtype=np.float32).ravel()
76
+ if vec.shape[0] != self._dim:
77
+ logger.warning(
78
+ "Dimension mismatch: expected %d, got %d for %s",
79
+ self._dim, vec.shape[0], fact_id,
80
+ )
81
+ return
82
+
83
+ norm = np.linalg.norm(vec)
84
+ if norm > 1e-10:
85
+ vec = vec / norm
86
+
87
+ with self._lock:
88
+ if fact_id in self._id_to_idx:
89
+ # Update existing entry
90
+ idx = self._id_to_idx[fact_id]
91
+ self._vectors[idx] = vec
92
+ else:
93
+ # Append new entry
94
+ self._id_to_idx[fact_id] = len(self._ids)
95
+ self._ids.append(fact_id)
96
+ self._vectors.append(vec)
97
+
98
+ def remove(self, fact_id: str) -> None:
99
+ """Remove a fact from the index.
100
+
101
+ Uses swap-and-pop for O(1) removal: the last element fills
102
+ the gap left by the removed element.
103
+
104
+ Args:
105
+ fact_id: Fact identifier to remove. No-op if not found.
106
+ """
107
+ with self._lock:
108
+ if fact_id not in self._id_to_idx:
109
+ return
110
+
111
+ idx = self._id_to_idx.pop(fact_id)
112
+ last_idx = len(self._ids) - 1
113
+
114
+ if idx != last_idx:
115
+ # Swap with last element
116
+ last_id = self._ids[last_idx]
117
+ self._ids[idx] = last_id
118
+ self._vectors[idx] = self._vectors[last_idx]
119
+ self._id_to_idx[last_id] = idx
120
+
121
+ self._ids.pop()
122
+ self._vectors.pop()
123
+
124
+ def clear(self) -> None:
125
+ """Remove all indexed vectors."""
126
+ with self._lock:
127
+ self._ids.clear()
128
+ self._id_to_idx.clear()
129
+ self._vectors.clear()
130
+
131
+ # ------------------------------------------------------------------
132
+ # Search
133
+ # ------------------------------------------------------------------
134
+
135
+ def search(
136
+ self,
137
+ query_embedding: list[float],
138
+ top_k: int = 30,
139
+ ) -> list[tuple[str, float]]:
140
+ """Find top-k most similar facts by cosine similarity.
141
+
142
+ Args:
143
+ query_embedding: Query vector (will be L2-normalized).
144
+ top_k: Number of results to return.
145
+
146
+ Returns:
147
+ List of (fact_id, score) tuples sorted by score descending.
148
+ Score is cosine similarity in [-1.0, 1.0].
149
+ """
150
+ q = np.asarray(query_embedding, dtype=np.float32).ravel()
151
+ if q.shape[0] != self._dim:
152
+ logger.warning(
153
+ "Query dim mismatch: expected %d, got %d",
154
+ self._dim, q.shape[0],
155
+ )
156
+ return []
157
+
158
+ norm = np.linalg.norm(q)
159
+ if norm < 1e-10:
160
+ return []
161
+ q_normed = q / norm
162
+
163
+ with self._lock:
164
+ if not self._vectors:
165
+ return []
166
+
167
+ # Stack into matrix for vectorized dot product
168
+ mat = np.stack(self._vectors) # shape: (N, dim)
169
+ scores = mat @ q_normed # shape: (N,)
170
+
171
+ # Partial sort for top-k (faster than full sort for large N)
172
+ k = min(top_k, len(scores))
173
+ if k <= 0:
174
+ return []
175
+
176
+ top_indices = np.argpartition(scores, -k)[-k:]
177
+ top_indices = top_indices[np.argsort(scores[top_indices])[::-1]]
178
+
179
+ return [
180
+ (self._ids[i], float(scores[i]))
181
+ for i in top_indices
182
+ ]
183
+
184
+ # ------------------------------------------------------------------
185
+ # Bulk loading (cold start)
186
+ # ------------------------------------------------------------------
187
+
188
+ def rebuild(
189
+ self,
190
+ fact_ids: list[str],
191
+ embeddings: list[list[float]],
192
+ ) -> int:
193
+ """Rebuild the entire index from database contents.
194
+
195
+ Replaces all existing entries. Used on cold start to populate
196
+ the index from persisted embeddings.
197
+
198
+ Args:
199
+ fact_ids: List of fact identifiers.
200
+ embeddings: Corresponding embedding vectors.
201
+
202
+ Returns:
203
+ Number of vectors successfully indexed.
204
+ """
205
+ if len(fact_ids) != len(embeddings):
206
+ logger.error(
207
+ "rebuild: mismatched lengths — %d ids vs %d embeddings",
208
+ len(fact_ids), len(embeddings),
209
+ )
210
+ return 0
211
+
212
+ with self._lock:
213
+ self._ids.clear()
214
+ self._id_to_idx.clear()
215
+ self._vectors.clear()
216
+
217
+ indexed = 0
218
+ for fid, emb in zip(fact_ids, embeddings):
219
+ self.add(fid, emb)
220
+ indexed += 1
221
+
222
+ logger.info("ANN index rebuilt with %d vectors (dim=%d)", indexed, self._dim)
223
+ return indexed
@@ -0,0 +1,185 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """SuperLocalMemory V3 — BM25 Keyword Search Channel.
6
+
7
+ Persistent BM25Plus index over fact content. Catches exact name/date
8
+ matches that embedding similarity misses.
9
+
10
+ V1 bug fix: V1 kept BM25 tokens in-memory only — a restart lost
11
+ the entire index. This version persists tokens to the DB via
12
+ store_bm25_tokens / get_all_bm25_tokens and cold-loads on init.
13
+
14
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
15
+ License: MIT
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+ import re
22
+ from typing import TYPE_CHECKING
23
+
24
+ from rank_bm25 import BM25Plus
25
+
26
+ if TYPE_CHECKING:
27
+ from superlocalmemory.storage.database import DatabaseManager
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ # Minimal stopwords — small set to avoid stripping important terms
32
+ _STOPWORDS: frozenset[str] = frozenset({
33
+ "a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
34
+ "have", "has", "had", "do", "does", "did", "will", "would", "shall",
35
+ "should", "may", "might", "must", "can", "could", "of", "in", "to",
36
+ "for", "with", "on", "at", "from", "by", "as", "into", "through",
37
+ "and", "but", "or", "nor", "not", "so", "yet", "if", "then", "than",
38
+ "that", "this", "it", "its", "i", "me", "my", "we", "our", "you",
39
+ "your", "he", "him", "his", "she", "her", "they", "them", "their",
40
+ })
41
+
42
+ # Token pattern: words with letters/digits, keeps hyphens and apostrophes
43
+ _TOKEN_RE = re.compile(r"[a-zA-Z0-9][\w'-]*[a-zA-Z0-9]|[a-zA-Z0-9]")
44
+
45
+
46
+ def tokenize(text: str) -> list[str]:
47
+ """Tokenize text: lowercase, split, remove stopwords.
48
+
49
+ Exported so encoding pipeline can persist tokens at ingest time.
50
+ """
51
+ tokens = _TOKEN_RE.findall(text.lower())
52
+ return [t for t in tokens if t not in _STOPWORDS]
53
+
54
+
55
+ class BM25Channel:
56
+ """Persistent BM25Plus index for keyword retrieval.
57
+
58
+ On cold start, loads all tokens from the DB. After that, new facts
59
+ are added incrementally. The BM25Plus model is rebuilt lazily
60
+ before each search when the corpus has changed.
61
+
62
+ Attributes:
63
+ document_count: Number of indexed documents.
64
+ """
65
+
66
+ def __init__(self, db: DatabaseManager) -> None:
67
+ self._db = db
68
+ self._corpus: list[list[str]] = []
69
+ self._fact_ids: list[str] = []
70
+ self._fact_id_set: set[str] = set()
71
+ self._bm25: BM25Plus | None = None
72
+ self._dirty: bool = False
73
+ self._loaded_profiles: set[str] = set()
74
+
75
+ @property
76
+ def document_count(self) -> int:
77
+ return len(self._corpus)
78
+
79
+ def ensure_loaded(self, profile_id: str) -> None:
80
+ """Cold-load BM25 tokens from DB for a profile (once).
81
+
82
+ Idempotent: subsequent calls for the same profile are no-ops.
83
+ """
84
+ if profile_id in self._loaded_profiles:
85
+ return
86
+
87
+ token_map = self._db.get_all_bm25_tokens(profile_id)
88
+ if not token_map:
89
+ # Fallback: tokenize facts directly if no pre-stored tokens
90
+ facts = self._db.get_all_facts(profile_id)
91
+ for fact in facts:
92
+ if fact.fact_id in self._fact_id_set:
93
+ continue
94
+ tokens = tokenize(fact.content)
95
+ if tokens:
96
+ self._corpus.append(tokens)
97
+ self._fact_ids.append(fact.fact_id)
98
+ self._fact_id_set.add(fact.fact_id)
99
+ # Persist for next cold start
100
+ self._db.store_bm25_tokens(fact.fact_id, profile_id, tokens)
101
+ else:
102
+ for fid, tokens in token_map.items():
103
+ if fid in self._fact_id_set:
104
+ continue
105
+ self._corpus.append(tokens)
106
+ self._fact_ids.append(fid)
107
+ self._fact_id_set.add(fid)
108
+
109
+ self._dirty = True
110
+ self._loaded_profiles.add(profile_id)
111
+ logger.debug(
112
+ "BM25 cold-loaded %d documents for profile=%s",
113
+ len(token_map) if token_map else 0, profile_id,
114
+ )
115
+
116
+ def add(self, fact_id: str, content: str, profile_id: str) -> None:
117
+ """Add a single fact to the index and persist tokens.
118
+
119
+ Args:
120
+ fact_id: Unique fact identifier.
121
+ content: Raw text content to index.
122
+ profile_id: Owner profile.
123
+ """
124
+ tokens = tokenize(content)
125
+ if not tokens:
126
+ return
127
+
128
+ self._corpus.append(tokens)
129
+ self._fact_ids.append(fact_id)
130
+ self._fact_id_set.add(fact_id)
131
+ self._dirty = True
132
+
133
+ # Persist for cold start
134
+ self._db.store_bm25_tokens(fact_id, profile_id, tokens)
135
+
136
+ def search(
137
+ self,
138
+ query: str,
139
+ profile_id: str,
140
+ top_k: int = 30,
141
+ ) -> list[tuple[str, float]]:
142
+ """Search BM25 index for matching facts.
143
+
144
+ Auto-loads from DB on first call for this profile.
145
+
146
+ Args:
147
+ query: Search query text.
148
+ profile_id: Scope to this profile.
149
+ top_k: Maximum results.
150
+
151
+ Returns:
152
+ List of (fact_id, bm25_score) sorted by score descending.
153
+ """
154
+ self.ensure_loaded(profile_id)
155
+
156
+ if not self._corpus:
157
+ return []
158
+
159
+ query_tokens = tokenize(query)
160
+ if not query_tokens:
161
+ return []
162
+
163
+ # Rebuild BM25 model if corpus changed
164
+ if self._dirty or self._bm25 is None:
165
+ self._bm25 = BM25Plus(self._corpus, k1=1.2, b=0.75)
166
+ self._dirty = False
167
+
168
+ scores = self._bm25.get_scores(query_tokens)
169
+
170
+ scored: list[tuple[str, float]] = []
171
+ for i, score in enumerate(scores):
172
+ if score > 0.0:
173
+ scored.append((self._fact_ids[i], float(score)))
174
+
175
+ scored.sort(key=lambda x: x[1], reverse=True)
176
+ return scored[:top_k]
177
+
178
+ def clear(self) -> None:
179
+ """Clear the in-memory index (does NOT delete DB tokens)."""
180
+ self._corpus = []
181
+ self._fact_ids = []
182
+ self._fact_id_set = set()
183
+ self._bm25 = None
184
+ self._dirty = False
185
+ self._loaded_profiles = set()