superlocalmemory 2.8.5 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (434) hide show
  1. package/CHANGELOG.md +11 -0
  2. package/LICENSE +9 -1
  3. package/NOTICE +63 -0
  4. package/README.md +165 -480
  5. package/bin/slm +17 -449
  6. package/bin/slm-npm +2 -2
  7. package/bin/slm.bat +4 -2
  8. package/conftest.py +5 -0
  9. package/docs/api-reference.md +284 -0
  10. package/docs/architecture.md +149 -0
  11. package/docs/auto-memory.md +150 -0
  12. package/docs/cli-reference.md +276 -0
  13. package/docs/compliance.md +191 -0
  14. package/docs/configuration.md +182 -0
  15. package/docs/getting-started.md +102 -0
  16. package/docs/ide-setup.md +261 -0
  17. package/docs/mcp-tools.md +220 -0
  18. package/docs/migration-from-v2.md +170 -0
  19. package/docs/profiles.md +173 -0
  20. package/docs/troubleshooting.md +310 -0
  21. package/{configs → ide/configs}/antigravity-mcp.json +3 -3
  22. package/ide/configs/chatgpt-desktop-mcp.json +16 -0
  23. package/{configs → ide/configs}/claude-desktop-mcp.json +3 -3
  24. package/{configs → ide/configs}/codex-mcp.toml +4 -4
  25. package/{configs → ide/configs}/continue-mcp.yaml +4 -3
  26. package/{configs → ide/configs}/continue-skills.yaml +6 -6
  27. package/ide/configs/cursor-mcp.json +15 -0
  28. package/{configs → ide/configs}/gemini-cli-mcp.json +2 -2
  29. package/{configs → ide/configs}/jetbrains-mcp.json +2 -2
  30. package/{configs → ide/configs}/opencode-mcp.json +2 -2
  31. package/{configs → ide/configs}/perplexity-mcp.json +2 -2
  32. package/{configs → ide/configs}/vscode-copilot-mcp.json +2 -2
  33. package/{configs → ide/configs}/windsurf-mcp.json +3 -3
  34. package/{configs → ide/configs}/zed-mcp.json +2 -2
  35. package/{hooks → ide/hooks}/context-hook.js +9 -20
  36. package/ide/hooks/memory-list-skill.js +70 -0
  37. package/ide/hooks/memory-profile-skill.js +101 -0
  38. package/ide/hooks/memory-recall-skill.js +62 -0
  39. package/ide/hooks/memory-remember-skill.js +68 -0
  40. package/ide/hooks/memory-reset-skill.js +160 -0
  41. package/{hooks → ide/hooks}/post-recall-hook.js +2 -2
  42. package/ide/integrations/langchain/README.md +106 -0
  43. package/ide/integrations/langchain/langchain_superlocalmemory/__init__.py +9 -0
  44. package/ide/integrations/langchain/langchain_superlocalmemory/chat_message_history.py +201 -0
  45. package/ide/integrations/langchain/pyproject.toml +38 -0
  46. package/{src/learning → ide/integrations/langchain}/tests/__init__.py +1 -0
  47. package/ide/integrations/langchain/tests/test_chat_message_history.py +215 -0
  48. package/ide/integrations/langchain/tests/test_security.py +117 -0
  49. package/ide/integrations/llamaindex/README.md +81 -0
  50. package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/__init__.py +9 -0
  51. package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/base.py +316 -0
  52. package/ide/integrations/llamaindex/pyproject.toml +43 -0
  53. package/{src/lifecycle → ide/integrations/llamaindex}/tests/__init__.py +1 -2
  54. package/ide/integrations/llamaindex/tests/test_chat_store.py +294 -0
  55. package/ide/integrations/llamaindex/tests/test_security.py +241 -0
  56. package/{skills → ide/skills}/slm-build-graph/SKILL.md +6 -6
  57. package/{skills → ide/skills}/slm-list-recent/SKILL.md +5 -5
  58. package/{skills → ide/skills}/slm-recall/SKILL.md +5 -5
  59. package/{skills → ide/skills}/slm-remember/SKILL.md +6 -6
  60. package/{skills → ide/skills}/slm-show-patterns/SKILL.md +7 -7
  61. package/{skills → ide/skills}/slm-status/SKILL.md +9 -9
  62. package/{skills → ide/skills}/slm-switch-profile/SKILL.md +9 -9
  63. package/package.json +13 -22
  64. package/pyproject.toml +85 -0
  65. package/scripts/build-dmg.sh +417 -0
  66. package/scripts/install-skills.ps1 +334 -0
  67. package/{install.ps1 → scripts/install.ps1} +36 -4
  68. package/{install.sh → scripts/install.sh} +14 -13
  69. package/scripts/postinstall.js +2 -2
  70. package/scripts/start-dashboard.ps1 +52 -0
  71. package/scripts/start-dashboard.sh +41 -0
  72. package/scripts/sync-wiki.ps1 +127 -0
  73. package/scripts/sync-wiki.sh +82 -0
  74. package/scripts/test-dmg.sh +161 -0
  75. package/scripts/test-npm-package.ps1 +252 -0
  76. package/scripts/test-npm-package.sh +207 -0
  77. package/scripts/verify-install.ps1 +294 -0
  78. package/scripts/verify-install.sh +266 -0
  79. package/src/superlocalmemory/__init__.py +0 -0
  80. package/src/superlocalmemory/attribution/__init__.py +9 -0
  81. package/src/superlocalmemory/attribution/mathematical_dna.py +235 -0
  82. package/src/superlocalmemory/attribution/signer.py +153 -0
  83. package/src/superlocalmemory/attribution/watermark.py +189 -0
  84. package/src/superlocalmemory/cli/__init__.py +5 -0
  85. package/src/superlocalmemory/cli/commands.py +245 -0
  86. package/src/superlocalmemory/cli/main.py +89 -0
  87. package/src/superlocalmemory/cli/migrate_cmd.py +55 -0
  88. package/src/superlocalmemory/cli/post_install.py +99 -0
  89. package/src/superlocalmemory/cli/setup_wizard.py +129 -0
  90. package/src/superlocalmemory/compliance/__init__.py +0 -0
  91. package/src/superlocalmemory/compliance/abac.py +204 -0
  92. package/src/superlocalmemory/compliance/audit.py +314 -0
  93. package/src/superlocalmemory/compliance/eu_ai_act.py +131 -0
  94. package/src/superlocalmemory/compliance/gdpr.py +294 -0
  95. package/src/superlocalmemory/compliance/lifecycle.py +158 -0
  96. package/src/superlocalmemory/compliance/retention.py +232 -0
  97. package/src/superlocalmemory/compliance/scheduler.py +148 -0
  98. package/src/superlocalmemory/core/__init__.py +0 -0
  99. package/src/superlocalmemory/core/config.py +391 -0
  100. package/src/superlocalmemory/core/embeddings.py +293 -0
  101. package/src/superlocalmemory/core/engine.py +701 -0
  102. package/src/superlocalmemory/core/hooks.py +65 -0
  103. package/src/superlocalmemory/core/maintenance.py +172 -0
  104. package/src/superlocalmemory/core/modes.py +140 -0
  105. package/src/superlocalmemory/core/profiles.py +234 -0
  106. package/src/superlocalmemory/core/registry.py +117 -0
  107. package/src/superlocalmemory/dynamics/__init__.py +0 -0
  108. package/src/superlocalmemory/dynamics/fisher_langevin_coupling.py +223 -0
  109. package/src/superlocalmemory/encoding/__init__.py +0 -0
  110. package/src/superlocalmemory/encoding/consolidator.py +485 -0
  111. package/src/superlocalmemory/encoding/emotional.py +125 -0
  112. package/src/superlocalmemory/encoding/entity_resolver.py +525 -0
  113. package/src/superlocalmemory/encoding/entropy_gate.py +104 -0
  114. package/src/superlocalmemory/encoding/fact_extractor.py +775 -0
  115. package/src/superlocalmemory/encoding/foresight.py +91 -0
  116. package/src/superlocalmemory/encoding/graph_builder.py +302 -0
  117. package/src/superlocalmemory/encoding/observation_builder.py +160 -0
  118. package/src/superlocalmemory/encoding/scene_builder.py +183 -0
  119. package/src/superlocalmemory/encoding/signal_inference.py +90 -0
  120. package/src/superlocalmemory/encoding/temporal_parser.py +426 -0
  121. package/src/superlocalmemory/encoding/type_router.py +235 -0
  122. package/src/superlocalmemory/hooks/__init__.py +3 -0
  123. package/src/superlocalmemory/hooks/auto_capture.py +111 -0
  124. package/src/superlocalmemory/hooks/auto_recall.py +93 -0
  125. package/src/superlocalmemory/hooks/ide_connector.py +204 -0
  126. package/src/superlocalmemory/hooks/rules_engine.py +99 -0
  127. package/src/superlocalmemory/infra/__init__.py +3 -0
  128. package/src/superlocalmemory/infra/auth_middleware.py +82 -0
  129. package/src/superlocalmemory/infra/backup.py +317 -0
  130. package/src/superlocalmemory/infra/cache_manager.py +267 -0
  131. package/src/superlocalmemory/infra/event_bus.py +381 -0
  132. package/src/superlocalmemory/infra/rate_limiter.py +135 -0
  133. package/src/{webhook_dispatcher.py → superlocalmemory/infra/webhook_dispatcher.py} +104 -101
  134. package/src/superlocalmemory/learning/__init__.py +0 -0
  135. package/src/superlocalmemory/learning/adaptive.py +172 -0
  136. package/src/superlocalmemory/learning/behavioral.py +490 -0
  137. package/src/superlocalmemory/learning/behavioral_listener.py +94 -0
  138. package/src/superlocalmemory/learning/bootstrap.py +298 -0
  139. package/src/superlocalmemory/learning/cross_project.py +399 -0
  140. package/src/superlocalmemory/learning/database.py +376 -0
  141. package/src/superlocalmemory/learning/engagement.py +323 -0
  142. package/src/superlocalmemory/learning/features.py +138 -0
  143. package/src/superlocalmemory/learning/feedback.py +316 -0
  144. package/src/superlocalmemory/learning/outcomes.py +255 -0
  145. package/src/superlocalmemory/learning/project_context.py +366 -0
  146. package/src/superlocalmemory/learning/ranker.py +155 -0
  147. package/src/superlocalmemory/learning/source_quality.py +303 -0
  148. package/src/superlocalmemory/learning/workflows.py +309 -0
  149. package/src/superlocalmemory/llm/__init__.py +0 -0
  150. package/src/superlocalmemory/llm/backbone.py +316 -0
  151. package/src/superlocalmemory/math/__init__.py +0 -0
  152. package/src/superlocalmemory/math/fisher.py +356 -0
  153. package/src/superlocalmemory/math/langevin.py +398 -0
  154. package/src/superlocalmemory/math/sheaf.py +257 -0
  155. package/src/superlocalmemory/mcp/__init__.py +0 -0
  156. package/src/superlocalmemory/mcp/resources.py +245 -0
  157. package/src/superlocalmemory/mcp/server.py +61 -0
  158. package/src/superlocalmemory/mcp/tools.py +18 -0
  159. package/src/superlocalmemory/mcp/tools_core.py +305 -0
  160. package/src/superlocalmemory/mcp/tools_v28.py +223 -0
  161. package/src/superlocalmemory/mcp/tools_v3.py +286 -0
  162. package/src/superlocalmemory/retrieval/__init__.py +0 -0
  163. package/src/superlocalmemory/retrieval/agentic.py +295 -0
  164. package/src/superlocalmemory/retrieval/ann_index.py +223 -0
  165. package/src/superlocalmemory/retrieval/bm25_channel.py +185 -0
  166. package/src/superlocalmemory/retrieval/bridge_discovery.py +170 -0
  167. package/src/superlocalmemory/retrieval/engine.py +390 -0
  168. package/src/superlocalmemory/retrieval/entity_channel.py +179 -0
  169. package/src/superlocalmemory/retrieval/fusion.py +78 -0
  170. package/src/superlocalmemory/retrieval/profile_channel.py +105 -0
  171. package/src/superlocalmemory/retrieval/reranker.py +154 -0
  172. package/src/superlocalmemory/retrieval/semantic_channel.py +232 -0
  173. package/src/superlocalmemory/retrieval/strategy.py +96 -0
  174. package/src/superlocalmemory/retrieval/temporal_channel.py +175 -0
  175. package/src/superlocalmemory/server/__init__.py +1 -0
  176. package/src/superlocalmemory/server/api.py +248 -0
  177. package/src/superlocalmemory/server/routes/__init__.py +4 -0
  178. package/src/superlocalmemory/server/routes/agents.py +107 -0
  179. package/src/superlocalmemory/server/routes/backup.py +91 -0
  180. package/src/superlocalmemory/server/routes/behavioral.py +127 -0
  181. package/src/superlocalmemory/server/routes/compliance.py +160 -0
  182. package/src/superlocalmemory/server/routes/data_io.py +188 -0
  183. package/src/superlocalmemory/server/routes/events.py +183 -0
  184. package/src/superlocalmemory/server/routes/helpers.py +85 -0
  185. package/src/superlocalmemory/server/routes/learning.py +273 -0
  186. package/src/superlocalmemory/server/routes/lifecycle.py +116 -0
  187. package/src/superlocalmemory/server/routes/memories.py +399 -0
  188. package/src/superlocalmemory/server/routes/profiles.py +219 -0
  189. package/src/superlocalmemory/server/routes/stats.py +346 -0
  190. package/src/superlocalmemory/server/routes/v3_api.py +365 -0
  191. package/src/superlocalmemory/server/routes/ws.py +82 -0
  192. package/src/superlocalmemory/server/security_middleware.py +57 -0
  193. package/src/superlocalmemory/server/ui.py +245 -0
  194. package/src/superlocalmemory/storage/__init__.py +0 -0
  195. package/src/superlocalmemory/storage/access_control.py +182 -0
  196. package/src/superlocalmemory/storage/database.py +594 -0
  197. package/src/superlocalmemory/storage/migrations.py +303 -0
  198. package/src/superlocalmemory/storage/models.py +406 -0
  199. package/src/superlocalmemory/storage/schema.py +726 -0
  200. package/src/superlocalmemory/storage/v2_migrator.py +317 -0
  201. package/src/superlocalmemory/trust/__init__.py +0 -0
  202. package/src/superlocalmemory/trust/gate.py +130 -0
  203. package/src/superlocalmemory/trust/provenance.py +124 -0
  204. package/src/superlocalmemory/trust/scorer.py +347 -0
  205. package/src/superlocalmemory/trust/signals.py +153 -0
  206. package/ui/index.html +278 -5
  207. package/ui/js/auto-settings.js +70 -0
  208. package/ui/js/dashboard.js +90 -0
  209. package/ui/js/fact-detail.js +92 -0
  210. package/ui/js/feedback.js +2 -2
  211. package/ui/js/ide-status.js +102 -0
  212. package/ui/js/math-health.js +98 -0
  213. package/ui/js/recall-lab.js +127 -0
  214. package/ui/js/settings.js +2 -2
  215. package/ui/js/trust-dashboard.js +73 -0
  216. package/api_server.py +0 -724
  217. package/bin/aider-smart +0 -72
  218. package/bin/superlocalmemoryv2-learning +0 -4
  219. package/bin/superlocalmemoryv2-list +0 -3
  220. package/bin/superlocalmemoryv2-patterns +0 -4
  221. package/bin/superlocalmemoryv2-profile +0 -3
  222. package/bin/superlocalmemoryv2-recall +0 -3
  223. package/bin/superlocalmemoryv2-remember +0 -3
  224. package/bin/superlocalmemoryv2-reset +0 -3
  225. package/bin/superlocalmemoryv2-status +0 -3
  226. package/configs/chatgpt-desktop-mcp.json +0 -16
  227. package/configs/cursor-mcp.json +0 -15
  228. package/docs/SECURITY-QUICK-REFERENCE.md +0 -214
  229. package/hooks/memory-list-skill.js +0 -139
  230. package/hooks/memory-profile-skill.js +0 -273
  231. package/hooks/memory-recall-skill.js +0 -114
  232. package/hooks/memory-remember-skill.js +0 -127
  233. package/hooks/memory-reset-skill.js +0 -274
  234. package/mcp_server.py +0 -1800
  235. package/requirements-core.txt +0 -22
  236. package/requirements-learning.txt +0 -12
  237. package/requirements.txt +0 -12
  238. package/src/agent_registry.py +0 -411
  239. package/src/auth_middleware.py +0 -61
  240. package/src/auto_backup.py +0 -459
  241. package/src/behavioral/__init__.py +0 -49
  242. package/src/behavioral/behavioral_listener.py +0 -203
  243. package/src/behavioral/behavioral_patterns.py +0 -275
  244. package/src/behavioral/cross_project_transfer.py +0 -206
  245. package/src/behavioral/outcome_inference.py +0 -194
  246. package/src/behavioral/outcome_tracker.py +0 -193
  247. package/src/behavioral/tests/__init__.py +0 -4
  248. package/src/behavioral/tests/test_behavioral_integration.py +0 -108
  249. package/src/behavioral/tests/test_behavioral_patterns.py +0 -150
  250. package/src/behavioral/tests/test_cross_project_transfer.py +0 -142
  251. package/src/behavioral/tests/test_mcp_behavioral.py +0 -139
  252. package/src/behavioral/tests/test_mcp_report_outcome.py +0 -117
  253. package/src/behavioral/tests/test_outcome_inference.py +0 -107
  254. package/src/behavioral/tests/test_outcome_tracker.py +0 -96
  255. package/src/cache_manager.py +0 -518
  256. package/src/compliance/__init__.py +0 -48
  257. package/src/compliance/abac_engine.py +0 -149
  258. package/src/compliance/abac_middleware.py +0 -116
  259. package/src/compliance/audit_db.py +0 -215
  260. package/src/compliance/audit_logger.py +0 -148
  261. package/src/compliance/retention_manager.py +0 -289
  262. package/src/compliance/retention_scheduler.py +0 -186
  263. package/src/compliance/tests/__init__.py +0 -4
  264. package/src/compliance/tests/test_abac_enforcement.py +0 -95
  265. package/src/compliance/tests/test_abac_engine.py +0 -124
  266. package/src/compliance/tests/test_abac_mcp_integration.py +0 -118
  267. package/src/compliance/tests/test_audit_db.py +0 -123
  268. package/src/compliance/tests/test_audit_logger.py +0 -98
  269. package/src/compliance/tests/test_mcp_audit.py +0 -128
  270. package/src/compliance/tests/test_mcp_retention_policy.py +0 -125
  271. package/src/compliance/tests/test_retention_manager.py +0 -131
  272. package/src/compliance/tests/test_retention_scheduler.py +0 -99
  273. package/src/compression/__init__.py +0 -25
  274. package/src/compression/cli.py +0 -150
  275. package/src/compression/cold_storage.py +0 -217
  276. package/src/compression/config.py +0 -72
  277. package/src/compression/orchestrator.py +0 -133
  278. package/src/compression/tier2_compressor.py +0 -228
  279. package/src/compression/tier3_compressor.py +0 -153
  280. package/src/compression/tier_classifier.py +0 -148
  281. package/src/db_connection_manager.py +0 -536
  282. package/src/embedding_engine.py +0 -63
  283. package/src/embeddings/__init__.py +0 -47
  284. package/src/embeddings/cache.py +0 -70
  285. package/src/embeddings/cli.py +0 -113
  286. package/src/embeddings/constants.py +0 -47
  287. package/src/embeddings/database.py +0 -91
  288. package/src/embeddings/engine.py +0 -247
  289. package/src/embeddings/model_loader.py +0 -145
  290. package/src/event_bus.py +0 -562
  291. package/src/graph/__init__.py +0 -36
  292. package/src/graph/build_helpers.py +0 -74
  293. package/src/graph/cli.py +0 -87
  294. package/src/graph/cluster_builder.py +0 -188
  295. package/src/graph/cluster_summary.py +0 -148
  296. package/src/graph/constants.py +0 -47
  297. package/src/graph/edge_builder.py +0 -162
  298. package/src/graph/entity_extractor.py +0 -95
  299. package/src/graph/graph_core.py +0 -226
  300. package/src/graph/graph_search.py +0 -231
  301. package/src/graph/hierarchical.py +0 -207
  302. package/src/graph/schema.py +0 -99
  303. package/src/graph_engine.py +0 -52
  304. package/src/hnsw_index.py +0 -628
  305. package/src/hybrid_search.py +0 -46
  306. package/src/learning/__init__.py +0 -217
  307. package/src/learning/adaptive_ranker.py +0 -682
  308. package/src/learning/bootstrap/__init__.py +0 -69
  309. package/src/learning/bootstrap/constants.py +0 -93
  310. package/src/learning/bootstrap/db_queries.py +0 -316
  311. package/src/learning/bootstrap/sampling.py +0 -82
  312. package/src/learning/bootstrap/text_utils.py +0 -71
  313. package/src/learning/cross_project_aggregator.py +0 -857
  314. package/src/learning/db/__init__.py +0 -40
  315. package/src/learning/db/constants.py +0 -44
  316. package/src/learning/db/schema.py +0 -279
  317. package/src/learning/engagement_tracker.py +0 -628
  318. package/src/learning/feature_extractor.py +0 -708
  319. package/src/learning/feedback_collector.py +0 -806
  320. package/src/learning/learning_db.py +0 -915
  321. package/src/learning/project_context_manager.py +0 -572
  322. package/src/learning/ranking/__init__.py +0 -33
  323. package/src/learning/ranking/constants.py +0 -84
  324. package/src/learning/ranking/helpers.py +0 -278
  325. package/src/learning/source_quality_scorer.py +0 -676
  326. package/src/learning/synthetic_bootstrap.py +0 -755
  327. package/src/learning/tests/test_adaptive_ranker.py +0 -325
  328. package/src/learning/tests/test_adaptive_ranker_v28.py +0 -60
  329. package/src/learning/tests/test_aggregator.py +0 -306
  330. package/src/learning/tests/test_auto_retrain_v28.py +0 -35
  331. package/src/learning/tests/test_e2e_ranking_v28.py +0 -82
  332. package/src/learning/tests/test_feature_extractor_v28.py +0 -93
  333. package/src/learning/tests/test_feedback_collector.py +0 -294
  334. package/src/learning/tests/test_learning_db.py +0 -602
  335. package/src/learning/tests/test_learning_db_v28.py +0 -110
  336. package/src/learning/tests/test_learning_init_v28.py +0 -48
  337. package/src/learning/tests/test_outcome_signals.py +0 -48
  338. package/src/learning/tests/test_project_context.py +0 -292
  339. package/src/learning/tests/test_schema_migration.py +0 -319
  340. package/src/learning/tests/test_signal_inference.py +0 -397
  341. package/src/learning/tests/test_source_quality.py +0 -351
  342. package/src/learning/tests/test_synthetic_bootstrap.py +0 -429
  343. package/src/learning/tests/test_workflow_miner.py +0 -318
  344. package/src/learning/workflow_pattern_miner.py +0 -655
  345. package/src/lifecycle/__init__.py +0 -54
  346. package/src/lifecycle/bounded_growth.py +0 -239
  347. package/src/lifecycle/compaction_engine.py +0 -226
  348. package/src/lifecycle/lifecycle_engine.py +0 -355
  349. package/src/lifecycle/lifecycle_evaluator.py +0 -257
  350. package/src/lifecycle/lifecycle_scheduler.py +0 -130
  351. package/src/lifecycle/retention_policy.py +0 -285
  352. package/src/lifecycle/tests/test_bounded_growth.py +0 -193
  353. package/src/lifecycle/tests/test_compaction.py +0 -179
  354. package/src/lifecycle/tests/test_lifecycle_engine.py +0 -137
  355. package/src/lifecycle/tests/test_lifecycle_evaluation.py +0 -177
  356. package/src/lifecycle/tests/test_lifecycle_scheduler.py +0 -127
  357. package/src/lifecycle/tests/test_lifecycle_search.py +0 -109
  358. package/src/lifecycle/tests/test_mcp_compact.py +0 -149
  359. package/src/lifecycle/tests/test_mcp_lifecycle_status.py +0 -114
  360. package/src/lifecycle/tests/test_retention_policy.py +0 -162
  361. package/src/mcp_tools_v28.py +0 -281
  362. package/src/memory/__init__.py +0 -36
  363. package/src/memory/cli.py +0 -205
  364. package/src/memory/constants.py +0 -39
  365. package/src/memory/helpers.py +0 -28
  366. package/src/memory/schema.py +0 -166
  367. package/src/memory-profiles.py +0 -595
  368. package/src/memory-reset.py +0 -491
  369. package/src/memory_compression.py +0 -989
  370. package/src/memory_store_v2.py +0 -1155
  371. package/src/migrate_v1_to_v2.py +0 -629
  372. package/src/pattern_learner.py +0 -34
  373. package/src/patterns/__init__.py +0 -24
  374. package/src/patterns/analyzers.py +0 -251
  375. package/src/patterns/learner.py +0 -271
  376. package/src/patterns/scoring.py +0 -171
  377. package/src/patterns/store.py +0 -225
  378. package/src/patterns/terminology.py +0 -140
  379. package/src/provenance_tracker.py +0 -312
  380. package/src/qualixar_attribution.py +0 -139
  381. package/src/qualixar_watermark.py +0 -78
  382. package/src/query_optimizer.py +0 -511
  383. package/src/rate_limiter.py +0 -83
  384. package/src/search/__init__.py +0 -20
  385. package/src/search/cli.py +0 -77
  386. package/src/search/constants.py +0 -26
  387. package/src/search/engine.py +0 -241
  388. package/src/search/fusion.py +0 -122
  389. package/src/search/index_loader.py +0 -114
  390. package/src/search/methods.py +0 -162
  391. package/src/search_engine_v2.py +0 -401
  392. package/src/setup_validator.py +0 -482
  393. package/src/subscription_manager.py +0 -391
  394. package/src/tree/__init__.py +0 -59
  395. package/src/tree/builder.py +0 -185
  396. package/src/tree/nodes.py +0 -202
  397. package/src/tree/queries.py +0 -257
  398. package/src/tree/schema.py +0 -80
  399. package/src/tree_manager.py +0 -19
  400. package/src/trust/__init__.py +0 -45
  401. package/src/trust/constants.py +0 -66
  402. package/src/trust/queries.py +0 -157
  403. package/src/trust/schema.py +0 -95
  404. package/src/trust/scorer.py +0 -299
  405. package/src/trust/signals.py +0 -95
  406. package/src/trust_scorer.py +0 -44
  407. package/ui/app.js +0 -1588
  408. package/ui/js/graph-cytoscape-monolithic-backup.js +0 -1168
  409. package/ui/js/graph-cytoscape.js +0 -1168
  410. package/ui/js/graph-d3-backup.js +0 -32
  411. package/ui/js/graph.js +0 -32
  412. package/ui_server.py +0 -266
  413. /package/docs/{ACCESSIBILITY.md → v2-archive/ACCESSIBILITY.md} +0 -0
  414. /package/docs/{ARCHITECTURE.md → v2-archive/ARCHITECTURE.md} +0 -0
  415. /package/docs/{CLI-COMMANDS-REFERENCE.md → v2-archive/CLI-COMMANDS-REFERENCE.md} +0 -0
  416. /package/docs/{COMPRESSION-README.md → v2-archive/COMPRESSION-README.md} +0 -0
  417. /package/docs/{FRAMEWORK-INTEGRATIONS.md → v2-archive/FRAMEWORK-INTEGRATIONS.md} +0 -0
  418. /package/docs/{MCP-MANUAL-SETUP.md → v2-archive/MCP-MANUAL-SETUP.md} +0 -0
  419. /package/docs/{MCP-TROUBLESHOOTING.md → v2-archive/MCP-TROUBLESHOOTING.md} +0 -0
  420. /package/docs/{PATTERN-LEARNING.md → v2-archive/PATTERN-LEARNING.md} +0 -0
  421. /package/docs/{PROFILES-GUIDE.md → v2-archive/PROFILES-GUIDE.md} +0 -0
  422. /package/docs/{RESET-GUIDE.md → v2-archive/RESET-GUIDE.md} +0 -0
  423. /package/docs/{SEARCH-ENGINE-V2.2.0.md → v2-archive/SEARCH-ENGINE-V2.2.0.md} +0 -0
  424. /package/docs/{SEARCH-INTEGRATION-GUIDE.md → v2-archive/SEARCH-INTEGRATION-GUIDE.md} +0 -0
  425. /package/docs/{UI-SERVER.md → v2-archive/UI-SERVER.md} +0 -0
  426. /package/docs/{UNIVERSAL-INTEGRATION.md → v2-archive/UNIVERSAL-INTEGRATION.md} +0 -0
  427. /package/docs/{V2.2.0-OPTIONAL-SEARCH.md → v2-archive/V2.2.0-OPTIONAL-SEARCH.md} +0 -0
  428. /package/docs/{WINDOWS-INSTALL-README.txt → v2-archive/WINDOWS-INSTALL-README.txt} +0 -0
  429. /package/docs/{WINDOWS-POST-INSTALL.txt → v2-archive/WINDOWS-POST-INSTALL.txt} +0 -0
  430. /package/docs/{example_graph_usage.py → v2-archive/example_graph_usage.py} +0 -0
  431. /package/{completions → ide/completions}/slm.bash +0 -0
  432. /package/{completions → ide/completions}/slm.zsh +0 -0
  433. /package/{configs → ide/configs}/cody-commands.json +0 -0
  434. /package/{install-skills.sh → scripts/install-skills.sh} +0 -0
@@ -0,0 +1,295 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """SuperLocalMemory V3 — 2-Round Sufficiency Verification (EverMemOS Pattern).
6
+
7
+ Round 1: Standard retrieval → sufficiency check.
8
+ Round 2 (if insufficient): LLM generates refined queries → merge → rerank.
9
+
10
+ Design decisions:
11
+ - 2 rounds MAX (3-round decomposition BROKE relational context in S16)
12
+ - Trigger: max_score < 0.6 OR multi_hop query type
13
+ - Skip agentic entirely for temporal queries (S15 lesson)
14
+ - Mode A: heuristic alias expansion (no LLM)
15
+ - Mode C: LLM sufficiency judgment with 3-way classification
16
+
17
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
18
+ License: MIT
19
+ """
20
+ from __future__ import annotations
21
+
22
+ import json
23
+ import logging
24
+ import re
25
+ from dataclasses import dataclass
26
+ from typing import Any, Protocol
27
+
28
+ from superlocalmemory.storage.models import AtomicFact
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ _MAX_ROUNDS = 2
33
+ _SUFFICIENCY_SCORE_THRESHOLD = 0.6
34
+ _SKIP_TYPES = frozenset() # Hotpatch: enable agentic for ALL query types including multi_hop
35
+
36
+ _SUFFICIENCY_SYSTEM = (
37
+ "You evaluate whether retrieved context is sufficient to answer a query. "
38
+ 'Respond ONLY with JSON: {"is_sufficient": true/false, "missing_information": "..."}'
39
+ )
40
+
41
+ _REWRITE_SYSTEM = (
42
+ "You rewrite queries for a memory retrieval system. "
43
+ "Respond ONLY with a JSON array of 1-3 rewritten queries: "
44
+ '["query1", "query2"]'
45
+ )
46
+
47
+
48
+ class LLMBackend(Protocol):
49
+ """Minimal LLM interface."""
50
+ @property
51
+ def is_available(self) -> bool: ...
52
+ def generate(self, prompt: str, system: str = "",
53
+ max_tokens: int = 512, temperature: float = 0.0) -> Any: ...
54
+
55
+
56
+ class RetrievalEngine(Protocol):
57
+ """Minimal retrieval engine interface."""
58
+ def recall_facts(self, query: str, profile_id: str,
59
+ top_k: int, skip_agentic: bool = True,
60
+ ) -> list[tuple[AtomicFact, float]]: ...
61
+
62
+
63
+ class DatabaseProtocol(Protocol):
64
+ """Minimal DB interface for alias expansion."""
65
+ def get_entity_by_name(self, name: str, profile_id: str) -> Any: ...
66
+ def get_aliases_for_entity(self, entity_id: str) -> list[Any]: ...
67
+
68
+
69
+ @dataclass
70
+ class RetrievalRound:
71
+ """Metadata for one retrieval round."""
72
+ round_num: int
73
+ query: str
74
+ result_count: int
75
+ avg_score: float
76
+ is_sufficient: bool
77
+
78
+
79
+ class AgenticRetriever:
80
+ """2-round sufficiency verification (EverMemOS pattern).
81
+
82
+ Round 1: Retrieve → check sufficiency.
83
+ Round 2: If insufficient, LLM refines queries → merge → rerank.
84
+
85
+ Mode A (no LLM): heuristic alias expansion for round 2.
86
+ Mode C (LLM): full sufficiency check + query refinement.
87
+ """
88
+
89
+ def __init__(
90
+ self,
91
+ confidence_threshold: float = _SUFFICIENCY_SCORE_THRESHOLD,
92
+ min_results_ratio: float = 0.5,
93
+ db: DatabaseProtocol | None = None,
94
+ ) -> None:
95
+ self._threshold = confidence_threshold
96
+ self._min_ratio = min_results_ratio
97
+ self._db = db
98
+ self.rounds: list[RetrievalRound] = []
99
+
100
+ def retrieve(
101
+ self, query: str, profile_id: str,
102
+ retrieval_engine: RetrievalEngine,
103
+ llm: LLMBackend | None = None,
104
+ top_k: int = 20, query_type: str = "",
105
+ ) -> list[AtomicFact]:
106
+ """2-round retrieval with sufficiency check."""
107
+ self.rounds = []
108
+
109
+ # S15: skip agentic for temporal (but NOT multi_hop — bridge handles that)
110
+ if query_type in _SKIP_TYPES:
111
+ logger.debug("Skipping agentic for query_type=%s", query_type)
112
+ return [f for f, _ in retrieval_engine.recall_facts(
113
+ query, profile_id, top_k=top_k, skip_agentic=True)]
114
+
115
+ # Round 1: standard retrieval
116
+ r1 = retrieval_engine.recall_facts(
117
+ query, profile_id, top_k=top_k, skip_agentic=True,
118
+ )
119
+ r1_avg = _avg(r1)
120
+ max_score = max((s for _, s in r1), default=0.0)
121
+
122
+ # Sufficiency check
123
+ is_sufficient = self._check_sufficiency(query, r1, llm)
124
+ self.rounds.append(RetrievalRound(1, query, len(r1), r1_avg, is_sufficient))
125
+
126
+ # Return if sufficient OR no way to improve (no LLM and no DB)
127
+ if is_sufficient:
128
+ return [f for f, _ in r1[:top_k]]
129
+
130
+ # Trigger round 2 only when: low score OR multi_hop
131
+ needs_round2 = (
132
+ max_score < self._threshold
133
+ or query_type == "multi_hop"
134
+ or len(r1) < 3
135
+ )
136
+ if not needs_round2:
137
+ return [f for f, _ in r1[:top_k]]
138
+
139
+ # Round 2: refinement
140
+ pool: dict[str, tuple[AtomicFact, float]] = {
141
+ f.fact_id: (f, s) for f, s in r1
142
+ }
143
+
144
+ if llm is not None and getattr(llm, "is_available", False):
145
+ # Mode C: LLM generates refined queries
146
+ refined = self._llm_refine(query, r1, llm)
147
+ else:
148
+ # Mode A: heuristic alias expansion
149
+ refined = self._heuristic_expand(query, profile_id)
150
+
151
+ for rq in refined:
152
+ rn = retrieval_engine.recall_facts(
153
+ rq, profile_id, top_k=top_k, skip_agentic=True,
154
+ )
155
+ for fact, score in rn:
156
+ existing = pool.get(fact.fact_id)
157
+ if existing is None or score > existing[1]:
158
+ pool[fact.fact_id] = (fact, score)
159
+ self.rounds.append(
160
+ RetrievalRound(2, rq, len(rn), _avg(rn), True),
161
+ )
162
+
163
+ merged = sorted(pool.values(), key=lambda x: x[1], reverse=True)
164
+ return [f for f, _ in merged[:top_k]]
165
+
166
+ # -- Sufficiency check ---------------------------------------------------
167
+
168
+ def _check_sufficiency(
169
+ self, query: str, results: list[tuple[AtomicFact, float]],
170
+ llm: LLMBackend | None,
171
+ ) -> bool:
172
+ """Three-way sufficiency: SUFFICIENT / INSUFFICIENT / AMBIGUOUS."""
173
+ if not results:
174
+ return False
175
+
176
+ max_score = max((s for _, s in results), default=0.0)
177
+
178
+ # Heuristic fast path: clearly sufficient
179
+ if max_score >= 0.8 and len(results) >= 5:
180
+ return True
181
+
182
+ # Heuristic fast path: clearly insufficient
183
+ if max_score < 0.3 or len(results) < 2:
184
+ return False
185
+
186
+ # LLM sufficiency check (Mode C only)
187
+ if llm is not None and getattr(llm, "is_available", False):
188
+ try:
189
+ top5_context = "\n".join(
190
+ f"- {f.content}" for f, _ in results[:5]
191
+ )
192
+ prompt = (
193
+ f"Query: {query}\n\n"
194
+ f"Retrieved context:\n{top5_context}\n\n"
195
+ "Is this context sufficient to answer the query?"
196
+ )
197
+ resp = llm.generate(
198
+ prompt=prompt, system=_SUFFICIENCY_SYSTEM,
199
+ max_tokens=128, temperature=0.0,
200
+ )
201
+ text = getattr(resp, "text", str(resp))
202
+ parsed = _parse_sufficiency(text)
203
+ if parsed is not None:
204
+ return parsed
205
+ except Exception as exc:
206
+ logger.warning("Sufficiency check failed: %s", exc)
207
+
208
+ # Default: sufficient if score is above threshold
209
+ return max_score >= self._threshold
210
+
211
+ # -- Query refinement ----------------------------------------------------
212
+
213
+ @staticmethod
214
+ def _llm_refine(
215
+ query: str,
216
+ prev: list[tuple[AtomicFact, float]],
217
+ llm: LLMBackend,
218
+ ) -> list[str]:
219
+ """LLM generates 2-3 refined queries from missing information."""
220
+ ctx = ""
221
+ if prev:
222
+ ctx = f"\nCurrent results: {[f.content[:80] for f, _ in prev[:3]]}"
223
+ try:
224
+ resp = llm.generate(
225
+ prompt=(
226
+ f"Original query: {query}\n"
227
+ f"Insufficient results.{ctx}\n"
228
+ "Generate 2-3 refined search queries to find missing information."
229
+ ),
230
+ system=_REWRITE_SYSTEM,
231
+ max_tokens=256,
232
+ temperature=0.0,
233
+ )
234
+ parsed = _parse_json_strings(getattr(resp, "text", str(resp)))
235
+ if parsed:
236
+ return parsed[:3]
237
+ except Exception as exc:
238
+ logger.warning("LLM refine failed: %s", exc)
239
+ return []
240
+
241
+ def _heuristic_expand(
242
+ self, query: str, profile_id: str,
243
+ ) -> list[str]:
244
+ """Mode A: expand query with entity aliases (no LLM)."""
245
+ if self._db is None:
246
+ return []
247
+
248
+ expanded_parts: list[str] = []
249
+ entities = re.findall(r"\b[A-Z][a-z]{2,}\b", query)
250
+ for name in entities:
251
+ entity = self._db.get_entity_by_name(name, profile_id)
252
+ if entity:
253
+ aliases = self._db.get_aliases_for_entity(entity.entity_id)
254
+ for a in aliases[:3]:
255
+ expanded_parts.append(a.alias)
256
+
257
+ if expanded_parts:
258
+ return [query + " " + " ".join(expanded_parts)]
259
+ return []
260
+
261
+
262
+ # ---------------------------------------------------------------------------
263
+ # Helpers
264
+ # ---------------------------------------------------------------------------
265
+
266
+ def _avg(results: list[tuple[AtomicFact, float]]) -> float:
267
+ return sum(s for _, s in results) / len(results) if results else 0.0
268
+
269
+
270
+ def _parse_json_strings(raw: str) -> list[str]:
271
+ """Extract JSON string array from LLM output."""
272
+ try:
273
+ m = re.search(r"\[.*\]", raw, re.DOTALL)
274
+ if not m:
275
+ return []
276
+ items = json.loads(m.group())
277
+ if isinstance(items, list):
278
+ return [str(q).strip() for q in items[:3] if q]
279
+ return []
280
+ except (json.JSONDecodeError, ValueError):
281
+ return []
282
+
283
+
284
+ def _parse_sufficiency(raw: str) -> bool | None:
285
+ """Parse LLM sufficiency response JSON."""
286
+ try:
287
+ m = re.search(r"\{.*\}", raw, re.DOTALL)
288
+ if not m:
289
+ return None
290
+ data = json.loads(m.group())
291
+ if isinstance(data, dict) and "is_sufficient" in data:
292
+ return bool(data["is_sufficient"])
293
+ return None
294
+ except (json.JSONDecodeError, ValueError):
295
+ return None
@@ -0,0 +1,223 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """SuperLocalMemory V3 — Approximate Nearest Neighbor Index.
6
+
7
+ Numpy-based cosine similarity with thread-safe operations. Supports
8
+ rebuilding from database on cold start and incremental add/remove.
9
+
10
+ No FAISS dependency — pure numpy brute-force is sufficient for the
11
+ scale we target (up to 100K facts). At that scale, a single cosine
12
+ scan takes ~5ms on CPU which is well within our latency budget.
13
+
14
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
15
+ License: MIT
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+ import threading
22
+
23
+ import numpy as np
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class ANNIndex:
29
+ """Thread-safe approximate nearest neighbor index using numpy.
30
+
31
+ Stores (fact_id, embedding) pairs and supports top-k cosine
32
+ similarity search. Vectors are L2-normalized on insertion for
33
+ efficient dot-product scoring.
34
+
35
+ Args:
36
+ dimension: Embedding vector dimension (e.g. 768 for nomic-embed).
37
+ """
38
+
39
+ def __init__(self, dimension: int) -> None:
40
+ self._dim = dimension
41
+ self._ids: list[str] = []
42
+ self._id_to_idx: dict[str, int] = {}
43
+ self._vectors: list[np.ndarray] = []
44
+ self._lock = threading.Lock()
45
+
46
+ # ------------------------------------------------------------------
47
+ # Properties
48
+ # ------------------------------------------------------------------
49
+
50
+ @property
51
+ def size(self) -> int:
52
+ """Number of indexed vectors."""
53
+ with self._lock:
54
+ return len(self._ids)
55
+
56
+ @property
57
+ def dimension(self) -> int:
58
+ """Embedding dimension this index was created for."""
59
+ return self._dim
60
+
61
+ # ------------------------------------------------------------------
62
+ # Mutation
63
+ # ------------------------------------------------------------------
64
+
65
+ def add(self, fact_id: str, embedding: list[float]) -> None:
66
+ """Add or update a fact embedding in the index.
67
+
68
+ The vector is L2-normalized before storage so that dot-product
69
+ equals cosine similarity at search time.
70
+
71
+ Args:
72
+ fact_id: Unique fact identifier.
73
+ embedding: Raw embedding vector (will be normalized).
74
+ """
75
+ vec = np.asarray(embedding, dtype=np.float32).ravel()
76
+ if vec.shape[0] != self._dim:
77
+ logger.warning(
78
+ "Dimension mismatch: expected %d, got %d for %s",
79
+ self._dim, vec.shape[0], fact_id,
80
+ )
81
+ return
82
+
83
+ norm = np.linalg.norm(vec)
84
+ if norm > 1e-10:
85
+ vec = vec / norm
86
+
87
+ with self._lock:
88
+ if fact_id in self._id_to_idx:
89
+ # Update existing entry
90
+ idx = self._id_to_idx[fact_id]
91
+ self._vectors[idx] = vec
92
+ else:
93
+ # Append new entry
94
+ self._id_to_idx[fact_id] = len(self._ids)
95
+ self._ids.append(fact_id)
96
+ self._vectors.append(vec)
97
+
98
+ def remove(self, fact_id: str) -> None:
99
+ """Remove a fact from the index.
100
+
101
+ Uses swap-and-pop for O(1) removal: the last element fills
102
+ the gap left by the removed element.
103
+
104
+ Args:
105
+ fact_id: Fact identifier to remove. No-op if not found.
106
+ """
107
+ with self._lock:
108
+ if fact_id not in self._id_to_idx:
109
+ return
110
+
111
+ idx = self._id_to_idx.pop(fact_id)
112
+ last_idx = len(self._ids) - 1
113
+
114
+ if idx != last_idx:
115
+ # Swap with last element
116
+ last_id = self._ids[last_idx]
117
+ self._ids[idx] = last_id
118
+ self._vectors[idx] = self._vectors[last_idx]
119
+ self._id_to_idx[last_id] = idx
120
+
121
+ self._ids.pop()
122
+ self._vectors.pop()
123
+
124
+ def clear(self) -> None:
125
+ """Remove all indexed vectors."""
126
+ with self._lock:
127
+ self._ids.clear()
128
+ self._id_to_idx.clear()
129
+ self._vectors.clear()
130
+
131
+ # ------------------------------------------------------------------
132
+ # Search
133
+ # ------------------------------------------------------------------
134
+
135
+ def search(
136
+ self,
137
+ query_embedding: list[float],
138
+ top_k: int = 30,
139
+ ) -> list[tuple[str, float]]:
140
+ """Find top-k most similar facts by cosine similarity.
141
+
142
+ Args:
143
+ query_embedding: Query vector (will be L2-normalized).
144
+ top_k: Number of results to return.
145
+
146
+ Returns:
147
+ List of (fact_id, score) tuples sorted by score descending.
148
+ Score is cosine similarity in [-1.0, 1.0].
149
+ """
150
+ q = np.asarray(query_embedding, dtype=np.float32).ravel()
151
+ if q.shape[0] != self._dim:
152
+ logger.warning(
153
+ "Query dim mismatch: expected %d, got %d",
154
+ self._dim, q.shape[0],
155
+ )
156
+ return []
157
+
158
+ norm = np.linalg.norm(q)
159
+ if norm < 1e-10:
160
+ return []
161
+ q_normed = q / norm
162
+
163
+ with self._lock:
164
+ if not self._vectors:
165
+ return []
166
+
167
+ # Stack into matrix for vectorized dot product
168
+ mat = np.stack(self._vectors) # shape: (N, dim)
169
+ scores = mat @ q_normed # shape: (N,)
170
+
171
+ # Partial sort for top-k (faster than full sort for large N)
172
+ k = min(top_k, len(scores))
173
+ if k <= 0:
174
+ return []
175
+
176
+ top_indices = np.argpartition(scores, -k)[-k:]
177
+ top_indices = top_indices[np.argsort(scores[top_indices])[::-1]]
178
+
179
+ return [
180
+ (self._ids[i], float(scores[i]))
181
+ for i in top_indices
182
+ ]
183
+
184
+ # ------------------------------------------------------------------
185
+ # Bulk loading (cold start)
186
+ # ------------------------------------------------------------------
187
+
188
+ def rebuild(
189
+ self,
190
+ fact_ids: list[str],
191
+ embeddings: list[list[float]],
192
+ ) -> int:
193
+ """Rebuild the entire index from database contents.
194
+
195
+ Replaces all existing entries. Used on cold start to populate
196
+ the index from persisted embeddings.
197
+
198
+ Args:
199
+ fact_ids: List of fact identifiers.
200
+ embeddings: Corresponding embedding vectors.
201
+
202
+ Returns:
203
+ Number of vectors successfully indexed.
204
+ """
205
+ if len(fact_ids) != len(embeddings):
206
+ logger.error(
207
+ "rebuild: mismatched lengths — %d ids vs %d embeddings",
208
+ len(fact_ids), len(embeddings),
209
+ )
210
+ return 0
211
+
212
+ with self._lock:
213
+ self._ids.clear()
214
+ self._id_to_idx.clear()
215
+ self._vectors.clear()
216
+
217
+ indexed = 0
218
+ for fid, emb in zip(fact_ids, embeddings):
219
+ self.add(fid, emb)
220
+ indexed += 1
221
+
222
+ logger.info("ANN index rebuilt with %d vectors (dim=%d)", indexed, self._dim)
223
+ return indexed
@@ -0,0 +1,185 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """SuperLocalMemory V3 — BM25 Keyword Search Channel.
6
+
7
+ Persistent BM25Plus index over fact content. Catches exact name/date
8
+ matches that embedding similarity misses.
9
+
10
+ V1 bug fix: V1 kept BM25 tokens in-memory only — a restart lost
11
+ the entire index. This version persists tokens to the DB via
12
+ store_bm25_tokens / get_all_bm25_tokens and cold-loads on init.
13
+
14
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
15
+ License: MIT
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+ import re
22
+ from typing import TYPE_CHECKING
23
+
24
+ from rank_bm25 import BM25Plus
25
+
26
+ if TYPE_CHECKING:
27
+ from superlocalmemory.storage.database import DatabaseManager
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ # Minimal stopwords — small set to avoid stripping important terms
32
+ _STOPWORDS: frozenset[str] = frozenset({
33
+ "a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
34
+ "have", "has", "had", "do", "does", "did", "will", "would", "shall",
35
+ "should", "may", "might", "must", "can", "could", "of", "in", "to",
36
+ "for", "with", "on", "at", "from", "by", "as", "into", "through",
37
+ "and", "but", "or", "nor", "not", "so", "yet", "if", "then", "than",
38
+ "that", "this", "it", "its", "i", "me", "my", "we", "our", "you",
39
+ "your", "he", "him", "his", "she", "her", "they", "them", "their",
40
+ })
41
+
42
+ # Token pattern: words with letters/digits, keeps hyphens and apostrophes
43
+ _TOKEN_RE = re.compile(r"[a-zA-Z0-9][\w'-]*[a-zA-Z0-9]|[a-zA-Z0-9]")
44
+
45
+
46
+ def tokenize(text: str) -> list[str]:
47
+ """Tokenize text: lowercase, split, remove stopwords.
48
+
49
+ Exported so encoding pipeline can persist tokens at ingest time.
50
+ """
51
+ tokens = _TOKEN_RE.findall(text.lower())
52
+ return [t for t in tokens if t not in _STOPWORDS]
53
+
54
+
55
+ class BM25Channel:
56
+ """Persistent BM25Plus index for keyword retrieval.
57
+
58
+ On cold start, loads all tokens from the DB. After that, new facts
59
+ are added incrementally. The BM25Plus model is rebuilt lazily
60
+ before each search when the corpus has changed.
61
+
62
+ Attributes:
63
+ document_count: Number of indexed documents.
64
+ """
65
+
66
+ def __init__(self, db: DatabaseManager) -> None:
67
+ self._db = db
68
+ self._corpus: list[list[str]] = []
69
+ self._fact_ids: list[str] = []
70
+ self._fact_id_set: set[str] = set()
71
+ self._bm25: BM25Plus | None = None
72
+ self._dirty: bool = False
73
+ self._loaded_profiles: set[str] = set()
74
+
75
+ @property
76
+ def document_count(self) -> int:
77
+ return len(self._corpus)
78
+
79
+ def ensure_loaded(self, profile_id: str) -> None:
80
+ """Cold-load BM25 tokens from DB for a profile (once).
81
+
82
+ Idempotent: subsequent calls for the same profile are no-ops.
83
+ """
84
+ if profile_id in self._loaded_profiles:
85
+ return
86
+
87
+ token_map = self._db.get_all_bm25_tokens(profile_id)
88
+ if not token_map:
89
+ # Fallback: tokenize facts directly if no pre-stored tokens
90
+ facts = self._db.get_all_facts(profile_id)
91
+ for fact in facts:
92
+ if fact.fact_id in self._fact_id_set:
93
+ continue
94
+ tokens = tokenize(fact.content)
95
+ if tokens:
96
+ self._corpus.append(tokens)
97
+ self._fact_ids.append(fact.fact_id)
98
+ self._fact_id_set.add(fact.fact_id)
99
+ # Persist for next cold start
100
+ self._db.store_bm25_tokens(fact.fact_id, profile_id, tokens)
101
+ else:
102
+ for fid, tokens in token_map.items():
103
+ if fid in self._fact_id_set:
104
+ continue
105
+ self._corpus.append(tokens)
106
+ self._fact_ids.append(fid)
107
+ self._fact_id_set.add(fid)
108
+
109
+ self._dirty = True
110
+ self._loaded_profiles.add(profile_id)
111
+ logger.debug(
112
+ "BM25 cold-loaded %d documents for profile=%s",
113
+ len(token_map) if token_map else 0, profile_id,
114
+ )
115
+
116
+ def add(self, fact_id: str, content: str, profile_id: str) -> None:
117
+ """Add a single fact to the index and persist tokens.
118
+
119
+ Args:
120
+ fact_id: Unique fact identifier.
121
+ content: Raw text content to index.
122
+ profile_id: Owner profile.
123
+ """
124
+ tokens = tokenize(content)
125
+ if not tokens:
126
+ return
127
+
128
+ self._corpus.append(tokens)
129
+ self._fact_ids.append(fact_id)
130
+ self._fact_id_set.add(fact_id)
131
+ self._dirty = True
132
+
133
+ # Persist for cold start
134
+ self._db.store_bm25_tokens(fact_id, profile_id, tokens)
135
+
136
+ def search(
137
+ self,
138
+ query: str,
139
+ profile_id: str,
140
+ top_k: int = 30,
141
+ ) -> list[tuple[str, float]]:
142
+ """Search BM25 index for matching facts.
143
+
144
+ Auto-loads from DB on first call for this profile.
145
+
146
+ Args:
147
+ query: Search query text.
148
+ profile_id: Scope to this profile.
149
+ top_k: Maximum results.
150
+
151
+ Returns:
152
+ List of (fact_id, bm25_score) sorted by score descending.
153
+ """
154
+ self.ensure_loaded(profile_id)
155
+
156
+ if not self._corpus:
157
+ return []
158
+
159
+ query_tokens = tokenize(query)
160
+ if not query_tokens:
161
+ return []
162
+
163
+ # Rebuild BM25 model if corpus changed
164
+ if self._dirty or self._bm25 is None:
165
+ self._bm25 = BM25Plus(self._corpus, k1=1.2, b=0.75)
166
+ self._dirty = False
167
+
168
+ scores = self._bm25.get_scores(query_tokens)
169
+
170
+ scored: list[tuple[str, float]] = []
171
+ for i, score in enumerate(scores):
172
+ if score > 0.0:
173
+ scored.append((self._fact_ids[i], float(score)))
174
+
175
+ scored.sort(key=lambda x: x[1], reverse=True)
176
+ return scored[:top_k]
177
+
178
+ def clear(self) -> None:
179
+ """Clear the in-memory index (does NOT delete DB tokens)."""
180
+ self._corpus = []
181
+ self._fact_ids = []
182
+ self._fact_id_set = set()
183
+ self._bm25 = None
184
+ self._dirty = False
185
+ self._loaded_profiles = set()