superlocalmemory 2.8.6 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. package/LICENSE +9 -1
  2. package/NOTICE +63 -0
  3. package/README.md +165 -480
  4. package/bin/slm +17 -449
  5. package/bin/slm-npm +62 -48
  6. package/conftest.py +5 -0
  7. package/docs/api-reference.md +284 -0
  8. package/docs/architecture.md +149 -0
  9. package/docs/auto-memory.md +150 -0
  10. package/docs/cli-reference.md +276 -0
  11. package/docs/compliance.md +191 -0
  12. package/docs/configuration.md +182 -0
  13. package/docs/getting-started.md +102 -0
  14. package/docs/ide-setup.md +261 -0
  15. package/docs/mcp-tools.md +220 -0
  16. package/docs/migration-from-v2.md +170 -0
  17. package/docs/profiles.md +173 -0
  18. package/docs/troubleshooting.md +310 -0
  19. package/{configs → ide/configs}/antigravity-mcp.json +3 -3
  20. package/ide/configs/chatgpt-desktop-mcp.json +16 -0
  21. package/{configs → ide/configs}/claude-desktop-mcp.json +3 -3
  22. package/{configs → ide/configs}/codex-mcp.toml +4 -4
  23. package/{configs → ide/configs}/continue-mcp.yaml +4 -3
  24. package/{configs → ide/configs}/continue-skills.yaml +6 -6
  25. package/ide/configs/cursor-mcp.json +15 -0
  26. package/{configs → ide/configs}/gemini-cli-mcp.json +2 -2
  27. package/{configs → ide/configs}/jetbrains-mcp.json +2 -2
  28. package/{configs → ide/configs}/opencode-mcp.json +2 -2
  29. package/{configs → ide/configs}/perplexity-mcp.json +2 -2
  30. package/{configs → ide/configs}/vscode-copilot-mcp.json +2 -2
  31. package/{configs → ide/configs}/windsurf-mcp.json +3 -3
  32. package/{configs → ide/configs}/zed-mcp.json +2 -2
  33. package/{hooks → ide/hooks}/context-hook.js +9 -20
  34. package/ide/hooks/memory-list-skill.js +70 -0
  35. package/ide/hooks/memory-profile-skill.js +101 -0
  36. package/ide/hooks/memory-recall-skill.js +62 -0
  37. package/ide/hooks/memory-remember-skill.js +68 -0
  38. package/ide/hooks/memory-reset-skill.js +160 -0
  39. package/{hooks → ide/hooks}/post-recall-hook.js +2 -2
  40. package/ide/integrations/langchain/README.md +106 -0
  41. package/ide/integrations/langchain/langchain_superlocalmemory/__init__.py +9 -0
  42. package/ide/integrations/langchain/langchain_superlocalmemory/chat_message_history.py +201 -0
  43. package/ide/integrations/langchain/pyproject.toml +38 -0
  44. package/{src/learning → ide/integrations/langchain}/tests/__init__.py +1 -0
  45. package/ide/integrations/langchain/tests/test_chat_message_history.py +215 -0
  46. package/ide/integrations/langchain/tests/test_security.py +117 -0
  47. package/ide/integrations/llamaindex/README.md +81 -0
  48. package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/__init__.py +9 -0
  49. package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/base.py +316 -0
  50. package/ide/integrations/llamaindex/pyproject.toml +43 -0
  51. package/{src/lifecycle → ide/integrations/llamaindex}/tests/__init__.py +1 -2
  52. package/ide/integrations/llamaindex/tests/test_chat_store.py +294 -0
  53. package/ide/integrations/llamaindex/tests/test_security.py +241 -0
  54. package/{skills → ide/skills}/slm-build-graph/SKILL.md +6 -6
  55. package/{skills → ide/skills}/slm-list-recent/SKILL.md +5 -5
  56. package/{skills → ide/skills}/slm-recall/SKILL.md +5 -5
  57. package/{skills → ide/skills}/slm-remember/SKILL.md +6 -6
  58. package/{skills → ide/skills}/slm-show-patterns/SKILL.md +7 -7
  59. package/{skills → ide/skills}/slm-status/SKILL.md +9 -9
  60. package/{skills → ide/skills}/slm-switch-profile/SKILL.md +9 -9
  61. package/package.json +13 -22
  62. package/pyproject.toml +85 -0
  63. package/scripts/build-dmg.sh +417 -0
  64. package/scripts/install-skills.ps1 +334 -0
  65. package/scripts/postinstall.js +2 -2
  66. package/scripts/start-dashboard.ps1 +52 -0
  67. package/scripts/start-dashboard.sh +41 -0
  68. package/scripts/sync-wiki.ps1 +127 -0
  69. package/scripts/sync-wiki.sh +82 -0
  70. package/scripts/test-dmg.sh +161 -0
  71. package/scripts/test-npm-package.ps1 +252 -0
  72. package/scripts/test-npm-package.sh +207 -0
  73. package/scripts/verify-install.ps1 +294 -0
  74. package/scripts/verify-install.sh +266 -0
  75. package/src/superlocalmemory/__init__.py +0 -0
  76. package/src/superlocalmemory/attribution/__init__.py +9 -0
  77. package/src/superlocalmemory/attribution/mathematical_dna.py +235 -0
  78. package/src/superlocalmemory/attribution/signer.py +153 -0
  79. package/src/superlocalmemory/attribution/watermark.py +189 -0
  80. package/src/superlocalmemory/cli/__init__.py +5 -0
  81. package/src/superlocalmemory/cli/commands.py +245 -0
  82. package/src/superlocalmemory/cli/main.py +89 -0
  83. package/src/superlocalmemory/cli/migrate_cmd.py +55 -0
  84. package/src/superlocalmemory/cli/post_install.py +99 -0
  85. package/src/superlocalmemory/cli/setup_wizard.py +129 -0
  86. package/src/superlocalmemory/compliance/__init__.py +0 -0
  87. package/src/superlocalmemory/compliance/abac.py +204 -0
  88. package/src/superlocalmemory/compliance/audit.py +314 -0
  89. package/src/superlocalmemory/compliance/eu_ai_act.py +131 -0
  90. package/src/superlocalmemory/compliance/gdpr.py +294 -0
  91. package/src/superlocalmemory/compliance/lifecycle.py +158 -0
  92. package/src/superlocalmemory/compliance/retention.py +232 -0
  93. package/src/superlocalmemory/compliance/scheduler.py +148 -0
  94. package/src/superlocalmemory/core/__init__.py +0 -0
  95. package/src/superlocalmemory/core/config.py +391 -0
  96. package/src/superlocalmemory/core/embeddings.py +293 -0
  97. package/src/superlocalmemory/core/engine.py +701 -0
  98. package/src/superlocalmemory/core/hooks.py +65 -0
  99. package/src/superlocalmemory/core/maintenance.py +172 -0
  100. package/src/superlocalmemory/core/modes.py +140 -0
  101. package/src/superlocalmemory/core/profiles.py +234 -0
  102. package/src/superlocalmemory/core/registry.py +117 -0
  103. package/src/superlocalmemory/dynamics/__init__.py +0 -0
  104. package/src/superlocalmemory/dynamics/fisher_langevin_coupling.py +223 -0
  105. package/src/superlocalmemory/encoding/__init__.py +0 -0
  106. package/src/superlocalmemory/encoding/consolidator.py +485 -0
  107. package/src/superlocalmemory/encoding/emotional.py +125 -0
  108. package/src/superlocalmemory/encoding/entity_resolver.py +525 -0
  109. package/src/superlocalmemory/encoding/entropy_gate.py +104 -0
  110. package/src/superlocalmemory/encoding/fact_extractor.py +775 -0
  111. package/src/superlocalmemory/encoding/foresight.py +91 -0
  112. package/src/superlocalmemory/encoding/graph_builder.py +302 -0
  113. package/src/superlocalmemory/encoding/observation_builder.py +160 -0
  114. package/src/superlocalmemory/encoding/scene_builder.py +183 -0
  115. package/src/superlocalmemory/encoding/signal_inference.py +90 -0
  116. package/src/superlocalmemory/encoding/temporal_parser.py +426 -0
  117. package/src/superlocalmemory/encoding/type_router.py +235 -0
  118. package/src/superlocalmemory/hooks/__init__.py +3 -0
  119. package/src/superlocalmemory/hooks/auto_capture.py +111 -0
  120. package/src/superlocalmemory/hooks/auto_recall.py +93 -0
  121. package/src/superlocalmemory/hooks/ide_connector.py +204 -0
  122. package/src/superlocalmemory/hooks/rules_engine.py +99 -0
  123. package/src/superlocalmemory/infra/__init__.py +3 -0
  124. package/src/superlocalmemory/infra/auth_middleware.py +82 -0
  125. package/src/superlocalmemory/infra/backup.py +317 -0
  126. package/src/superlocalmemory/infra/cache_manager.py +267 -0
  127. package/src/superlocalmemory/infra/event_bus.py +381 -0
  128. package/src/superlocalmemory/infra/rate_limiter.py +135 -0
  129. package/src/{webhook_dispatcher.py → superlocalmemory/infra/webhook_dispatcher.py} +104 -101
  130. package/src/superlocalmemory/learning/__init__.py +0 -0
  131. package/src/superlocalmemory/learning/adaptive.py +172 -0
  132. package/src/superlocalmemory/learning/behavioral.py +490 -0
  133. package/src/superlocalmemory/learning/behavioral_listener.py +94 -0
  134. package/src/superlocalmemory/learning/bootstrap.py +298 -0
  135. package/src/superlocalmemory/learning/cross_project.py +399 -0
  136. package/src/superlocalmemory/learning/database.py +376 -0
  137. package/src/superlocalmemory/learning/engagement.py +323 -0
  138. package/src/superlocalmemory/learning/features.py +138 -0
  139. package/src/superlocalmemory/learning/feedback.py +316 -0
  140. package/src/superlocalmemory/learning/outcomes.py +255 -0
  141. package/src/superlocalmemory/learning/project_context.py +366 -0
  142. package/src/superlocalmemory/learning/ranker.py +155 -0
  143. package/src/superlocalmemory/learning/source_quality.py +303 -0
  144. package/src/superlocalmemory/learning/workflows.py +309 -0
  145. package/src/superlocalmemory/llm/__init__.py +0 -0
  146. package/src/superlocalmemory/llm/backbone.py +316 -0
  147. package/src/superlocalmemory/math/__init__.py +0 -0
  148. package/src/superlocalmemory/math/fisher.py +356 -0
  149. package/src/superlocalmemory/math/langevin.py +398 -0
  150. package/src/superlocalmemory/math/sheaf.py +257 -0
  151. package/src/superlocalmemory/mcp/__init__.py +0 -0
  152. package/src/superlocalmemory/mcp/resources.py +245 -0
  153. package/src/superlocalmemory/mcp/server.py +61 -0
  154. package/src/superlocalmemory/mcp/tools.py +18 -0
  155. package/src/superlocalmemory/mcp/tools_core.py +305 -0
  156. package/src/superlocalmemory/mcp/tools_v28.py +223 -0
  157. package/src/superlocalmemory/mcp/tools_v3.py +286 -0
  158. package/src/superlocalmemory/retrieval/__init__.py +0 -0
  159. package/src/superlocalmemory/retrieval/agentic.py +295 -0
  160. package/src/superlocalmemory/retrieval/ann_index.py +223 -0
  161. package/src/superlocalmemory/retrieval/bm25_channel.py +185 -0
  162. package/src/superlocalmemory/retrieval/bridge_discovery.py +170 -0
  163. package/src/superlocalmemory/retrieval/engine.py +390 -0
  164. package/src/superlocalmemory/retrieval/entity_channel.py +179 -0
  165. package/src/superlocalmemory/retrieval/fusion.py +78 -0
  166. package/src/superlocalmemory/retrieval/profile_channel.py +105 -0
  167. package/src/superlocalmemory/retrieval/reranker.py +154 -0
  168. package/src/superlocalmemory/retrieval/semantic_channel.py +232 -0
  169. package/src/superlocalmemory/retrieval/strategy.py +96 -0
  170. package/src/superlocalmemory/retrieval/temporal_channel.py +175 -0
  171. package/src/superlocalmemory/server/__init__.py +1 -0
  172. package/src/superlocalmemory/server/api.py +248 -0
  173. package/src/superlocalmemory/server/routes/__init__.py +4 -0
  174. package/src/superlocalmemory/server/routes/agents.py +107 -0
  175. package/src/superlocalmemory/server/routes/backup.py +91 -0
  176. package/src/superlocalmemory/server/routes/behavioral.py +127 -0
  177. package/src/superlocalmemory/server/routes/compliance.py +160 -0
  178. package/src/superlocalmemory/server/routes/data_io.py +188 -0
  179. package/src/superlocalmemory/server/routes/events.py +183 -0
  180. package/src/superlocalmemory/server/routes/helpers.py +85 -0
  181. package/src/superlocalmemory/server/routes/learning.py +273 -0
  182. package/src/superlocalmemory/server/routes/lifecycle.py +116 -0
  183. package/src/superlocalmemory/server/routes/memories.py +399 -0
  184. package/src/superlocalmemory/server/routes/profiles.py +219 -0
  185. package/src/superlocalmemory/server/routes/stats.py +346 -0
  186. package/src/superlocalmemory/server/routes/v3_api.py +365 -0
  187. package/src/superlocalmemory/server/routes/ws.py +82 -0
  188. package/src/superlocalmemory/server/security_middleware.py +57 -0
  189. package/src/superlocalmemory/server/ui.py +245 -0
  190. package/src/superlocalmemory/storage/__init__.py +0 -0
  191. package/src/superlocalmemory/storage/access_control.py +182 -0
  192. package/src/superlocalmemory/storage/database.py +594 -0
  193. package/src/superlocalmemory/storage/migrations.py +303 -0
  194. package/src/superlocalmemory/storage/models.py +406 -0
  195. package/src/superlocalmemory/storage/schema.py +726 -0
  196. package/src/superlocalmemory/storage/v2_migrator.py +317 -0
  197. package/src/superlocalmemory/trust/__init__.py +0 -0
  198. package/src/superlocalmemory/trust/gate.py +130 -0
  199. package/src/superlocalmemory/trust/provenance.py +124 -0
  200. package/src/superlocalmemory/trust/scorer.py +347 -0
  201. package/src/superlocalmemory/trust/signals.py +153 -0
  202. package/ui/index.html +278 -5
  203. package/ui/js/auto-settings.js +70 -0
  204. package/ui/js/dashboard.js +90 -0
  205. package/ui/js/fact-detail.js +92 -0
  206. package/ui/js/feedback.js +2 -2
  207. package/ui/js/ide-status.js +102 -0
  208. package/ui/js/math-health.js +98 -0
  209. package/ui/js/recall-lab.js +127 -0
  210. package/ui/js/settings.js +2 -2
  211. package/ui/js/trust-dashboard.js +73 -0
  212. package/api_server.py +0 -724
  213. package/bin/aider-smart +0 -72
  214. package/bin/superlocalmemoryv2-learning +0 -4
  215. package/bin/superlocalmemoryv2-list +0 -3
  216. package/bin/superlocalmemoryv2-patterns +0 -4
  217. package/bin/superlocalmemoryv2-profile +0 -3
  218. package/bin/superlocalmemoryv2-recall +0 -3
  219. package/bin/superlocalmemoryv2-remember +0 -3
  220. package/bin/superlocalmemoryv2-reset +0 -3
  221. package/bin/superlocalmemoryv2-status +0 -3
  222. package/configs/chatgpt-desktop-mcp.json +0 -16
  223. package/configs/cursor-mcp.json +0 -15
  224. package/hooks/memory-list-skill.js +0 -139
  225. package/hooks/memory-profile-skill.js +0 -273
  226. package/hooks/memory-recall-skill.js +0 -114
  227. package/hooks/memory-remember-skill.js +0 -127
  228. package/hooks/memory-reset-skill.js +0 -274
  229. package/mcp_server.py +0 -1808
  230. package/requirements-core.txt +0 -22
  231. package/requirements-learning.txt +0 -12
  232. package/requirements.txt +0 -12
  233. package/src/agent_registry.py +0 -411
  234. package/src/auth_middleware.py +0 -61
  235. package/src/auto_backup.py +0 -459
  236. package/src/behavioral/__init__.py +0 -49
  237. package/src/behavioral/behavioral_listener.py +0 -203
  238. package/src/behavioral/behavioral_patterns.py +0 -275
  239. package/src/behavioral/cross_project_transfer.py +0 -206
  240. package/src/behavioral/outcome_inference.py +0 -194
  241. package/src/behavioral/outcome_tracker.py +0 -193
  242. package/src/behavioral/tests/__init__.py +0 -4
  243. package/src/behavioral/tests/test_behavioral_integration.py +0 -108
  244. package/src/behavioral/tests/test_behavioral_patterns.py +0 -150
  245. package/src/behavioral/tests/test_cross_project_transfer.py +0 -142
  246. package/src/behavioral/tests/test_mcp_behavioral.py +0 -139
  247. package/src/behavioral/tests/test_mcp_report_outcome.py +0 -117
  248. package/src/behavioral/tests/test_outcome_inference.py +0 -107
  249. package/src/behavioral/tests/test_outcome_tracker.py +0 -96
  250. package/src/cache_manager.py +0 -518
  251. package/src/compliance/__init__.py +0 -48
  252. package/src/compliance/abac_engine.py +0 -149
  253. package/src/compliance/abac_middleware.py +0 -116
  254. package/src/compliance/audit_db.py +0 -215
  255. package/src/compliance/audit_logger.py +0 -148
  256. package/src/compliance/retention_manager.py +0 -289
  257. package/src/compliance/retention_scheduler.py +0 -186
  258. package/src/compliance/tests/__init__.py +0 -4
  259. package/src/compliance/tests/test_abac_enforcement.py +0 -95
  260. package/src/compliance/tests/test_abac_engine.py +0 -124
  261. package/src/compliance/tests/test_abac_mcp_integration.py +0 -118
  262. package/src/compliance/tests/test_audit_db.py +0 -123
  263. package/src/compliance/tests/test_audit_logger.py +0 -98
  264. package/src/compliance/tests/test_mcp_audit.py +0 -128
  265. package/src/compliance/tests/test_mcp_retention_policy.py +0 -125
  266. package/src/compliance/tests/test_retention_manager.py +0 -131
  267. package/src/compliance/tests/test_retention_scheduler.py +0 -99
  268. package/src/compression/__init__.py +0 -25
  269. package/src/compression/cli.py +0 -150
  270. package/src/compression/cold_storage.py +0 -217
  271. package/src/compression/config.py +0 -72
  272. package/src/compression/orchestrator.py +0 -133
  273. package/src/compression/tier2_compressor.py +0 -228
  274. package/src/compression/tier3_compressor.py +0 -153
  275. package/src/compression/tier_classifier.py +0 -148
  276. package/src/db_connection_manager.py +0 -536
  277. package/src/embedding_engine.py +0 -63
  278. package/src/embeddings/__init__.py +0 -47
  279. package/src/embeddings/cache.py +0 -70
  280. package/src/embeddings/cli.py +0 -113
  281. package/src/embeddings/constants.py +0 -47
  282. package/src/embeddings/database.py +0 -91
  283. package/src/embeddings/engine.py +0 -247
  284. package/src/embeddings/model_loader.py +0 -145
  285. package/src/event_bus.py +0 -562
  286. package/src/graph/__init__.py +0 -36
  287. package/src/graph/build_helpers.py +0 -74
  288. package/src/graph/cli.py +0 -87
  289. package/src/graph/cluster_builder.py +0 -188
  290. package/src/graph/cluster_summary.py +0 -148
  291. package/src/graph/constants.py +0 -47
  292. package/src/graph/edge_builder.py +0 -162
  293. package/src/graph/entity_extractor.py +0 -95
  294. package/src/graph/graph_core.py +0 -226
  295. package/src/graph/graph_search.py +0 -231
  296. package/src/graph/hierarchical.py +0 -207
  297. package/src/graph/schema.py +0 -99
  298. package/src/graph_engine.py +0 -52
  299. package/src/hnsw_index.py +0 -628
  300. package/src/hybrid_search.py +0 -46
  301. package/src/learning/__init__.py +0 -217
  302. package/src/learning/adaptive_ranker.py +0 -682
  303. package/src/learning/bootstrap/__init__.py +0 -69
  304. package/src/learning/bootstrap/constants.py +0 -93
  305. package/src/learning/bootstrap/db_queries.py +0 -316
  306. package/src/learning/bootstrap/sampling.py +0 -82
  307. package/src/learning/bootstrap/text_utils.py +0 -71
  308. package/src/learning/cross_project_aggregator.py +0 -857
  309. package/src/learning/db/__init__.py +0 -40
  310. package/src/learning/db/constants.py +0 -44
  311. package/src/learning/db/schema.py +0 -279
  312. package/src/learning/engagement_tracker.py +0 -628
  313. package/src/learning/feature_extractor.py +0 -708
  314. package/src/learning/feedback_collector.py +0 -806
  315. package/src/learning/learning_db.py +0 -915
  316. package/src/learning/project_context_manager.py +0 -572
  317. package/src/learning/ranking/__init__.py +0 -33
  318. package/src/learning/ranking/constants.py +0 -84
  319. package/src/learning/ranking/helpers.py +0 -278
  320. package/src/learning/source_quality_scorer.py +0 -676
  321. package/src/learning/synthetic_bootstrap.py +0 -755
  322. package/src/learning/tests/test_adaptive_ranker.py +0 -325
  323. package/src/learning/tests/test_adaptive_ranker_v28.py +0 -60
  324. package/src/learning/tests/test_aggregator.py +0 -306
  325. package/src/learning/tests/test_auto_retrain_v28.py +0 -35
  326. package/src/learning/tests/test_e2e_ranking_v28.py +0 -82
  327. package/src/learning/tests/test_feature_extractor_v28.py +0 -93
  328. package/src/learning/tests/test_feedback_collector.py +0 -294
  329. package/src/learning/tests/test_learning_db.py +0 -602
  330. package/src/learning/tests/test_learning_db_v28.py +0 -110
  331. package/src/learning/tests/test_learning_init_v28.py +0 -48
  332. package/src/learning/tests/test_outcome_signals.py +0 -48
  333. package/src/learning/tests/test_project_context.py +0 -292
  334. package/src/learning/tests/test_schema_migration.py +0 -319
  335. package/src/learning/tests/test_signal_inference.py +0 -397
  336. package/src/learning/tests/test_source_quality.py +0 -351
  337. package/src/learning/tests/test_synthetic_bootstrap.py +0 -429
  338. package/src/learning/tests/test_workflow_miner.py +0 -318
  339. package/src/learning/workflow_pattern_miner.py +0 -655
  340. package/src/lifecycle/__init__.py +0 -54
  341. package/src/lifecycle/bounded_growth.py +0 -239
  342. package/src/lifecycle/compaction_engine.py +0 -226
  343. package/src/lifecycle/lifecycle_engine.py +0 -355
  344. package/src/lifecycle/lifecycle_evaluator.py +0 -257
  345. package/src/lifecycle/lifecycle_scheduler.py +0 -130
  346. package/src/lifecycle/retention_policy.py +0 -285
  347. package/src/lifecycle/tests/test_bounded_growth.py +0 -193
  348. package/src/lifecycle/tests/test_compaction.py +0 -179
  349. package/src/lifecycle/tests/test_lifecycle_engine.py +0 -137
  350. package/src/lifecycle/tests/test_lifecycle_evaluation.py +0 -177
  351. package/src/lifecycle/tests/test_lifecycle_scheduler.py +0 -127
  352. package/src/lifecycle/tests/test_lifecycle_search.py +0 -109
  353. package/src/lifecycle/tests/test_mcp_compact.py +0 -149
  354. package/src/lifecycle/tests/test_mcp_lifecycle_status.py +0 -114
  355. package/src/lifecycle/tests/test_retention_policy.py +0 -162
  356. package/src/mcp_tools_v28.py +0 -281
  357. package/src/memory/__init__.py +0 -36
  358. package/src/memory/cli.py +0 -205
  359. package/src/memory/constants.py +0 -39
  360. package/src/memory/helpers.py +0 -28
  361. package/src/memory/schema.py +0 -166
  362. package/src/memory-profiles.py +0 -595
  363. package/src/memory-reset.py +0 -491
  364. package/src/memory_compression.py +0 -989
  365. package/src/memory_store_v2.py +0 -1155
  366. package/src/migrate_v1_to_v2.py +0 -629
  367. package/src/pattern_learner.py +0 -34
  368. package/src/patterns/__init__.py +0 -24
  369. package/src/patterns/analyzers.py +0 -251
  370. package/src/patterns/learner.py +0 -271
  371. package/src/patterns/scoring.py +0 -171
  372. package/src/patterns/store.py +0 -225
  373. package/src/patterns/terminology.py +0 -140
  374. package/src/provenance_tracker.py +0 -312
  375. package/src/qualixar_attribution.py +0 -139
  376. package/src/qualixar_watermark.py +0 -78
  377. package/src/query_optimizer.py +0 -511
  378. package/src/rate_limiter.py +0 -83
  379. package/src/search/__init__.py +0 -20
  380. package/src/search/cli.py +0 -77
  381. package/src/search/constants.py +0 -26
  382. package/src/search/engine.py +0 -241
  383. package/src/search/fusion.py +0 -122
  384. package/src/search/index_loader.py +0 -114
  385. package/src/search/methods.py +0 -162
  386. package/src/search_engine_v2.py +0 -401
  387. package/src/setup_validator.py +0 -482
  388. package/src/subscription_manager.py +0 -391
  389. package/src/tree/__init__.py +0 -59
  390. package/src/tree/builder.py +0 -185
  391. package/src/tree/nodes.py +0 -202
  392. package/src/tree/queries.py +0 -257
  393. package/src/tree/schema.py +0 -80
  394. package/src/tree_manager.py +0 -19
  395. package/src/trust/__init__.py +0 -45
  396. package/src/trust/constants.py +0 -66
  397. package/src/trust/queries.py +0 -157
  398. package/src/trust/schema.py +0 -95
  399. package/src/trust/scorer.py +0 -299
  400. package/src/trust/signals.py +0 -95
  401. package/src/trust_scorer.py +0 -44
  402. package/ui/app.js +0 -1588
  403. package/ui/js/graph-cytoscape-monolithic-backup.js +0 -1168
  404. package/ui/js/graph-cytoscape.js +0 -1168
  405. package/ui/js/graph-d3-backup.js +0 -32
  406. package/ui/js/graph.js +0 -32
  407. package/ui_server.py +0 -286
  408. /package/docs/{ACCESSIBILITY.md → v2-archive/ACCESSIBILITY.md} +0 -0
  409. /package/docs/{ARCHITECTURE.md → v2-archive/ARCHITECTURE.md} +0 -0
  410. /package/docs/{CLI-COMMANDS-REFERENCE.md → v2-archive/CLI-COMMANDS-REFERENCE.md} +0 -0
  411. /package/docs/{COMPRESSION-README.md → v2-archive/COMPRESSION-README.md} +0 -0
  412. /package/docs/{FRAMEWORK-INTEGRATIONS.md → v2-archive/FRAMEWORK-INTEGRATIONS.md} +0 -0
  413. /package/docs/{MCP-MANUAL-SETUP.md → v2-archive/MCP-MANUAL-SETUP.md} +0 -0
  414. /package/docs/{MCP-TROUBLESHOOTING.md → v2-archive/MCP-TROUBLESHOOTING.md} +0 -0
  415. /package/docs/{PATTERN-LEARNING.md → v2-archive/PATTERN-LEARNING.md} +0 -0
  416. /package/docs/{PROFILES-GUIDE.md → v2-archive/PROFILES-GUIDE.md} +0 -0
  417. /package/docs/{RESET-GUIDE.md → v2-archive/RESET-GUIDE.md} +0 -0
  418. /package/docs/{SEARCH-ENGINE-V2.2.0.md → v2-archive/SEARCH-ENGINE-V2.2.0.md} +0 -0
  419. /package/docs/{SEARCH-INTEGRATION-GUIDE.md → v2-archive/SEARCH-INTEGRATION-GUIDE.md} +0 -0
  420. /package/docs/{UI-SERVER.md → v2-archive/UI-SERVER.md} +0 -0
  421. /package/docs/{UNIVERSAL-INTEGRATION.md → v2-archive/UNIVERSAL-INTEGRATION.md} +0 -0
  422. /package/docs/{V2.2.0-OPTIONAL-SEARCH.md → v2-archive/V2.2.0-OPTIONAL-SEARCH.md} +0 -0
  423. /package/docs/{WINDOWS-INSTALL-README.txt → v2-archive/WINDOWS-INSTALL-README.txt} +0 -0
  424. /package/docs/{WINDOWS-POST-INSTALL.txt → v2-archive/WINDOWS-POST-INSTALL.txt} +0 -0
  425. /package/docs/{example_graph_usage.py → v2-archive/example_graph_usage.py} +0 -0
  426. /package/{completions → ide/completions}/slm.bash +0 -0
  427. /package/{completions → ide/completions}/slm.zsh +0 -0
  428. /package/{configs → ide/configs}/cody-commands.json +0 -0
  429. /package/{install-skills.sh → scripts/install-skills.sh} +0 -0
  430. /package/{install.ps1 → scripts/install.ps1} +0 -0
  431. /package/{install.sh → scripts/install.sh} +0 -0
@@ -1,69 +0,0 @@
1
- #!/usr/bin/env python3
2
- # SPDX-License-Identifier: MIT
3
- # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
- """
5
- Bootstrap utilities package.
6
-
7
- Re-exports all constants, functions, and utilities used by SyntheticBootstrapper.
8
- """
9
-
10
- # Constants
11
- from .constants import (
12
- MEMORY_DB_PATH,
13
- MODELS_DIR,
14
- MODEL_PATH,
15
- MIN_MEMORIES_FOR_BOOTSTRAP,
16
- BOOTSTRAP_CONFIG,
17
- BOOTSTRAP_PARAMS,
18
- STOPWORDS,
19
- MIN_KEYWORD_LENGTH,
20
- )
21
-
22
- # Text utilities
23
- from .text_utils import (
24
- extract_keywords,
25
- clean_fts_query,
26
- )
27
-
28
- # Database queries
29
- from .db_queries import (
30
- get_memory_count,
31
- get_memories_by_access,
32
- get_memories_by_importance,
33
- get_recent_memories,
34
- get_learned_patterns,
35
- search_memories,
36
- find_negative_memories,
37
- )
38
-
39
- # Sampling utilities
40
- from .sampling import (
41
- diverse_sample,
42
- count_sources,
43
- )
44
-
45
- __all__ = [
46
- # Constants
47
- 'MEMORY_DB_PATH',
48
- 'MODELS_DIR',
49
- 'MODEL_PATH',
50
- 'MIN_MEMORIES_FOR_BOOTSTRAP',
51
- 'BOOTSTRAP_CONFIG',
52
- 'BOOTSTRAP_PARAMS',
53
- 'STOPWORDS',
54
- 'MIN_KEYWORD_LENGTH',
55
- # Text utilities
56
- 'extract_keywords',
57
- 'clean_fts_query',
58
- # Database queries
59
- 'get_memory_count',
60
- 'get_memories_by_access',
61
- 'get_memories_by_importance',
62
- 'get_recent_memories',
63
- 'get_learned_patterns',
64
- 'search_memories',
65
- 'find_negative_memories',
66
- # Sampling
67
- 'diverse_sample',
68
- 'count_sources',
69
- ]
@@ -1,93 +0,0 @@
1
- #!/usr/bin/env python3
2
- # SPDX-License-Identifier: MIT
3
- # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
- """
5
- Bootstrap constants and configuration.
6
-
7
- All constant values, configuration dicts, and static data used
8
- by SyntheticBootstrapper are defined here.
9
- """
10
-
11
- from pathlib import Path
12
-
13
- # ============================================================================
14
- # Paths
15
- # ============================================================================
16
-
17
- MEMORY_DB_PATH = Path.home() / ".claude-memory" / "memory.db"
18
- MODELS_DIR = Path.home() / ".claude-memory" / "models"
19
- MODEL_PATH = MODELS_DIR / "ranker.txt"
20
-
21
- # ============================================================================
22
- # Bootstrap Configuration
23
- # ============================================================================
24
-
25
- # Minimum memories needed before bootstrap makes sense
26
- MIN_MEMORIES_FOR_BOOTSTRAP = 50
27
-
28
- # Tiered config — bootstrap model complexity scales with data size
29
- BOOTSTRAP_CONFIG = {
30
- 'small': {
31
- 'min_memories': 50,
32
- 'max_memories': 499,
33
- 'target_samples': 200,
34
- 'n_estimators': 30,
35
- 'max_depth': 3,
36
- },
37
- 'medium': {
38
- 'min_memories': 500,
39
- 'max_memories': 4999,
40
- 'target_samples': 1000,
41
- 'n_estimators': 50,
42
- 'max_depth': 4,
43
- },
44
- 'large': {
45
- 'min_memories': 5000,
46
- 'max_memories': float('inf'),
47
- 'target_samples': 2000,
48
- 'n_estimators': 100,
49
- 'max_depth': 6,
50
- },
51
- }
52
-
53
- # ============================================================================
54
- # LightGBM Parameters
55
- # ============================================================================
56
-
57
- # LightGBM bootstrap parameters — MORE aggressive regularization than
58
- # real training because synthetic data has systematic biases
59
- BOOTSTRAP_PARAMS = {
60
- 'objective': 'lambdarank',
61
- 'metric': 'ndcg',
62
- 'ndcg_eval_at': [5, 10],
63
- 'learning_rate': 0.1,
64
- 'num_leaves': 8,
65
- 'max_depth': 3,
66
- 'min_child_samples': 5,
67
- 'subsample': 0.7,
68
- 'reg_alpha': 0.5,
69
- 'reg_lambda': 2.0,
70
- 'boosting_type': 'dart',
71
- 'verbose': -1,
72
- }
73
-
74
- # ============================================================================
75
- # Text Processing
76
- # ============================================================================
77
-
78
- # English stopwords for keyword extraction (no external deps)
79
- STOPWORDS = frozenset({
80
- 'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
81
- 'of', 'with', 'by', 'from', 'is', 'it', 'this', 'that', 'was', 'are',
82
- 'be', 'has', 'have', 'had', 'do', 'does', 'did', 'will', 'would',
83
- 'could', 'should', 'may', 'might', 'can', 'not', 'no', 'if', 'then',
84
- 'so', 'as', 'up', 'out', 'about', 'into', 'over', 'after', 'before',
85
- 'when', 'where', 'how', 'what', 'which', 'who', 'whom', 'why',
86
- 'all', 'each', 'every', 'both', 'few', 'more', 'most', 'other',
87
- 'some', 'such', 'than', 'too', 'very', 'just', 'also', 'now',
88
- 'here', 'there', 'use', 'used', 'using', 'make', 'made',
89
- 'need', 'needed', 'get', 'got', 'set', 'new', 'old', 'one', 'two',
90
- })
91
-
92
- # Minimum word length for keyword extraction
93
- MIN_KEYWORD_LENGTH = 3
@@ -1,316 +0,0 @@
1
- #!/usr/bin/env python3
2
- # SPDX-License-Identifier: MIT
3
- # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
- """
5
- Database query utilities for synthetic bootstrap.
6
-
7
- All read-only queries against memory.db used by SyntheticBootstrapper.
8
- These functions are stateless and take db_path as parameter.
9
- """
10
-
11
- import logging
12
- import sqlite3
13
- from pathlib import Path
14
- from typing import List, Optional, Set
15
-
16
- from .text_utils import clean_fts_query
17
-
18
- logger = logging.getLogger("superlocalmemory.learning.bootstrap.db_queries")
19
-
20
-
21
- def get_memory_count(db_path: Path) -> int:
22
- """
23
- Count total memories in memory.db.
24
-
25
- Args:
26
- db_path: Path to memory.db.
27
-
28
- Returns:
29
- Total number of memories, or 0 if error.
30
- """
31
- if not db_path.exists():
32
- return 0
33
- try:
34
- conn = sqlite3.connect(str(db_path), timeout=5)
35
- cursor = conn.cursor()
36
- cursor.execute('SELECT COUNT(*) FROM memories')
37
- count = cursor.fetchone()[0]
38
- conn.close()
39
- return count
40
- except Exception as e:
41
- logger.warning("Failed to count memories: %s", e)
42
- return 0
43
-
44
-
45
- def get_memories_by_access(db_path: Path, min_access: int = 5) -> List[dict]:
46
- """
47
- Fetch memories with access_count >= min_access from memory.db.
48
-
49
- These are memories the user keeps coming back to — strong positive signal.
50
-
51
- Args:
52
- db_path: Path to memory.db.
53
- min_access: Minimum access_count threshold.
54
-
55
- Returns:
56
- List of memory dicts.
57
- """
58
- if not db_path.exists():
59
- return []
60
- try:
61
- conn = sqlite3.connect(str(db_path), timeout=5)
62
- conn.row_factory = sqlite3.Row
63
- cursor = conn.cursor()
64
- cursor.execute('''
65
- SELECT id, content, summary, project_name, tags,
66
- category, importance, created_at, access_count
67
- FROM memories
68
- WHERE access_count >= ?
69
- ORDER BY access_count DESC
70
- LIMIT 100
71
- ''', (min_access,))
72
- results = [dict(row) for row in cursor.fetchall()]
73
- conn.close()
74
- return results
75
- except Exception as e:
76
- logger.warning("Failed to fetch high-access memories: %s", e)
77
- return []
78
-
79
-
80
- def get_memories_by_importance(db_path: Path, min_importance: int = 8) -> List[dict]:
81
- """
82
- Fetch memories with importance >= min_importance from memory.db.
83
-
84
- High importance = user explicitly rated these as valuable.
85
-
86
- Args:
87
- db_path: Path to memory.db.
88
- min_importance: Minimum importance threshold.
89
-
90
- Returns:
91
- List of memory dicts.
92
- """
93
- if not db_path.exists():
94
- return []
95
- try:
96
- conn = sqlite3.connect(str(db_path), timeout=5)
97
- conn.row_factory = sqlite3.Row
98
- cursor = conn.cursor()
99
- cursor.execute('''
100
- SELECT id, content, summary, project_name, tags,
101
- category, importance, created_at, access_count
102
- FROM memories
103
- WHERE importance >= ?
104
- ORDER BY importance DESC
105
- LIMIT 100
106
- ''', (min_importance,))
107
- results = [dict(row) for row in cursor.fetchall()]
108
- conn.close()
109
- return results
110
- except Exception as e:
111
- logger.warning("Failed to fetch high-importance memories: %s", e)
112
- return []
113
-
114
-
115
- def get_recent_memories(db_path: Path, limit: int = 30) -> List[dict]:
116
- """
117
- Fetch the N most recently created memories.
118
-
119
- Args:
120
- db_path: Path to memory.db.
121
- limit: Maximum number of memories to return.
122
-
123
- Returns:
124
- List of memory dicts, sorted by created_at DESC.
125
- """
126
- if not db_path.exists():
127
- return []
128
- try:
129
- conn = sqlite3.connect(str(db_path), timeout=5)
130
- conn.row_factory = sqlite3.Row
131
- cursor = conn.cursor()
132
- cursor.execute('''
133
- SELECT id, content, summary, project_name, tags,
134
- category, importance, created_at, access_count
135
- FROM memories
136
- ORDER BY created_at DESC
137
- LIMIT ?
138
- ''', (limit,))
139
- results = [dict(row) for row in cursor.fetchall()]
140
- conn.close()
141
- return results
142
- except Exception as e:
143
- logger.warning("Failed to fetch recent memories: %s", e)
144
- return []
145
-
146
-
147
- def get_learned_patterns(
148
- db_path: Path,
149
- min_confidence: float = 0.7,
150
- ) -> List[dict]:
151
- """
152
- Fetch high-confidence identity_patterns from memory.db.
153
-
154
- These are patterns detected by pattern_learner.py (Layer 4) —
155
- tech preferences, coding style, terminology, etc.
156
-
157
- Returns empty list if identity_patterns table doesn't exist
158
- (backward compatible with pre-v2.3 databases).
159
-
160
- Args:
161
- db_path: Path to memory.db.
162
- min_confidence: Minimum confidence threshold.
163
-
164
- Returns:
165
- List of pattern dicts.
166
- """
167
- if not db_path.exists():
168
- return []
169
- try:
170
- conn = sqlite3.connect(str(db_path), timeout=5)
171
- try:
172
- conn.row_factory = sqlite3.Row
173
- cursor = conn.cursor()
174
-
175
- # Check if table exists (backward compatibility)
176
- cursor.execute('''
177
- SELECT name FROM sqlite_master
178
- WHERE type='table' AND name='identity_patterns'
179
- ''')
180
- if cursor.fetchone() is None:
181
- return []
182
-
183
- cursor.execute('''
184
- SELECT id, pattern_type, key, value, confidence,
185
- evidence_count, category
186
- FROM identity_patterns
187
- WHERE confidence >= ?
188
- ORDER BY confidence DESC
189
- LIMIT 50
190
- ''', (min_confidence,))
191
- results = [dict(row) for row in cursor.fetchall()]
192
- return results
193
- finally:
194
- conn.close()
195
- except Exception as e:
196
- logger.warning("Failed to fetch learned patterns: %s", e)
197
- return []
198
-
199
-
200
- def search_memories(db_path: Path, query: str, limit: int = 20) -> List[dict]:
201
- """
202
- Simple FTS5 search in memory.db.
203
-
204
- Used to find memories matching synthetic query terms.
205
- This is a lightweight search — no TF-IDF, no HNSW, just FTS5.
206
-
207
- Args:
208
- db_path: Path to memory.db.
209
- query: Search query string.
210
- limit: Maximum results to return.
211
-
212
- Returns:
213
- List of memory dicts matching the query.
214
- """
215
- if not db_path.exists():
216
- return []
217
- if not query or not query.strip():
218
- return []
219
-
220
- try:
221
- conn = sqlite3.connect(str(db_path), timeout=5)
222
- try:
223
- conn.row_factory = sqlite3.Row
224
- cursor = conn.cursor()
225
-
226
- # Clean query for FTS5 (same approach as memory_store_v2.search)
227
- fts_query = clean_fts_query(query)
228
- if not fts_query:
229
- return []
230
-
231
- cursor.execute('''
232
- SELECT m.id, m.content, m.summary, m.project_name, m.tags,
233
- m.category, m.importance, m.created_at, m.access_count
234
- FROM memories m
235
- JOIN memories_fts fts ON m.id = fts.rowid
236
- WHERE memories_fts MATCH ?
237
- ORDER BY rank
238
- LIMIT ?
239
- ''', (fts_query, limit))
240
- results = [dict(row) for row in cursor.fetchall()]
241
- return results
242
- finally:
243
- conn.close()
244
- except Exception as e:
245
- logger.debug("FTS5 search failed (may not exist yet): %s", e)
246
- return []
247
-
248
-
249
- def find_negative_memories(
250
- db_path: Path,
251
- anchor_memory: dict,
252
- exclude_ids: Optional[Set[int]] = None,
253
- limit: int = 2,
254
- ) -> List[dict]:
255
- """
256
- Find memories dissimilar to the anchor (for negative examples).
257
-
258
- Simple heuristic: pick memories from a different category or project.
259
- Falls back to random sample if no structured differences available.
260
-
261
- Args:
262
- db_path: Path to memory.db.
263
- anchor_memory: The reference memory to find negatives for.
264
- exclude_ids: Set of memory IDs to exclude from results.
265
- limit: Maximum number of negatives to return.
266
-
267
- Returns:
268
- List of negative example memory dicts.
269
- """
270
- if not db_path.exists():
271
- return []
272
- exclude_ids = exclude_ids or set()
273
-
274
- try:
275
- conn = sqlite3.connect(str(db_path), timeout=5)
276
- try:
277
- conn.row_factory = sqlite3.Row
278
- cursor = conn.cursor()
279
-
280
- anchor_project = anchor_memory.get('project_name', '')
281
- anchor_category = anchor_memory.get('category', '')
282
-
283
- # Try to find memories from different project or category
284
- conditions = []
285
- params: list = []
286
-
287
- if anchor_project:
288
- conditions.append('project_name != ?')
289
- params.append(anchor_project)
290
- if anchor_category:
291
- conditions.append('category != ?')
292
- params.append(anchor_category)
293
-
294
- # Exclude specified IDs
295
- if exclude_ids:
296
- placeholders = ','.join('?' for _ in exclude_ids)
297
- conditions.append(f'id NOT IN ({placeholders})')
298
- params.extend(exclude_ids)
299
-
300
- where_clause = ' AND '.join(conditions) if conditions else '1=1'
301
-
302
- cursor.execute(f'''
303
- SELECT id, content, summary, project_name, tags,
304
- category, importance, created_at, access_count
305
- FROM memories
306
- WHERE {where_clause}
307
- ORDER BY RANDOM()
308
- LIMIT ?
309
- ''', (*params, limit))
310
- results = [dict(row) for row in cursor.fetchall()]
311
- return results
312
- finally:
313
- conn.close()
314
- except Exception as e:
315
- logger.debug("Failed to find negative memories: %s", e)
316
- return []
@@ -1,82 +0,0 @@
1
- #!/usr/bin/env python3
2
- # SPDX-License-Identifier: MIT
3
- # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
- """
5
- Sampling utilities for synthetic bootstrap.
6
-
7
- Functions for diverse sampling and record aggregation.
8
- """
9
-
10
- from typing import Dict, List
11
-
12
-
13
- def diverse_sample(
14
- records: List[dict],
15
- target: int,
16
- ) -> List[dict]:
17
- """
18
- Sample records while maintaining source diversity.
19
-
20
- Takes proportional samples from each source strategy to ensure
21
- the training data isn't dominated by one strategy.
22
-
23
- Args:
24
- records: List of training records with 'source' field.
25
- target: Target number of samples to return.
26
-
27
- Returns:
28
- Sampled list of records (at most target items).
29
- """
30
- if len(records) <= target:
31
- return records
32
-
33
- # Group by source
34
- by_source: Dict[str, List[dict]] = {}
35
- for r in records:
36
- src = r.get('source', 'unknown')
37
- if src not in by_source:
38
- by_source[src] = []
39
- by_source[src].append(r)
40
-
41
- # Proportional allocation
42
- n_sources = len(by_source)
43
- if n_sources == 0:
44
- return records[:target]
45
-
46
- per_source = max(1, target // n_sources)
47
- sampled = []
48
-
49
- for source, source_records in by_source.items():
50
- # Take up to per_source from each, or all if fewer
51
- take = min(len(source_records), per_source)
52
- sampled.extend(source_records[:take])
53
-
54
- # If under target, fill from remaining
55
- if len(sampled) < target:
56
- used_ids = {(r['query_hash'], r['memory_id']) for r in sampled}
57
- for r in records:
58
- if len(sampled) >= target:
59
- break
60
- key = (r['query_hash'], r['memory_id'])
61
- if key not in used_ids:
62
- sampled.append(r)
63
- used_ids.add(key)
64
-
65
- return sampled[:target]
66
-
67
-
68
- def count_sources(records: List[dict]) -> Dict[str, int]:
69
- """
70
- Count records by source strategy.
71
-
72
- Args:
73
- records: List of training records with 'source' field.
74
-
75
- Returns:
76
- Dict mapping source name to count.
77
- """
78
- counts: Dict[str, int] = {}
79
- for r in records:
80
- src = r.get('source', 'unknown')
81
- counts[src] = counts.get(src, 0) + 1
82
- return counts
@@ -1,71 +0,0 @@
1
- #!/usr/bin/env python3
2
- # SPDX-License-Identifier: MIT
3
- # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
- """
5
- Text processing utilities for synthetic bootstrap.
6
-
7
- Simple keyword extraction and text processing functions
8
- with no external NLP dependencies.
9
- """
10
-
11
- import re
12
- from collections import Counter
13
- from typing import List
14
-
15
- from .constants import STOPWORDS, MIN_KEYWORD_LENGTH
16
-
17
-
18
- def extract_keywords(content: str, top_n: int = 3) -> List[str]:
19
- """
20
- Extract meaningful keywords from memory content.
21
-
22
- Simple frequency-based extraction:
23
- 1. Tokenize (alphanumeric words)
24
- 2. Remove stopwords and short words
25
- 3. Return top N by frequency
26
-
27
- No external NLP dependencies — just regex + counter.
28
-
29
- Args:
30
- content: Text content to extract keywords from.
31
- top_n: Number of top keywords to return.
32
-
33
- Returns:
34
- List of top N keywords by frequency.
35
- """
36
- if not content:
37
- return []
38
-
39
- # Tokenize: extract alphanumeric words
40
- words = re.findall(r'[a-zA-Z][a-zA-Z0-9_.-]*[a-zA-Z0-9]|[a-zA-Z]', content.lower())
41
-
42
- # Filter stopwords and short words
43
- meaningful = [
44
- w for w in words
45
- if w not in STOPWORDS and len(w) >= MIN_KEYWORD_LENGTH
46
- ]
47
-
48
- if not meaningful:
49
- return []
50
-
51
- # Count and return top N
52
- counter = Counter(meaningful)
53
- return [word for word, _count in counter.most_common(top_n)]
54
-
55
-
56
- def clean_fts_query(query: str) -> str:
57
- """
58
- Clean and prepare query for FTS5 search.
59
-
60
- Extracts word tokens and joins them with OR for FTS5 MATCH syntax.
61
-
62
- Args:
63
- query: Raw query string.
64
-
65
- Returns:
66
- FTS5-compatible query string, or empty string if no valid tokens.
67
- """
68
- fts_tokens = re.findall(r'\w+', query)
69
- if not fts_tokens:
70
- return ''
71
- return ' OR '.join(fts_tokens)