superlocalmemory 2.8.5 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (434) hide show
  1. package/CHANGELOG.md +11 -0
  2. package/LICENSE +9 -1
  3. package/NOTICE +63 -0
  4. package/README.md +165 -480
  5. package/bin/slm +17 -449
  6. package/bin/slm-npm +2 -2
  7. package/bin/slm.bat +4 -2
  8. package/conftest.py +5 -0
  9. package/docs/api-reference.md +284 -0
  10. package/docs/architecture.md +149 -0
  11. package/docs/auto-memory.md +150 -0
  12. package/docs/cli-reference.md +276 -0
  13. package/docs/compliance.md +191 -0
  14. package/docs/configuration.md +182 -0
  15. package/docs/getting-started.md +102 -0
  16. package/docs/ide-setup.md +261 -0
  17. package/docs/mcp-tools.md +220 -0
  18. package/docs/migration-from-v2.md +170 -0
  19. package/docs/profiles.md +173 -0
  20. package/docs/troubleshooting.md +310 -0
  21. package/{configs → ide/configs}/antigravity-mcp.json +3 -3
  22. package/ide/configs/chatgpt-desktop-mcp.json +16 -0
  23. package/{configs → ide/configs}/claude-desktop-mcp.json +3 -3
  24. package/{configs → ide/configs}/codex-mcp.toml +4 -4
  25. package/{configs → ide/configs}/continue-mcp.yaml +4 -3
  26. package/{configs → ide/configs}/continue-skills.yaml +6 -6
  27. package/ide/configs/cursor-mcp.json +15 -0
  28. package/{configs → ide/configs}/gemini-cli-mcp.json +2 -2
  29. package/{configs → ide/configs}/jetbrains-mcp.json +2 -2
  30. package/{configs → ide/configs}/opencode-mcp.json +2 -2
  31. package/{configs → ide/configs}/perplexity-mcp.json +2 -2
  32. package/{configs → ide/configs}/vscode-copilot-mcp.json +2 -2
  33. package/{configs → ide/configs}/windsurf-mcp.json +3 -3
  34. package/{configs → ide/configs}/zed-mcp.json +2 -2
  35. package/{hooks → ide/hooks}/context-hook.js +9 -20
  36. package/ide/hooks/memory-list-skill.js +70 -0
  37. package/ide/hooks/memory-profile-skill.js +101 -0
  38. package/ide/hooks/memory-recall-skill.js +62 -0
  39. package/ide/hooks/memory-remember-skill.js +68 -0
  40. package/ide/hooks/memory-reset-skill.js +160 -0
  41. package/{hooks → ide/hooks}/post-recall-hook.js +2 -2
  42. package/ide/integrations/langchain/README.md +106 -0
  43. package/ide/integrations/langchain/langchain_superlocalmemory/__init__.py +9 -0
  44. package/ide/integrations/langchain/langchain_superlocalmemory/chat_message_history.py +201 -0
  45. package/ide/integrations/langchain/pyproject.toml +38 -0
  46. package/{src/learning → ide/integrations/langchain}/tests/__init__.py +1 -0
  47. package/ide/integrations/langchain/tests/test_chat_message_history.py +215 -0
  48. package/ide/integrations/langchain/tests/test_security.py +117 -0
  49. package/ide/integrations/llamaindex/README.md +81 -0
  50. package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/__init__.py +9 -0
  51. package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/base.py +316 -0
  52. package/ide/integrations/llamaindex/pyproject.toml +43 -0
  53. package/{src/lifecycle → ide/integrations/llamaindex}/tests/__init__.py +1 -2
  54. package/ide/integrations/llamaindex/tests/test_chat_store.py +294 -0
  55. package/ide/integrations/llamaindex/tests/test_security.py +241 -0
  56. package/{skills → ide/skills}/slm-build-graph/SKILL.md +6 -6
  57. package/{skills → ide/skills}/slm-list-recent/SKILL.md +5 -5
  58. package/{skills → ide/skills}/slm-recall/SKILL.md +5 -5
  59. package/{skills → ide/skills}/slm-remember/SKILL.md +6 -6
  60. package/{skills → ide/skills}/slm-show-patterns/SKILL.md +7 -7
  61. package/{skills → ide/skills}/slm-status/SKILL.md +9 -9
  62. package/{skills → ide/skills}/slm-switch-profile/SKILL.md +9 -9
  63. package/package.json +13 -22
  64. package/pyproject.toml +85 -0
  65. package/scripts/build-dmg.sh +417 -0
  66. package/scripts/install-skills.ps1 +334 -0
  67. package/{install.ps1 → scripts/install.ps1} +36 -4
  68. package/{install.sh → scripts/install.sh} +14 -13
  69. package/scripts/postinstall.js +2 -2
  70. package/scripts/start-dashboard.ps1 +52 -0
  71. package/scripts/start-dashboard.sh +41 -0
  72. package/scripts/sync-wiki.ps1 +127 -0
  73. package/scripts/sync-wiki.sh +82 -0
  74. package/scripts/test-dmg.sh +161 -0
  75. package/scripts/test-npm-package.ps1 +252 -0
  76. package/scripts/test-npm-package.sh +207 -0
  77. package/scripts/verify-install.ps1 +294 -0
  78. package/scripts/verify-install.sh +266 -0
  79. package/src/superlocalmemory/__init__.py +0 -0
  80. package/src/superlocalmemory/attribution/__init__.py +9 -0
  81. package/src/superlocalmemory/attribution/mathematical_dna.py +235 -0
  82. package/src/superlocalmemory/attribution/signer.py +153 -0
  83. package/src/superlocalmemory/attribution/watermark.py +189 -0
  84. package/src/superlocalmemory/cli/__init__.py +5 -0
  85. package/src/superlocalmemory/cli/commands.py +245 -0
  86. package/src/superlocalmemory/cli/main.py +89 -0
  87. package/src/superlocalmemory/cli/migrate_cmd.py +55 -0
  88. package/src/superlocalmemory/cli/post_install.py +99 -0
  89. package/src/superlocalmemory/cli/setup_wizard.py +129 -0
  90. package/src/superlocalmemory/compliance/__init__.py +0 -0
  91. package/src/superlocalmemory/compliance/abac.py +204 -0
  92. package/src/superlocalmemory/compliance/audit.py +314 -0
  93. package/src/superlocalmemory/compliance/eu_ai_act.py +131 -0
  94. package/src/superlocalmemory/compliance/gdpr.py +294 -0
  95. package/src/superlocalmemory/compliance/lifecycle.py +158 -0
  96. package/src/superlocalmemory/compliance/retention.py +232 -0
  97. package/src/superlocalmemory/compliance/scheduler.py +148 -0
  98. package/src/superlocalmemory/core/__init__.py +0 -0
  99. package/src/superlocalmemory/core/config.py +391 -0
  100. package/src/superlocalmemory/core/embeddings.py +293 -0
  101. package/src/superlocalmemory/core/engine.py +701 -0
  102. package/src/superlocalmemory/core/hooks.py +65 -0
  103. package/src/superlocalmemory/core/maintenance.py +172 -0
  104. package/src/superlocalmemory/core/modes.py +140 -0
  105. package/src/superlocalmemory/core/profiles.py +234 -0
  106. package/src/superlocalmemory/core/registry.py +117 -0
  107. package/src/superlocalmemory/dynamics/__init__.py +0 -0
  108. package/src/superlocalmemory/dynamics/fisher_langevin_coupling.py +223 -0
  109. package/src/superlocalmemory/encoding/__init__.py +0 -0
  110. package/src/superlocalmemory/encoding/consolidator.py +485 -0
  111. package/src/superlocalmemory/encoding/emotional.py +125 -0
  112. package/src/superlocalmemory/encoding/entity_resolver.py +525 -0
  113. package/src/superlocalmemory/encoding/entropy_gate.py +104 -0
  114. package/src/superlocalmemory/encoding/fact_extractor.py +775 -0
  115. package/src/superlocalmemory/encoding/foresight.py +91 -0
  116. package/src/superlocalmemory/encoding/graph_builder.py +302 -0
  117. package/src/superlocalmemory/encoding/observation_builder.py +160 -0
  118. package/src/superlocalmemory/encoding/scene_builder.py +183 -0
  119. package/src/superlocalmemory/encoding/signal_inference.py +90 -0
  120. package/src/superlocalmemory/encoding/temporal_parser.py +426 -0
  121. package/src/superlocalmemory/encoding/type_router.py +235 -0
  122. package/src/superlocalmemory/hooks/__init__.py +3 -0
  123. package/src/superlocalmemory/hooks/auto_capture.py +111 -0
  124. package/src/superlocalmemory/hooks/auto_recall.py +93 -0
  125. package/src/superlocalmemory/hooks/ide_connector.py +204 -0
  126. package/src/superlocalmemory/hooks/rules_engine.py +99 -0
  127. package/src/superlocalmemory/infra/__init__.py +3 -0
  128. package/src/superlocalmemory/infra/auth_middleware.py +82 -0
  129. package/src/superlocalmemory/infra/backup.py +317 -0
  130. package/src/superlocalmemory/infra/cache_manager.py +267 -0
  131. package/src/superlocalmemory/infra/event_bus.py +381 -0
  132. package/src/superlocalmemory/infra/rate_limiter.py +135 -0
  133. package/src/{webhook_dispatcher.py → superlocalmemory/infra/webhook_dispatcher.py} +104 -101
  134. package/src/superlocalmemory/learning/__init__.py +0 -0
  135. package/src/superlocalmemory/learning/adaptive.py +172 -0
  136. package/src/superlocalmemory/learning/behavioral.py +490 -0
  137. package/src/superlocalmemory/learning/behavioral_listener.py +94 -0
  138. package/src/superlocalmemory/learning/bootstrap.py +298 -0
  139. package/src/superlocalmemory/learning/cross_project.py +399 -0
  140. package/src/superlocalmemory/learning/database.py +376 -0
  141. package/src/superlocalmemory/learning/engagement.py +323 -0
  142. package/src/superlocalmemory/learning/features.py +138 -0
  143. package/src/superlocalmemory/learning/feedback.py +316 -0
  144. package/src/superlocalmemory/learning/outcomes.py +255 -0
  145. package/src/superlocalmemory/learning/project_context.py +366 -0
  146. package/src/superlocalmemory/learning/ranker.py +155 -0
  147. package/src/superlocalmemory/learning/source_quality.py +303 -0
  148. package/src/superlocalmemory/learning/workflows.py +309 -0
  149. package/src/superlocalmemory/llm/__init__.py +0 -0
  150. package/src/superlocalmemory/llm/backbone.py +316 -0
  151. package/src/superlocalmemory/math/__init__.py +0 -0
  152. package/src/superlocalmemory/math/fisher.py +356 -0
  153. package/src/superlocalmemory/math/langevin.py +398 -0
  154. package/src/superlocalmemory/math/sheaf.py +257 -0
  155. package/src/superlocalmemory/mcp/__init__.py +0 -0
  156. package/src/superlocalmemory/mcp/resources.py +245 -0
  157. package/src/superlocalmemory/mcp/server.py +61 -0
  158. package/src/superlocalmemory/mcp/tools.py +18 -0
  159. package/src/superlocalmemory/mcp/tools_core.py +305 -0
  160. package/src/superlocalmemory/mcp/tools_v28.py +223 -0
  161. package/src/superlocalmemory/mcp/tools_v3.py +286 -0
  162. package/src/superlocalmemory/retrieval/__init__.py +0 -0
  163. package/src/superlocalmemory/retrieval/agentic.py +295 -0
  164. package/src/superlocalmemory/retrieval/ann_index.py +223 -0
  165. package/src/superlocalmemory/retrieval/bm25_channel.py +185 -0
  166. package/src/superlocalmemory/retrieval/bridge_discovery.py +170 -0
  167. package/src/superlocalmemory/retrieval/engine.py +390 -0
  168. package/src/superlocalmemory/retrieval/entity_channel.py +179 -0
  169. package/src/superlocalmemory/retrieval/fusion.py +78 -0
  170. package/src/superlocalmemory/retrieval/profile_channel.py +105 -0
  171. package/src/superlocalmemory/retrieval/reranker.py +154 -0
  172. package/src/superlocalmemory/retrieval/semantic_channel.py +232 -0
  173. package/src/superlocalmemory/retrieval/strategy.py +96 -0
  174. package/src/superlocalmemory/retrieval/temporal_channel.py +175 -0
  175. package/src/superlocalmemory/server/__init__.py +1 -0
  176. package/src/superlocalmemory/server/api.py +248 -0
  177. package/src/superlocalmemory/server/routes/__init__.py +4 -0
  178. package/src/superlocalmemory/server/routes/agents.py +107 -0
  179. package/src/superlocalmemory/server/routes/backup.py +91 -0
  180. package/src/superlocalmemory/server/routes/behavioral.py +127 -0
  181. package/src/superlocalmemory/server/routes/compliance.py +160 -0
  182. package/src/superlocalmemory/server/routes/data_io.py +188 -0
  183. package/src/superlocalmemory/server/routes/events.py +183 -0
  184. package/src/superlocalmemory/server/routes/helpers.py +85 -0
  185. package/src/superlocalmemory/server/routes/learning.py +273 -0
  186. package/src/superlocalmemory/server/routes/lifecycle.py +116 -0
  187. package/src/superlocalmemory/server/routes/memories.py +399 -0
  188. package/src/superlocalmemory/server/routes/profiles.py +219 -0
  189. package/src/superlocalmemory/server/routes/stats.py +346 -0
  190. package/src/superlocalmemory/server/routes/v3_api.py +365 -0
  191. package/src/superlocalmemory/server/routes/ws.py +82 -0
  192. package/src/superlocalmemory/server/security_middleware.py +57 -0
  193. package/src/superlocalmemory/server/ui.py +245 -0
  194. package/src/superlocalmemory/storage/__init__.py +0 -0
  195. package/src/superlocalmemory/storage/access_control.py +182 -0
  196. package/src/superlocalmemory/storage/database.py +594 -0
  197. package/src/superlocalmemory/storage/migrations.py +303 -0
  198. package/src/superlocalmemory/storage/models.py +406 -0
  199. package/src/superlocalmemory/storage/schema.py +726 -0
  200. package/src/superlocalmemory/storage/v2_migrator.py +317 -0
  201. package/src/superlocalmemory/trust/__init__.py +0 -0
  202. package/src/superlocalmemory/trust/gate.py +130 -0
  203. package/src/superlocalmemory/trust/provenance.py +124 -0
  204. package/src/superlocalmemory/trust/scorer.py +347 -0
  205. package/src/superlocalmemory/trust/signals.py +153 -0
  206. package/ui/index.html +278 -5
  207. package/ui/js/auto-settings.js +70 -0
  208. package/ui/js/dashboard.js +90 -0
  209. package/ui/js/fact-detail.js +92 -0
  210. package/ui/js/feedback.js +2 -2
  211. package/ui/js/ide-status.js +102 -0
  212. package/ui/js/math-health.js +98 -0
  213. package/ui/js/recall-lab.js +127 -0
  214. package/ui/js/settings.js +2 -2
  215. package/ui/js/trust-dashboard.js +73 -0
  216. package/api_server.py +0 -724
  217. package/bin/aider-smart +0 -72
  218. package/bin/superlocalmemoryv2-learning +0 -4
  219. package/bin/superlocalmemoryv2-list +0 -3
  220. package/bin/superlocalmemoryv2-patterns +0 -4
  221. package/bin/superlocalmemoryv2-profile +0 -3
  222. package/bin/superlocalmemoryv2-recall +0 -3
  223. package/bin/superlocalmemoryv2-remember +0 -3
  224. package/bin/superlocalmemoryv2-reset +0 -3
  225. package/bin/superlocalmemoryv2-status +0 -3
  226. package/configs/chatgpt-desktop-mcp.json +0 -16
  227. package/configs/cursor-mcp.json +0 -15
  228. package/docs/SECURITY-QUICK-REFERENCE.md +0 -214
  229. package/hooks/memory-list-skill.js +0 -139
  230. package/hooks/memory-profile-skill.js +0 -273
  231. package/hooks/memory-recall-skill.js +0 -114
  232. package/hooks/memory-remember-skill.js +0 -127
  233. package/hooks/memory-reset-skill.js +0 -274
  234. package/mcp_server.py +0 -1800
  235. package/requirements-core.txt +0 -22
  236. package/requirements-learning.txt +0 -12
  237. package/requirements.txt +0 -12
  238. package/src/agent_registry.py +0 -411
  239. package/src/auth_middleware.py +0 -61
  240. package/src/auto_backup.py +0 -459
  241. package/src/behavioral/__init__.py +0 -49
  242. package/src/behavioral/behavioral_listener.py +0 -203
  243. package/src/behavioral/behavioral_patterns.py +0 -275
  244. package/src/behavioral/cross_project_transfer.py +0 -206
  245. package/src/behavioral/outcome_inference.py +0 -194
  246. package/src/behavioral/outcome_tracker.py +0 -193
  247. package/src/behavioral/tests/__init__.py +0 -4
  248. package/src/behavioral/tests/test_behavioral_integration.py +0 -108
  249. package/src/behavioral/tests/test_behavioral_patterns.py +0 -150
  250. package/src/behavioral/tests/test_cross_project_transfer.py +0 -142
  251. package/src/behavioral/tests/test_mcp_behavioral.py +0 -139
  252. package/src/behavioral/tests/test_mcp_report_outcome.py +0 -117
  253. package/src/behavioral/tests/test_outcome_inference.py +0 -107
  254. package/src/behavioral/tests/test_outcome_tracker.py +0 -96
  255. package/src/cache_manager.py +0 -518
  256. package/src/compliance/__init__.py +0 -48
  257. package/src/compliance/abac_engine.py +0 -149
  258. package/src/compliance/abac_middleware.py +0 -116
  259. package/src/compliance/audit_db.py +0 -215
  260. package/src/compliance/audit_logger.py +0 -148
  261. package/src/compliance/retention_manager.py +0 -289
  262. package/src/compliance/retention_scheduler.py +0 -186
  263. package/src/compliance/tests/__init__.py +0 -4
  264. package/src/compliance/tests/test_abac_enforcement.py +0 -95
  265. package/src/compliance/tests/test_abac_engine.py +0 -124
  266. package/src/compliance/tests/test_abac_mcp_integration.py +0 -118
  267. package/src/compliance/tests/test_audit_db.py +0 -123
  268. package/src/compliance/tests/test_audit_logger.py +0 -98
  269. package/src/compliance/tests/test_mcp_audit.py +0 -128
  270. package/src/compliance/tests/test_mcp_retention_policy.py +0 -125
  271. package/src/compliance/tests/test_retention_manager.py +0 -131
  272. package/src/compliance/tests/test_retention_scheduler.py +0 -99
  273. package/src/compression/__init__.py +0 -25
  274. package/src/compression/cli.py +0 -150
  275. package/src/compression/cold_storage.py +0 -217
  276. package/src/compression/config.py +0 -72
  277. package/src/compression/orchestrator.py +0 -133
  278. package/src/compression/tier2_compressor.py +0 -228
  279. package/src/compression/tier3_compressor.py +0 -153
  280. package/src/compression/tier_classifier.py +0 -148
  281. package/src/db_connection_manager.py +0 -536
  282. package/src/embedding_engine.py +0 -63
  283. package/src/embeddings/__init__.py +0 -47
  284. package/src/embeddings/cache.py +0 -70
  285. package/src/embeddings/cli.py +0 -113
  286. package/src/embeddings/constants.py +0 -47
  287. package/src/embeddings/database.py +0 -91
  288. package/src/embeddings/engine.py +0 -247
  289. package/src/embeddings/model_loader.py +0 -145
  290. package/src/event_bus.py +0 -562
  291. package/src/graph/__init__.py +0 -36
  292. package/src/graph/build_helpers.py +0 -74
  293. package/src/graph/cli.py +0 -87
  294. package/src/graph/cluster_builder.py +0 -188
  295. package/src/graph/cluster_summary.py +0 -148
  296. package/src/graph/constants.py +0 -47
  297. package/src/graph/edge_builder.py +0 -162
  298. package/src/graph/entity_extractor.py +0 -95
  299. package/src/graph/graph_core.py +0 -226
  300. package/src/graph/graph_search.py +0 -231
  301. package/src/graph/hierarchical.py +0 -207
  302. package/src/graph/schema.py +0 -99
  303. package/src/graph_engine.py +0 -52
  304. package/src/hnsw_index.py +0 -628
  305. package/src/hybrid_search.py +0 -46
  306. package/src/learning/__init__.py +0 -217
  307. package/src/learning/adaptive_ranker.py +0 -682
  308. package/src/learning/bootstrap/__init__.py +0 -69
  309. package/src/learning/bootstrap/constants.py +0 -93
  310. package/src/learning/bootstrap/db_queries.py +0 -316
  311. package/src/learning/bootstrap/sampling.py +0 -82
  312. package/src/learning/bootstrap/text_utils.py +0 -71
  313. package/src/learning/cross_project_aggregator.py +0 -857
  314. package/src/learning/db/__init__.py +0 -40
  315. package/src/learning/db/constants.py +0 -44
  316. package/src/learning/db/schema.py +0 -279
  317. package/src/learning/engagement_tracker.py +0 -628
  318. package/src/learning/feature_extractor.py +0 -708
  319. package/src/learning/feedback_collector.py +0 -806
  320. package/src/learning/learning_db.py +0 -915
  321. package/src/learning/project_context_manager.py +0 -572
  322. package/src/learning/ranking/__init__.py +0 -33
  323. package/src/learning/ranking/constants.py +0 -84
  324. package/src/learning/ranking/helpers.py +0 -278
  325. package/src/learning/source_quality_scorer.py +0 -676
  326. package/src/learning/synthetic_bootstrap.py +0 -755
  327. package/src/learning/tests/test_adaptive_ranker.py +0 -325
  328. package/src/learning/tests/test_adaptive_ranker_v28.py +0 -60
  329. package/src/learning/tests/test_aggregator.py +0 -306
  330. package/src/learning/tests/test_auto_retrain_v28.py +0 -35
  331. package/src/learning/tests/test_e2e_ranking_v28.py +0 -82
  332. package/src/learning/tests/test_feature_extractor_v28.py +0 -93
  333. package/src/learning/tests/test_feedback_collector.py +0 -294
  334. package/src/learning/tests/test_learning_db.py +0 -602
  335. package/src/learning/tests/test_learning_db_v28.py +0 -110
  336. package/src/learning/tests/test_learning_init_v28.py +0 -48
  337. package/src/learning/tests/test_outcome_signals.py +0 -48
  338. package/src/learning/tests/test_project_context.py +0 -292
  339. package/src/learning/tests/test_schema_migration.py +0 -319
  340. package/src/learning/tests/test_signal_inference.py +0 -397
  341. package/src/learning/tests/test_source_quality.py +0 -351
  342. package/src/learning/tests/test_synthetic_bootstrap.py +0 -429
  343. package/src/learning/tests/test_workflow_miner.py +0 -318
  344. package/src/learning/workflow_pattern_miner.py +0 -655
  345. package/src/lifecycle/__init__.py +0 -54
  346. package/src/lifecycle/bounded_growth.py +0 -239
  347. package/src/lifecycle/compaction_engine.py +0 -226
  348. package/src/lifecycle/lifecycle_engine.py +0 -355
  349. package/src/lifecycle/lifecycle_evaluator.py +0 -257
  350. package/src/lifecycle/lifecycle_scheduler.py +0 -130
  351. package/src/lifecycle/retention_policy.py +0 -285
  352. package/src/lifecycle/tests/test_bounded_growth.py +0 -193
  353. package/src/lifecycle/tests/test_compaction.py +0 -179
  354. package/src/lifecycle/tests/test_lifecycle_engine.py +0 -137
  355. package/src/lifecycle/tests/test_lifecycle_evaluation.py +0 -177
  356. package/src/lifecycle/tests/test_lifecycle_scheduler.py +0 -127
  357. package/src/lifecycle/tests/test_lifecycle_search.py +0 -109
  358. package/src/lifecycle/tests/test_mcp_compact.py +0 -149
  359. package/src/lifecycle/tests/test_mcp_lifecycle_status.py +0 -114
  360. package/src/lifecycle/tests/test_retention_policy.py +0 -162
  361. package/src/mcp_tools_v28.py +0 -281
  362. package/src/memory/__init__.py +0 -36
  363. package/src/memory/cli.py +0 -205
  364. package/src/memory/constants.py +0 -39
  365. package/src/memory/helpers.py +0 -28
  366. package/src/memory/schema.py +0 -166
  367. package/src/memory-profiles.py +0 -595
  368. package/src/memory-reset.py +0 -491
  369. package/src/memory_compression.py +0 -989
  370. package/src/memory_store_v2.py +0 -1155
  371. package/src/migrate_v1_to_v2.py +0 -629
  372. package/src/pattern_learner.py +0 -34
  373. package/src/patterns/__init__.py +0 -24
  374. package/src/patterns/analyzers.py +0 -251
  375. package/src/patterns/learner.py +0 -271
  376. package/src/patterns/scoring.py +0 -171
  377. package/src/patterns/store.py +0 -225
  378. package/src/patterns/terminology.py +0 -140
  379. package/src/provenance_tracker.py +0 -312
  380. package/src/qualixar_attribution.py +0 -139
  381. package/src/qualixar_watermark.py +0 -78
  382. package/src/query_optimizer.py +0 -511
  383. package/src/rate_limiter.py +0 -83
  384. package/src/search/__init__.py +0 -20
  385. package/src/search/cli.py +0 -77
  386. package/src/search/constants.py +0 -26
  387. package/src/search/engine.py +0 -241
  388. package/src/search/fusion.py +0 -122
  389. package/src/search/index_loader.py +0 -114
  390. package/src/search/methods.py +0 -162
  391. package/src/search_engine_v2.py +0 -401
  392. package/src/setup_validator.py +0 -482
  393. package/src/subscription_manager.py +0 -391
  394. package/src/tree/__init__.py +0 -59
  395. package/src/tree/builder.py +0 -185
  396. package/src/tree/nodes.py +0 -202
  397. package/src/tree/queries.py +0 -257
  398. package/src/tree/schema.py +0 -80
  399. package/src/tree_manager.py +0 -19
  400. package/src/trust/__init__.py +0 -45
  401. package/src/trust/constants.py +0 -66
  402. package/src/trust/queries.py +0 -157
  403. package/src/trust/schema.py +0 -95
  404. package/src/trust/scorer.py +0 -299
  405. package/src/trust/signals.py +0 -95
  406. package/src/trust_scorer.py +0 -44
  407. package/ui/app.js +0 -1588
  408. package/ui/js/graph-cytoscape-monolithic-backup.js +0 -1168
  409. package/ui/js/graph-cytoscape.js +0 -1168
  410. package/ui/js/graph-d3-backup.js +0 -32
  411. package/ui/js/graph.js +0 -32
  412. package/ui_server.py +0 -266
  413. /package/docs/{ACCESSIBILITY.md → v2-archive/ACCESSIBILITY.md} +0 -0
  414. /package/docs/{ARCHITECTURE.md → v2-archive/ARCHITECTURE.md} +0 -0
  415. /package/docs/{CLI-COMMANDS-REFERENCE.md → v2-archive/CLI-COMMANDS-REFERENCE.md} +0 -0
  416. /package/docs/{COMPRESSION-README.md → v2-archive/COMPRESSION-README.md} +0 -0
  417. /package/docs/{FRAMEWORK-INTEGRATIONS.md → v2-archive/FRAMEWORK-INTEGRATIONS.md} +0 -0
  418. /package/docs/{MCP-MANUAL-SETUP.md → v2-archive/MCP-MANUAL-SETUP.md} +0 -0
  419. /package/docs/{MCP-TROUBLESHOOTING.md → v2-archive/MCP-TROUBLESHOOTING.md} +0 -0
  420. /package/docs/{PATTERN-LEARNING.md → v2-archive/PATTERN-LEARNING.md} +0 -0
  421. /package/docs/{PROFILES-GUIDE.md → v2-archive/PROFILES-GUIDE.md} +0 -0
  422. /package/docs/{RESET-GUIDE.md → v2-archive/RESET-GUIDE.md} +0 -0
  423. /package/docs/{SEARCH-ENGINE-V2.2.0.md → v2-archive/SEARCH-ENGINE-V2.2.0.md} +0 -0
  424. /package/docs/{SEARCH-INTEGRATION-GUIDE.md → v2-archive/SEARCH-INTEGRATION-GUIDE.md} +0 -0
  425. /package/docs/{UI-SERVER.md → v2-archive/UI-SERVER.md} +0 -0
  426. /package/docs/{UNIVERSAL-INTEGRATION.md → v2-archive/UNIVERSAL-INTEGRATION.md} +0 -0
  427. /package/docs/{V2.2.0-OPTIONAL-SEARCH.md → v2-archive/V2.2.0-OPTIONAL-SEARCH.md} +0 -0
  428. /package/docs/{WINDOWS-INSTALL-README.txt → v2-archive/WINDOWS-INSTALL-README.txt} +0 -0
  429. /package/docs/{WINDOWS-POST-INSTALL.txt → v2-archive/WINDOWS-POST-INSTALL.txt} +0 -0
  430. /package/docs/{example_graph_usage.py → v2-archive/example_graph_usage.py} +0 -0
  431. /package/{completions → ide/completions}/slm.bash +0 -0
  432. /package/{completions → ide/completions}/slm.zsh +0 -0
  433. /package/{configs → ide/configs}/cody-commands.json +0 -0
  434. /package/{install-skills.sh → scripts/install-skills.sh} +0 -0
@@ -1,69 +0,0 @@
1
- #!/usr/bin/env python3
2
- # SPDX-License-Identifier: MIT
3
- # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
- """
5
- Bootstrap utilities package.
6
-
7
- Re-exports all constants, functions, and utilities used by SyntheticBootstrapper.
8
- """
9
-
10
- # Constants
11
- from .constants import (
12
- MEMORY_DB_PATH,
13
- MODELS_DIR,
14
- MODEL_PATH,
15
- MIN_MEMORIES_FOR_BOOTSTRAP,
16
- BOOTSTRAP_CONFIG,
17
- BOOTSTRAP_PARAMS,
18
- STOPWORDS,
19
- MIN_KEYWORD_LENGTH,
20
- )
21
-
22
- # Text utilities
23
- from .text_utils import (
24
- extract_keywords,
25
- clean_fts_query,
26
- )
27
-
28
- # Database queries
29
- from .db_queries import (
30
- get_memory_count,
31
- get_memories_by_access,
32
- get_memories_by_importance,
33
- get_recent_memories,
34
- get_learned_patterns,
35
- search_memories,
36
- find_negative_memories,
37
- )
38
-
39
- # Sampling utilities
40
- from .sampling import (
41
- diverse_sample,
42
- count_sources,
43
- )
44
-
45
- __all__ = [
46
- # Constants
47
- 'MEMORY_DB_PATH',
48
- 'MODELS_DIR',
49
- 'MODEL_PATH',
50
- 'MIN_MEMORIES_FOR_BOOTSTRAP',
51
- 'BOOTSTRAP_CONFIG',
52
- 'BOOTSTRAP_PARAMS',
53
- 'STOPWORDS',
54
- 'MIN_KEYWORD_LENGTH',
55
- # Text utilities
56
- 'extract_keywords',
57
- 'clean_fts_query',
58
- # Database queries
59
- 'get_memory_count',
60
- 'get_memories_by_access',
61
- 'get_memories_by_importance',
62
- 'get_recent_memories',
63
- 'get_learned_patterns',
64
- 'search_memories',
65
- 'find_negative_memories',
66
- # Sampling
67
- 'diverse_sample',
68
- 'count_sources',
69
- ]
@@ -1,93 +0,0 @@
1
- #!/usr/bin/env python3
2
- # SPDX-License-Identifier: MIT
3
- # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
- """
5
- Bootstrap constants and configuration.
6
-
7
- All constant values, configuration dicts, and static data used
8
- by SyntheticBootstrapper are defined here.
9
- """
10
-
11
- from pathlib import Path
12
-
13
- # ============================================================================
14
- # Paths
15
- # ============================================================================
16
-
17
- MEMORY_DB_PATH = Path.home() / ".claude-memory" / "memory.db"
18
- MODELS_DIR = Path.home() / ".claude-memory" / "models"
19
- MODEL_PATH = MODELS_DIR / "ranker.txt"
20
-
21
- # ============================================================================
22
- # Bootstrap Configuration
23
- # ============================================================================
24
-
25
- # Minimum memories needed before bootstrap makes sense
26
- MIN_MEMORIES_FOR_BOOTSTRAP = 50
27
-
28
- # Tiered config — bootstrap model complexity scales with data size
29
- BOOTSTRAP_CONFIG = {
30
- 'small': {
31
- 'min_memories': 50,
32
- 'max_memories': 499,
33
- 'target_samples': 200,
34
- 'n_estimators': 30,
35
- 'max_depth': 3,
36
- },
37
- 'medium': {
38
- 'min_memories': 500,
39
- 'max_memories': 4999,
40
- 'target_samples': 1000,
41
- 'n_estimators': 50,
42
- 'max_depth': 4,
43
- },
44
- 'large': {
45
- 'min_memories': 5000,
46
- 'max_memories': float('inf'),
47
- 'target_samples': 2000,
48
- 'n_estimators': 100,
49
- 'max_depth': 6,
50
- },
51
- }
52
-
53
- # ============================================================================
54
- # LightGBM Parameters
55
- # ============================================================================
56
-
57
- # LightGBM bootstrap parameters — MORE aggressive regularization than
58
- # real training because synthetic data has systematic biases
59
- BOOTSTRAP_PARAMS = {
60
- 'objective': 'lambdarank',
61
- 'metric': 'ndcg',
62
- 'ndcg_eval_at': [5, 10],
63
- 'learning_rate': 0.1,
64
- 'num_leaves': 8,
65
- 'max_depth': 3,
66
- 'min_child_samples': 5,
67
- 'subsample': 0.7,
68
- 'reg_alpha': 0.5,
69
- 'reg_lambda': 2.0,
70
- 'boosting_type': 'dart',
71
- 'verbose': -1,
72
- }
73
-
74
- # ============================================================================
75
- # Text Processing
76
- # ============================================================================
77
-
78
- # English stopwords for keyword extraction (no external deps)
79
- STOPWORDS = frozenset({
80
- 'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
81
- 'of', 'with', 'by', 'from', 'is', 'it', 'this', 'that', 'was', 'are',
82
- 'be', 'has', 'have', 'had', 'do', 'does', 'did', 'will', 'would',
83
- 'could', 'should', 'may', 'might', 'can', 'not', 'no', 'if', 'then',
84
- 'so', 'as', 'up', 'out', 'about', 'into', 'over', 'after', 'before',
85
- 'when', 'where', 'how', 'what', 'which', 'who', 'whom', 'why',
86
- 'all', 'each', 'every', 'both', 'few', 'more', 'most', 'other',
87
- 'some', 'such', 'than', 'too', 'very', 'just', 'also', 'now',
88
- 'here', 'there', 'use', 'used', 'using', 'make', 'made',
89
- 'need', 'needed', 'get', 'got', 'set', 'new', 'old', 'one', 'two',
90
- })
91
-
92
- # Minimum word length for keyword extraction
93
- MIN_KEYWORD_LENGTH = 3
@@ -1,316 +0,0 @@
1
- #!/usr/bin/env python3
2
- # SPDX-License-Identifier: MIT
3
- # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
- """
5
- Database query utilities for synthetic bootstrap.
6
-
7
- All read-only queries against memory.db used by SyntheticBootstrapper.
8
- These functions are stateless and take db_path as parameter.
9
- """
10
-
11
- import logging
12
- import sqlite3
13
- from pathlib import Path
14
- from typing import List, Optional, Set
15
-
16
- from .text_utils import clean_fts_query
17
-
18
- logger = logging.getLogger("superlocalmemory.learning.bootstrap.db_queries")
19
-
20
-
21
- def get_memory_count(db_path: Path) -> int:
22
- """
23
- Count total memories in memory.db.
24
-
25
- Args:
26
- db_path: Path to memory.db.
27
-
28
- Returns:
29
- Total number of memories, or 0 if error.
30
- """
31
- if not db_path.exists():
32
- return 0
33
- try:
34
- conn = sqlite3.connect(str(db_path), timeout=5)
35
- cursor = conn.cursor()
36
- cursor.execute('SELECT COUNT(*) FROM memories')
37
- count = cursor.fetchone()[0]
38
- conn.close()
39
- return count
40
- except Exception as e:
41
- logger.warning("Failed to count memories: %s", e)
42
- return 0
43
-
44
-
45
- def get_memories_by_access(db_path: Path, min_access: int = 5) -> List[dict]:
46
- """
47
- Fetch memories with access_count >= min_access from memory.db.
48
-
49
- These are memories the user keeps coming back to — strong positive signal.
50
-
51
- Args:
52
- db_path: Path to memory.db.
53
- min_access: Minimum access_count threshold.
54
-
55
- Returns:
56
- List of memory dicts.
57
- """
58
- if not db_path.exists():
59
- return []
60
- try:
61
- conn = sqlite3.connect(str(db_path), timeout=5)
62
- conn.row_factory = sqlite3.Row
63
- cursor = conn.cursor()
64
- cursor.execute('''
65
- SELECT id, content, summary, project_name, tags,
66
- category, importance, created_at, access_count
67
- FROM memories
68
- WHERE access_count >= ?
69
- ORDER BY access_count DESC
70
- LIMIT 100
71
- ''', (min_access,))
72
- results = [dict(row) for row in cursor.fetchall()]
73
- conn.close()
74
- return results
75
- except Exception as e:
76
- logger.warning("Failed to fetch high-access memories: %s", e)
77
- return []
78
-
79
-
80
- def get_memories_by_importance(db_path: Path, min_importance: int = 8) -> List[dict]:
81
- """
82
- Fetch memories with importance >= min_importance from memory.db.
83
-
84
- High importance = user explicitly rated these as valuable.
85
-
86
- Args:
87
- db_path: Path to memory.db.
88
- min_importance: Minimum importance threshold.
89
-
90
- Returns:
91
- List of memory dicts.
92
- """
93
- if not db_path.exists():
94
- return []
95
- try:
96
- conn = sqlite3.connect(str(db_path), timeout=5)
97
- conn.row_factory = sqlite3.Row
98
- cursor = conn.cursor()
99
- cursor.execute('''
100
- SELECT id, content, summary, project_name, tags,
101
- category, importance, created_at, access_count
102
- FROM memories
103
- WHERE importance >= ?
104
- ORDER BY importance DESC
105
- LIMIT 100
106
- ''', (min_importance,))
107
- results = [dict(row) for row in cursor.fetchall()]
108
- conn.close()
109
- return results
110
- except Exception as e:
111
- logger.warning("Failed to fetch high-importance memories: %s", e)
112
- return []
113
-
114
-
115
- def get_recent_memories(db_path: Path, limit: int = 30) -> List[dict]:
116
- """
117
- Fetch the N most recently created memories.
118
-
119
- Args:
120
- db_path: Path to memory.db.
121
- limit: Maximum number of memories to return.
122
-
123
- Returns:
124
- List of memory dicts, sorted by created_at DESC.
125
- """
126
- if not db_path.exists():
127
- return []
128
- try:
129
- conn = sqlite3.connect(str(db_path), timeout=5)
130
- conn.row_factory = sqlite3.Row
131
- cursor = conn.cursor()
132
- cursor.execute('''
133
- SELECT id, content, summary, project_name, tags,
134
- category, importance, created_at, access_count
135
- FROM memories
136
- ORDER BY created_at DESC
137
- LIMIT ?
138
- ''', (limit,))
139
- results = [dict(row) for row in cursor.fetchall()]
140
- conn.close()
141
- return results
142
- except Exception as e:
143
- logger.warning("Failed to fetch recent memories: %s", e)
144
- return []
145
-
146
-
147
- def get_learned_patterns(
148
- db_path: Path,
149
- min_confidence: float = 0.7,
150
- ) -> List[dict]:
151
- """
152
- Fetch high-confidence identity_patterns from memory.db.
153
-
154
- These are patterns detected by pattern_learner.py (Layer 4) —
155
- tech preferences, coding style, terminology, etc.
156
-
157
- Returns empty list if identity_patterns table doesn't exist
158
- (backward compatible with pre-v2.3 databases).
159
-
160
- Args:
161
- db_path: Path to memory.db.
162
- min_confidence: Minimum confidence threshold.
163
-
164
- Returns:
165
- List of pattern dicts.
166
- """
167
- if not db_path.exists():
168
- return []
169
- try:
170
- conn = sqlite3.connect(str(db_path), timeout=5)
171
- try:
172
- conn.row_factory = sqlite3.Row
173
- cursor = conn.cursor()
174
-
175
- # Check if table exists (backward compatibility)
176
- cursor.execute('''
177
- SELECT name FROM sqlite_master
178
- WHERE type='table' AND name='identity_patterns'
179
- ''')
180
- if cursor.fetchone() is None:
181
- return []
182
-
183
- cursor.execute('''
184
- SELECT id, pattern_type, key, value, confidence,
185
- evidence_count, category
186
- FROM identity_patterns
187
- WHERE confidence >= ?
188
- ORDER BY confidence DESC
189
- LIMIT 50
190
- ''', (min_confidence,))
191
- results = [dict(row) for row in cursor.fetchall()]
192
- return results
193
- finally:
194
- conn.close()
195
- except Exception as e:
196
- logger.warning("Failed to fetch learned patterns: %s", e)
197
- return []
198
-
199
-
200
- def search_memories(db_path: Path, query: str, limit: int = 20) -> List[dict]:
201
- """
202
- Simple FTS5 search in memory.db.
203
-
204
- Used to find memories matching synthetic query terms.
205
- This is a lightweight search — no TF-IDF, no HNSW, just FTS5.
206
-
207
- Args:
208
- db_path: Path to memory.db.
209
- query: Search query string.
210
- limit: Maximum results to return.
211
-
212
- Returns:
213
- List of memory dicts matching the query.
214
- """
215
- if not db_path.exists():
216
- return []
217
- if not query or not query.strip():
218
- return []
219
-
220
- try:
221
- conn = sqlite3.connect(str(db_path), timeout=5)
222
- try:
223
- conn.row_factory = sqlite3.Row
224
- cursor = conn.cursor()
225
-
226
- # Clean query for FTS5 (same approach as memory_store_v2.search)
227
- fts_query = clean_fts_query(query)
228
- if not fts_query:
229
- return []
230
-
231
- cursor.execute('''
232
- SELECT m.id, m.content, m.summary, m.project_name, m.tags,
233
- m.category, m.importance, m.created_at, m.access_count
234
- FROM memories m
235
- JOIN memories_fts fts ON m.id = fts.rowid
236
- WHERE memories_fts MATCH ?
237
- ORDER BY rank
238
- LIMIT ?
239
- ''', (fts_query, limit))
240
- results = [dict(row) for row in cursor.fetchall()]
241
- return results
242
- finally:
243
- conn.close()
244
- except Exception as e:
245
- logger.debug("FTS5 search failed (may not exist yet): %s", e)
246
- return []
247
-
248
-
249
- def find_negative_memories(
250
- db_path: Path,
251
- anchor_memory: dict,
252
- exclude_ids: Optional[Set[int]] = None,
253
- limit: int = 2,
254
- ) -> List[dict]:
255
- """
256
- Find memories dissimilar to the anchor (for negative examples).
257
-
258
- Simple heuristic: pick memories from a different category or project.
259
- Falls back to random sample if no structured differences available.
260
-
261
- Args:
262
- db_path: Path to memory.db.
263
- anchor_memory: The reference memory to find negatives for.
264
- exclude_ids: Set of memory IDs to exclude from results.
265
- limit: Maximum number of negatives to return.
266
-
267
- Returns:
268
- List of negative example memory dicts.
269
- """
270
- if not db_path.exists():
271
- return []
272
- exclude_ids = exclude_ids or set()
273
-
274
- try:
275
- conn = sqlite3.connect(str(db_path), timeout=5)
276
- try:
277
- conn.row_factory = sqlite3.Row
278
- cursor = conn.cursor()
279
-
280
- anchor_project = anchor_memory.get('project_name', '')
281
- anchor_category = anchor_memory.get('category', '')
282
-
283
- # Try to find memories from different project or category
284
- conditions = []
285
- params: list = []
286
-
287
- if anchor_project:
288
- conditions.append('project_name != ?')
289
- params.append(anchor_project)
290
- if anchor_category:
291
- conditions.append('category != ?')
292
- params.append(anchor_category)
293
-
294
- # Exclude specified IDs
295
- if exclude_ids:
296
- placeholders = ','.join('?' for _ in exclude_ids)
297
- conditions.append(f'id NOT IN ({placeholders})')
298
- params.extend(exclude_ids)
299
-
300
- where_clause = ' AND '.join(conditions) if conditions else '1=1'
301
-
302
- cursor.execute(f'''
303
- SELECT id, content, summary, project_name, tags,
304
- category, importance, created_at, access_count
305
- FROM memories
306
- WHERE {where_clause}
307
- ORDER BY RANDOM()
308
- LIMIT ?
309
- ''', (*params, limit))
310
- results = [dict(row) for row in cursor.fetchall()]
311
- return results
312
- finally:
313
- conn.close()
314
- except Exception as e:
315
- logger.debug("Failed to find negative memories: %s", e)
316
- return []
@@ -1,82 +0,0 @@
1
- #!/usr/bin/env python3
2
- # SPDX-License-Identifier: MIT
3
- # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
- """
5
- Sampling utilities for synthetic bootstrap.
6
-
7
- Functions for diverse sampling and record aggregation.
8
- """
9
-
10
- from typing import Dict, List
11
-
12
-
13
- def diverse_sample(
14
- records: List[dict],
15
- target: int,
16
- ) -> List[dict]:
17
- """
18
- Sample records while maintaining source diversity.
19
-
20
- Takes proportional samples from each source strategy to ensure
21
- the training data isn't dominated by one strategy.
22
-
23
- Args:
24
- records: List of training records with 'source' field.
25
- target: Target number of samples to return.
26
-
27
- Returns:
28
- Sampled list of records (at most target items).
29
- """
30
- if len(records) <= target:
31
- return records
32
-
33
- # Group by source
34
- by_source: Dict[str, List[dict]] = {}
35
- for r in records:
36
- src = r.get('source', 'unknown')
37
- if src not in by_source:
38
- by_source[src] = []
39
- by_source[src].append(r)
40
-
41
- # Proportional allocation
42
- n_sources = len(by_source)
43
- if n_sources == 0:
44
- return records[:target]
45
-
46
- per_source = max(1, target // n_sources)
47
- sampled = []
48
-
49
- for source, source_records in by_source.items():
50
- # Take up to per_source from each, or all if fewer
51
- take = min(len(source_records), per_source)
52
- sampled.extend(source_records[:take])
53
-
54
- # If under target, fill from remaining
55
- if len(sampled) < target:
56
- used_ids = {(r['query_hash'], r['memory_id']) for r in sampled}
57
- for r in records:
58
- if len(sampled) >= target:
59
- break
60
- key = (r['query_hash'], r['memory_id'])
61
- if key not in used_ids:
62
- sampled.append(r)
63
- used_ids.add(key)
64
-
65
- return sampled[:target]
66
-
67
-
68
- def count_sources(records: List[dict]) -> Dict[str, int]:
69
- """
70
- Count records by source strategy.
71
-
72
- Args:
73
- records: List of training records with 'source' field.
74
-
75
- Returns:
76
- Dict mapping source name to count.
77
- """
78
- counts: Dict[str, int] = {}
79
- for r in records:
80
- src = r.get('source', 'unknown')
81
- counts[src] = counts.get(src, 0) + 1
82
- return counts
@@ -1,71 +0,0 @@
1
- #!/usr/bin/env python3
2
- # SPDX-License-Identifier: MIT
3
- # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
- """
5
- Text processing utilities for synthetic bootstrap.
6
-
7
- Simple keyword extraction and text processing functions
8
- with no external NLP dependencies.
9
- """
10
-
11
- import re
12
- from collections import Counter
13
- from typing import List
14
-
15
- from .constants import STOPWORDS, MIN_KEYWORD_LENGTH
16
-
17
-
18
- def extract_keywords(content: str, top_n: int = 3) -> List[str]:
19
- """
20
- Extract meaningful keywords from memory content.
21
-
22
- Simple frequency-based extraction:
23
- 1. Tokenize (alphanumeric words)
24
- 2. Remove stopwords and short words
25
- 3. Return top N by frequency
26
-
27
- No external NLP dependencies — just regex + counter.
28
-
29
- Args:
30
- content: Text content to extract keywords from.
31
- top_n: Number of top keywords to return.
32
-
33
- Returns:
34
- List of top N keywords by frequency.
35
- """
36
- if not content:
37
- return []
38
-
39
- # Tokenize: extract alphanumeric words
40
- words = re.findall(r'[a-zA-Z][a-zA-Z0-9_.-]*[a-zA-Z0-9]|[a-zA-Z]', content.lower())
41
-
42
- # Filter stopwords and short words
43
- meaningful = [
44
- w for w in words
45
- if w not in STOPWORDS and len(w) >= MIN_KEYWORD_LENGTH
46
- ]
47
-
48
- if not meaningful:
49
- return []
50
-
51
- # Count and return top N
52
- counter = Counter(meaningful)
53
- return [word for word, _count in counter.most_common(top_n)]
54
-
55
-
56
- def clean_fts_query(query: str) -> str:
57
- """
58
- Clean and prepare query for FTS5 search.
59
-
60
- Extracts word tokens and joins them with OR for FTS5 MATCH syntax.
61
-
62
- Args:
63
- query: Raw query string.
64
-
65
- Returns:
66
- FTS5-compatible query string, or empty string if no valid tokens.
67
- """
68
- fts_tokens = re.findall(r'\w+', query)
69
- if not fts_tokens:
70
- return ''
71
- return ' OR '.join(fts_tokens)