superlocalmemory 2.8.5 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (434) hide show
  1. package/CHANGELOG.md +11 -0
  2. package/LICENSE +9 -1
  3. package/NOTICE +63 -0
  4. package/README.md +165 -480
  5. package/bin/slm +17 -449
  6. package/bin/slm-npm +2 -2
  7. package/bin/slm.bat +4 -2
  8. package/conftest.py +5 -0
  9. package/docs/api-reference.md +284 -0
  10. package/docs/architecture.md +149 -0
  11. package/docs/auto-memory.md +150 -0
  12. package/docs/cli-reference.md +276 -0
  13. package/docs/compliance.md +191 -0
  14. package/docs/configuration.md +182 -0
  15. package/docs/getting-started.md +102 -0
  16. package/docs/ide-setup.md +261 -0
  17. package/docs/mcp-tools.md +220 -0
  18. package/docs/migration-from-v2.md +170 -0
  19. package/docs/profiles.md +173 -0
  20. package/docs/troubleshooting.md +310 -0
  21. package/{configs → ide/configs}/antigravity-mcp.json +3 -3
  22. package/ide/configs/chatgpt-desktop-mcp.json +16 -0
  23. package/{configs → ide/configs}/claude-desktop-mcp.json +3 -3
  24. package/{configs → ide/configs}/codex-mcp.toml +4 -4
  25. package/{configs → ide/configs}/continue-mcp.yaml +4 -3
  26. package/{configs → ide/configs}/continue-skills.yaml +6 -6
  27. package/ide/configs/cursor-mcp.json +15 -0
  28. package/{configs → ide/configs}/gemini-cli-mcp.json +2 -2
  29. package/{configs → ide/configs}/jetbrains-mcp.json +2 -2
  30. package/{configs → ide/configs}/opencode-mcp.json +2 -2
  31. package/{configs → ide/configs}/perplexity-mcp.json +2 -2
  32. package/{configs → ide/configs}/vscode-copilot-mcp.json +2 -2
  33. package/{configs → ide/configs}/windsurf-mcp.json +3 -3
  34. package/{configs → ide/configs}/zed-mcp.json +2 -2
  35. package/{hooks → ide/hooks}/context-hook.js +9 -20
  36. package/ide/hooks/memory-list-skill.js +70 -0
  37. package/ide/hooks/memory-profile-skill.js +101 -0
  38. package/ide/hooks/memory-recall-skill.js +62 -0
  39. package/ide/hooks/memory-remember-skill.js +68 -0
  40. package/ide/hooks/memory-reset-skill.js +160 -0
  41. package/{hooks → ide/hooks}/post-recall-hook.js +2 -2
  42. package/ide/integrations/langchain/README.md +106 -0
  43. package/ide/integrations/langchain/langchain_superlocalmemory/__init__.py +9 -0
  44. package/ide/integrations/langchain/langchain_superlocalmemory/chat_message_history.py +201 -0
  45. package/ide/integrations/langchain/pyproject.toml +38 -0
  46. package/{src/learning → ide/integrations/langchain}/tests/__init__.py +1 -0
  47. package/ide/integrations/langchain/tests/test_chat_message_history.py +215 -0
  48. package/ide/integrations/langchain/tests/test_security.py +117 -0
  49. package/ide/integrations/llamaindex/README.md +81 -0
  50. package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/__init__.py +9 -0
  51. package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/base.py +316 -0
  52. package/ide/integrations/llamaindex/pyproject.toml +43 -0
  53. package/{src/lifecycle → ide/integrations/llamaindex}/tests/__init__.py +1 -2
  54. package/ide/integrations/llamaindex/tests/test_chat_store.py +294 -0
  55. package/ide/integrations/llamaindex/tests/test_security.py +241 -0
  56. package/{skills → ide/skills}/slm-build-graph/SKILL.md +6 -6
  57. package/{skills → ide/skills}/slm-list-recent/SKILL.md +5 -5
  58. package/{skills → ide/skills}/slm-recall/SKILL.md +5 -5
  59. package/{skills → ide/skills}/slm-remember/SKILL.md +6 -6
  60. package/{skills → ide/skills}/slm-show-patterns/SKILL.md +7 -7
  61. package/{skills → ide/skills}/slm-status/SKILL.md +9 -9
  62. package/{skills → ide/skills}/slm-switch-profile/SKILL.md +9 -9
  63. package/package.json +13 -22
  64. package/pyproject.toml +85 -0
  65. package/scripts/build-dmg.sh +417 -0
  66. package/scripts/install-skills.ps1 +334 -0
  67. package/{install.ps1 → scripts/install.ps1} +36 -4
  68. package/{install.sh → scripts/install.sh} +14 -13
  69. package/scripts/postinstall.js +2 -2
  70. package/scripts/start-dashboard.ps1 +52 -0
  71. package/scripts/start-dashboard.sh +41 -0
  72. package/scripts/sync-wiki.ps1 +127 -0
  73. package/scripts/sync-wiki.sh +82 -0
  74. package/scripts/test-dmg.sh +161 -0
  75. package/scripts/test-npm-package.ps1 +252 -0
  76. package/scripts/test-npm-package.sh +207 -0
  77. package/scripts/verify-install.ps1 +294 -0
  78. package/scripts/verify-install.sh +266 -0
  79. package/src/superlocalmemory/__init__.py +0 -0
  80. package/src/superlocalmemory/attribution/__init__.py +9 -0
  81. package/src/superlocalmemory/attribution/mathematical_dna.py +235 -0
  82. package/src/superlocalmemory/attribution/signer.py +153 -0
  83. package/src/superlocalmemory/attribution/watermark.py +189 -0
  84. package/src/superlocalmemory/cli/__init__.py +5 -0
  85. package/src/superlocalmemory/cli/commands.py +245 -0
  86. package/src/superlocalmemory/cli/main.py +89 -0
  87. package/src/superlocalmemory/cli/migrate_cmd.py +55 -0
  88. package/src/superlocalmemory/cli/post_install.py +99 -0
  89. package/src/superlocalmemory/cli/setup_wizard.py +129 -0
  90. package/src/superlocalmemory/compliance/__init__.py +0 -0
  91. package/src/superlocalmemory/compliance/abac.py +204 -0
  92. package/src/superlocalmemory/compliance/audit.py +314 -0
  93. package/src/superlocalmemory/compliance/eu_ai_act.py +131 -0
  94. package/src/superlocalmemory/compliance/gdpr.py +294 -0
  95. package/src/superlocalmemory/compliance/lifecycle.py +158 -0
  96. package/src/superlocalmemory/compliance/retention.py +232 -0
  97. package/src/superlocalmemory/compliance/scheduler.py +148 -0
  98. package/src/superlocalmemory/core/__init__.py +0 -0
  99. package/src/superlocalmemory/core/config.py +391 -0
  100. package/src/superlocalmemory/core/embeddings.py +293 -0
  101. package/src/superlocalmemory/core/engine.py +701 -0
  102. package/src/superlocalmemory/core/hooks.py +65 -0
  103. package/src/superlocalmemory/core/maintenance.py +172 -0
  104. package/src/superlocalmemory/core/modes.py +140 -0
  105. package/src/superlocalmemory/core/profiles.py +234 -0
  106. package/src/superlocalmemory/core/registry.py +117 -0
  107. package/src/superlocalmemory/dynamics/__init__.py +0 -0
  108. package/src/superlocalmemory/dynamics/fisher_langevin_coupling.py +223 -0
  109. package/src/superlocalmemory/encoding/__init__.py +0 -0
  110. package/src/superlocalmemory/encoding/consolidator.py +485 -0
  111. package/src/superlocalmemory/encoding/emotional.py +125 -0
  112. package/src/superlocalmemory/encoding/entity_resolver.py +525 -0
  113. package/src/superlocalmemory/encoding/entropy_gate.py +104 -0
  114. package/src/superlocalmemory/encoding/fact_extractor.py +775 -0
  115. package/src/superlocalmemory/encoding/foresight.py +91 -0
  116. package/src/superlocalmemory/encoding/graph_builder.py +302 -0
  117. package/src/superlocalmemory/encoding/observation_builder.py +160 -0
  118. package/src/superlocalmemory/encoding/scene_builder.py +183 -0
  119. package/src/superlocalmemory/encoding/signal_inference.py +90 -0
  120. package/src/superlocalmemory/encoding/temporal_parser.py +426 -0
  121. package/src/superlocalmemory/encoding/type_router.py +235 -0
  122. package/src/superlocalmemory/hooks/__init__.py +3 -0
  123. package/src/superlocalmemory/hooks/auto_capture.py +111 -0
  124. package/src/superlocalmemory/hooks/auto_recall.py +93 -0
  125. package/src/superlocalmemory/hooks/ide_connector.py +204 -0
  126. package/src/superlocalmemory/hooks/rules_engine.py +99 -0
  127. package/src/superlocalmemory/infra/__init__.py +3 -0
  128. package/src/superlocalmemory/infra/auth_middleware.py +82 -0
  129. package/src/superlocalmemory/infra/backup.py +317 -0
  130. package/src/superlocalmemory/infra/cache_manager.py +267 -0
  131. package/src/superlocalmemory/infra/event_bus.py +381 -0
  132. package/src/superlocalmemory/infra/rate_limiter.py +135 -0
  133. package/src/{webhook_dispatcher.py → superlocalmemory/infra/webhook_dispatcher.py} +104 -101
  134. package/src/superlocalmemory/learning/__init__.py +0 -0
  135. package/src/superlocalmemory/learning/adaptive.py +172 -0
  136. package/src/superlocalmemory/learning/behavioral.py +490 -0
  137. package/src/superlocalmemory/learning/behavioral_listener.py +94 -0
  138. package/src/superlocalmemory/learning/bootstrap.py +298 -0
  139. package/src/superlocalmemory/learning/cross_project.py +399 -0
  140. package/src/superlocalmemory/learning/database.py +376 -0
  141. package/src/superlocalmemory/learning/engagement.py +323 -0
  142. package/src/superlocalmemory/learning/features.py +138 -0
  143. package/src/superlocalmemory/learning/feedback.py +316 -0
  144. package/src/superlocalmemory/learning/outcomes.py +255 -0
  145. package/src/superlocalmemory/learning/project_context.py +366 -0
  146. package/src/superlocalmemory/learning/ranker.py +155 -0
  147. package/src/superlocalmemory/learning/source_quality.py +303 -0
  148. package/src/superlocalmemory/learning/workflows.py +309 -0
  149. package/src/superlocalmemory/llm/__init__.py +0 -0
  150. package/src/superlocalmemory/llm/backbone.py +316 -0
  151. package/src/superlocalmemory/math/__init__.py +0 -0
  152. package/src/superlocalmemory/math/fisher.py +356 -0
  153. package/src/superlocalmemory/math/langevin.py +398 -0
  154. package/src/superlocalmemory/math/sheaf.py +257 -0
  155. package/src/superlocalmemory/mcp/__init__.py +0 -0
  156. package/src/superlocalmemory/mcp/resources.py +245 -0
  157. package/src/superlocalmemory/mcp/server.py +61 -0
  158. package/src/superlocalmemory/mcp/tools.py +18 -0
  159. package/src/superlocalmemory/mcp/tools_core.py +305 -0
  160. package/src/superlocalmemory/mcp/tools_v28.py +223 -0
  161. package/src/superlocalmemory/mcp/tools_v3.py +286 -0
  162. package/src/superlocalmemory/retrieval/__init__.py +0 -0
  163. package/src/superlocalmemory/retrieval/agentic.py +295 -0
  164. package/src/superlocalmemory/retrieval/ann_index.py +223 -0
  165. package/src/superlocalmemory/retrieval/bm25_channel.py +185 -0
  166. package/src/superlocalmemory/retrieval/bridge_discovery.py +170 -0
  167. package/src/superlocalmemory/retrieval/engine.py +390 -0
  168. package/src/superlocalmemory/retrieval/entity_channel.py +179 -0
  169. package/src/superlocalmemory/retrieval/fusion.py +78 -0
  170. package/src/superlocalmemory/retrieval/profile_channel.py +105 -0
  171. package/src/superlocalmemory/retrieval/reranker.py +154 -0
  172. package/src/superlocalmemory/retrieval/semantic_channel.py +232 -0
  173. package/src/superlocalmemory/retrieval/strategy.py +96 -0
  174. package/src/superlocalmemory/retrieval/temporal_channel.py +175 -0
  175. package/src/superlocalmemory/server/__init__.py +1 -0
  176. package/src/superlocalmemory/server/api.py +248 -0
  177. package/src/superlocalmemory/server/routes/__init__.py +4 -0
  178. package/src/superlocalmemory/server/routes/agents.py +107 -0
  179. package/src/superlocalmemory/server/routes/backup.py +91 -0
  180. package/src/superlocalmemory/server/routes/behavioral.py +127 -0
  181. package/src/superlocalmemory/server/routes/compliance.py +160 -0
  182. package/src/superlocalmemory/server/routes/data_io.py +188 -0
  183. package/src/superlocalmemory/server/routes/events.py +183 -0
  184. package/src/superlocalmemory/server/routes/helpers.py +85 -0
  185. package/src/superlocalmemory/server/routes/learning.py +273 -0
  186. package/src/superlocalmemory/server/routes/lifecycle.py +116 -0
  187. package/src/superlocalmemory/server/routes/memories.py +399 -0
  188. package/src/superlocalmemory/server/routes/profiles.py +219 -0
  189. package/src/superlocalmemory/server/routes/stats.py +346 -0
  190. package/src/superlocalmemory/server/routes/v3_api.py +365 -0
  191. package/src/superlocalmemory/server/routes/ws.py +82 -0
  192. package/src/superlocalmemory/server/security_middleware.py +57 -0
  193. package/src/superlocalmemory/server/ui.py +245 -0
  194. package/src/superlocalmemory/storage/__init__.py +0 -0
  195. package/src/superlocalmemory/storage/access_control.py +182 -0
  196. package/src/superlocalmemory/storage/database.py +594 -0
  197. package/src/superlocalmemory/storage/migrations.py +303 -0
  198. package/src/superlocalmemory/storage/models.py +406 -0
  199. package/src/superlocalmemory/storage/schema.py +726 -0
  200. package/src/superlocalmemory/storage/v2_migrator.py +317 -0
  201. package/src/superlocalmemory/trust/__init__.py +0 -0
  202. package/src/superlocalmemory/trust/gate.py +130 -0
  203. package/src/superlocalmemory/trust/provenance.py +124 -0
  204. package/src/superlocalmemory/trust/scorer.py +347 -0
  205. package/src/superlocalmemory/trust/signals.py +153 -0
  206. package/ui/index.html +278 -5
  207. package/ui/js/auto-settings.js +70 -0
  208. package/ui/js/dashboard.js +90 -0
  209. package/ui/js/fact-detail.js +92 -0
  210. package/ui/js/feedback.js +2 -2
  211. package/ui/js/ide-status.js +102 -0
  212. package/ui/js/math-health.js +98 -0
  213. package/ui/js/recall-lab.js +127 -0
  214. package/ui/js/settings.js +2 -2
  215. package/ui/js/trust-dashboard.js +73 -0
  216. package/api_server.py +0 -724
  217. package/bin/aider-smart +0 -72
  218. package/bin/superlocalmemoryv2-learning +0 -4
  219. package/bin/superlocalmemoryv2-list +0 -3
  220. package/bin/superlocalmemoryv2-patterns +0 -4
  221. package/bin/superlocalmemoryv2-profile +0 -3
  222. package/bin/superlocalmemoryv2-recall +0 -3
  223. package/bin/superlocalmemoryv2-remember +0 -3
  224. package/bin/superlocalmemoryv2-reset +0 -3
  225. package/bin/superlocalmemoryv2-status +0 -3
  226. package/configs/chatgpt-desktop-mcp.json +0 -16
  227. package/configs/cursor-mcp.json +0 -15
  228. package/docs/SECURITY-QUICK-REFERENCE.md +0 -214
  229. package/hooks/memory-list-skill.js +0 -139
  230. package/hooks/memory-profile-skill.js +0 -273
  231. package/hooks/memory-recall-skill.js +0 -114
  232. package/hooks/memory-remember-skill.js +0 -127
  233. package/hooks/memory-reset-skill.js +0 -274
  234. package/mcp_server.py +0 -1800
  235. package/requirements-core.txt +0 -22
  236. package/requirements-learning.txt +0 -12
  237. package/requirements.txt +0 -12
  238. package/src/agent_registry.py +0 -411
  239. package/src/auth_middleware.py +0 -61
  240. package/src/auto_backup.py +0 -459
  241. package/src/behavioral/__init__.py +0 -49
  242. package/src/behavioral/behavioral_listener.py +0 -203
  243. package/src/behavioral/behavioral_patterns.py +0 -275
  244. package/src/behavioral/cross_project_transfer.py +0 -206
  245. package/src/behavioral/outcome_inference.py +0 -194
  246. package/src/behavioral/outcome_tracker.py +0 -193
  247. package/src/behavioral/tests/__init__.py +0 -4
  248. package/src/behavioral/tests/test_behavioral_integration.py +0 -108
  249. package/src/behavioral/tests/test_behavioral_patterns.py +0 -150
  250. package/src/behavioral/tests/test_cross_project_transfer.py +0 -142
  251. package/src/behavioral/tests/test_mcp_behavioral.py +0 -139
  252. package/src/behavioral/tests/test_mcp_report_outcome.py +0 -117
  253. package/src/behavioral/tests/test_outcome_inference.py +0 -107
  254. package/src/behavioral/tests/test_outcome_tracker.py +0 -96
  255. package/src/cache_manager.py +0 -518
  256. package/src/compliance/__init__.py +0 -48
  257. package/src/compliance/abac_engine.py +0 -149
  258. package/src/compliance/abac_middleware.py +0 -116
  259. package/src/compliance/audit_db.py +0 -215
  260. package/src/compliance/audit_logger.py +0 -148
  261. package/src/compliance/retention_manager.py +0 -289
  262. package/src/compliance/retention_scheduler.py +0 -186
  263. package/src/compliance/tests/__init__.py +0 -4
  264. package/src/compliance/tests/test_abac_enforcement.py +0 -95
  265. package/src/compliance/tests/test_abac_engine.py +0 -124
  266. package/src/compliance/tests/test_abac_mcp_integration.py +0 -118
  267. package/src/compliance/tests/test_audit_db.py +0 -123
  268. package/src/compliance/tests/test_audit_logger.py +0 -98
  269. package/src/compliance/tests/test_mcp_audit.py +0 -128
  270. package/src/compliance/tests/test_mcp_retention_policy.py +0 -125
  271. package/src/compliance/tests/test_retention_manager.py +0 -131
  272. package/src/compliance/tests/test_retention_scheduler.py +0 -99
  273. package/src/compression/__init__.py +0 -25
  274. package/src/compression/cli.py +0 -150
  275. package/src/compression/cold_storage.py +0 -217
  276. package/src/compression/config.py +0 -72
  277. package/src/compression/orchestrator.py +0 -133
  278. package/src/compression/tier2_compressor.py +0 -228
  279. package/src/compression/tier3_compressor.py +0 -153
  280. package/src/compression/tier_classifier.py +0 -148
  281. package/src/db_connection_manager.py +0 -536
  282. package/src/embedding_engine.py +0 -63
  283. package/src/embeddings/__init__.py +0 -47
  284. package/src/embeddings/cache.py +0 -70
  285. package/src/embeddings/cli.py +0 -113
  286. package/src/embeddings/constants.py +0 -47
  287. package/src/embeddings/database.py +0 -91
  288. package/src/embeddings/engine.py +0 -247
  289. package/src/embeddings/model_loader.py +0 -145
  290. package/src/event_bus.py +0 -562
  291. package/src/graph/__init__.py +0 -36
  292. package/src/graph/build_helpers.py +0 -74
  293. package/src/graph/cli.py +0 -87
  294. package/src/graph/cluster_builder.py +0 -188
  295. package/src/graph/cluster_summary.py +0 -148
  296. package/src/graph/constants.py +0 -47
  297. package/src/graph/edge_builder.py +0 -162
  298. package/src/graph/entity_extractor.py +0 -95
  299. package/src/graph/graph_core.py +0 -226
  300. package/src/graph/graph_search.py +0 -231
  301. package/src/graph/hierarchical.py +0 -207
  302. package/src/graph/schema.py +0 -99
  303. package/src/graph_engine.py +0 -52
  304. package/src/hnsw_index.py +0 -628
  305. package/src/hybrid_search.py +0 -46
  306. package/src/learning/__init__.py +0 -217
  307. package/src/learning/adaptive_ranker.py +0 -682
  308. package/src/learning/bootstrap/__init__.py +0 -69
  309. package/src/learning/bootstrap/constants.py +0 -93
  310. package/src/learning/bootstrap/db_queries.py +0 -316
  311. package/src/learning/bootstrap/sampling.py +0 -82
  312. package/src/learning/bootstrap/text_utils.py +0 -71
  313. package/src/learning/cross_project_aggregator.py +0 -857
  314. package/src/learning/db/__init__.py +0 -40
  315. package/src/learning/db/constants.py +0 -44
  316. package/src/learning/db/schema.py +0 -279
  317. package/src/learning/engagement_tracker.py +0 -628
  318. package/src/learning/feature_extractor.py +0 -708
  319. package/src/learning/feedback_collector.py +0 -806
  320. package/src/learning/learning_db.py +0 -915
  321. package/src/learning/project_context_manager.py +0 -572
  322. package/src/learning/ranking/__init__.py +0 -33
  323. package/src/learning/ranking/constants.py +0 -84
  324. package/src/learning/ranking/helpers.py +0 -278
  325. package/src/learning/source_quality_scorer.py +0 -676
  326. package/src/learning/synthetic_bootstrap.py +0 -755
  327. package/src/learning/tests/test_adaptive_ranker.py +0 -325
  328. package/src/learning/tests/test_adaptive_ranker_v28.py +0 -60
  329. package/src/learning/tests/test_aggregator.py +0 -306
  330. package/src/learning/tests/test_auto_retrain_v28.py +0 -35
  331. package/src/learning/tests/test_e2e_ranking_v28.py +0 -82
  332. package/src/learning/tests/test_feature_extractor_v28.py +0 -93
  333. package/src/learning/tests/test_feedback_collector.py +0 -294
  334. package/src/learning/tests/test_learning_db.py +0 -602
  335. package/src/learning/tests/test_learning_db_v28.py +0 -110
  336. package/src/learning/tests/test_learning_init_v28.py +0 -48
  337. package/src/learning/tests/test_outcome_signals.py +0 -48
  338. package/src/learning/tests/test_project_context.py +0 -292
  339. package/src/learning/tests/test_schema_migration.py +0 -319
  340. package/src/learning/tests/test_signal_inference.py +0 -397
  341. package/src/learning/tests/test_source_quality.py +0 -351
  342. package/src/learning/tests/test_synthetic_bootstrap.py +0 -429
  343. package/src/learning/tests/test_workflow_miner.py +0 -318
  344. package/src/learning/workflow_pattern_miner.py +0 -655
  345. package/src/lifecycle/__init__.py +0 -54
  346. package/src/lifecycle/bounded_growth.py +0 -239
  347. package/src/lifecycle/compaction_engine.py +0 -226
  348. package/src/lifecycle/lifecycle_engine.py +0 -355
  349. package/src/lifecycle/lifecycle_evaluator.py +0 -257
  350. package/src/lifecycle/lifecycle_scheduler.py +0 -130
  351. package/src/lifecycle/retention_policy.py +0 -285
  352. package/src/lifecycle/tests/test_bounded_growth.py +0 -193
  353. package/src/lifecycle/tests/test_compaction.py +0 -179
  354. package/src/lifecycle/tests/test_lifecycle_engine.py +0 -137
  355. package/src/lifecycle/tests/test_lifecycle_evaluation.py +0 -177
  356. package/src/lifecycle/tests/test_lifecycle_scheduler.py +0 -127
  357. package/src/lifecycle/tests/test_lifecycle_search.py +0 -109
  358. package/src/lifecycle/tests/test_mcp_compact.py +0 -149
  359. package/src/lifecycle/tests/test_mcp_lifecycle_status.py +0 -114
  360. package/src/lifecycle/tests/test_retention_policy.py +0 -162
  361. package/src/mcp_tools_v28.py +0 -281
  362. package/src/memory/__init__.py +0 -36
  363. package/src/memory/cli.py +0 -205
  364. package/src/memory/constants.py +0 -39
  365. package/src/memory/helpers.py +0 -28
  366. package/src/memory/schema.py +0 -166
  367. package/src/memory-profiles.py +0 -595
  368. package/src/memory-reset.py +0 -491
  369. package/src/memory_compression.py +0 -989
  370. package/src/memory_store_v2.py +0 -1155
  371. package/src/migrate_v1_to_v2.py +0 -629
  372. package/src/pattern_learner.py +0 -34
  373. package/src/patterns/__init__.py +0 -24
  374. package/src/patterns/analyzers.py +0 -251
  375. package/src/patterns/learner.py +0 -271
  376. package/src/patterns/scoring.py +0 -171
  377. package/src/patterns/store.py +0 -225
  378. package/src/patterns/terminology.py +0 -140
  379. package/src/provenance_tracker.py +0 -312
  380. package/src/qualixar_attribution.py +0 -139
  381. package/src/qualixar_watermark.py +0 -78
  382. package/src/query_optimizer.py +0 -511
  383. package/src/rate_limiter.py +0 -83
  384. package/src/search/__init__.py +0 -20
  385. package/src/search/cli.py +0 -77
  386. package/src/search/constants.py +0 -26
  387. package/src/search/engine.py +0 -241
  388. package/src/search/fusion.py +0 -122
  389. package/src/search/index_loader.py +0 -114
  390. package/src/search/methods.py +0 -162
  391. package/src/search_engine_v2.py +0 -401
  392. package/src/setup_validator.py +0 -482
  393. package/src/subscription_manager.py +0 -391
  394. package/src/tree/__init__.py +0 -59
  395. package/src/tree/builder.py +0 -185
  396. package/src/tree/nodes.py +0 -202
  397. package/src/tree/queries.py +0 -257
  398. package/src/tree/schema.py +0 -80
  399. package/src/tree_manager.py +0 -19
  400. package/src/trust/__init__.py +0 -45
  401. package/src/trust/constants.py +0 -66
  402. package/src/trust/queries.py +0 -157
  403. package/src/trust/schema.py +0 -95
  404. package/src/trust/scorer.py +0 -299
  405. package/src/trust/signals.py +0 -95
  406. package/src/trust_scorer.py +0 -44
  407. package/ui/app.js +0 -1588
  408. package/ui/js/graph-cytoscape-monolithic-backup.js +0 -1168
  409. package/ui/js/graph-cytoscape.js +0 -1168
  410. package/ui/js/graph-d3-backup.js +0 -32
  411. package/ui/js/graph.js +0 -32
  412. package/ui_server.py +0 -266
  413. /package/docs/{ACCESSIBILITY.md → v2-archive/ACCESSIBILITY.md} +0 -0
  414. /package/docs/{ARCHITECTURE.md → v2-archive/ARCHITECTURE.md} +0 -0
  415. /package/docs/{CLI-COMMANDS-REFERENCE.md → v2-archive/CLI-COMMANDS-REFERENCE.md} +0 -0
  416. /package/docs/{COMPRESSION-README.md → v2-archive/COMPRESSION-README.md} +0 -0
  417. /package/docs/{FRAMEWORK-INTEGRATIONS.md → v2-archive/FRAMEWORK-INTEGRATIONS.md} +0 -0
  418. /package/docs/{MCP-MANUAL-SETUP.md → v2-archive/MCP-MANUAL-SETUP.md} +0 -0
  419. /package/docs/{MCP-TROUBLESHOOTING.md → v2-archive/MCP-TROUBLESHOOTING.md} +0 -0
  420. /package/docs/{PATTERN-LEARNING.md → v2-archive/PATTERN-LEARNING.md} +0 -0
  421. /package/docs/{PROFILES-GUIDE.md → v2-archive/PROFILES-GUIDE.md} +0 -0
  422. /package/docs/{RESET-GUIDE.md → v2-archive/RESET-GUIDE.md} +0 -0
  423. /package/docs/{SEARCH-ENGINE-V2.2.0.md → v2-archive/SEARCH-ENGINE-V2.2.0.md} +0 -0
  424. /package/docs/{SEARCH-INTEGRATION-GUIDE.md → v2-archive/SEARCH-INTEGRATION-GUIDE.md} +0 -0
  425. /package/docs/{UI-SERVER.md → v2-archive/UI-SERVER.md} +0 -0
  426. /package/docs/{UNIVERSAL-INTEGRATION.md → v2-archive/UNIVERSAL-INTEGRATION.md} +0 -0
  427. /package/docs/{V2.2.0-OPTIONAL-SEARCH.md → v2-archive/V2.2.0-OPTIONAL-SEARCH.md} +0 -0
  428. /package/docs/{WINDOWS-INSTALL-README.txt → v2-archive/WINDOWS-INSTALL-README.txt} +0 -0
  429. /package/docs/{WINDOWS-POST-INSTALL.txt → v2-archive/WINDOWS-POST-INSTALL.txt} +0 -0
  430. /package/docs/{example_graph_usage.py → v2-archive/example_graph_usage.py} +0 -0
  431. /package/{completions → ide/completions}/slm.bash +0 -0
  432. /package/{completions → ide/completions}/slm.zsh +0 -0
  433. /package/{configs → ide/configs}/cody-commands.json +0 -0
  434. /package/{install-skills.sh → scripts/install-skills.sh} +0 -0
@@ -0,0 +1,775 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Fact extraction — converts raw conversation turns into structured AtomicFacts.
6
+
7
+ Three extraction strategies aligned to operating modes:
8
+ Mode A Zero LLM — regex entities, date inference, keyword type classification.
9
+ Mode B Local Ollama — LLM-guided extraction with JSON output, Mode A fallback.
10
+ Mode C Cloud LLM — narrative fact extraction (2-5 per chunk), richest quality.
11
+
12
+ This module is the primary driver of encoding quality. Competitor analysis
13
+ (EverMemOS 93%, Hindsight 89.6%, Mastra 94.9%) shows that structured
14
+ extraction at encoding time — not retrieval sophistication — accounts for
15
+ the majority of benchmark score differences.
16
+
17
+ Key patterns implemented:
18
+ - Conversation chunking (5-10 turns, 2-turn overlap)
19
+ - Three-date temporal model (observation, referenced, interval)
20
+ - Typed fact classification (episodic / semantic / opinion / temporal)
21
+ - Importance scoring (entity frequency + emotional markers + recency)
22
+ - Narrative fact extraction in LLM modes (self-contained, context-rich)
23
+
24
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
25
+ License: MIT
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import json
31
+ import logging
32
+ import re
33
+ import uuid
34
+ from typing import Any, Protocol, runtime_checkable
35
+
36
+ from superlocalmemory.core.config import EncodingConfig
37
+ from superlocalmemory.storage.models import AtomicFact, FactType, Mode, SignalType
38
+
39
+ logger = logging.getLogger(__name__)
40
+
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # Protocols — accept any LLM / embedder without importing concrete classes
44
+ # ---------------------------------------------------------------------------
45
+
46
+ @runtime_checkable
47
+ class LLMBackboneProtocol(Protocol):
48
+ """Minimal interface the fact extractor needs from an LLM."""
49
+
50
+ def is_available(self) -> bool: ...
51
+ def generate(
52
+ self,
53
+ prompt: str,
54
+ system: str = "",
55
+ temperature: float | None = None,
56
+ max_tokens: int | None = None,
57
+ ) -> str: ...
58
+
59
+
60
+ @runtime_checkable
61
+ class EmbedderProtocol(Protocol):
62
+ """Minimal interface for computing embeddings (Mode A type classification)."""
63
+
64
+ def embed(self, text: str) -> list[float]: ...
65
+
66
+
67
+ # ---------------------------------------------------------------------------
68
+ # Constants — regex patterns, markers, templates
69
+ # ---------------------------------------------------------------------------
70
+
71
+ _DATE_RE = re.compile(
72
+ r"\b(\d{4}-\d{2}-\d{2})" # ISO
73
+ r"|\b(\d{1,2}/\d{1,2}/\d{2,4})" # US
74
+ r"|\b((?:January|February|March|April|May|June|July"
75
+ r"|August|September|October|November|December)"
76
+ r"\s+\d{1,2}(?:,?\s+\d{4})?)" # Month Day Year
77
+ r"|\b(yesterday|today|tomorrow|last\s+\w+|next\s+\w+)\b",
78
+ re.IGNORECASE,
79
+ )
80
+
81
+ _INTERVAL_RE = re.compile(
82
+ r"\b(?:from|between)\s+(.+?)\s+(?:to|and|until|through)\s+(.+?)(?:[.,;]|$)",
83
+ re.IGNORECASE,
84
+ )
85
+
86
+ _ENTITY_RE = re.compile(
87
+ r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+){0,3})\b" # Capitalized word sequences
88
+ )
89
+
90
+ _QUOTED_RE = re.compile(r'"([^"]+)"') # Quoted strings as entities
91
+
92
+ _OPINION_MARKERS = re.compile(
93
+ r"\b(?:I think|I believe|I feel|in my opinion|I prefer|I like|I love|"
94
+ r"I hate|I want|I need|I wish|personally|my favorite|"
95
+ r"probably|seems like|might be|could be|I guess|"
96
+ r"thinks?|believes?|prefers?|preferred|likes?|liked|loves?|loved|hates?|hated|"
97
+ r"overrated|underrated|best|worst|favorite|"
98
+ r"should|shouldn't|ought to|better|rather)\b",
99
+ re.IGNORECASE,
100
+ )
101
+
102
+ _EXPERIENCE_MARKERS = re.compile(
103
+ r"\b(?:I went|I visited|I saw|I met|I did|I made|I had|I was|"
104
+ r"we went|we visited|we had|I've been|I've done|I used to|"
105
+ r"I remember|I once|last time I|when I was|my experience)\b",
106
+ re.IGNORECASE,
107
+ )
108
+
109
+ _TEMPORAL_MARKERS = re.compile(
110
+ r"\b(?:deadline|due date|expires?|scheduled|appointment|meeting|"
111
+ r"on \w+day|at \d{1,2}:\d{2}|by \w+|until|before|after|"
112
+ r"in \d+ (?:days?|weeks?|months?|years?)|"
113
+ r"next week|next month|this weekend|tomorrow|yesterday)\b",
114
+ re.IGNORECASE,
115
+ )
116
+
117
+ _EMOTIONAL_KEYWORDS = frozenset({
118
+ "love", "hate", "amazing", "terrible", "wonderful", "awful", "excited",
119
+ "angry", "happy", "sad", "scared", "thrilled", "devastated", "furious",
120
+ "anxious", "grateful", "disappointed", "proud", "embarrassed", "jealous",
121
+ "best", "worst", "incredible", "horrible", "fantastic", "miserable",
122
+ })
123
+
124
+ _FILLER_PREFIXES = (
125
+ "good to see", "nice to", "hello", "hi ", "hey ", "how are you",
126
+ "thanks", "thank you", "bye", "goodbye", "see you", "take care",
127
+ "sure thing", "no problem", "okay",
128
+ )
129
+
130
+
131
+ # ---------------------------------------------------------------------------
132
+ # LLM Prompt Templates
133
+ # ---------------------------------------------------------------------------
134
+
135
+ _SYSTEM_PROMPT = (
136
+ "You are a precise fact extraction engine for a memory system.\n"
137
+ "Given conversation turns, extract 2-5 atomic facts. Rules:\n"
138
+ "1. Use EXPLICIT NAMES — never pronouns (he/she/they/it). Every fact "
139
+ "must name the subject explicitly.\n"
140
+ "2. Each fact must be a COMPLETE, STANDALONE statement understandable "
141
+ "without the original conversation.\n"
142
+ "3. Convert ALL relative time to ABSOLUTE dates when possible. "
143
+ "'Yesterday' with session date 2024-01-15 becomes '2024-01-14'. "
144
+ "'Next month' becomes the actual month and year.\n"
145
+ "4. Resolve ALL coreferences. 'He went there' must become "
146
+ "'[Person name] went to [Place name]'.\n"
147
+ "5. Extract relationships between people when mentioned.\n"
148
+ "6. Extract preferences, opinions, and experiences as SEPARATE facts.\n"
149
+ "7. Skip greetings, filler, social pleasantries, and confirmations.\n"
150
+ "8. For opinions, include a confidence between 0.0-1.0.\n\n"
151
+ "Classify each fact:\n"
152
+ "- episodic: personal event or experience (visited, attended, did)\n"
153
+ "- semantic: objective fact about the world (jobs, locations, relations)\n"
154
+ "- opinion: subjective belief or preference (likes, thinks, prefers)\n"
155
+ "- temporal: time-bound fact with dates or deadlines\n\n"
156
+ "Respond ONLY with a JSON array. Example:\n"
157
+ '[{"text":"Alice works at Google as a software engineer",'
158
+ '"fact_type":"semantic","entities":["Alice","Google"],'
159
+ '"referenced_date":null,"importance":7,"confidence":0.95},'
160
+ '{"text":"Alice prefers Python over Java",'
161
+ '"fact_type":"opinion","entities":["Alice"],'
162
+ '"referenced_date":null,"importance":5,"confidence":0.8}]'
163
+ )
164
+
165
+
166
+ # ---------------------------------------------------------------------------
167
+ # Helpers
168
+ # ---------------------------------------------------------------------------
169
+
170
+ def _new_id() -> str:
171
+ return uuid.uuid4().hex[:16]
172
+
173
+
174
+ def _split_sentences(text: str) -> list[str]:
175
+ """Split text into sentences using punctuation boundaries."""
176
+ parts = re.split(r"(?<=[.!?])\s+", text.strip())
177
+ return [p.strip() for p in parts if len(p.strip()) >= 8]
178
+
179
+
180
+ def _extract_date_string(text: str) -> str | None:
181
+ """Extract the first recognizable date string from text."""
182
+ match = _DATE_RE.search(text)
183
+ if not match:
184
+ return None
185
+ for group in match.groups():
186
+ if group:
187
+ return group.strip()
188
+ return None
189
+
190
+
191
+ def _try_parse_date(raw: str, reference_date: str | None = None) -> str | None:
192
+ """Attempt to resolve a date string to ISO format.
193
+
194
+ Uses dateutil.parser for structured dates and dateparser for
195
+ relative expressions ("last Monday", "next week").
196
+ Returns None on failure — never raises.
197
+ """
198
+ if not raw:
199
+ return None
200
+
201
+ # Fast path: already ISO
202
+ iso_match = re.match(r"^\d{4}-\d{2}-\d{2}$", raw.strip())
203
+ if iso_match:
204
+ return raw.strip()
205
+
206
+ # dateutil for structured dates (March 15, 2026 / 3/15/2026)
207
+ try:
208
+ from dateutil import parser as du_parser
209
+ result = du_parser.parse(raw, fuzzy=True)
210
+ return result.date().isoformat()
211
+ except Exception:
212
+ pass
213
+
214
+ # dateparser for relative dates (yesterday, last week, next Friday)
215
+ try:
216
+ import dateparser
217
+ settings: dict[str, Any] = {"PREFER_DATES_FROM": "past"}
218
+ if reference_date:
219
+ ref = dateparser.parse(reference_date)
220
+ if ref:
221
+ settings["RELATIVE_BASE"] = ref
222
+ result = dateparser.parse(raw, settings=settings)
223
+ if result:
224
+ return result.date().isoformat()
225
+ except Exception:
226
+ pass
227
+
228
+ return None
229
+
230
+
231
+ def _extract_interval(text: str, ref_date: str | None = None) -> tuple[str | None, str | None]:
232
+ """Extract temporal interval (start, end) from text."""
233
+ match = _INTERVAL_RE.search(text)
234
+ if not match:
235
+ return None, None
236
+ start_raw, end_raw = match.group(1).strip(), match.group(2).strip()
237
+ return _try_parse_date(start_raw, ref_date), _try_parse_date(end_raw, ref_date)
238
+
239
+
240
+ def _extract_entities(text: str) -> list[str]:
241
+ """Extract candidate entity names from text using regex heuristics."""
242
+ entities: set[str] = set()
243
+
244
+ # Capitalized word sequences (proper nouns)
245
+ for match in _ENTITY_RE.finditer(text):
246
+ candidate = match.group(1).strip()
247
+ # Filter common English words that start sentences
248
+ # Check first word of multi-word candidates against stop list
249
+ _first_word = candidate.split()[0].lower() if candidate else ""
250
+ if _first_word not in {
251
+ "the", "this", "that", "these", "those", "what", "when", "where",
252
+ "which", "how", "who", "why", "also", "then", "just", "very",
253
+ "really", "actually", "maybe", "well", "still", "even",
254
+ "she", "he", "they", "them", "her", "him", "his", "its",
255
+ "but", "and", "not", "yes", "yeah", "sure", "okay", "ok",
256
+ "here", "there", "now", "today", "some", "all", "any",
257
+ "been", "being", "have", "has", "had", "was", "were",
258
+ "for", "with", "from", "about", "into", "over",
259
+ # Sentence starters and conversational words
260
+ "wow", "did", "so", "gonna", "got", "by", "thanks", "thank",
261
+ "hey", "hi", "hello", "bye", "good", "great", "nice", "cool",
262
+ "right", "like", "know", "think", "feel", "want", "need",
263
+ "make", "take", "give", "tell", "said", "told", "get",
264
+ "let", "can", "will", "would", "could", "should", "might",
265
+ "much", "many", "more", "most", "lot", "way", "thing",
266
+ "something", "anything", "everything", "nothing", "someone",
267
+ "it", "my", "your", "our", "their", "me", "you", "we", "us",
268
+ "do", "does", "if", "or", "no", "to", "at", "on", "in",
269
+ "up", "out", "off", "too", "go", "come", "see", "look",
270
+ "say", "ask", "try", "keep", "put", "run", "set", "move",
271
+ "call", "end", "start", "find", "show", "hear", "play",
272
+ "work", "read", "talk", "turn", "help", "miss", "hope",
273
+ "love", "hate", "wish", "seem", "mean", "mind", "care",
274
+ }:
275
+ entities.add(candidate)
276
+
277
+ # Quoted strings
278
+ for match in _QUOTED_RE.finditer(text):
279
+ quoted = match.group(1).strip()
280
+ if len(quoted) >= 2:
281
+ entities.add(quoted)
282
+
283
+ return sorted(entities)
284
+
285
+
286
+ def _classify_sentence(sentence: str) -> FactType:
287
+ """Classify a sentence into a FactType using keyword markers."""
288
+ if _TEMPORAL_MARKERS.search(sentence):
289
+ return FactType.TEMPORAL
290
+ if _OPINION_MARKERS.search(sentence):
291
+ return FactType.OPINION
292
+ if _EXPERIENCE_MARKERS.search(sentence):
293
+ return FactType.EPISODIC
294
+ return FactType.SEMANTIC
295
+
296
+
297
+ def _score_importance(
298
+ text: str,
299
+ entities: list[str],
300
+ entity_frequency: dict[str, int],
301
+ has_date: bool,
302
+ ) -> float:
303
+ """Score importance 0.0-1.0 based on entity frequency, emotion, temporality.
304
+
305
+ Scoring formula:
306
+ base = 0.3
307
+ +0.2 if contains emotional keywords
308
+ +0.2 if temporally grounded (has a date reference)
309
+ +0.3 scaled by entity prominence (max entity frequency / total)
310
+ """
311
+ score = 0.3
312
+
313
+ # Emotional boost
314
+ words = set(text.lower().split())
315
+ if words & _EMOTIONAL_KEYWORDS:
316
+ score += 0.2
317
+
318
+ # Temporal boost
319
+ if has_date:
320
+ score += 0.2
321
+
322
+ # Entity prominence boost (frequent entities are important)
323
+ if entities and entity_frequency:
324
+ total = sum(entity_frequency.values()) or 1
325
+ max_freq = max((entity_frequency.get(e, 0) for e in entities), default=0)
326
+ score += 0.3 * (max_freq / total)
327
+
328
+ return min(1.0, round(score, 3))
329
+
330
+
331
+ def _signal_from_fact_type(ft: FactType) -> SignalType:
332
+ """Map FactType to SignalType for V2 compatibility."""
333
+ mapping = {
334
+ FactType.EPISODIC: SignalType.FACTUAL,
335
+ FactType.SEMANTIC: SignalType.FACTUAL,
336
+ FactType.OPINION: SignalType.OPINION,
337
+ FactType.TEMPORAL: SignalType.TEMPORAL,
338
+ }
339
+ return mapping.get(ft, SignalType.FACTUAL)
340
+
341
+
342
+ def _is_filler(text: str) -> bool:
343
+ """Return True if text is a greeting, filler, or social pleasantry."""
344
+ low = text.strip().lower()
345
+ return any(low.startswith(prefix) for prefix in _FILLER_PREFIXES)
346
+
347
+
348
+ # ---------------------------------------------------------------------------
349
+ # Chunk builder
350
+ # ---------------------------------------------------------------------------
351
+
352
+ def chunk_turns(
353
+ turns: list[str],
354
+ chunk_size: int = 10,
355
+ overlap: int = 2,
356
+ ) -> list[list[str]]:
357
+ """Group conversation turns into overlapping chunks.
358
+
359
+ Each chunk is up to ``chunk_size`` turns with ``overlap`` turns
360
+ carried over from the previous chunk to preserve cross-boundary context.
361
+ Trailing fragments smaller than ``overlap + 1`` are merged into the
362
+ final chunk to avoid low-context extraction passes.
363
+ """
364
+ if not turns:
365
+ return []
366
+ if len(turns) <= chunk_size:
367
+ return [list(turns)]
368
+
369
+ chunks: list[list[str]] = []
370
+ start = 0
371
+ step = max(1, chunk_size - overlap)
372
+
373
+ while start < len(turns):
374
+ end = min(start + chunk_size, len(turns))
375
+ remaining_after = len(turns) - end
376
+ # Merge tiny trailing fragment into current chunk
377
+ if 0 < remaining_after < overlap + 1:
378
+ end = len(turns)
379
+ chunks.append(list(turns[start:end]))
380
+ if end >= len(turns):
381
+ break
382
+ start += step
383
+
384
+ return chunks
385
+
386
+
387
+ # ---------------------------------------------------------------------------
388
+ # FactExtractor
389
+ # ---------------------------------------------------------------------------
390
+
391
+ class FactExtractor:
392
+ """Extract structured AtomicFacts from conversation turns.
393
+
394
+ Strategies:
395
+ Mode A — Rule-based: regex entities, keyword classification, heuristic importance.
396
+ Mode B — Local LLM (Ollama): structured JSON extraction, Mode A fallback.
397
+ Mode C — Cloud LLM: narrative fact extraction (2-5 per chunk), richest output.
398
+ """
399
+
400
+ def __init__(
401
+ self,
402
+ config: EncodingConfig,
403
+ llm: LLMBackboneProtocol | None = None,
404
+ embedder: EmbedderProtocol | None = None,
405
+ mode: Mode = Mode.A,
406
+ ) -> None:
407
+ self._config = config
408
+ self._llm = llm
409
+ self._embedder = embedder
410
+ self._mode = mode
411
+
412
+ # ------------------------------------------------------------------
413
+ # Public API
414
+ # ------------------------------------------------------------------
415
+
416
+ def extract_facts(
417
+ self,
418
+ turns: list[str],
419
+ session_id: str,
420
+ session_date: str | None = None,
421
+ speaker_a: str = "",
422
+ speaker_b: str = "",
423
+ ) -> list[AtomicFact]:
424
+ """Extract structured atomic facts from conversation turns.
425
+
426
+ Chunks the conversation into overlapping windows, extracts facts from
427
+ each chunk, and deduplicates the merged results.
428
+
429
+ Args:
430
+ turns: Raw conversation turn strings.
431
+ session_id: Identifier for the conversation session.
432
+ session_date: ISO-8601 date of the session (observation date).
433
+ speaker_a: Name/identifier for the first speaker (e.g. user).
434
+ speaker_b: Name/identifier for the second speaker (e.g. assistant).
435
+
436
+ Returns:
437
+ Deduplicated list of AtomicFact objects.
438
+ """
439
+ if not turns:
440
+ return []
441
+
442
+ chunks = chunk_turns(turns, self._config.chunk_size, overlap=2)
443
+ all_facts: list[AtomicFact] = []
444
+
445
+ for chunk in chunks:
446
+ chunk_facts = self._extract_chunk(
447
+ chunk, session_id, session_date, speaker_a, speaker_b,
448
+ )
449
+ all_facts.extend(chunk_facts)
450
+
451
+ return self._deduplicate(all_facts)
452
+
453
+ # ------------------------------------------------------------------
454
+ # Chunk-level dispatch
455
+ # ------------------------------------------------------------------
456
+
457
+ def _extract_chunk(
458
+ self,
459
+ turns: list[str],
460
+ session_id: str,
461
+ session_date: str | None,
462
+ speaker_a: str,
463
+ speaker_b: str,
464
+ ) -> list[AtomicFact]:
465
+ """Extract facts from a single chunk — dispatches by mode."""
466
+ use_llm = (
467
+ self._mode in (Mode.B, Mode.C)
468
+ and self._llm is not None
469
+ and self._llm.is_available()
470
+ )
471
+ if use_llm:
472
+ facts = self._extract_llm(
473
+ turns, session_id, session_date, speaker_a, speaker_b,
474
+ )
475
+ if facts:
476
+ return facts
477
+ # Fallback to local if LLM produced nothing
478
+ logger.info("LLM extraction returned no facts, falling back to local.")
479
+
480
+ return self._extract_local(
481
+ turns, session_id, session_date, speaker_a, speaker_b,
482
+ )
483
+
484
+ # ------------------------------------------------------------------
485
+ # Mode A: Rule-based extraction
486
+ # ------------------------------------------------------------------
487
+
488
+ def _extract_local(
489
+ self,
490
+ turns: list[str],
491
+ session_id: str,
492
+ session_date: str | None,
493
+ speaker_a: str,
494
+ speaker_b: str,
495
+ ) -> list[AtomicFact]:
496
+ """Rule-based extraction: regex entities, keyword classification, scoring."""
497
+ combined = "\n".join(turns)
498
+ sentences = _split_sentences(combined)
499
+ if not sentences:
500
+ # If no proper sentences, treat each turn as a sentence
501
+ sentences = [t.strip() for t in turns if len(t.strip()) >= 8]
502
+
503
+ # Build entity frequency map for importance scoring
504
+ entity_freq: dict[str, int] = {}
505
+ for sent in sentences:
506
+ for ent in _extract_entities(sent):
507
+ entity_freq[ent] = entity_freq.get(ent, 0) + 1
508
+
509
+ facts: list[AtomicFact] = []
510
+ seen_texts: set[str] = set()
511
+
512
+ for sent in sentences:
513
+ if _is_filler(sent):
514
+ continue
515
+ normalized = sent.strip()
516
+ if normalized in seen_texts or len(normalized) < 10:
517
+ continue
518
+ seen_texts.add(normalized)
519
+
520
+ # Resolve [Speaker]: prefix to "Speaker" in content
521
+ # "[Caroline]: I went to..." → "Caroline: I went to..."
522
+ import re as _re
523
+ _spk_match = _re.match(r"^\[([A-Za-z ]+)\]:\s*", normalized)
524
+ if _spk_match:
525
+ speaker_name = _spk_match.group(1)
526
+ normalized = f"{speaker_name}: {normalized[_spk_match.end():]}"
527
+
528
+ entities = _extract_entities(normalized)
529
+ fact_type = _classify_sentence(normalized)
530
+
531
+ # Three-date model: extract and resolve relative dates
532
+ raw_date = _extract_date_string(normalized)
533
+ referenced_date = _try_parse_date(raw_date, session_date) if raw_date else None
534
+ interval_start, interval_end = _extract_interval(normalized, session_date)
535
+
536
+ # Resolve relative dates in content for better retrieval
537
+ # "I went yesterday" + session_date=2023-05-08 → "I went on 2023-05-07"
538
+ if raw_date and referenced_date and raw_date.lower() in (
539
+ "yesterday", "today", "last week", "last month", "last year",
540
+ "this morning", "this afternoon", "this evening",
541
+ "the other day", "recently", "the day before",
542
+ ):
543
+ date_str = referenced_date[:10] # YYYY-MM-DD
544
+ normalized = normalized.replace(raw_date, f"on {date_str}")
545
+
546
+ has_date = referenced_date is not None or interval_start is not None
547
+ importance = _score_importance(normalized, entities, entity_freq, has_date)
548
+
549
+ if importance < self._config.min_fact_confidence:
550
+ continue
551
+
552
+ # Determine speaker from turn position heuristic
553
+ speaker = self._infer_speaker(normalized, turns, speaker_a, speaker_b)
554
+
555
+ facts.append(AtomicFact(
556
+ fact_id=_new_id(),
557
+ content=normalized,
558
+ fact_type=fact_type,
559
+ entities=entities,
560
+ observation_date=session_date,
561
+ referenced_date=referenced_date,
562
+ interval_start=interval_start,
563
+ interval_end=interval_end,
564
+ confidence=0.7 if fact_type == FactType.SEMANTIC else 0.6,
565
+ importance=importance,
566
+ session_id=session_id,
567
+ signal_type=_signal_from_fact_type(fact_type),
568
+ ))
569
+
570
+ # Cap at max_facts_per_chunk, keeping highest importance
571
+ facts.sort(key=lambda f: f.importance, reverse=True)
572
+ return facts[: self._config.max_facts_per_chunk]
573
+
574
+ # ------------------------------------------------------------------
575
+ # Mode B/C: LLM-based extraction
576
+ # ------------------------------------------------------------------
577
+
578
+ def _extract_llm(
579
+ self,
580
+ turns: list[str],
581
+ session_id: str,
582
+ session_date: str | None,
583
+ speaker_a: str,
584
+ speaker_b: str,
585
+ ) -> list[AtomicFact]:
586
+ """LLM-guided extraction: structured JSON prompt, parsed into AtomicFacts."""
587
+ conversation_text = "\n".join(turns)
588
+ speakers = []
589
+ if speaker_a:
590
+ speakers.append(f"Speaker A: {speaker_a}")
591
+ if speaker_b:
592
+ speakers.append(f"Speaker B: {speaker_b}")
593
+ speaker_info = ", ".join(speakers) if speakers else "unknown"
594
+
595
+ prompt = (
596
+ f"Extract atomic facts from the following conversation.\n"
597
+ f"Speakers: {speaker_info}\n"
598
+ f"Conversation date: {session_date or 'unknown'}\n\n"
599
+ f"--- CONVERSATION ---\n{conversation_text}\n--- END ---\n\n"
600
+ f"Rules:\n"
601
+ f"- Extract 2-5 comprehensive, self-contained facts.\n"
602
+ f"- Use explicit names (never pronouns).\n"
603
+ f"- Each fact must make sense WITHOUT the original conversation.\n"
604
+ f"- For dates mentioned (\"yesterday\", \"next week\"), resolve to "
605
+ f"ISO format relative to {session_date or 'today'}.\n"
606
+ f"- Skip greetings, filler, and confirmations.\n"
607
+ f"- importance: 1 (trivial) to 10 (critical)\n"
608
+ f"- confidence: 0.0 (uncertain) to 1.0 (definite)\n\n"
609
+ f"Respond with ONLY a JSON array."
610
+ )
611
+
612
+ try:
613
+ raw = self._llm.generate( # type: ignore[union-attr]
614
+ prompt=prompt,
615
+ system=_SYSTEM_PROMPT,
616
+ temperature=0.0,
617
+ max_tokens=1024,
618
+ )
619
+ return self._parse_llm_response(raw, session_id, session_date)
620
+ except Exception as exc:
621
+ logger.warning("LLM fact extraction failed: %s", exc)
622
+ return []
623
+
624
+ def _parse_llm_response(
625
+ self,
626
+ raw: str,
627
+ session_id: str,
628
+ session_date: str | None,
629
+ ) -> list[AtomicFact]:
630
+ """Parse JSON array from LLM response into AtomicFact list."""
631
+ if not raw or not raw.strip():
632
+ return []
633
+
634
+ # Extract JSON array from potentially wrapped response
635
+ try:
636
+ match = re.search(r"\[.*\]", raw, re.DOTALL)
637
+ if not match:
638
+ logger.warning("No JSON array found in LLM response.")
639
+ return []
640
+ items = json.loads(match.group())
641
+ if not isinstance(items, list):
642
+ return []
643
+ except (json.JSONDecodeError, ValueError) as exc:
644
+ logger.warning("JSON parse error in LLM fact response: %s", exc)
645
+ return []
646
+
647
+ facts: list[AtomicFact] = []
648
+ for item in items[:10]: # Hard cap at 10 per chunk
649
+ if not isinstance(item, dict):
650
+ continue
651
+ fact = self._item_to_fact(item, session_id, session_date)
652
+ if fact is not None:
653
+ facts.append(fact)
654
+
655
+ return facts
656
+
657
+ def _item_to_fact(
658
+ self,
659
+ item: dict[str, Any],
660
+ session_id: str,
661
+ session_date: str | None,
662
+ ) -> AtomicFact | None:
663
+ """Convert a single LLM JSON item to an AtomicFact.
664
+
665
+ Returns None if the item is malformed or is filler.
666
+ """
667
+ text = str(item.get("text", "")).strip()
668
+ if not text or len(text) < 8 or _is_filler(text):
669
+ return None
670
+
671
+ # Fact type
672
+ raw_type = str(item.get("fact_type", item.get("type", "semantic"))).lower()
673
+ type_map = {
674
+ "episodic": FactType.EPISODIC,
675
+ "experience": FactType.EPISODIC,
676
+ "semantic": FactType.SEMANTIC,
677
+ "world": FactType.SEMANTIC,
678
+ "opinion": FactType.OPINION,
679
+ "temporal": FactType.TEMPORAL,
680
+ }
681
+ fact_type = type_map.get(raw_type, FactType.SEMANTIC)
682
+
683
+ # Entities
684
+ raw_entities = item.get("entities", [])
685
+ if isinstance(raw_entities, list):
686
+ entities = [str(e).strip() for e in raw_entities if str(e).strip()]
687
+ elif isinstance(raw_entities, str):
688
+ entities = [raw_entities.strip()] if raw_entities.strip() else []
689
+ else:
690
+ entities = _extract_entities(text)
691
+
692
+ # Referenced date — from LLM or inferred
693
+ ref_date_raw = item.get("referenced_date") or item.get("date")
694
+ referenced_date: str | None = None
695
+ if ref_date_raw and str(ref_date_raw).strip().lower() != "null":
696
+ referenced_date = _try_parse_date(str(ref_date_raw), session_date)
697
+
698
+ # Interval
699
+ interval_start = item.get("interval_start")
700
+ interval_end = item.get("interval_end")
701
+ if interval_start:
702
+ interval_start = _try_parse_date(str(interval_start), session_date)
703
+ if interval_end:
704
+ interval_end = _try_parse_date(str(interval_end), session_date)
705
+
706
+ # Importance (LLM returns 1-10, we normalize to 0.0-1.0)
707
+ raw_importance = item.get("importance", 5)
708
+ try:
709
+ importance = min(1.0, max(0.0, float(raw_importance) / 10.0))
710
+ except (TypeError, ValueError):
711
+ importance = 0.5
712
+
713
+ # Confidence
714
+ raw_conf = item.get("confidence", 0.8)
715
+ try:
716
+ confidence = min(1.0, max(0.0, float(raw_conf)))
717
+ except (TypeError, ValueError):
718
+ confidence = 0.8
719
+
720
+ return AtomicFact(
721
+ fact_id=_new_id(),
722
+ content=text,
723
+ fact_type=fact_type,
724
+ entities=entities,
725
+ observation_date=session_date,
726
+ referenced_date=referenced_date,
727
+ interval_start=interval_start,
728
+ interval_end=interval_end,
729
+ confidence=confidence,
730
+ importance=importance,
731
+ session_id=session_id,
732
+ signal_type=_signal_from_fact_type(fact_type),
733
+ )
734
+
735
+ # ------------------------------------------------------------------
736
+ # Speaker inference (Mode A heuristic)
737
+ # ------------------------------------------------------------------
738
+
739
+ @staticmethod
740
+ def _infer_speaker(
741
+ sentence: str,
742
+ turns: list[str],
743
+ speaker_a: str,
744
+ speaker_b: str,
745
+ ) -> str:
746
+ """Infer which speaker said a sentence based on turn position.
747
+
748
+ Checks which turn contains the sentence and uses even/odd indexing
749
+ (even = speaker_a, odd = speaker_b by convention).
750
+ """
751
+ if not speaker_a and not speaker_b:
752
+ return ""
753
+ for i, turn in enumerate(turns):
754
+ if sentence in turn:
755
+ return speaker_a if i % 2 == 0 else speaker_b
756
+ return speaker_a or speaker_b
757
+
758
+ # ------------------------------------------------------------------
759
+ # Deduplication
760
+ # ------------------------------------------------------------------
761
+
762
+ @staticmethod
763
+ def _deduplicate(facts: list[AtomicFact]) -> list[AtomicFact]:
764
+ """Remove near-duplicate facts by content normalization.
765
+
766
+ Uses lowercased, whitespace-collapsed content as dedup key.
767
+ When duplicates exist, keeps the one with higher importance.
768
+ """
769
+ seen: dict[str, AtomicFact] = {}
770
+ for fact in facts:
771
+ key = re.sub(r"\s+", " ", fact.content.lower().strip())
772
+ existing = seen.get(key)
773
+ if existing is None or fact.importance > existing.importance:
774
+ seen[key] = fact
775
+ return list(seen.values())