superlocalmemory 2.8.6 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. package/LICENSE +9 -1
  2. package/NOTICE +63 -0
  3. package/README.md +165 -480
  4. package/bin/slm +17 -449
  5. package/bin/slm-npm +62 -48
  6. package/conftest.py +5 -0
  7. package/docs/api-reference.md +284 -0
  8. package/docs/architecture.md +149 -0
  9. package/docs/auto-memory.md +150 -0
  10. package/docs/cli-reference.md +276 -0
  11. package/docs/compliance.md +191 -0
  12. package/docs/configuration.md +182 -0
  13. package/docs/getting-started.md +102 -0
  14. package/docs/ide-setup.md +261 -0
  15. package/docs/mcp-tools.md +220 -0
  16. package/docs/migration-from-v2.md +170 -0
  17. package/docs/profiles.md +173 -0
  18. package/docs/troubleshooting.md +310 -0
  19. package/{configs → ide/configs}/antigravity-mcp.json +3 -3
  20. package/ide/configs/chatgpt-desktop-mcp.json +16 -0
  21. package/{configs → ide/configs}/claude-desktop-mcp.json +3 -3
  22. package/{configs → ide/configs}/codex-mcp.toml +4 -4
  23. package/{configs → ide/configs}/continue-mcp.yaml +4 -3
  24. package/{configs → ide/configs}/continue-skills.yaml +6 -6
  25. package/ide/configs/cursor-mcp.json +15 -0
  26. package/{configs → ide/configs}/gemini-cli-mcp.json +2 -2
  27. package/{configs → ide/configs}/jetbrains-mcp.json +2 -2
  28. package/{configs → ide/configs}/opencode-mcp.json +2 -2
  29. package/{configs → ide/configs}/perplexity-mcp.json +2 -2
  30. package/{configs → ide/configs}/vscode-copilot-mcp.json +2 -2
  31. package/{configs → ide/configs}/windsurf-mcp.json +3 -3
  32. package/{configs → ide/configs}/zed-mcp.json +2 -2
  33. package/{hooks → ide/hooks}/context-hook.js +9 -20
  34. package/ide/hooks/memory-list-skill.js +70 -0
  35. package/ide/hooks/memory-profile-skill.js +101 -0
  36. package/ide/hooks/memory-recall-skill.js +62 -0
  37. package/ide/hooks/memory-remember-skill.js +68 -0
  38. package/ide/hooks/memory-reset-skill.js +160 -0
  39. package/{hooks → ide/hooks}/post-recall-hook.js +2 -2
  40. package/ide/integrations/langchain/README.md +106 -0
  41. package/ide/integrations/langchain/langchain_superlocalmemory/__init__.py +9 -0
  42. package/ide/integrations/langchain/langchain_superlocalmemory/chat_message_history.py +201 -0
  43. package/ide/integrations/langchain/pyproject.toml +38 -0
  44. package/{src/learning → ide/integrations/langchain}/tests/__init__.py +1 -0
  45. package/ide/integrations/langchain/tests/test_chat_message_history.py +215 -0
  46. package/ide/integrations/langchain/tests/test_security.py +117 -0
  47. package/ide/integrations/llamaindex/README.md +81 -0
  48. package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/__init__.py +9 -0
  49. package/ide/integrations/llamaindex/llama_index/storage/chat_store/superlocalmemory/base.py +316 -0
  50. package/ide/integrations/llamaindex/pyproject.toml +43 -0
  51. package/{src/lifecycle → ide/integrations/llamaindex}/tests/__init__.py +1 -2
  52. package/ide/integrations/llamaindex/tests/test_chat_store.py +294 -0
  53. package/ide/integrations/llamaindex/tests/test_security.py +241 -0
  54. package/{skills → ide/skills}/slm-build-graph/SKILL.md +6 -6
  55. package/{skills → ide/skills}/slm-list-recent/SKILL.md +5 -5
  56. package/{skills → ide/skills}/slm-recall/SKILL.md +5 -5
  57. package/{skills → ide/skills}/slm-remember/SKILL.md +6 -6
  58. package/{skills → ide/skills}/slm-show-patterns/SKILL.md +7 -7
  59. package/{skills → ide/skills}/slm-status/SKILL.md +9 -9
  60. package/{skills → ide/skills}/slm-switch-profile/SKILL.md +9 -9
  61. package/package.json +13 -22
  62. package/pyproject.toml +85 -0
  63. package/scripts/build-dmg.sh +417 -0
  64. package/scripts/install-skills.ps1 +334 -0
  65. package/scripts/postinstall.js +2 -2
  66. package/scripts/start-dashboard.ps1 +52 -0
  67. package/scripts/start-dashboard.sh +41 -0
  68. package/scripts/sync-wiki.ps1 +127 -0
  69. package/scripts/sync-wiki.sh +82 -0
  70. package/scripts/test-dmg.sh +161 -0
  71. package/scripts/test-npm-package.ps1 +252 -0
  72. package/scripts/test-npm-package.sh +207 -0
  73. package/scripts/verify-install.ps1 +294 -0
  74. package/scripts/verify-install.sh +266 -0
  75. package/src/superlocalmemory/__init__.py +0 -0
  76. package/src/superlocalmemory/attribution/__init__.py +9 -0
  77. package/src/superlocalmemory/attribution/mathematical_dna.py +235 -0
  78. package/src/superlocalmemory/attribution/signer.py +153 -0
  79. package/src/superlocalmemory/attribution/watermark.py +189 -0
  80. package/src/superlocalmemory/cli/__init__.py +5 -0
  81. package/src/superlocalmemory/cli/commands.py +245 -0
  82. package/src/superlocalmemory/cli/main.py +89 -0
  83. package/src/superlocalmemory/cli/migrate_cmd.py +55 -0
  84. package/src/superlocalmemory/cli/post_install.py +99 -0
  85. package/src/superlocalmemory/cli/setup_wizard.py +129 -0
  86. package/src/superlocalmemory/compliance/__init__.py +0 -0
  87. package/src/superlocalmemory/compliance/abac.py +204 -0
  88. package/src/superlocalmemory/compliance/audit.py +314 -0
  89. package/src/superlocalmemory/compliance/eu_ai_act.py +131 -0
  90. package/src/superlocalmemory/compliance/gdpr.py +294 -0
  91. package/src/superlocalmemory/compliance/lifecycle.py +158 -0
  92. package/src/superlocalmemory/compliance/retention.py +232 -0
  93. package/src/superlocalmemory/compliance/scheduler.py +148 -0
  94. package/src/superlocalmemory/core/__init__.py +0 -0
  95. package/src/superlocalmemory/core/config.py +391 -0
  96. package/src/superlocalmemory/core/embeddings.py +293 -0
  97. package/src/superlocalmemory/core/engine.py +701 -0
  98. package/src/superlocalmemory/core/hooks.py +65 -0
  99. package/src/superlocalmemory/core/maintenance.py +172 -0
  100. package/src/superlocalmemory/core/modes.py +140 -0
  101. package/src/superlocalmemory/core/profiles.py +234 -0
  102. package/src/superlocalmemory/core/registry.py +117 -0
  103. package/src/superlocalmemory/dynamics/__init__.py +0 -0
  104. package/src/superlocalmemory/dynamics/fisher_langevin_coupling.py +223 -0
  105. package/src/superlocalmemory/encoding/__init__.py +0 -0
  106. package/src/superlocalmemory/encoding/consolidator.py +485 -0
  107. package/src/superlocalmemory/encoding/emotional.py +125 -0
  108. package/src/superlocalmemory/encoding/entity_resolver.py +525 -0
  109. package/src/superlocalmemory/encoding/entropy_gate.py +104 -0
  110. package/src/superlocalmemory/encoding/fact_extractor.py +775 -0
  111. package/src/superlocalmemory/encoding/foresight.py +91 -0
  112. package/src/superlocalmemory/encoding/graph_builder.py +302 -0
  113. package/src/superlocalmemory/encoding/observation_builder.py +160 -0
  114. package/src/superlocalmemory/encoding/scene_builder.py +183 -0
  115. package/src/superlocalmemory/encoding/signal_inference.py +90 -0
  116. package/src/superlocalmemory/encoding/temporal_parser.py +426 -0
  117. package/src/superlocalmemory/encoding/type_router.py +235 -0
  118. package/src/superlocalmemory/hooks/__init__.py +3 -0
  119. package/src/superlocalmemory/hooks/auto_capture.py +111 -0
  120. package/src/superlocalmemory/hooks/auto_recall.py +93 -0
  121. package/src/superlocalmemory/hooks/ide_connector.py +204 -0
  122. package/src/superlocalmemory/hooks/rules_engine.py +99 -0
  123. package/src/superlocalmemory/infra/__init__.py +3 -0
  124. package/src/superlocalmemory/infra/auth_middleware.py +82 -0
  125. package/src/superlocalmemory/infra/backup.py +317 -0
  126. package/src/superlocalmemory/infra/cache_manager.py +267 -0
  127. package/src/superlocalmemory/infra/event_bus.py +381 -0
  128. package/src/superlocalmemory/infra/rate_limiter.py +135 -0
  129. package/src/{webhook_dispatcher.py → superlocalmemory/infra/webhook_dispatcher.py} +104 -101
  130. package/src/superlocalmemory/learning/__init__.py +0 -0
  131. package/src/superlocalmemory/learning/adaptive.py +172 -0
  132. package/src/superlocalmemory/learning/behavioral.py +490 -0
  133. package/src/superlocalmemory/learning/behavioral_listener.py +94 -0
  134. package/src/superlocalmemory/learning/bootstrap.py +298 -0
  135. package/src/superlocalmemory/learning/cross_project.py +399 -0
  136. package/src/superlocalmemory/learning/database.py +376 -0
  137. package/src/superlocalmemory/learning/engagement.py +323 -0
  138. package/src/superlocalmemory/learning/features.py +138 -0
  139. package/src/superlocalmemory/learning/feedback.py +316 -0
  140. package/src/superlocalmemory/learning/outcomes.py +255 -0
  141. package/src/superlocalmemory/learning/project_context.py +366 -0
  142. package/src/superlocalmemory/learning/ranker.py +155 -0
  143. package/src/superlocalmemory/learning/source_quality.py +303 -0
  144. package/src/superlocalmemory/learning/workflows.py +309 -0
  145. package/src/superlocalmemory/llm/__init__.py +0 -0
  146. package/src/superlocalmemory/llm/backbone.py +316 -0
  147. package/src/superlocalmemory/math/__init__.py +0 -0
  148. package/src/superlocalmemory/math/fisher.py +356 -0
  149. package/src/superlocalmemory/math/langevin.py +398 -0
  150. package/src/superlocalmemory/math/sheaf.py +257 -0
  151. package/src/superlocalmemory/mcp/__init__.py +0 -0
  152. package/src/superlocalmemory/mcp/resources.py +245 -0
  153. package/src/superlocalmemory/mcp/server.py +61 -0
  154. package/src/superlocalmemory/mcp/tools.py +18 -0
  155. package/src/superlocalmemory/mcp/tools_core.py +305 -0
  156. package/src/superlocalmemory/mcp/tools_v28.py +223 -0
  157. package/src/superlocalmemory/mcp/tools_v3.py +286 -0
  158. package/src/superlocalmemory/retrieval/__init__.py +0 -0
  159. package/src/superlocalmemory/retrieval/agentic.py +295 -0
  160. package/src/superlocalmemory/retrieval/ann_index.py +223 -0
  161. package/src/superlocalmemory/retrieval/bm25_channel.py +185 -0
  162. package/src/superlocalmemory/retrieval/bridge_discovery.py +170 -0
  163. package/src/superlocalmemory/retrieval/engine.py +390 -0
  164. package/src/superlocalmemory/retrieval/entity_channel.py +179 -0
  165. package/src/superlocalmemory/retrieval/fusion.py +78 -0
  166. package/src/superlocalmemory/retrieval/profile_channel.py +105 -0
  167. package/src/superlocalmemory/retrieval/reranker.py +154 -0
  168. package/src/superlocalmemory/retrieval/semantic_channel.py +232 -0
  169. package/src/superlocalmemory/retrieval/strategy.py +96 -0
  170. package/src/superlocalmemory/retrieval/temporal_channel.py +175 -0
  171. package/src/superlocalmemory/server/__init__.py +1 -0
  172. package/src/superlocalmemory/server/api.py +248 -0
  173. package/src/superlocalmemory/server/routes/__init__.py +4 -0
  174. package/src/superlocalmemory/server/routes/agents.py +107 -0
  175. package/src/superlocalmemory/server/routes/backup.py +91 -0
  176. package/src/superlocalmemory/server/routes/behavioral.py +127 -0
  177. package/src/superlocalmemory/server/routes/compliance.py +160 -0
  178. package/src/superlocalmemory/server/routes/data_io.py +188 -0
  179. package/src/superlocalmemory/server/routes/events.py +183 -0
  180. package/src/superlocalmemory/server/routes/helpers.py +85 -0
  181. package/src/superlocalmemory/server/routes/learning.py +273 -0
  182. package/src/superlocalmemory/server/routes/lifecycle.py +116 -0
  183. package/src/superlocalmemory/server/routes/memories.py +399 -0
  184. package/src/superlocalmemory/server/routes/profiles.py +219 -0
  185. package/src/superlocalmemory/server/routes/stats.py +346 -0
  186. package/src/superlocalmemory/server/routes/v3_api.py +365 -0
  187. package/src/superlocalmemory/server/routes/ws.py +82 -0
  188. package/src/superlocalmemory/server/security_middleware.py +57 -0
  189. package/src/superlocalmemory/server/ui.py +245 -0
  190. package/src/superlocalmemory/storage/__init__.py +0 -0
  191. package/src/superlocalmemory/storage/access_control.py +182 -0
  192. package/src/superlocalmemory/storage/database.py +594 -0
  193. package/src/superlocalmemory/storage/migrations.py +303 -0
  194. package/src/superlocalmemory/storage/models.py +406 -0
  195. package/src/superlocalmemory/storage/schema.py +726 -0
  196. package/src/superlocalmemory/storage/v2_migrator.py +317 -0
  197. package/src/superlocalmemory/trust/__init__.py +0 -0
  198. package/src/superlocalmemory/trust/gate.py +130 -0
  199. package/src/superlocalmemory/trust/provenance.py +124 -0
  200. package/src/superlocalmemory/trust/scorer.py +347 -0
  201. package/src/superlocalmemory/trust/signals.py +153 -0
  202. package/ui/index.html +278 -5
  203. package/ui/js/auto-settings.js +70 -0
  204. package/ui/js/dashboard.js +90 -0
  205. package/ui/js/fact-detail.js +92 -0
  206. package/ui/js/feedback.js +2 -2
  207. package/ui/js/ide-status.js +102 -0
  208. package/ui/js/math-health.js +98 -0
  209. package/ui/js/recall-lab.js +127 -0
  210. package/ui/js/settings.js +2 -2
  211. package/ui/js/trust-dashboard.js +73 -0
  212. package/api_server.py +0 -724
  213. package/bin/aider-smart +0 -72
  214. package/bin/superlocalmemoryv2-learning +0 -4
  215. package/bin/superlocalmemoryv2-list +0 -3
  216. package/bin/superlocalmemoryv2-patterns +0 -4
  217. package/bin/superlocalmemoryv2-profile +0 -3
  218. package/bin/superlocalmemoryv2-recall +0 -3
  219. package/bin/superlocalmemoryv2-remember +0 -3
  220. package/bin/superlocalmemoryv2-reset +0 -3
  221. package/bin/superlocalmemoryv2-status +0 -3
  222. package/configs/chatgpt-desktop-mcp.json +0 -16
  223. package/configs/cursor-mcp.json +0 -15
  224. package/hooks/memory-list-skill.js +0 -139
  225. package/hooks/memory-profile-skill.js +0 -273
  226. package/hooks/memory-recall-skill.js +0 -114
  227. package/hooks/memory-remember-skill.js +0 -127
  228. package/hooks/memory-reset-skill.js +0 -274
  229. package/mcp_server.py +0 -1808
  230. package/requirements-core.txt +0 -22
  231. package/requirements-learning.txt +0 -12
  232. package/requirements.txt +0 -12
  233. package/src/agent_registry.py +0 -411
  234. package/src/auth_middleware.py +0 -61
  235. package/src/auto_backup.py +0 -459
  236. package/src/behavioral/__init__.py +0 -49
  237. package/src/behavioral/behavioral_listener.py +0 -203
  238. package/src/behavioral/behavioral_patterns.py +0 -275
  239. package/src/behavioral/cross_project_transfer.py +0 -206
  240. package/src/behavioral/outcome_inference.py +0 -194
  241. package/src/behavioral/outcome_tracker.py +0 -193
  242. package/src/behavioral/tests/__init__.py +0 -4
  243. package/src/behavioral/tests/test_behavioral_integration.py +0 -108
  244. package/src/behavioral/tests/test_behavioral_patterns.py +0 -150
  245. package/src/behavioral/tests/test_cross_project_transfer.py +0 -142
  246. package/src/behavioral/tests/test_mcp_behavioral.py +0 -139
  247. package/src/behavioral/tests/test_mcp_report_outcome.py +0 -117
  248. package/src/behavioral/tests/test_outcome_inference.py +0 -107
  249. package/src/behavioral/tests/test_outcome_tracker.py +0 -96
  250. package/src/cache_manager.py +0 -518
  251. package/src/compliance/__init__.py +0 -48
  252. package/src/compliance/abac_engine.py +0 -149
  253. package/src/compliance/abac_middleware.py +0 -116
  254. package/src/compliance/audit_db.py +0 -215
  255. package/src/compliance/audit_logger.py +0 -148
  256. package/src/compliance/retention_manager.py +0 -289
  257. package/src/compliance/retention_scheduler.py +0 -186
  258. package/src/compliance/tests/__init__.py +0 -4
  259. package/src/compliance/tests/test_abac_enforcement.py +0 -95
  260. package/src/compliance/tests/test_abac_engine.py +0 -124
  261. package/src/compliance/tests/test_abac_mcp_integration.py +0 -118
  262. package/src/compliance/tests/test_audit_db.py +0 -123
  263. package/src/compliance/tests/test_audit_logger.py +0 -98
  264. package/src/compliance/tests/test_mcp_audit.py +0 -128
  265. package/src/compliance/tests/test_mcp_retention_policy.py +0 -125
  266. package/src/compliance/tests/test_retention_manager.py +0 -131
  267. package/src/compliance/tests/test_retention_scheduler.py +0 -99
  268. package/src/compression/__init__.py +0 -25
  269. package/src/compression/cli.py +0 -150
  270. package/src/compression/cold_storage.py +0 -217
  271. package/src/compression/config.py +0 -72
  272. package/src/compression/orchestrator.py +0 -133
  273. package/src/compression/tier2_compressor.py +0 -228
  274. package/src/compression/tier3_compressor.py +0 -153
  275. package/src/compression/tier_classifier.py +0 -148
  276. package/src/db_connection_manager.py +0 -536
  277. package/src/embedding_engine.py +0 -63
  278. package/src/embeddings/__init__.py +0 -47
  279. package/src/embeddings/cache.py +0 -70
  280. package/src/embeddings/cli.py +0 -113
  281. package/src/embeddings/constants.py +0 -47
  282. package/src/embeddings/database.py +0 -91
  283. package/src/embeddings/engine.py +0 -247
  284. package/src/embeddings/model_loader.py +0 -145
  285. package/src/event_bus.py +0 -562
  286. package/src/graph/__init__.py +0 -36
  287. package/src/graph/build_helpers.py +0 -74
  288. package/src/graph/cli.py +0 -87
  289. package/src/graph/cluster_builder.py +0 -188
  290. package/src/graph/cluster_summary.py +0 -148
  291. package/src/graph/constants.py +0 -47
  292. package/src/graph/edge_builder.py +0 -162
  293. package/src/graph/entity_extractor.py +0 -95
  294. package/src/graph/graph_core.py +0 -226
  295. package/src/graph/graph_search.py +0 -231
  296. package/src/graph/hierarchical.py +0 -207
  297. package/src/graph/schema.py +0 -99
  298. package/src/graph_engine.py +0 -52
  299. package/src/hnsw_index.py +0 -628
  300. package/src/hybrid_search.py +0 -46
  301. package/src/learning/__init__.py +0 -217
  302. package/src/learning/adaptive_ranker.py +0 -682
  303. package/src/learning/bootstrap/__init__.py +0 -69
  304. package/src/learning/bootstrap/constants.py +0 -93
  305. package/src/learning/bootstrap/db_queries.py +0 -316
  306. package/src/learning/bootstrap/sampling.py +0 -82
  307. package/src/learning/bootstrap/text_utils.py +0 -71
  308. package/src/learning/cross_project_aggregator.py +0 -857
  309. package/src/learning/db/__init__.py +0 -40
  310. package/src/learning/db/constants.py +0 -44
  311. package/src/learning/db/schema.py +0 -279
  312. package/src/learning/engagement_tracker.py +0 -628
  313. package/src/learning/feature_extractor.py +0 -708
  314. package/src/learning/feedback_collector.py +0 -806
  315. package/src/learning/learning_db.py +0 -915
  316. package/src/learning/project_context_manager.py +0 -572
  317. package/src/learning/ranking/__init__.py +0 -33
  318. package/src/learning/ranking/constants.py +0 -84
  319. package/src/learning/ranking/helpers.py +0 -278
  320. package/src/learning/source_quality_scorer.py +0 -676
  321. package/src/learning/synthetic_bootstrap.py +0 -755
  322. package/src/learning/tests/test_adaptive_ranker.py +0 -325
  323. package/src/learning/tests/test_adaptive_ranker_v28.py +0 -60
  324. package/src/learning/tests/test_aggregator.py +0 -306
  325. package/src/learning/tests/test_auto_retrain_v28.py +0 -35
  326. package/src/learning/tests/test_e2e_ranking_v28.py +0 -82
  327. package/src/learning/tests/test_feature_extractor_v28.py +0 -93
  328. package/src/learning/tests/test_feedback_collector.py +0 -294
  329. package/src/learning/tests/test_learning_db.py +0 -602
  330. package/src/learning/tests/test_learning_db_v28.py +0 -110
  331. package/src/learning/tests/test_learning_init_v28.py +0 -48
  332. package/src/learning/tests/test_outcome_signals.py +0 -48
  333. package/src/learning/tests/test_project_context.py +0 -292
  334. package/src/learning/tests/test_schema_migration.py +0 -319
  335. package/src/learning/tests/test_signal_inference.py +0 -397
  336. package/src/learning/tests/test_source_quality.py +0 -351
  337. package/src/learning/tests/test_synthetic_bootstrap.py +0 -429
  338. package/src/learning/tests/test_workflow_miner.py +0 -318
  339. package/src/learning/workflow_pattern_miner.py +0 -655
  340. package/src/lifecycle/__init__.py +0 -54
  341. package/src/lifecycle/bounded_growth.py +0 -239
  342. package/src/lifecycle/compaction_engine.py +0 -226
  343. package/src/lifecycle/lifecycle_engine.py +0 -355
  344. package/src/lifecycle/lifecycle_evaluator.py +0 -257
  345. package/src/lifecycle/lifecycle_scheduler.py +0 -130
  346. package/src/lifecycle/retention_policy.py +0 -285
  347. package/src/lifecycle/tests/test_bounded_growth.py +0 -193
  348. package/src/lifecycle/tests/test_compaction.py +0 -179
  349. package/src/lifecycle/tests/test_lifecycle_engine.py +0 -137
  350. package/src/lifecycle/tests/test_lifecycle_evaluation.py +0 -177
  351. package/src/lifecycle/tests/test_lifecycle_scheduler.py +0 -127
  352. package/src/lifecycle/tests/test_lifecycle_search.py +0 -109
  353. package/src/lifecycle/tests/test_mcp_compact.py +0 -149
  354. package/src/lifecycle/tests/test_mcp_lifecycle_status.py +0 -114
  355. package/src/lifecycle/tests/test_retention_policy.py +0 -162
  356. package/src/mcp_tools_v28.py +0 -281
  357. package/src/memory/__init__.py +0 -36
  358. package/src/memory/cli.py +0 -205
  359. package/src/memory/constants.py +0 -39
  360. package/src/memory/helpers.py +0 -28
  361. package/src/memory/schema.py +0 -166
  362. package/src/memory-profiles.py +0 -595
  363. package/src/memory-reset.py +0 -491
  364. package/src/memory_compression.py +0 -989
  365. package/src/memory_store_v2.py +0 -1155
  366. package/src/migrate_v1_to_v2.py +0 -629
  367. package/src/pattern_learner.py +0 -34
  368. package/src/patterns/__init__.py +0 -24
  369. package/src/patterns/analyzers.py +0 -251
  370. package/src/patterns/learner.py +0 -271
  371. package/src/patterns/scoring.py +0 -171
  372. package/src/patterns/store.py +0 -225
  373. package/src/patterns/terminology.py +0 -140
  374. package/src/provenance_tracker.py +0 -312
  375. package/src/qualixar_attribution.py +0 -139
  376. package/src/qualixar_watermark.py +0 -78
  377. package/src/query_optimizer.py +0 -511
  378. package/src/rate_limiter.py +0 -83
  379. package/src/search/__init__.py +0 -20
  380. package/src/search/cli.py +0 -77
  381. package/src/search/constants.py +0 -26
  382. package/src/search/engine.py +0 -241
  383. package/src/search/fusion.py +0 -122
  384. package/src/search/index_loader.py +0 -114
  385. package/src/search/methods.py +0 -162
  386. package/src/search_engine_v2.py +0 -401
  387. package/src/setup_validator.py +0 -482
  388. package/src/subscription_manager.py +0 -391
  389. package/src/tree/__init__.py +0 -59
  390. package/src/tree/builder.py +0 -185
  391. package/src/tree/nodes.py +0 -202
  392. package/src/tree/queries.py +0 -257
  393. package/src/tree/schema.py +0 -80
  394. package/src/tree_manager.py +0 -19
  395. package/src/trust/__init__.py +0 -45
  396. package/src/trust/constants.py +0 -66
  397. package/src/trust/queries.py +0 -157
  398. package/src/trust/schema.py +0 -95
  399. package/src/trust/scorer.py +0 -299
  400. package/src/trust/signals.py +0 -95
  401. package/src/trust_scorer.py +0 -44
  402. package/ui/app.js +0 -1588
  403. package/ui/js/graph-cytoscape-monolithic-backup.js +0 -1168
  404. package/ui/js/graph-cytoscape.js +0 -1168
  405. package/ui/js/graph-d3-backup.js +0 -32
  406. package/ui/js/graph.js +0 -32
  407. package/ui_server.py +0 -286
  408. /package/docs/{ACCESSIBILITY.md → v2-archive/ACCESSIBILITY.md} +0 -0
  409. /package/docs/{ARCHITECTURE.md → v2-archive/ARCHITECTURE.md} +0 -0
  410. /package/docs/{CLI-COMMANDS-REFERENCE.md → v2-archive/CLI-COMMANDS-REFERENCE.md} +0 -0
  411. /package/docs/{COMPRESSION-README.md → v2-archive/COMPRESSION-README.md} +0 -0
  412. /package/docs/{FRAMEWORK-INTEGRATIONS.md → v2-archive/FRAMEWORK-INTEGRATIONS.md} +0 -0
  413. /package/docs/{MCP-MANUAL-SETUP.md → v2-archive/MCP-MANUAL-SETUP.md} +0 -0
  414. /package/docs/{MCP-TROUBLESHOOTING.md → v2-archive/MCP-TROUBLESHOOTING.md} +0 -0
  415. /package/docs/{PATTERN-LEARNING.md → v2-archive/PATTERN-LEARNING.md} +0 -0
  416. /package/docs/{PROFILES-GUIDE.md → v2-archive/PROFILES-GUIDE.md} +0 -0
  417. /package/docs/{RESET-GUIDE.md → v2-archive/RESET-GUIDE.md} +0 -0
  418. /package/docs/{SEARCH-ENGINE-V2.2.0.md → v2-archive/SEARCH-ENGINE-V2.2.0.md} +0 -0
  419. /package/docs/{SEARCH-INTEGRATION-GUIDE.md → v2-archive/SEARCH-INTEGRATION-GUIDE.md} +0 -0
  420. /package/docs/{UI-SERVER.md → v2-archive/UI-SERVER.md} +0 -0
  421. /package/docs/{UNIVERSAL-INTEGRATION.md → v2-archive/UNIVERSAL-INTEGRATION.md} +0 -0
  422. /package/docs/{V2.2.0-OPTIONAL-SEARCH.md → v2-archive/V2.2.0-OPTIONAL-SEARCH.md} +0 -0
  423. /package/docs/{WINDOWS-INSTALL-README.txt → v2-archive/WINDOWS-INSTALL-README.txt} +0 -0
  424. /package/docs/{WINDOWS-POST-INSTALL.txt → v2-archive/WINDOWS-POST-INSTALL.txt} +0 -0
  425. /package/docs/{example_graph_usage.py → v2-archive/example_graph_usage.py} +0 -0
  426. /package/{completions → ide/completions}/slm.bash +0 -0
  427. /package/{completions → ide/completions}/slm.zsh +0 -0
  428. /package/{configs → ide/configs}/cody-commands.json +0 -0
  429. /package/{install-skills.sh → scripts/install-skills.sh} +0 -0
  430. /package/{install.ps1 → scripts/install.ps1} +0 -0
  431. /package/{install.sh → scripts/install.sh} +0 -0
@@ -0,0 +1,775 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Fact extraction — converts raw conversation turns into structured AtomicFacts.
6
+
7
+ Three extraction strategies aligned to operating modes:
8
+ Mode A Zero LLM — regex entities, date inference, keyword type classification.
9
+ Mode B Local Ollama — LLM-guided extraction with JSON output, Mode A fallback.
10
+ Mode C Cloud LLM — narrative fact extraction (2-5 per chunk), richest quality.
11
+
12
+ This module is the primary driver of encoding quality. Competitor analysis
13
+ (EverMemOS 93%, Hindsight 89.6%, Mastra 94.9%) shows that structured
14
+ extraction at encoding time — not retrieval sophistication — accounts for
15
+ the majority of benchmark score differences.
16
+
17
+ Key patterns implemented:
18
+ - Conversation chunking (5-10 turns, 2-turn overlap)
19
+ - Three-date temporal model (observation, referenced, interval)
20
+ - Typed fact classification (episodic / semantic / opinion / temporal)
21
+ - Importance scoring (entity frequency + emotional markers + recency)
22
+ - Narrative fact extraction in LLM modes (self-contained, context-rich)
23
+
24
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
25
+ License: MIT
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import json
31
+ import logging
32
+ import re
33
+ import uuid
34
+ from typing import Any, Protocol, runtime_checkable
35
+
36
+ from superlocalmemory.core.config import EncodingConfig
37
+ from superlocalmemory.storage.models import AtomicFact, FactType, Mode, SignalType
38
+
39
+ logger = logging.getLogger(__name__)
40
+
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # Protocols — accept any LLM / embedder without importing concrete classes
44
+ # ---------------------------------------------------------------------------
45
+
46
+ @runtime_checkable
47
+ class LLMBackboneProtocol(Protocol):
48
+ """Minimal interface the fact extractor needs from an LLM."""
49
+
50
+ def is_available(self) -> bool: ...
51
+ def generate(
52
+ self,
53
+ prompt: str,
54
+ system: str = "",
55
+ temperature: float | None = None,
56
+ max_tokens: int | None = None,
57
+ ) -> str: ...
58
+
59
+
60
+ @runtime_checkable
61
+ class EmbedderProtocol(Protocol):
62
+ """Minimal interface for computing embeddings (Mode A type classification)."""
63
+
64
+ def embed(self, text: str) -> list[float]: ...
65
+
66
+
67
+ # ---------------------------------------------------------------------------
68
+ # Constants — regex patterns, markers, templates
69
+ # ---------------------------------------------------------------------------
70
+
71
+ _DATE_RE = re.compile(
72
+ r"\b(\d{4}-\d{2}-\d{2})" # ISO
73
+ r"|\b(\d{1,2}/\d{1,2}/\d{2,4})" # US
74
+ r"|\b((?:January|February|March|April|May|June|July"
75
+ r"|August|September|October|November|December)"
76
+ r"\s+\d{1,2}(?:,?\s+\d{4})?)" # Month Day Year
77
+ r"|\b(yesterday|today|tomorrow|last\s+\w+|next\s+\w+)\b",
78
+ re.IGNORECASE,
79
+ )
80
+
81
+ _INTERVAL_RE = re.compile(
82
+ r"\b(?:from|between)\s+(.+?)\s+(?:to|and|until|through)\s+(.+?)(?:[.,;]|$)",
83
+ re.IGNORECASE,
84
+ )
85
+
86
+ _ENTITY_RE = re.compile(
87
+ r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+){0,3})\b" # Capitalized word sequences
88
+ )
89
+
90
+ _QUOTED_RE = re.compile(r'"([^"]+)"') # Quoted strings as entities
91
+
92
+ _OPINION_MARKERS = re.compile(
93
+ r"\b(?:I think|I believe|I feel|in my opinion|I prefer|I like|I love|"
94
+ r"I hate|I want|I need|I wish|personally|my favorite|"
95
+ r"probably|seems like|might be|could be|I guess|"
96
+ r"thinks?|believes?|prefers?|preferred|likes?|liked|loves?|loved|hates?|hated|"
97
+ r"overrated|underrated|best|worst|favorite|"
98
+ r"should|shouldn't|ought to|better|rather)\b",
99
+ re.IGNORECASE,
100
+ )
101
+
102
+ _EXPERIENCE_MARKERS = re.compile(
103
+ r"\b(?:I went|I visited|I saw|I met|I did|I made|I had|I was|"
104
+ r"we went|we visited|we had|I've been|I've done|I used to|"
105
+ r"I remember|I once|last time I|when I was|my experience)\b",
106
+ re.IGNORECASE,
107
+ )
108
+
109
+ _TEMPORAL_MARKERS = re.compile(
110
+ r"\b(?:deadline|due date|expires?|scheduled|appointment|meeting|"
111
+ r"on \w+day|at \d{1,2}:\d{2}|by \w+|until|before|after|"
112
+ r"in \d+ (?:days?|weeks?|months?|years?)|"
113
+ r"next week|next month|this weekend|tomorrow|yesterday)\b",
114
+ re.IGNORECASE,
115
+ )
116
+
117
+ _EMOTIONAL_KEYWORDS = frozenset({
118
+ "love", "hate", "amazing", "terrible", "wonderful", "awful", "excited",
119
+ "angry", "happy", "sad", "scared", "thrilled", "devastated", "furious",
120
+ "anxious", "grateful", "disappointed", "proud", "embarrassed", "jealous",
121
+ "best", "worst", "incredible", "horrible", "fantastic", "miserable",
122
+ })
123
+
124
+ _FILLER_PREFIXES = (
125
+ "good to see", "nice to", "hello", "hi ", "hey ", "how are you",
126
+ "thanks", "thank you", "bye", "goodbye", "see you", "take care",
127
+ "sure thing", "no problem", "okay",
128
+ )
129
+
130
+
131
+ # ---------------------------------------------------------------------------
132
+ # LLM Prompt Templates
133
+ # ---------------------------------------------------------------------------
134
+
135
+ _SYSTEM_PROMPT = (
136
+ "You are a precise fact extraction engine for a memory system.\n"
137
+ "Given conversation turns, extract 2-5 atomic facts. Rules:\n"
138
+ "1. Use EXPLICIT NAMES — never pronouns (he/she/they/it). Every fact "
139
+ "must name the subject explicitly.\n"
140
+ "2. Each fact must be a COMPLETE, STANDALONE statement understandable "
141
+ "without the original conversation.\n"
142
+ "3. Convert ALL relative time to ABSOLUTE dates when possible. "
143
+ "'Yesterday' with session date 2024-01-15 becomes '2024-01-14'. "
144
+ "'Next month' becomes the actual month and year.\n"
145
+ "4. Resolve ALL coreferences. 'He went there' must become "
146
+ "'[Person name] went to [Place name]'.\n"
147
+ "5. Extract relationships between people when mentioned.\n"
148
+ "6. Extract preferences, opinions, and experiences as SEPARATE facts.\n"
149
+ "7. Skip greetings, filler, social pleasantries, and confirmations.\n"
150
+ "8. For opinions, include a confidence between 0.0-1.0.\n\n"
151
+ "Classify each fact:\n"
152
+ "- episodic: personal event or experience (visited, attended, did)\n"
153
+ "- semantic: objective fact about the world (jobs, locations, relations)\n"
154
+ "- opinion: subjective belief or preference (likes, thinks, prefers)\n"
155
+ "- temporal: time-bound fact with dates or deadlines\n\n"
156
+ "Respond ONLY with a JSON array. Example:\n"
157
+ '[{"text":"Alice works at Google as a software engineer",'
158
+ '"fact_type":"semantic","entities":["Alice","Google"],'
159
+ '"referenced_date":null,"importance":7,"confidence":0.95},'
160
+ '{"text":"Alice prefers Python over Java",'
161
+ '"fact_type":"opinion","entities":["Alice"],'
162
+ '"referenced_date":null,"importance":5,"confidence":0.8}]'
163
+ )
164
+
165
+
166
+ # ---------------------------------------------------------------------------
167
+ # Helpers
168
+ # ---------------------------------------------------------------------------
169
+
170
+ def _new_id() -> str:
171
+ return uuid.uuid4().hex[:16]
172
+
173
+
174
+ def _split_sentences(text: str) -> list[str]:
175
+ """Split text into sentences using punctuation boundaries."""
176
+ parts = re.split(r"(?<=[.!?])\s+", text.strip())
177
+ return [p.strip() for p in parts if len(p.strip()) >= 8]
178
+
179
+
180
+ def _extract_date_string(text: str) -> str | None:
181
+ """Extract the first recognizable date string from text."""
182
+ match = _DATE_RE.search(text)
183
+ if not match:
184
+ return None
185
+ for group in match.groups():
186
+ if group:
187
+ return group.strip()
188
+ return None
189
+
190
+
191
+ def _try_parse_date(raw: str, reference_date: str | None = None) -> str | None:
192
+ """Attempt to resolve a date string to ISO format.
193
+
194
+ Uses dateutil.parser for structured dates and dateparser for
195
+ relative expressions ("last Monday", "next week").
196
+ Returns None on failure — never raises.
197
+ """
198
+ if not raw:
199
+ return None
200
+
201
+ # Fast path: already ISO
202
+ iso_match = re.match(r"^\d{4}-\d{2}-\d{2}$", raw.strip())
203
+ if iso_match:
204
+ return raw.strip()
205
+
206
+ # dateutil for structured dates (March 15, 2026 / 3/15/2026)
207
+ try:
208
+ from dateutil import parser as du_parser
209
+ result = du_parser.parse(raw, fuzzy=True)
210
+ return result.date().isoformat()
211
+ except Exception:
212
+ pass
213
+
214
+ # dateparser for relative dates (yesterday, last week, next Friday)
215
+ try:
216
+ import dateparser
217
+ settings: dict[str, Any] = {"PREFER_DATES_FROM": "past"}
218
+ if reference_date:
219
+ ref = dateparser.parse(reference_date)
220
+ if ref:
221
+ settings["RELATIVE_BASE"] = ref
222
+ result = dateparser.parse(raw, settings=settings)
223
+ if result:
224
+ return result.date().isoformat()
225
+ except Exception:
226
+ pass
227
+
228
+ return None
229
+
230
+
231
+ def _extract_interval(text: str, ref_date: str | None = None) -> tuple[str | None, str | None]:
232
+ """Extract temporal interval (start, end) from text."""
233
+ match = _INTERVAL_RE.search(text)
234
+ if not match:
235
+ return None, None
236
+ start_raw, end_raw = match.group(1).strip(), match.group(2).strip()
237
+ return _try_parse_date(start_raw, ref_date), _try_parse_date(end_raw, ref_date)
238
+
239
+
240
+ def _extract_entities(text: str) -> list[str]:
241
+ """Extract candidate entity names from text using regex heuristics."""
242
+ entities: set[str] = set()
243
+
244
+ # Capitalized word sequences (proper nouns)
245
+ for match in _ENTITY_RE.finditer(text):
246
+ candidate = match.group(1).strip()
247
+ # Filter common English words that start sentences
248
+ # Check first word of multi-word candidates against stop list
249
+ _first_word = candidate.split()[0].lower() if candidate else ""
250
+ if _first_word not in {
251
+ "the", "this", "that", "these", "those", "what", "when", "where",
252
+ "which", "how", "who", "why", "also", "then", "just", "very",
253
+ "really", "actually", "maybe", "well", "still", "even",
254
+ "she", "he", "they", "them", "her", "him", "his", "its",
255
+ "but", "and", "not", "yes", "yeah", "sure", "okay", "ok",
256
+ "here", "there", "now", "today", "some", "all", "any",
257
+ "been", "being", "have", "has", "had", "was", "were",
258
+ "for", "with", "from", "about", "into", "over",
259
+ # Sentence starters and conversational words
260
+ "wow", "did", "so", "gonna", "got", "by", "thanks", "thank",
261
+ "hey", "hi", "hello", "bye", "good", "great", "nice", "cool",
262
+ "right", "like", "know", "think", "feel", "want", "need",
263
+ "make", "take", "give", "tell", "said", "told", "get",
264
+ "let", "can", "will", "would", "could", "should", "might",
265
+ "much", "many", "more", "most", "lot", "way", "thing",
266
+ "something", "anything", "everything", "nothing", "someone",
267
+ "it", "my", "your", "our", "their", "me", "you", "we", "us",
268
+ "do", "does", "if", "or", "no", "to", "at", "on", "in",
269
+ "up", "out", "off", "too", "go", "come", "see", "look",
270
+ "say", "ask", "try", "keep", "put", "run", "set", "move",
271
+ "call", "end", "start", "find", "show", "hear", "play",
272
+ "work", "read", "talk", "turn", "help", "miss", "hope",
273
+ "love", "hate", "wish", "seem", "mean", "mind", "care",
274
+ }:
275
+ entities.add(candidate)
276
+
277
+ # Quoted strings
278
+ for match in _QUOTED_RE.finditer(text):
279
+ quoted = match.group(1).strip()
280
+ if len(quoted) >= 2:
281
+ entities.add(quoted)
282
+
283
+ return sorted(entities)
284
+
285
+
286
+ def _classify_sentence(sentence: str) -> FactType:
287
+ """Classify a sentence into a FactType using keyword markers."""
288
+ if _TEMPORAL_MARKERS.search(sentence):
289
+ return FactType.TEMPORAL
290
+ if _OPINION_MARKERS.search(sentence):
291
+ return FactType.OPINION
292
+ if _EXPERIENCE_MARKERS.search(sentence):
293
+ return FactType.EPISODIC
294
+ return FactType.SEMANTIC
295
+
296
+
297
+ def _score_importance(
298
+ text: str,
299
+ entities: list[str],
300
+ entity_frequency: dict[str, int],
301
+ has_date: bool,
302
+ ) -> float:
303
+ """Score importance 0.0-1.0 based on entity frequency, emotion, temporality.
304
+
305
+ Scoring formula:
306
+ base = 0.3
307
+ +0.2 if contains emotional keywords
308
+ +0.2 if temporally grounded (has a date reference)
309
+ +0.3 scaled by entity prominence (max entity frequency / total)
310
+ """
311
+ score = 0.3
312
+
313
+ # Emotional boost
314
+ words = set(text.lower().split())
315
+ if words & _EMOTIONAL_KEYWORDS:
316
+ score += 0.2
317
+
318
+ # Temporal boost
319
+ if has_date:
320
+ score += 0.2
321
+
322
+ # Entity prominence boost (frequent entities are important)
323
+ if entities and entity_frequency:
324
+ total = sum(entity_frequency.values()) or 1
325
+ max_freq = max((entity_frequency.get(e, 0) for e in entities), default=0)
326
+ score += 0.3 * (max_freq / total)
327
+
328
+ return min(1.0, round(score, 3))
329
+
330
+
331
+ def _signal_from_fact_type(ft: FactType) -> SignalType:
332
+ """Map FactType to SignalType for V2 compatibility."""
333
+ mapping = {
334
+ FactType.EPISODIC: SignalType.FACTUAL,
335
+ FactType.SEMANTIC: SignalType.FACTUAL,
336
+ FactType.OPINION: SignalType.OPINION,
337
+ FactType.TEMPORAL: SignalType.TEMPORAL,
338
+ }
339
+ return mapping.get(ft, SignalType.FACTUAL)
340
+
341
+
342
+ def _is_filler(text: str) -> bool:
343
+ """Return True if text is a greeting, filler, or social pleasantry."""
344
+ low = text.strip().lower()
345
+ return any(low.startswith(prefix) for prefix in _FILLER_PREFIXES)
346
+
347
+
348
+ # ---------------------------------------------------------------------------
349
+ # Chunk builder
350
+ # ---------------------------------------------------------------------------
351
+
352
+ def chunk_turns(
353
+ turns: list[str],
354
+ chunk_size: int = 10,
355
+ overlap: int = 2,
356
+ ) -> list[list[str]]:
357
+ """Group conversation turns into overlapping chunks.
358
+
359
+ Each chunk is up to ``chunk_size`` turns with ``overlap`` turns
360
+ carried over from the previous chunk to preserve cross-boundary context.
361
+ Trailing fragments smaller than ``overlap + 1`` are merged into the
362
+ final chunk to avoid low-context extraction passes.
363
+ """
364
+ if not turns:
365
+ return []
366
+ if len(turns) <= chunk_size:
367
+ return [list(turns)]
368
+
369
+ chunks: list[list[str]] = []
370
+ start = 0
371
+ step = max(1, chunk_size - overlap)
372
+
373
+ while start < len(turns):
374
+ end = min(start + chunk_size, len(turns))
375
+ remaining_after = len(turns) - end
376
+ # Merge tiny trailing fragment into current chunk
377
+ if 0 < remaining_after < overlap + 1:
378
+ end = len(turns)
379
+ chunks.append(list(turns[start:end]))
380
+ if end >= len(turns):
381
+ break
382
+ start += step
383
+
384
+ return chunks
385
+
386
+
387
+ # ---------------------------------------------------------------------------
388
+ # FactExtractor
389
+ # ---------------------------------------------------------------------------
390
+
391
+ class FactExtractor:
392
+ """Extract structured AtomicFacts from conversation turns.
393
+
394
+ Strategies:
395
+ Mode A — Rule-based: regex entities, keyword classification, heuristic importance.
396
+ Mode B — Local LLM (Ollama): structured JSON extraction, Mode A fallback.
397
+ Mode C — Cloud LLM: narrative fact extraction (2-5 per chunk), richest output.
398
+ """
399
+
400
+ def __init__(
401
+ self,
402
+ config: EncodingConfig,
403
+ llm: LLMBackboneProtocol | None = None,
404
+ embedder: EmbedderProtocol | None = None,
405
+ mode: Mode = Mode.A,
406
+ ) -> None:
407
+ self._config = config
408
+ self._llm = llm
409
+ self._embedder = embedder
410
+ self._mode = mode
411
+
412
+ # ------------------------------------------------------------------
413
+ # Public API
414
+ # ------------------------------------------------------------------
415
+
416
+ def extract_facts(
417
+ self,
418
+ turns: list[str],
419
+ session_id: str,
420
+ session_date: str | None = None,
421
+ speaker_a: str = "",
422
+ speaker_b: str = "",
423
+ ) -> list[AtomicFact]:
424
+ """Extract structured atomic facts from conversation turns.
425
+
426
+ Chunks the conversation into overlapping windows, extracts facts from
427
+ each chunk, and deduplicates the merged results.
428
+
429
+ Args:
430
+ turns: Raw conversation turn strings.
431
+ session_id: Identifier for the conversation session.
432
+ session_date: ISO-8601 date of the session (observation date).
433
+ speaker_a: Name/identifier for the first speaker (e.g. user).
434
+ speaker_b: Name/identifier for the second speaker (e.g. assistant).
435
+
436
+ Returns:
437
+ Deduplicated list of AtomicFact objects.
438
+ """
439
+ if not turns:
440
+ return []
441
+
442
+ chunks = chunk_turns(turns, self._config.chunk_size, overlap=2)
443
+ all_facts: list[AtomicFact] = []
444
+
445
+ for chunk in chunks:
446
+ chunk_facts = self._extract_chunk(
447
+ chunk, session_id, session_date, speaker_a, speaker_b,
448
+ )
449
+ all_facts.extend(chunk_facts)
450
+
451
+ return self._deduplicate(all_facts)
452
+
453
+ # ------------------------------------------------------------------
454
+ # Chunk-level dispatch
455
+ # ------------------------------------------------------------------
456
+
457
+ def _extract_chunk(
458
+ self,
459
+ turns: list[str],
460
+ session_id: str,
461
+ session_date: str | None,
462
+ speaker_a: str,
463
+ speaker_b: str,
464
+ ) -> list[AtomicFact]:
465
+ """Extract facts from a single chunk — dispatches by mode."""
466
+ use_llm = (
467
+ self._mode in (Mode.B, Mode.C)
468
+ and self._llm is not None
469
+ and self._llm.is_available()
470
+ )
471
+ if use_llm:
472
+ facts = self._extract_llm(
473
+ turns, session_id, session_date, speaker_a, speaker_b,
474
+ )
475
+ if facts:
476
+ return facts
477
+ # Fallback to local if LLM produced nothing
478
+ logger.info("LLM extraction returned no facts, falling back to local.")
479
+
480
+ return self._extract_local(
481
+ turns, session_id, session_date, speaker_a, speaker_b,
482
+ )
483
+
484
+ # ------------------------------------------------------------------
485
+ # Mode A: Rule-based extraction
486
+ # ------------------------------------------------------------------
487
+
488
+ def _extract_local(
489
+ self,
490
+ turns: list[str],
491
+ session_id: str,
492
+ session_date: str | None,
493
+ speaker_a: str,
494
+ speaker_b: str,
495
+ ) -> list[AtomicFact]:
496
+ """Rule-based extraction: regex entities, keyword classification, scoring."""
497
+ combined = "\n".join(turns)
498
+ sentences = _split_sentences(combined)
499
+ if not sentences:
500
+ # If no proper sentences, treat each turn as a sentence
501
+ sentences = [t.strip() for t in turns if len(t.strip()) >= 8]
502
+
503
+ # Build entity frequency map for importance scoring
504
+ entity_freq: dict[str, int] = {}
505
+ for sent in sentences:
506
+ for ent in _extract_entities(sent):
507
+ entity_freq[ent] = entity_freq.get(ent, 0) + 1
508
+
509
+ facts: list[AtomicFact] = []
510
+ seen_texts: set[str] = set()
511
+
512
+ for sent in sentences:
513
+ if _is_filler(sent):
514
+ continue
515
+ normalized = sent.strip()
516
+ if normalized in seen_texts or len(normalized) < 10:
517
+ continue
518
+ seen_texts.add(normalized)
519
+
520
+ # Resolve [Speaker]: prefix to "Speaker" in content
521
+ # "[Caroline]: I went to..." → "Caroline: I went to..."
522
+ import re as _re
523
+ _spk_match = _re.match(r"^\[([A-Za-z ]+)\]:\s*", normalized)
524
+ if _spk_match:
525
+ speaker_name = _spk_match.group(1)
526
+ normalized = f"{speaker_name}: {normalized[_spk_match.end():]}"
527
+
528
+ entities = _extract_entities(normalized)
529
+ fact_type = _classify_sentence(normalized)
530
+
531
+ # Three-date model: extract and resolve relative dates
532
+ raw_date = _extract_date_string(normalized)
533
+ referenced_date = _try_parse_date(raw_date, session_date) if raw_date else None
534
+ interval_start, interval_end = _extract_interval(normalized, session_date)
535
+
536
+ # Resolve relative dates in content for better retrieval
537
+ # "I went yesterday" + session_date=2023-05-08 → "I went on 2023-05-07"
538
+ if raw_date and referenced_date and raw_date.lower() in (
539
+ "yesterday", "today", "last week", "last month", "last year",
540
+ "this morning", "this afternoon", "this evening",
541
+ "the other day", "recently", "the day before",
542
+ ):
543
+ date_str = referenced_date[:10] # YYYY-MM-DD
544
+ normalized = normalized.replace(raw_date, f"on {date_str}")
545
+
546
+ has_date = referenced_date is not None or interval_start is not None
547
+ importance = _score_importance(normalized, entities, entity_freq, has_date)
548
+
549
+ if importance < self._config.min_fact_confidence:
550
+ continue
551
+
552
+ # Determine speaker from turn position heuristic
553
+ speaker = self._infer_speaker(normalized, turns, speaker_a, speaker_b)
554
+
555
+ facts.append(AtomicFact(
556
+ fact_id=_new_id(),
557
+ content=normalized,
558
+ fact_type=fact_type,
559
+ entities=entities,
560
+ observation_date=session_date,
561
+ referenced_date=referenced_date,
562
+ interval_start=interval_start,
563
+ interval_end=interval_end,
564
+ confidence=0.7 if fact_type == FactType.SEMANTIC else 0.6,
565
+ importance=importance,
566
+ session_id=session_id,
567
+ signal_type=_signal_from_fact_type(fact_type),
568
+ ))
569
+
570
+ # Cap at max_facts_per_chunk, keeping highest importance
571
+ facts.sort(key=lambda f: f.importance, reverse=True)
572
+ return facts[: self._config.max_facts_per_chunk]
573
+
574
+ # ------------------------------------------------------------------
575
+ # Mode B/C: LLM-based extraction
576
+ # ------------------------------------------------------------------
577
+
578
+ def _extract_llm(
579
+ self,
580
+ turns: list[str],
581
+ session_id: str,
582
+ session_date: str | None,
583
+ speaker_a: str,
584
+ speaker_b: str,
585
+ ) -> list[AtomicFact]:
586
+ """LLM-guided extraction: structured JSON prompt, parsed into AtomicFacts."""
587
+ conversation_text = "\n".join(turns)
588
+ speakers = []
589
+ if speaker_a:
590
+ speakers.append(f"Speaker A: {speaker_a}")
591
+ if speaker_b:
592
+ speakers.append(f"Speaker B: {speaker_b}")
593
+ speaker_info = ", ".join(speakers) if speakers else "unknown"
594
+
595
+ prompt = (
596
+ f"Extract atomic facts from the following conversation.\n"
597
+ f"Speakers: {speaker_info}\n"
598
+ f"Conversation date: {session_date or 'unknown'}\n\n"
599
+ f"--- CONVERSATION ---\n{conversation_text}\n--- END ---\n\n"
600
+ f"Rules:\n"
601
+ f"- Extract 2-5 comprehensive, self-contained facts.\n"
602
+ f"- Use explicit names (never pronouns).\n"
603
+ f"- Each fact must make sense WITHOUT the original conversation.\n"
604
+ f"- For dates mentioned (\"yesterday\", \"next week\"), resolve to "
605
+ f"ISO format relative to {session_date or 'today'}.\n"
606
+ f"- Skip greetings, filler, and confirmations.\n"
607
+ f"- importance: 1 (trivial) to 10 (critical)\n"
608
+ f"- confidence: 0.0 (uncertain) to 1.0 (definite)\n\n"
609
+ f"Respond with ONLY a JSON array."
610
+ )
611
+
612
+ try:
613
+ raw = self._llm.generate( # type: ignore[union-attr]
614
+ prompt=prompt,
615
+ system=_SYSTEM_PROMPT,
616
+ temperature=0.0,
617
+ max_tokens=1024,
618
+ )
619
+ return self._parse_llm_response(raw, session_id, session_date)
620
+ except Exception as exc:
621
+ logger.warning("LLM fact extraction failed: %s", exc)
622
+ return []
623
+
624
+ def _parse_llm_response(
625
+ self,
626
+ raw: str,
627
+ session_id: str,
628
+ session_date: str | None,
629
+ ) -> list[AtomicFact]:
630
+ """Parse JSON array from LLM response into AtomicFact list."""
631
+ if not raw or not raw.strip():
632
+ return []
633
+
634
+ # Extract JSON array from potentially wrapped response
635
+ try:
636
+ match = re.search(r"\[.*\]", raw, re.DOTALL)
637
+ if not match:
638
+ logger.warning("No JSON array found in LLM response.")
639
+ return []
640
+ items = json.loads(match.group())
641
+ if not isinstance(items, list):
642
+ return []
643
+ except (json.JSONDecodeError, ValueError) as exc:
644
+ logger.warning("JSON parse error in LLM fact response: %s", exc)
645
+ return []
646
+
647
+ facts: list[AtomicFact] = []
648
+ for item in items[:10]: # Hard cap at 10 per chunk
649
+ if not isinstance(item, dict):
650
+ continue
651
+ fact = self._item_to_fact(item, session_id, session_date)
652
+ if fact is not None:
653
+ facts.append(fact)
654
+
655
+ return facts
656
+
657
+ def _item_to_fact(
658
+ self,
659
+ item: dict[str, Any],
660
+ session_id: str,
661
+ session_date: str | None,
662
+ ) -> AtomicFact | None:
663
+ """Convert a single LLM JSON item to an AtomicFact.
664
+
665
+ Returns None if the item is malformed or is filler.
666
+ """
667
+ text = str(item.get("text", "")).strip()
668
+ if not text or len(text) < 8 or _is_filler(text):
669
+ return None
670
+
671
+ # Fact type
672
+ raw_type = str(item.get("fact_type", item.get("type", "semantic"))).lower()
673
+ type_map = {
674
+ "episodic": FactType.EPISODIC,
675
+ "experience": FactType.EPISODIC,
676
+ "semantic": FactType.SEMANTIC,
677
+ "world": FactType.SEMANTIC,
678
+ "opinion": FactType.OPINION,
679
+ "temporal": FactType.TEMPORAL,
680
+ }
681
+ fact_type = type_map.get(raw_type, FactType.SEMANTIC)
682
+
683
+ # Entities
684
+ raw_entities = item.get("entities", [])
685
+ if isinstance(raw_entities, list):
686
+ entities = [str(e).strip() for e in raw_entities if str(e).strip()]
687
+ elif isinstance(raw_entities, str):
688
+ entities = [raw_entities.strip()] if raw_entities.strip() else []
689
+ else:
690
+ entities = _extract_entities(text)
691
+
692
+ # Referenced date — from LLM or inferred
693
+ ref_date_raw = item.get("referenced_date") or item.get("date")
694
+ referenced_date: str | None = None
695
+ if ref_date_raw and str(ref_date_raw).strip().lower() != "null":
696
+ referenced_date = _try_parse_date(str(ref_date_raw), session_date)
697
+
698
+ # Interval
699
+ interval_start = item.get("interval_start")
700
+ interval_end = item.get("interval_end")
701
+ if interval_start:
702
+ interval_start = _try_parse_date(str(interval_start), session_date)
703
+ if interval_end:
704
+ interval_end = _try_parse_date(str(interval_end), session_date)
705
+
706
+ # Importance (LLM returns 1-10, we normalize to 0.0-1.0)
707
+ raw_importance = item.get("importance", 5)
708
+ try:
709
+ importance = min(1.0, max(0.0, float(raw_importance) / 10.0))
710
+ except (TypeError, ValueError):
711
+ importance = 0.5
712
+
713
+ # Confidence
714
+ raw_conf = item.get("confidence", 0.8)
715
+ try:
716
+ confidence = min(1.0, max(0.0, float(raw_conf)))
717
+ except (TypeError, ValueError):
718
+ confidence = 0.8
719
+
720
+ return AtomicFact(
721
+ fact_id=_new_id(),
722
+ content=text,
723
+ fact_type=fact_type,
724
+ entities=entities,
725
+ observation_date=session_date,
726
+ referenced_date=referenced_date,
727
+ interval_start=interval_start,
728
+ interval_end=interval_end,
729
+ confidence=confidence,
730
+ importance=importance,
731
+ session_id=session_id,
732
+ signal_type=_signal_from_fact_type(fact_type),
733
+ )
734
+
735
+ # ------------------------------------------------------------------
736
+ # Speaker inference (Mode A heuristic)
737
+ # ------------------------------------------------------------------
738
+
739
+ @staticmethod
740
+ def _infer_speaker(
741
+ sentence: str,
742
+ turns: list[str],
743
+ speaker_a: str,
744
+ speaker_b: str,
745
+ ) -> str:
746
+ """Infer which speaker said a sentence based on turn position.
747
+
748
+ Checks which turn contains the sentence and uses even/odd indexing
749
+ (even = speaker_a, odd = speaker_b by convention).
750
+ """
751
+ if not speaker_a and not speaker_b:
752
+ return ""
753
+ for i, turn in enumerate(turns):
754
+ if sentence in turn:
755
+ return speaker_a if i % 2 == 0 else speaker_b
756
+ return speaker_a or speaker_b
757
+
758
+ # ------------------------------------------------------------------
759
+ # Deduplication
760
+ # ------------------------------------------------------------------
761
+
762
+ @staticmethod
763
+ def _deduplicate(facts: list[AtomicFact]) -> list[AtomicFact]:
764
+ """Remove near-duplicate facts by content normalization.
765
+
766
+ Uses lowercased, whitespace-collapsed content as dedup key.
767
+ When duplicates exist, keeps the one with higher importance.
768
+ """
769
+ seen: dict[str, AtomicFact] = {}
770
+ for fact in facts:
771
+ key = re.sub(r"\s+", " ", fact.content.lower().strip())
772
+ existing = seen.get(key)
773
+ if existing is None or fact.importance > existing.importance:
774
+ seen[key] = fact
775
+ return list(seen.values())