rubino-agent 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (376) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +115 -0
  4. data/.rubocop_todo.yml +955 -0
  5. data/.ruby-version +1 -0
  6. data/AGENTS.md +97 -0
  7. data/CHANGELOG.md +344 -0
  8. data/CONTRIBUTING.md +69 -0
  9. data/LICENSE +21 -0
  10. data/README.md +200 -0
  11. data/Rakefile +8 -0
  12. data/docs/agents.md +190 -0
  13. data/docs/api/v1.md +414 -0
  14. data/docs/architecture.md +177 -0
  15. data/docs/commands.md +375 -0
  16. data/docs/configuration.md +590 -0
  17. data/docs/getting-started.md +143 -0
  18. data/docs/jobs.md +332 -0
  19. data/docs/mcp.md +128 -0
  20. data/docs/memory.md +98 -0
  21. data/docs/models-and-keys.md +173 -0
  22. data/docs/oauth-providers.md +145 -0
  23. data/docs/plugins.md +195 -0
  24. data/docs/security.md +145 -0
  25. data/docs/skills.md +322 -0
  26. data/docs/tools.md +395 -0
  27. data/docs/troubleshooting.md +73 -0
  28. data/exe/rubino +9 -0
  29. data/install.sh +275 -0
  30. data/lib/rubino/active_skill.rb +50 -0
  31. data/lib/rubino/agent/agent_registry.rb +120 -0
  32. data/lib/rubino/agent/backoff_policy.rb +116 -0
  33. data/lib/rubino/agent/definition.rb +128 -0
  34. data/lib/rubino/agent/degenerate_recovery.rb +271 -0
  35. data/lib/rubino/agent/fallback_chain.rb +194 -0
  36. data/lib/rubino/agent/iteration_budget.rb +50 -0
  37. data/lib/rubino/agent/loop.rb +617 -0
  38. data/lib/rubino/agent/model_call_runner.rb +383 -0
  39. data/lib/rubino/agent/prompts/build.txt +69 -0
  40. data/lib/rubino/agent/prompts/compaction.txt +20 -0
  41. data/lib/rubino/agent/prompts/explore.txt +19 -0
  42. data/lib/rubino/agent/prompts/general.txt +20 -0
  43. data/lib/rubino/agent/prompts/plan.txt +31 -0
  44. data/lib/rubino/agent/response_validator.rb +70 -0
  45. data/lib/rubino/agent/router.rb +65 -0
  46. data/lib/rubino/agent/runner.rb +195 -0
  47. data/lib/rubino/agent/tool_executor.rb +402 -0
  48. data/lib/rubino/agent/truncation_continuation.rb +137 -0
  49. data/lib/rubino/api/middleware/auth.rb +43 -0
  50. data/lib/rubino/api/middleware/error_handler.rb +65 -0
  51. data/lib/rubino/api/middleware/json_parser.rb +100 -0
  52. data/lib/rubino/api/middleware/observability.rb +59 -0
  53. data/lib/rubino/api/middleware/rate_limit.rb +136 -0
  54. data/lib/rubino/api/operations/approvals/decide_operation.rb +49 -0
  55. data/lib/rubino/api/operations/clarifications/decide_operation.rb +44 -0
  56. data/lib/rubino/api/operations/cron_jobs/create_operation.rb +46 -0
  57. data/lib/rubino/api/operations/cron_jobs/delete_operation.rb +36 -0
  58. data/lib/rubino/api/operations/cron_jobs/list_operation.rb +55 -0
  59. data/lib/rubino/api/operations/cron_jobs/pause_operation.rb +34 -0
  60. data/lib/rubino/api/operations/cron_jobs/resume_operation.rb +34 -0
  61. data/lib/rubino/api/operations/cron_jobs/schedule_validation.rb +30 -0
  62. data/lib/rubino/api/operations/cron_jobs/show_operation.rb +32 -0
  63. data/lib/rubino/api/operations/cron_jobs/trigger_operation.rb +38 -0
  64. data/lib/rubino/api/operations/cron_jobs/update_operation.rb +42 -0
  65. data/lib/rubino/api/operations/files/read_operation.rb +40 -0
  66. data/lib/rubino/api/operations/files/upload_operation.rb +175 -0
  67. data/lib/rubino/api/operations/health_operation.rb +46 -0
  68. data/lib/rubino/api/operations/memory/delete_operation.rb +32 -0
  69. data/lib/rubino/api/operations/memory/index_operation.rb +80 -0
  70. data/lib/rubino/api/operations/memory/stats_operation.rb +28 -0
  71. data/lib/rubino/api/operations/metrics_operation.rb +18 -0
  72. data/lib/rubino/api/operations/mode/show_operation.rb +29 -0
  73. data/lib/rubino/api/operations/mode/update_operation.rb +42 -0
  74. data/lib/rubino/api/operations/models/list_operation.rb +45 -0
  75. data/lib/rubino/api/operations/oauth/connections/disconnect_operation.rb +77 -0
  76. data/lib/rubino/api/operations/oauth/connections/list_operation.rb +36 -0
  77. data/lib/rubino/api/operations/oauth/providers/callback_operation.rb +82 -0
  78. data/lib/rubino/api/operations/oauth/providers/connect_operation.rb +44 -0
  79. data/lib/rubino/api/operations/oauth/providers/list_operation.rb +35 -0
  80. data/lib/rubino/api/operations/oauth/serializer.rb +21 -0
  81. data/lib/rubino/api/operations/runs/create_operation.rb +77 -0
  82. data/lib/rubino/api/operations/runs/events_operation.rb +195 -0
  83. data/lib/rubino/api/operations/runs/stop_operation.rb +34 -0
  84. data/lib/rubino/api/operations/sessions/create_operation.rb +46 -0
  85. data/lib/rubino/api/operations/sessions/delete_operation.rb +33 -0
  86. data/lib/rubino/api/operations/sessions/index_operation.rb +82 -0
  87. data/lib/rubino/api/operations/sessions/retry_operation.rb +45 -0
  88. data/lib/rubino/api/operations/sessions/show_operation.rb +59 -0
  89. data/lib/rubino/api/operations/sessions/undo_operation.rb +38 -0
  90. data/lib/rubino/api/operations/skills/list_operation.rb +34 -0
  91. data/lib/rubino/api/operations/skills/toggle_operation.rb +40 -0
  92. data/lib/rubino/api/operations/tasks/index_operation.rb +30 -0
  93. data/lib/rubino/api/operations/tasks/serializer.rb +60 -0
  94. data/lib/rubino/api/operations/tasks/show_operation.rb +33 -0
  95. data/lib/rubino/api/operations/tasks/stop_operation.rb +47 -0
  96. data/lib/rubino/api/request.rb +54 -0
  97. data/lib/rubino/api/responses.rb +64 -0
  98. data/lib/rubino/api/router.rb +72 -0
  99. data/lib/rubino/api/schemas.rb +103 -0
  100. data/lib/rubino/api/server.rb +102 -0
  101. data/lib/rubino/api/tls.rb +108 -0
  102. data/lib/rubino/attachments/classification.rb +16 -0
  103. data/lib/rubino/attachments/classify.rb +171 -0
  104. data/lib/rubino/attachments/defang.rb +47 -0
  105. data/lib/rubino/attachments/policy.rb +36 -0
  106. data/lib/rubino/attachments/preamble.rb +120 -0
  107. data/lib/rubino/boot/encryption_key.rb +32 -0
  108. data/lib/rubino/cli/chat/bang_shell.rb +257 -0
  109. data/lib/rubino/cli/chat/completion_builder.rb +290 -0
  110. data/lib/rubino/cli/chat/idle_card_host.rb +69 -0
  111. data/lib/rubino/cli/chat/image_inbox.rb +168 -0
  112. data/lib/rubino/cli/chat/session_resolver.rb +176 -0
  113. data/lib/rubino/cli/chat_command.rb +1674 -0
  114. data/lib/rubino/cli/commands.rb +250 -0
  115. data/lib/rubino/cli/config_command.rb +96 -0
  116. data/lib/rubino/cli/doctor_command.rb +251 -0
  117. data/lib/rubino/cli/jobs_command.rb +60 -0
  118. data/lib/rubino/cli/memory_command.rb +135 -0
  119. data/lib/rubino/cli/onboarding_wizard.rb +207 -0
  120. data/lib/rubino/cli/server_command.rb +139 -0
  121. data/lib/rubino/cli/session_command.rb +125 -0
  122. data/lib/rubino/cli/setup_command.rb +107 -0
  123. data/lib/rubino/cli/skills_command.rb +85 -0
  124. data/lib/rubino/cli/tools_command.rb +81 -0
  125. data/lib/rubino/cli/trust_gate.rb +71 -0
  126. data/lib/rubino/commands/built_ins.rb +46 -0
  127. data/lib/rubino/commands/command.rb +116 -0
  128. data/lib/rubino/commands/executor.rb +550 -0
  129. data/lib/rubino/commands/handlers/agents.rb +510 -0
  130. data/lib/rubino/commands/handlers/config.rb +88 -0
  131. data/lib/rubino/commands/handlers/help.rb +148 -0
  132. data/lib/rubino/commands/handlers/jobs.rb +71 -0
  133. data/lib/rubino/commands/handlers/mcp.rb +229 -0
  134. data/lib/rubino/commands/handlers/memory.rb +200 -0
  135. data/lib/rubino/commands/handlers/sessions.rb +207 -0
  136. data/lib/rubino/commands/handlers/skills.rb +195 -0
  137. data/lib/rubino/commands/handlers/status.rb +211 -0
  138. data/lib/rubino/commands/loader.rb +90 -0
  139. data/lib/rubino/config/configuration.rb +455 -0
  140. data/lib/rubino/config/defaults.rb +569 -0
  141. data/lib/rubino/config/loader.rb +115 -0
  142. data/lib/rubino/config/reasoning_prefs.rb +67 -0
  143. data/lib/rubino/config/writer.rb +72 -0
  144. data/lib/rubino/context/compressor.rb +149 -0
  145. data/lib/rubino/context/environment_inspector.rb +176 -0
  146. data/lib/rubino/context/file_discovery.rb +45 -0
  147. data/lib/rubino/context/message_boundary.rb +39 -0
  148. data/lib/rubino/context/prompt_assembler.rb +382 -0
  149. data/lib/rubino/context/summary_builder.rb +159 -0
  150. data/lib/rubino/context/token_budget.rb +68 -0
  151. data/lib/rubino/context/tool_pair_sanitizer.rb +70 -0
  152. data/lib/rubino/database/connection.rb +77 -0
  153. data/lib/rubino/database/migrations/001_create_initial_schema.rb +156 -0
  154. data/lib/rubino/database/migrations/002_create_runs.rb +45 -0
  155. data/lib/rubino/database/migrations/003_create_skill_states.rb +15 -0
  156. data/lib/rubino/database/migrations/004_create_cron_jobs.rb +36 -0
  157. data/lib/rubino/database/migrations/005_create_oauth_connections.rb +27 -0
  158. data/lib/rubino/database/migrations/006_create_webhook_deliveries.rb +34 -0
  159. data/lib/rubino/database/migrations/007_create_messages_fts.rb +59 -0
  160. data/lib/rubino/database/migrations/008_create_memory_facts.rb +75 -0
  161. data/lib/rubino/database/migrations/009_create_memory_graph.rb +55 -0
  162. data/lib/rubino/database/migrations/010_add_owner_pid_to_sessions.rb +20 -0
  163. data/lib/rubino/database/migrator.rb +48 -0
  164. data/lib/rubino/documents/converters/csv.rb +79 -0
  165. data/lib/rubino/documents/converters/docx.rb +129 -0
  166. data/lib/rubino/documents/converters/html.rb +28 -0
  167. data/lib/rubino/documents/converters/json.rb +35 -0
  168. data/lib/rubino/documents/converters/pdf.rb +59 -0
  169. data/lib/rubino/documents/converters/plain.rb +68 -0
  170. data/lib/rubino/documents/converters/pptx.rb +64 -0
  171. data/lib/rubino/documents/converters/xlsx.rb +62 -0
  172. data/lib/rubino/documents/converters/xml.rb +45 -0
  173. data/lib/rubino/documents/html.rb +71 -0
  174. data/lib/rubino/documents/registry.rb +68 -0
  175. data/lib/rubino/documents/table.rb +63 -0
  176. data/lib/rubino/documents.rb +50 -0
  177. data/lib/rubino/errors.rb +119 -0
  178. data/lib/rubino/files/workspace.rb +93 -0
  179. data/lib/rubino/interaction/cancel_token.rb +43 -0
  180. data/lib/rubino/interaction/clipboard_image.rb +84 -0
  181. data/lib/rubino/interaction/event_bus.rb +48 -0
  182. data/lib/rubino/interaction/events.rb +101 -0
  183. data/lib/rubino/interaction/image_input.rb +127 -0
  184. data/lib/rubino/interaction/input_queue.rb +117 -0
  185. data/lib/rubino/interaction/lifecycle.rb +299 -0
  186. data/lib/rubino/interaction/probe.rb +65 -0
  187. data/lib/rubino/interaction/state.rb +56 -0
  188. data/lib/rubino/jobs/cron_job_repository.rb +75 -0
  189. data/lib/rubino/jobs/handlers/cleanup_sessions_job.rb +32 -0
  190. data/lib/rubino/jobs/handlers/compact_session_job.rb +21 -0
  191. data/lib/rubino/jobs/handlers/distill_skill_job.rb +186 -0
  192. data/lib/rubino/jobs/handlers/extract_memory_job.rb +37 -0
  193. data/lib/rubino/jobs/handlers/summarize_session_job.rb +21 -0
  194. data/lib/rubino/jobs/queue.rb +184 -0
  195. data/lib/rubino/jobs/registry.rb +45 -0
  196. data/lib/rubino/jobs/runner.rb +79 -0
  197. data/lib/rubino/jobs/scheduler.rb +138 -0
  198. data/lib/rubino/jobs/webhook_delivery.rb +225 -0
  199. data/lib/rubino/jobs/worker.rb +59 -0
  200. data/lib/rubino/llm/adapter_factory.rb +47 -0
  201. data/lib/rubino/llm/adapter_response.rb +65 -0
  202. data/lib/rubino/llm/auxiliary_client.rb +61 -0
  203. data/lib/rubino/llm/bedrock_bearer_client.rb +235 -0
  204. data/lib/rubino/llm/content_builder.rb +55 -0
  205. data/lib/rubino/llm/credential_check.rb +93 -0
  206. data/lib/rubino/llm/error_classifier.rb +364 -0
  207. data/lib/rubino/llm/fake_provider.rb +292 -0
  208. data/lib/rubino/llm/inline_think_filter.rb +58 -0
  209. data/lib/rubino/llm/model_catalog.rb +29 -0
  210. data/lib/rubino/llm/provider_resolver.rb +48 -0
  211. data/lib/rubino/llm/reasoning_manager.rb +100 -0
  212. data/lib/rubino/llm/request.rb +56 -0
  213. data/lib/rubino/llm/ruby_llm_adapter.rb +794 -0
  214. data/lib/rubino/llm/scenario_loader.rb +68 -0
  215. data/lib/rubino/llm/scenario_selector.rb +80 -0
  216. data/lib/rubino/llm/scenarios/agent-creates-cron-failure.yml +29 -0
  217. data/lib/rubino/llm/scenarios/agent-creates-cron.yml +36 -0
  218. data/lib/rubino/llm/scenarios/analysis.yml +501 -0
  219. data/lib/rubino/llm/scenarios/complex-analysis.yml +598 -0
  220. data/lib/rubino/llm/scenarios/failure.yml +65 -0
  221. data/lib/rubino/llm/scenarios/happy-path.yml +24 -0
  222. data/lib/rubino/llm/scenarios/provider-quota-completed.yml +14 -0
  223. data/lib/rubino/llm/scenarios/wide-table.yml +121 -0
  224. data/lib/rubino/llm/scenarios/with-approvals.yml +50 -0
  225. data/lib/rubino/llm/scenarios/with-artifacts.yml +98 -0
  226. data/lib/rubino/llm/scenarios/with-clarify.yml +32 -0
  227. data/lib/rubino/llm/scenarios/with-reasoning.yml +175 -0
  228. data/lib/rubino/llm/scenarios/with-uploads.yml +104 -0
  229. data/lib/rubino/llm/thinking_support.rb +84 -0
  230. data/lib/rubino/llm/tool_bridge.rb +89 -0
  231. data/lib/rubino/logger.rb +99 -0
  232. data/lib/rubino/mcp/manager.rb +180 -0
  233. data/lib/rubino/mcp/mcp_tool_wrapper.rb +69 -0
  234. data/lib/rubino/mcp.rb +57 -0
  235. data/lib/rubino/memory/backend.rb +104 -0
  236. data/lib/rubino/memory/backends/default.rb +101 -0
  237. data/lib/rubino/memory/backends/sqlite.rb +653 -0
  238. data/lib/rubino/memory/backends.rb +53 -0
  239. data/lib/rubino/memory/deduplicator.rb +74 -0
  240. data/lib/rubino/memory/extractor.rb +85 -0
  241. data/lib/rubino/memory/flusher.rb +31 -0
  242. data/lib/rubino/memory/retriever.rb +50 -0
  243. data/lib/rubino/memory/sqlite_extraction_prompt.rb +70 -0
  244. data/lib/rubino/memory/sqlite_graph.rb +154 -0
  245. data/lib/rubino/memory/store.rb +228 -0
  246. data/lib/rubino/memory/threat_scanner.rb +68 -0
  247. data/lib/rubino/metrics.rb +175 -0
  248. data/lib/rubino/modes.rb +93 -0
  249. data/lib/rubino/oauth/connection_repository.rb +95 -0
  250. data/lib/rubino/oauth/provider/github.rb +75 -0
  251. data/lib/rubino/oauth/provider/google.rb +59 -0
  252. data/lib/rubino/oauth/provider.rb +149 -0
  253. data/lib/rubino/oauth/registry.rb +86 -0
  254. data/lib/rubino/oauth/token_encryptor.rb +87 -0
  255. data/lib/rubino/plugins/registry.rb +75 -0
  256. data/lib/rubino/plugins.rb +86 -0
  257. data/lib/rubino/run/approval_gate.rb +243 -0
  258. data/lib/rubino/run/attachment_downloader.rb +166 -0
  259. data/lib/rubino/run/event_store.rb +74 -0
  260. data/lib/rubino/run/executor.rb +383 -0
  261. data/lib/rubino/run/gate_registry.rb +39 -0
  262. data/lib/rubino/run/recorder.rb +69 -0
  263. data/lib/rubino/run/repository.rb +118 -0
  264. data/lib/rubino/run/session_approval_cache.rb +118 -0
  265. data/lib/rubino/security/allowlist_persister.rb +55 -0
  266. data/lib/rubino/security/approval_policy.rb +227 -0
  267. data/lib/rubino/security/command_allowlist.rb +24 -0
  268. data/lib/rubino/security/dangerous_patterns.rb +118 -0
  269. data/lib/rubino/security/deny_persister.rb +73 -0
  270. data/lib/rubino/security/doom_loop_detector.rb +43 -0
  271. data/lib/rubino/security/hardline_guard.rb +105 -0
  272. data/lib/rubino/security/pattern_matcher.rb +62 -0
  273. data/lib/rubino/security/prefix_deriver.rb +124 -0
  274. data/lib/rubino/security/readonly_commands.rb +211 -0
  275. data/lib/rubino/session/exporter.rb +101 -0
  276. data/lib/rubino/session/message.rb +77 -0
  277. data/lib/rubino/session/repository.rb +295 -0
  278. data/lib/rubino/session/store.rb +198 -0
  279. data/lib/rubino/session/summary_store.rb +65 -0
  280. data/lib/rubino/skills/prompt_index.rb +85 -0
  281. data/lib/rubino/skills/registry.rb +208 -0
  282. data/lib/rubino/skills/skill.rb +176 -0
  283. data/lib/rubino/skills/skill_tool.rb +215 -0
  284. data/lib/rubino/skills/state_repository.rb +37 -0
  285. data/lib/rubino/skills/toggle.rb +26 -0
  286. data/lib/rubino/tools/answer_child_tool.rb +83 -0
  287. data/lib/rubino/tools/ask_parent_tool.rb +232 -0
  288. data/lib/rubino/tools/attach_file_tool.rb +120 -0
  289. data/lib/rubino/tools/background_tasks.rb +520 -0
  290. data/lib/rubino/tools/base.rb +222 -0
  291. data/lib/rubino/tools/custom_tool_loader.rb +119 -0
  292. data/lib/rubino/tools/edit_tool.rb +122 -0
  293. data/lib/rubino/tools/git_tool.rb +71 -0
  294. data/lib/rubino/tools/github_tool.rb +233 -0
  295. data/lib/rubino/tools/glob_tool.rb +69 -0
  296. data/lib/rubino/tools/grep_tool.rb +206 -0
  297. data/lib/rubino/tools/memory_tool.rb +184 -0
  298. data/lib/rubino/tools/multi_edit_tool.rb +110 -0
  299. data/lib/rubino/tools/patch_tool.rb +260 -0
  300. data/lib/rubino/tools/probe_tool.rb +175 -0
  301. data/lib/rubino/tools/question_tool.rb +128 -0
  302. data/lib/rubino/tools/read_attachment_tool.rb +180 -0
  303. data/lib/rubino/tools/read_tool.rb +212 -0
  304. data/lib/rubino/tools/read_tracker.rb +98 -0
  305. data/lib/rubino/tools/registry.rb +166 -0
  306. data/lib/rubino/tools/result.rb +113 -0
  307. data/lib/rubino/tools/ruby_tool.rb +0 -0
  308. data/lib/rubino/tools/session_search_tool.rb +103 -0
  309. data/lib/rubino/tools/shell_input_tool.rb +96 -0
  310. data/lib/rubino/tools/shell_kill_tool.rb +76 -0
  311. data/lib/rubino/tools/shell_output_tool.rb +72 -0
  312. data/lib/rubino/tools/shell_registry.rb +158 -0
  313. data/lib/rubino/tools/shell_tail_tool.rb +118 -0
  314. data/lib/rubino/tools/shell_tool.rb +330 -0
  315. data/lib/rubino/tools/steer_tool.rb +118 -0
  316. data/lib/rubino/tools/subagent_probe.rb +89 -0
  317. data/lib/rubino/tools/summarize_file_tool.rb +182 -0
  318. data/lib/rubino/tools/task_result_tool.rb +90 -0
  319. data/lib/rubino/tools/task_stop_tool.rb +80 -0
  320. data/lib/rubino/tools/task_tool.rb +622 -0
  321. data/lib/rubino/tools/test_tool.rb +454 -0
  322. data/lib/rubino/tools/todo_tool.rb +93 -0
  323. data/lib/rubino/tools/tool_call_repository.rb +33 -0
  324. data/lib/rubino/tools/vision_tool.rb +85 -0
  325. data/lib/rubino/tools/webfetch_tool.rb +153 -0
  326. data/lib/rubino/tools/websearch_tool.rb +179 -0
  327. data/lib/rubino/tools/write_tool.rb +61 -0
  328. data/lib/rubino/trust.rb +88 -0
  329. data/lib/rubino/ui/api.rb +296 -0
  330. data/lib/rubino/ui/base.rb +252 -0
  331. data/lib/rubino/ui/bottom_composer.rb +1599 -0
  332. data/lib/rubino/ui/cli.rb +1987 -0
  333. data/lib/rubino/ui/completion_menu.rb +321 -0
  334. data/lib/rubino/ui/completion_source.rb +284 -0
  335. data/lib/rubino/ui/escape_reader.rb +169 -0
  336. data/lib/rubino/ui/indented_io.rb +88 -0
  337. data/lib/rubino/ui/input_history.rb +108 -0
  338. data/lib/rubino/ui/live_region.rb +183 -0
  339. data/lib/rubino/ui/markdown_renderer.rb +506 -0
  340. data/lib/rubino/ui/notifier.rb +163 -0
  341. data/lib/rubino/ui/null.rb +195 -0
  342. data/lib/rubino/ui/paste_store.rb +176 -0
  343. data/lib/rubino/ui/printer_base.rb +79 -0
  344. data/lib/rubino/ui/probe_wait_indicator.rb +75 -0
  345. data/lib/rubino/ui/queued_indicators.rb +66 -0
  346. data/lib/rubino/ui/status_bar.rb +100 -0
  347. data/lib/rubino/ui/stdout_proxy.rb +161 -0
  348. data/lib/rubino/ui/streaming_markdown.rb +186 -0
  349. data/lib/rubino/ui/subagent_cards.rb +134 -0
  350. data/lib/rubino/ui/subagent_view.rb +255 -0
  351. data/lib/rubino/ui.rb +21 -0
  352. data/lib/rubino/update_check.rb +187 -0
  353. data/lib/rubino/util/duration.rb +23 -0
  354. data/lib/rubino/util/hyperlink.rb +105 -0
  355. data/lib/rubino/util/output.rb +145 -0
  356. data/lib/rubino/util/secrets_mask.rb +83 -0
  357. data/lib/rubino/version.rb +5 -0
  358. data/lib/rubino/workspace.rb +85 -0
  359. data/lib/rubino-agent.rb +5 -0
  360. data/lib/rubino.rb +318 -0
  361. data/mise.toml +2 -0
  362. data/rubino-agent.gemspec +103 -0
  363. data/skills/ruby-expert/SKILL.md +67 -0
  364. data/skills/ruby-expert/references/concurrency.md +357 -0
  365. data/skills/ruby-expert/references/datetime-and-encoding.md +363 -0
  366. data/skills/ruby-expert/references/errors-and-types.md +460 -0
  367. data/skills/ruby-expert/references/gem-authoring.md +459 -0
  368. data/skills/ruby-expert/references/language-idioms.md +465 -0
  369. data/skills/ruby-expert/references/metaprogramming.md +339 -0
  370. data/skills/ruby-expert/references/oo-design.md +553 -0
  371. data/skills/ruby-expert/references/performance.md +383 -0
  372. data/skills/ruby-expert/references/rails.md +424 -0
  373. data/skills/ruby-expert/references/security.md +404 -0
  374. data/skills/ruby-expert/references/testing.md +473 -0
  375. data/skills/ruby-expert/references/tooling.md +466 -0
  376. metadata +856 -0
@@ -0,0 +1,653 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "securerandom"
4
+ require "json"
5
+ require "time"
6
+
7
+ module Rubino
8
+ module Memory
9
+ module Backends
10
+ # "Tiny-Zep" memory backend on embedded SQLite (Zep/Graphiti-inspired,
11
+ # minus the graph DB, the server, and the six-LLM-call pipeline).
12
+ #
13
+ # Three ideas are kept from Zep:
14
+ # * ATOMIC LLM-extracted facts (one declarative fact per row), via a
15
+ # single aux-LLM call per turn that both ADDs new facts and SUPERSEDES
16
+ # contradicted ones (Graphiti edge-invalidation, collapsed to 1 call).
17
+ # * BI-TEMPORAL supersession — a contradicted fact is soft-retired
18
+ # (valid_to set), not deleted; "live" memory = valid_to IS NULL, so we
19
+ # get temporal correctness without losing provenance.
20
+ # * HYBRID ranked recall — FTS5/BM25 (+ optional vector KNN) fused with
21
+ # Reciprocal Rank Fusion and lightly kind-weighted, top-k under the
22
+ # char budget. Graph (1-hop) and recency are tail SUPPLEMENTS that only
23
+ # backfill the budget after direct content matches — never outranking
24
+ # them. (Optional vector KNN via sqlite-vec when available; see #vector?.)
25
+ #
26
+ # The injection-defense floor (ThreatScanner + char-budget) is enforced on
27
+ # the write path exactly as Memory::Store does, so no fact can splice
28
+ # tainted or over-budget content into a future system prompt.
29
+ class Sqlite < Backend
30
+ include SqliteGraph
31
+
32
+ TABLE = :memory_facts
33
+ FTS = :memory_facts_fts
34
+ RRF_K = 60
35
+ DEFAULT_K = 20
36
+
37
+ # Weighted-RRF list weights for the DIRECT relevance signals (FTS/BM25 and
38
+ # vector KNN). Graph (1-hop) and recency are no longer fused here — they
39
+ # are tail supplements (see #rank) so they can never outrank a direct
40
+ # content match.
41
+ FTS_WEIGHT = 3.0
42
+ VECTOR_WEIGHT = 3.0
43
+
44
+ # Trivial words that appear in almost every fact ("user", "project") or
45
+ # carry no retrieval signal — excluded from the FTS MATCH so a probe
46
+ # like "what package manager does the user use" doesn't match every
47
+ # "User ..." fact on the word "user".
48
+ STOPWORDS = %w[
49
+ the a an of to in on at for and or is are was were be been being do does did
50
+ how what where when which who whom whose why this that these those it its
51
+ use uses used user users project projects right now
52
+ ].to_set.freeze
53
+
54
+ # Maps the backend's fact `kind` onto Memory::Store's budget group so a
55
+ # user_profile fact is metered against the user budget and everything
56
+ # else against the shared memory budget — same split as the default
57
+ # backend.
58
+ USER_KIND = "user_profile"
59
+
60
+ # Light kind weighting applied after RRF so durable user facts outrank
61
+ # one-off facts on ties.
62
+ KIND_WEIGHT = Hash.new(1.0).merge(
63
+ "user_profile" => 1.3,
64
+ "preference" => 1.2,
65
+ "env" => 1.1
66
+ ).freeze
67
+
68
+ def self.backend_name
69
+ "sqlite"
70
+ end
71
+
72
+ def initialize(config: nil, db: nil, aux_client: nil)
73
+ super(config: config)
74
+ @db = db || Rubino.database.db
75
+ @aux_client = aux_client
76
+ end
77
+
78
+ # FTS5 ships with the sqlite3 gem, so the backend is always available.
79
+ # (Vector mode is a best-effort upgrade gated separately by #vector?.)
80
+ def available?
81
+ true
82
+ end
83
+
84
+ # -- WRITE path --
85
+
86
+ def store(kind:, content:, source_session_id: nil, confidence: 1.0, metadata: {})
87
+ insert_fact(
88
+ text: content,
89
+ kind: normalize_kind(kind),
90
+ entities: Array(metadata[:entities]),
91
+ source_session_id: source_session_id,
92
+ confidence: confidence,
93
+ valid_from: metadata[:valid_from]
94
+ )
95
+ end
96
+
97
+ # Replace the first LIVE fact of `kind` whose text includes `old_text`.
98
+ # Modelled as a supersession so history is preserved.
99
+ def replace(kind:, old_text:, content:)
100
+ target = live_dataset.where(kind: normalize_kind(kind))
101
+ .where(Sequel.like(:text, "%#{old_text}%")).first
102
+ return nil unless target
103
+
104
+ # Retire first so the old row's chars free up before the new fact is
105
+ # budget-checked (a same-size replace must always fit).
106
+ new_id = SecureRandom.uuid
107
+ retire!(target[:id], new_id)
108
+ insert_fact(text: content, kind: target[:kind],
109
+ entities: parse_entities(target[:entities_json]),
110
+ source_session_id: target[:source_session_id], id: new_id)
111
+ target
112
+ end
113
+
114
+ # Hard-delete the first LIVE fact of `kind` whose text includes
115
+ # `old_text` (forget = remove from the record entirely, vs supersede).
116
+ def forget(kind:, old_text:)
117
+ target = live_dataset.where(kind: normalize_kind(kind))
118
+ .where(Sequel.like(:text, "%#{old_text}%")).first
119
+ return nil unless target
120
+
121
+ @db[TABLE].where(id: target[:id]).delete
122
+ target
123
+ end
124
+
125
+ # ONE aux-LLM call over the recent turn(s): returns {add, supersede}.
126
+ # Apply is pure Ruby — insert adds (deduped + guarded), retire
127
+ # superseded rows and insert their replacement.
128
+ def extract(session_id)
129
+ turn = recent_turn_text(session_id)
130
+ return [] if turn.strip.empty?
131
+
132
+ result = call_llm(session_id: session_id, turn: turn)
133
+ return [] unless result
134
+
135
+ apply(result, session_id)
136
+ end
137
+
138
+ # -- READ path --
139
+
140
+ def user_profile
141
+ return nil unless @config.dig("memory", "user_profile_enabled")
142
+
143
+ rows = live_dataset.where(kind: USER_KIND).order(Sequel.desc(:created_at)).all
144
+ return nil if rows.empty?
145
+
146
+ text = rows.map { |r| r[:text] }.join("\n")
147
+ limit = @config.memory_user_char_limit
148
+ text.length > limit ? text[0...limit] : text
149
+ end
150
+
151
+ def project_context
152
+ return nil unless @config.dig("memory", "project_context_enabled")
153
+
154
+ rows = live_dataset.where(kind: %w[project env]).order(Sequel.desc(:created_at)).limit(10).all
155
+ return nil if rows.empty?
156
+
157
+ rows.map { |r| r[:text] }.join("\n")
158
+ end
159
+
160
+ # HYBRID recall over LIVE facts: FTS5/BM25 on `query` (and vector KNN when
161
+ # available) fused via RRF and kind-weighted as the direct relevance
162
+ # ranking, then graph/recency-supplemented and greedily packed under the
163
+ # memory char budget. Returns rows shaped like the default backend
164
+ # ({id:, kind:, content:, ...}) so the prompt assembler is unchanged.
165
+ def retrieve(session_id:, query: nil, k: DEFAULT_K)
166
+ ranked = rank(query: query, k: k)
167
+ budget = @config.memory_char_limit
168
+ selected = []
169
+ total = 0
170
+ ranked.each do |row|
171
+ len = row[:text].to_s.length
172
+ break if budget&.positive? && total + len > budget
173
+
174
+ selected << present(row)
175
+ total += len
176
+ end
177
+ selected
178
+ end
179
+
180
+ # -- admin --
181
+
182
+ # LIVE facts only by default — a superseded fact is a tombstone, not a
183
+ # current memory, so listing it undecorated next to its replacement
184
+ # presents contradicted data as true and makes the rows disagree with
185
+ # #count/#retrieve (#82). `include_retired: true` opts into the full
186
+ # supersession history (`rubino memory list --all`).
187
+ def list(kind: nil, limit: 20, include_retired: false)
188
+ ds = (include_retired ? @db[TABLE] : live_dataset).order(Sequel.desc(:created_at)).limit(limit)
189
+ ds = ds.where(kind: normalize_kind(kind)) if kind
190
+ ds.all.map { |r| present(r) }
191
+ end
192
+
193
+ def find(id)
194
+ row = @db[TABLE].where(Sequel.like(:id, "#{id}%")).first
195
+ row && present(row)
196
+ end
197
+
198
+ def delete(id)
199
+ @db[TABLE].where(Sequel.like(:id, "#{id}%")).delete.positive?
200
+ end
201
+
202
+ # Count only LIVE facts (valid_to IS NULL) — retired/superseded rows are
203
+ # tombstones the admin surface and #list already hide.
204
+ def count
205
+ live_dataset.count
206
+ end
207
+
208
+ private
209
+
210
+ # ---- ranking ----
211
+
212
+ def rank(query:, k:)
213
+ # DIRECT relevance first: FTS/BM25 (+ vector KNN when wired) fused by
214
+ # weighted RRF. These are the only signals that match the query's
215
+ # CONTENT, so the fact a keyword probe ranks #1 must stay #1.
216
+ lists = [[fts_match(query, k * 3), FTS_WEIGHT]]
217
+ lists << [vector_match(query, k * 3), VECTOR_WEIGHT] if vector? && query
218
+
219
+ scores = Hash.new(0.0)
220
+ lists.each do |ids, weight|
221
+ ids.each_with_index { |id, idx| scores[id] += weight / (RRF_K + idx + 1) }
222
+ end
223
+
224
+ rows = live_dataset.where(id: scores.keys).all.each_with_object({}) { |r, h| h[r[:id]] = r }
225
+ ranked = scores.keys
226
+ .map { |id| rows[id] }
227
+ .compact
228
+ .sort_by { |row| -(scores[row[:id]] * KIND_WEIGHT[row[:kind]]) }
229
+
230
+ # Graph (1-hop neighbours) and recency are TAIL SUPPLEMENTS, not
231
+ # co-equal RRF lists. Fusing them into the score let a dense entity hub
232
+ # (e.g. every "Melanie" fact) or a burst of freshly-ingested but
233
+ # irrelevant facts outscore the right atomic fact that FTS had ranked
234
+ # #1 — the dominant cause of single-shot recall misses on this store.
235
+ # They now only BACKFILL the budget after direct hits: graph first (a
236
+ # connected fact a keyword probe missed), then recency (so a no-match
237
+ # query still surfaces the freshest live facts). Neither can outrank a
238
+ # direct relevance hit.
239
+ ranked.first(k) + tail_backfill(ranked, k, query)
240
+ end
241
+
242
+ # Fill the remaining budget (k − direct hits) with supplementary facts
243
+ # NOT already ranked: 1-hop graph neighbours of the query first, then
244
+ # recency. Returns [] when direct relevance already covers k.
245
+ def tail_backfill(ranked, k, query)
246
+ return [] if ranked.size >= k
247
+
248
+ have = ranked.map { |r| r[:id] }.to_set
249
+ ids = []
250
+ ids.concat(graph_neighbors(query, k * 2)) if query && graph?
251
+ ids.concat(recency(k * 2))
252
+ ids = ids.reject { |id| have.include?(id) }.uniq.first(k - ranked.size)
253
+ return [] if ids.empty?
254
+
255
+ by_id = live_dataset.where(id: ids).all.each_with_object({}) { |r, h| h[r[:id]] = r }
256
+ ids.map { |id| by_id[id] }.compact
257
+ end
258
+
259
+ # BM25 ranking over live facts. FTS5's MATCH needs a sanitized query
260
+ # (bare words OR-ed) so user punctuation never raises a syntax error.
261
+ def fts_match(query, limit)
262
+ terms = fts_terms(query)
263
+ return [] if terms.empty?
264
+
265
+ @db[FTS]
266
+ .select(Sequel.lit("memory_facts.id").as(:id))
267
+ .join(Sequel.lit("memory_facts"), Sequel.lit("memory_facts.rowid = memory_facts_fts.rowid"))
268
+ .where(Sequel.lit("memory_facts_fts MATCH ?", terms))
269
+ .where(Sequel.lit("memory_facts.valid_to IS NULL"))
270
+ .order(Sequel.lit("bm25(memory_facts_fts)"))
271
+ .limit(limit)
272
+ .all
273
+ .map { |r| r[:id] }
274
+ rescue Sequel::DatabaseError
275
+ []
276
+ end
277
+
278
+ def recency(limit)
279
+ live_dataset.order(Sequel.desc(:created_at)).limit(limit).select_map(:id)
280
+ end
281
+
282
+ # Best-effort vector KNN — only when sqlite-vec is wired (see #vector?).
283
+ # Kept tiny: cosine over an in-Ruby decode of the embedding blobs.
284
+ def vector_match(query, limit)
285
+ qvec = embed(query)
286
+ return [] unless qvec
287
+
288
+ live_dataset.exclude(embedding: nil).all.map do |row|
289
+ vec = decode_embedding(row[:embedding])
290
+ vec ? [row[:id], cosine(qvec, vec)] : nil
291
+ end.compact.sort_by { |(_, sim)| -sim }.first(limit).map(&:first)
292
+ rescue StandardError
293
+ []
294
+ end
295
+
296
+ def fts_terms(query)
297
+ return "" if query.nil?
298
+
299
+ words = query.to_s.downcase.scan(/[\p{L}\p{N}]+/)
300
+ .reject { |w| w.length < 2 || STOPWORDS.include?(w) }.uniq
301
+ # If the query was all stopwords, fall back to the bare tokens so we
302
+ # still attempt a match rather than returning nothing.
303
+ words = query.to_s.downcase.scan(/[\p{L}\p{N}]+/).uniq if words.empty?
304
+ words.first(12).map { |w| "\"#{w}\"" }.join(" OR ")
305
+ end
306
+
307
+ # ---- extraction apply ----
308
+
309
+ def apply(result, session_id)
310
+ stored = []
311
+ now = Time.now.utc.iso8601
312
+
313
+ Array(result["supersede"]).each do |s|
314
+ old = resolve_supersede_target(s)
315
+ # A self-supersede is a no-op (#223): when the replacement text is
316
+ # IDENTICAL to the very row it would retire — e.g. the memory tool
317
+ # already wrote this fact in-turn and the extractor "updates" it to
318
+ # itself — retire-and-reinsert would just mint a byte-identical twin
319
+ # and a useless 1-link chain. The #157 exclude guard hides this row
320
+ # from the duplicate_of check below, so it has to be caught here
321
+ # first. Identity only, not near-dup: a genuine rephrase of the
322
+ # retired row must still land (the #157 exclude-guard case).
323
+ next if old && old[:text].to_s.strip == s["by_text"].to_s.strip
324
+
325
+ # The replacement passes the SAME near-dup check a plain add runs
326
+ # (#157): when the new fact already exists live (e.g. the memory
327
+ # tool stored it in-turn), retire the old row pointing at it
328
+ # instead of inserting a byte-identical twin.
329
+ if (existing_id = duplicate_of(s["by_text"], exclude_id: old && old[:id]))
330
+ retire!(old[:id], existing_id) if old
331
+ next
332
+ end
333
+
334
+ # Retire the contradicted fact before inserting its replacement so
335
+ # the old row's chars free up for the budget check.
336
+ new_id = SecureRandom.uuid
337
+ retire!(old[:id], new_id) if old
338
+ replacement = guarded_insert(
339
+ text: s["by_text"], kind: s["kind"],
340
+ entities: s["entities"], session_id: session_id, valid_from: now, id: new_id
341
+ )
342
+ stored << replacement if replacement
343
+ end
344
+
345
+ Array(result["add"]).each do |a|
346
+ next if duplicate_of(a["text"])
347
+
348
+ row = guarded_insert(
349
+ text: a["text"], kind: a["kind"], entities: a["entities"],
350
+ session_id: session_id, valid_from: a["valid_from"]
351
+ )
352
+ stored << row if row
353
+ end
354
+
355
+ # Turn-level TYPED relations (the extractor's optional edges[]) are
356
+ # indexed once for the whole turn, tied to the first stored fact for
357
+ # provenance. Co-occurrence edges are already laid down per-fact in
358
+ # insert_fact from each fact's own entity tags.
359
+ index_typed_edges(result["edges"], stored.first)
360
+
361
+ stored.compact
362
+ end
363
+
364
+ def index_typed_edges(edges, anchor)
365
+ edges = Array(edges)
366
+ return if edges.empty?
367
+
368
+ index_fact_graph(anchor && (anchor[:id] || anchor["id"]), [], typed: edges)
369
+ rescue StandardError => e
370
+ log_skip(e)
371
+ end
372
+
373
+ # Insert through the injection-defense floor; swallow refusals so one
374
+ # bad fact never aborts the whole extraction batch (mirrors the
375
+ # default extractor, which silently skips dups).
376
+ def guarded_insert(text:, kind:, entities:, session_id:, valid_from:, id: nil)
377
+ return nil if text.to_s.strip.empty?
378
+
379
+ insert_fact(
380
+ text: text, kind: normalize_kind(kind), entities: Array(entities),
381
+ source_session_id: session_id, confidence: 1.0, valid_from: valid_from, id: id
382
+ )
383
+ rescue Store::ThreatDetectedError, Store::BudgetExceededError => e
384
+ log_skip(e)
385
+ nil
386
+ end
387
+
388
+ def resolve_supersede_target(spec)
389
+ id = spec["id"].to_s
390
+ return live_dataset.where(Sequel.like(:id, "#{id}%")).first unless id.empty?
391
+
392
+ match = spec["match"].to_s
393
+ return nil if match.empty?
394
+
395
+ live_dataset.where(Sequel.like(:text, "%#{match}%")).first
396
+ end
397
+
398
+ # ---- low-level fact ops ----
399
+
400
+ def insert_fact(text:, kind:, entities: [], source_session_id: nil,
401
+ confidence: 1.0, valid_from: nil, id: nil, edges: [])
402
+ enforce_guards!(kind, text)
403
+ now = Time.now.utc.iso8601
404
+ id ||= SecureRandom.uuid
405
+
406
+ @db[TABLE].insert(
407
+ id: id,
408
+ text: text,
409
+ kind: kind,
410
+ entities_json: entities.empty? ? nil : JSON.generate(entities),
411
+ source_session_id: source_session_id,
412
+ confidence: confidence,
413
+ valid_from: (valid_from.to_s.empty? ? now : valid_from),
414
+ valid_to: nil,
415
+ superseded_by: nil,
416
+ embedding: maybe_embed(text),
417
+ created_at: now,
418
+ updated_at: now
419
+ )
420
+ # Graph-lite: upsert entity nodes + co-occurrence/typed edges for this
421
+ # fact. Best-effort — a graph hiccup must never abort the fact write.
422
+ index_fact_graph(id, entities, typed: edges) unless entities.empty? && Array(edges).empty?
423
+ present(@db[TABLE].where(id: id).first)
424
+ rescue Sequel::DatabaseError, StandardError => e
425
+ raise if @db[TABLE].where(id: id).first.nil? # fact insert itself failed: surface it
426
+
427
+ log_skip(e) # fact stored, only graph indexing tripped
428
+ present(@db[TABLE].where(id: id).first)
429
+ end
430
+
431
+ def retire!(old_id, new_id)
432
+ @db[TABLE].where(id: old_id).update(
433
+ valid_to: Time.now.utc.iso8601,
434
+ superseded_by: new_id,
435
+ updated_at: Time.now.utc.iso8601
436
+ )
437
+ end
438
+
439
+ def live_dataset
440
+ @db[TABLE].where(valid_to: nil)
441
+ end
442
+
443
+ # Jaccard near-dup check against the live set (Deduplicator threshold,
444
+ # no second LLM call): id of the first live near-dup, nil when none.
445
+ # +exclude_id+ skips the row being superseded so a rephrased
446
+ # replacement never matches its own retirement target (#157).
447
+ def duplicate_of(text, exclude_id: nil)
448
+ words_b = word_set(text)
449
+ return nil if words_b.empty?
450
+
451
+ ds = exclude_id ? live_dataset.exclude(id: exclude_id) : live_dataset
452
+ ds.select_map(%i[id text]).find do |(_, existing)|
453
+ words_a = word_set(existing)
454
+ next false if words_a.empty?
455
+
456
+ inter = (words_a & words_b).size
457
+ union = (words_a | words_b).size
458
+ (inter.to_f / union) >= Deduplicator::SIMILARITY_THRESHOLD
459
+ end&.first
460
+ end
461
+
462
+ def word_set(str)
463
+ str.to_s.downcase.split(/\W+/).reject(&:empty?).to_set
464
+ end
465
+
466
+ # ---- guards (ThreatScanner + char-budget, same floor as Store) ----
467
+
468
+ def enforce_guards!(kind, text)
469
+ threat = ThreatScanner.scan(text)
470
+ raise Store::ThreatDetectedError, threat if threat
471
+
472
+ enforce_char_budget!(kind, text)
473
+ end
474
+
475
+ def enforce_char_budget!(kind, text)
476
+ group = kind == USER_KIND ? "user" : "memory"
477
+ # INGEST cap, decoupled from the injection budget. `memory_char_limit`
478
+ # bounds only what `retrieve` packs into the prompt; storing facts is
479
+ # gated by `memory_ingest_char_limit` (nil => unbounded) so long
480
+ # multi-session conversations don't stall once the injection budget
481
+ # fills. User facts keep their own (small) profile budget.
482
+ limit = group == "user" ? @config.memory_user_char_limit : @config.memory_ingest_char_limit
483
+ return unless limit&.positive?
484
+
485
+ current = current_chars(group)
486
+ requested = text.to_s.length
487
+ return if current + requested <= limit
488
+
489
+ raise Store::BudgetExceededError.new(
490
+ group: group, limit: limit, current: current, requested: requested
491
+ )
492
+ end
493
+
494
+ # Budget is metered over LIVE facts only — superseded rows don't count
495
+ # against the injection budget since they're never injected.
496
+ def current_chars(group)
497
+ ds = live_dataset
498
+ ds = group == "user" ? ds.where(kind: USER_KIND) : ds.exclude(kind: USER_KIND)
499
+ ds.sum(Sequel.function(:length, :text)).to_i
500
+ end
501
+
502
+ # ---- LLM ----
503
+
504
+ def call_llm(session_id:, turn:)
505
+ response = aux_client.call(
506
+ task: :compression,
507
+ messages: [
508
+ { role: "system", content: SqliteExtractionPrompt::SYSTEM },
509
+ { role: "user", content: SqliteExtractionPrompt.user_message(
510
+ now: Time.now.utc.iso8601, live_facts: live_facts_for_prompt, turn: turn
511
+ ) }
512
+ ]
513
+ )
514
+ parse_json(response&.content)
515
+ rescue StandardError => e
516
+ log_skip(e)
517
+ nil
518
+ end
519
+
520
+ def live_facts_for_prompt
521
+ live_dataset.order(Sequel.desc(:created_at)).limit(60).all.map do |r|
522
+ { id: r[:id][0, 8], kind: r[:kind], text: r[:text] }
523
+ end
524
+ end
525
+
526
+ # The aux model may wrap JSON in prose or a fenced block; extract the
527
+ # outermost object and parse leniently.
528
+ def parse_json(content)
529
+ return nil if content.to_s.strip.empty?
530
+
531
+ str = content[/\{.*\}/m] || content
532
+ JSON.parse(str)
533
+ rescue JSON::ParserError
534
+ nil
535
+ end
536
+
537
+ def aux_client
538
+ @aux_client ||= LLM::AuxiliaryClient.new(config: @config)
539
+ end
540
+
541
+ def recent_turn_text(session_id)
542
+ msgs = Session::Store.new(db: @db).recent(session_id, count: 6)
543
+ msgs.filter_map do |m|
544
+ next if m.content.nil? || m.content.to_s.empty?
545
+ next unless %w[user assistant].include?(m.role)
546
+
547
+ "#{m.role.upcase}: #{m.content}"
548
+ end.join("\n")
549
+ rescue StandardError
550
+ ""
551
+ end
552
+
553
+ # ---- embeddings (best-effort) ----
554
+
555
+ # Vector mode is opt-in (`memory.sqlite.vector: true`) AND requires
556
+ # RubyLLM.embed to be wired. Off by default → FTS5-only hybrid.
557
+ def vector?
558
+ return @vector unless @vector.nil?
559
+
560
+ @vector = @config.dig("memory", "sqlite", "vector") == true &&
561
+ defined?(RubyLLM) && RubyLLM.respond_to?(:embed)
562
+ end
563
+
564
+ # Graph-lite 1-hop blend is ON by default; `memory.sqlite.graph: false`
565
+ # disables it (FTS-only recall) — used to A/B the graph signal.
566
+ def graph?
567
+ return @graph unless @graph.nil?
568
+
569
+ @graph = @config.dig("memory", "sqlite", "graph") != false
570
+ end
571
+
572
+ def maybe_embed(text)
573
+ return nil unless vector?
574
+
575
+ vec = embed(text)
576
+ vec && encode_embedding(vec)
577
+ end
578
+
579
+ def embed(text)
580
+ return nil unless vector?
581
+
582
+ res = RubyLLM.embed(text.to_s)
583
+ res.respond_to?(:vectors) ? res.vectors : res
584
+ rescue StandardError
585
+ nil
586
+ end
587
+
588
+ def encode_embedding(vec)
589
+ vec.pack("e*")
590
+ end
591
+
592
+ def decode_embedding(blob)
593
+ blob && blob.to_s.unpack("e*")
594
+ end
595
+
596
+ def cosine(a, b)
597
+ return 0.0 if a.empty? || b.empty? || a.size != b.size
598
+
599
+ dot = a.zip(b).sum { |x, y| x * y }
600
+ na = Math.sqrt(a.sum { |x| x * x })
601
+ nb = Math.sqrt(b.sum { |x| x * x })
602
+ na.zero? || nb.zero? ? 0.0 : dot / (na * nb)
603
+ end
604
+
605
+ # ---- helpers ----
606
+
607
+ def normalize_kind(kind)
608
+ k = kind.to_s
609
+ return USER_KIND if k.empty?
610
+
611
+ # Map legacy/default-backend kinds onto the tiny-Zep vocabulary so the
612
+ # backend tolerates store() calls from the existing MemoryTool/job.
613
+ case k
614
+ when "user_profile", "preference", "fact", "env" then k
615
+ when "project_context", "project" then "project"
616
+ when "technical_decision" then "fact"
617
+ when "task_state", "tool_result" then "fact"
618
+ else k
619
+ end
620
+ end
621
+
622
+ def parse_entities(json)
623
+ json ? JSON.parse(json) : []
624
+ rescue JSON::ParserError
625
+ []
626
+ end
627
+
628
+ # Shape a row like the default backend's memories row so downstream
629
+ # (PromptAssembler, CLI) sees the same {id:, kind:, content:} contract.
630
+ def present(row)
631
+ {
632
+ id: row[:id],
633
+ kind: row[:kind],
634
+ content: row[:text],
635
+ confidence: row[:confidence],
636
+ source_session_id: row[:source_session_id],
637
+ entities: parse_entities(row[:entities_json]),
638
+ valid_from: row[:valid_from],
639
+ valid_to: row[:valid_to],
640
+ superseded_by: row[:superseded_by],
641
+ created_at: row[:created_at]
642
+ }
643
+ end
644
+
645
+ def log_skip(error)
646
+ Rubino.logger.warn(event: "memory.sqlite.skip", error: error.class.name)
647
+ rescue StandardError
648
+ # logging must never block the write/extract path
649
+ end
650
+ end
651
+ end
652
+ end
653
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rubino
4
+ module Memory
5
+ # Registry of pluggable memory backends, mirroring Tools::Registry: a
6
+ # name => class map with register/build. The active backend is selected by
7
+ # the `memory.backend` config key (default "sqlite" — the tiny-Zep FTS5/
8
+ # graph-lite backend). DEFAULT_NAME below is the registry fallback used only
9
+ # when the configured name is blank/unknown.
10
+ module Backends
11
+ @registry = {}
12
+
13
+ class << self
14
+ # Registers a backend class under its #backend_name.
15
+ def register(klass)
16
+ @registry[klass.backend_name.to_s] = klass
17
+ end
18
+
19
+ # All registered backend names.
20
+ def names
21
+ @registry.keys
22
+ end
23
+
24
+ def registered?(name)
25
+ @registry.key?(name.to_s)
26
+ end
27
+
28
+ def fetch(name)
29
+ @registry[name.to_s]
30
+ end
31
+
32
+ # Builds the configured backend instance. Falls back to the default
33
+ # backend when `memory.backend` is unset or names an unknown backend,
34
+ # so a stale config never breaks the interaction.
35
+ def build(config: nil)
36
+ cfg = config || Rubino.configuration
37
+ name = cfg.dig("memory", "backend").to_s
38
+ klass = @registry[name] || @registry[DEFAULT_NAME]
39
+ raise Error, "no memory backend registered (looked for #{name.inspect})" unless klass
40
+
41
+ klass.new(config: cfg)
42
+ end
43
+
44
+ # For tests.
45
+ def reset!
46
+ @registry = {}
47
+ end
48
+ end
49
+
50
+ DEFAULT_NAME = "default"
51
+ end
52
+ end
53
+ end