rubino-agent 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +11 -2
  3. data/AGENTS.md +1 -1
  4. data/CHANGELOG.md +137 -1
  5. data/CONTRIBUTING.md +10 -1
  6. data/README.md +14 -5
  7. data/Rakefile +31 -0
  8. data/docs/agents.md +42 -23
  9. data/docs/architecture.md +2 -2
  10. data/docs/commands.md +28 -1
  11. data/docs/configuration.md +20 -23
  12. data/docs/getting-started.md +5 -3
  13. data/docs/security.md +16 -5
  14. data/docs/troubleshooting.md +1 -1
  15. data/exe/rubino +16 -2
  16. data/install.sh +715 -54
  17. data/lib/rubino/active_agent.rb +73 -0
  18. data/lib/rubino/agent/action_claim_guard.rb +881 -0
  19. data/lib/rubino/agent/agent_registry.rb +5 -2
  20. data/lib/rubino/agent/definition.rb +1 -9
  21. data/lib/rubino/agent/fallback_chain.rb +0 -6
  22. data/lib/rubino/agent/iteration_budget.rb +109 -3
  23. data/lib/rubino/agent/loop.rb +476 -20
  24. data/lib/rubino/agent/model_call_runner.rb +81 -3
  25. data/lib/rubino/agent/prompts/build.txt +22 -5
  26. data/lib/rubino/agent/response_validator.rb +8 -0
  27. data/lib/rubino/agent/runner.rb +133 -8
  28. data/lib/rubino/agent/tool_executor.rb +166 -14
  29. data/lib/rubino/agent/truncation_continuation.rb +4 -1
  30. data/lib/rubino/api/server.rb +19 -0
  31. data/lib/rubino/boot/config_guard.rb +71 -0
  32. data/lib/rubino/cli/chat/completion_builder.rb +42 -6
  33. data/lib/rubino/cli/chat/idle_card_host.rb +7 -1
  34. data/lib/rubino/cli/chat/session_resolver.rb +87 -21
  35. data/lib/rubino/cli/chat_command.rb +1189 -50
  36. data/lib/rubino/cli/commands.rb +281 -1
  37. data/lib/rubino/cli/config_command.rb +68 -8
  38. data/lib/rubino/cli/doctor_command.rb +204 -12
  39. data/lib/rubino/cli/jobs_command.rb +12 -0
  40. data/lib/rubino/cli/memory_command.rb +53 -20
  41. data/lib/rubino/cli/onboarding_wizard.rb +79 -6
  42. data/lib/rubino/cli/session_command.rb +172 -18
  43. data/lib/rubino/cli/setup_command.rb +131 -8
  44. data/lib/rubino/cli/skills_command.rb +67 -20
  45. data/lib/rubino/cli/trust_gate.rb +16 -7
  46. data/lib/rubino/commands/built_ins.rb +2 -0
  47. data/lib/rubino/commands/command.rb +12 -2
  48. data/lib/rubino/commands/executor.rb +149 -12
  49. data/lib/rubino/commands/handlers/agent_switch.rb +100 -0
  50. data/lib/rubino/commands/handlers/agents.rb +133 -38
  51. data/lib/rubino/commands/handlers/config.rb +4 -1
  52. data/lib/rubino/commands/handlers/help.rb +113 -14
  53. data/lib/rubino/commands/handlers/memory.rb +15 -5
  54. data/lib/rubino/commands/handlers/sessions.rb +26 -3
  55. data/lib/rubino/commands/handlers/status.rb +9 -4
  56. data/lib/rubino/commands/loader.rb +12 -0
  57. data/lib/rubino/config/configuration.rb +86 -24
  58. data/lib/rubino/config/defaults.rb +140 -33
  59. data/lib/rubino/config/loader.rb +62 -12
  60. data/lib/rubino/config/validator.rb +341 -0
  61. data/lib/rubino/config/writer.rb +123 -31
  62. data/lib/rubino/context/compressor.rb +184 -22
  63. data/lib/rubino/context/message_boundary.rb +27 -1
  64. data/lib/rubino/context/project_languages.rb +90 -0
  65. data/lib/rubino/context/prompt_assembler.rb +104 -21
  66. data/lib/rubino/context/summary_builder.rb +45 -4
  67. data/lib/rubino/context/token_budget.rb +36 -11
  68. data/lib/rubino/context/token_estimate.rb +45 -0
  69. data/lib/rubino/context/tool_result_pruner.rb +81 -0
  70. data/lib/rubino/database/connection.rb +154 -3
  71. data/lib/rubino/database/migrations/001_create_initial_schema.rb +314 -40
  72. data/lib/rubino/database/migrator.rb +98 -5
  73. data/lib/rubino/documents/cap_exceeded.rb +13 -0
  74. data/lib/rubino/documents/converters/csv.rb +4 -3
  75. data/lib/rubino/documents/converters/docx.rb +29 -5
  76. data/lib/rubino/documents/converters/html.rb +5 -1
  77. data/lib/rubino/documents/converters/json.rb +2 -1
  78. data/lib/rubino/documents/converters/pdf.rb +11 -2
  79. data/lib/rubino/documents/converters/plain.rb +2 -1
  80. data/lib/rubino/documents/converters/pptx.rb +11 -2
  81. data/lib/rubino/documents/converters/xlsx.rb +35 -4
  82. data/lib/rubino/documents/converters/xml.rb +2 -1
  83. data/lib/rubino/documents/limits.rb +210 -0
  84. data/lib/rubino/documents.rb +10 -3
  85. data/lib/rubino/errors.rb +36 -5
  86. data/lib/rubino/interaction/cancel_token.rb +19 -3
  87. data/lib/rubino/interaction/events.rb +13 -0
  88. data/lib/rubino/interaction/lifecycle.rb +99 -13
  89. data/lib/rubino/interaction/polishing.rb +176 -0
  90. data/lib/rubino/jobs/cron_job_repository.rb +5 -8
  91. data/lib/rubino/jobs/handlers/cleanup_sessions_job.rb +11 -0
  92. data/lib/rubino/jobs/handlers/distill_skill_job.rb +65 -9
  93. data/lib/rubino/jobs/queue.rb +63 -8
  94. data/lib/rubino/jobs/runner.rb +24 -6
  95. data/lib/rubino/jobs/worker.rb +0 -4
  96. data/lib/rubino/llm/adapter_response.rb +47 -4
  97. data/lib/rubino/llm/credential_check.rb +15 -16
  98. data/lib/rubino/llm/error_classifier.rb +89 -1
  99. data/lib/rubino/llm/inline_think_filter.rb +69 -12
  100. data/lib/rubino/llm/request.rb +30 -3
  101. data/lib/rubino/llm/ruby_llm_adapter.rb +394 -46
  102. data/lib/rubino/llm/tool_bridge.rb +113 -9
  103. data/lib/rubino/mcp/manager.rb +18 -1
  104. data/lib/rubino/mcp/mcp_tool_wrapper.rb +14 -3
  105. data/lib/rubino/memory/aux_retry.rb +107 -0
  106. data/lib/rubino/memory/backends/sqlite.rb +73 -44
  107. data/lib/rubino/memory/backends.rb +23 -7
  108. data/lib/rubino/memory/salience_gate.rb +103 -0
  109. data/lib/rubino/memory/sqlite_extraction.rb +70 -0
  110. data/lib/rubino/memory/sqlite_extraction_prompt.rb +11 -0
  111. data/lib/rubino/memory/store.rb +33 -5
  112. data/lib/rubino/memory/threat_scanner.rb +52 -0
  113. data/lib/rubino/output/cost.rb +52 -0
  114. data/lib/rubino/output/headless_block_latch.rb +53 -0
  115. data/lib/rubino/output/result_serializer.rb +222 -0
  116. data/lib/rubino/output/turn_recorder.rb +77 -0
  117. data/lib/rubino/security/approval_policy.rb +227 -32
  118. data/lib/rubino/security/command_allowlist.rb +79 -4
  119. data/lib/rubino/security/doom_loop_detector.rb +21 -2
  120. data/lib/rubino/security/hardline_guard.rb +189 -16
  121. data/lib/rubino/security/pattern_matcher.rb +28 -5
  122. data/lib/rubino/security/prefix_deriver.rb +25 -6
  123. data/lib/rubino/security/readonly_commands.rb +145 -5
  124. data/lib/rubino/security/secret_path.rb +134 -0
  125. data/lib/rubino/security/url_safety.rb +255 -0
  126. data/lib/rubino/session/repository.rb +212 -11
  127. data/lib/rubino/session/store.rb +139 -14
  128. data/lib/rubino/skills/installer.rb +116 -32
  129. data/lib/rubino/skills/prompt_index.rb +2 -2
  130. data/lib/rubino/skills/registry.rb +42 -1
  131. data/lib/rubino/skills/skill.rb +63 -2
  132. data/lib/rubino/skills/skill_tool.rb +16 -5
  133. data/lib/rubino/tools/background_tasks.rb +122 -9
  134. data/lib/rubino/tools/base.rb +204 -3
  135. data/lib/rubino/tools/edit_tool.rb +73 -18
  136. data/lib/rubino/tools/glob_tool.rb +48 -9
  137. data/lib/rubino/tools/grep_tool.rb +103 -9
  138. data/lib/rubino/tools/multi_edit_tool.rb +64 -9
  139. data/lib/rubino/tools/patch_tool.rb +5 -0
  140. data/lib/rubino/tools/read_attachment_tool.rb +3 -1
  141. data/lib/rubino/tools/read_tool.rb +33 -15
  142. data/lib/rubino/tools/read_tracker.rb +153 -35
  143. data/lib/rubino/tools/registry.rb +113 -12
  144. data/lib/rubino/tools/result.rb +9 -1
  145. data/lib/rubino/tools/ruby_tool.rb +0 -0
  146. data/lib/rubino/tools/shell_registry.rb +70 -0
  147. data/lib/rubino/tools/shell_tool.rb +40 -1
  148. data/lib/rubino/tools/summarize_file_tool.rb +6 -0
  149. data/lib/rubino/tools/task_stop_tool.rb +10 -16
  150. data/lib/rubino/tools/task_tool.rb +36 -8
  151. data/lib/rubino/tools/vision_tool.rb +5 -0
  152. data/lib/rubino/tools/webfetch_tool.rb +39 -7
  153. data/lib/rubino/tools/websearch_tool.rb +92 -30
  154. data/lib/rubino/tools/write_tool.rb +23 -4
  155. data/lib/rubino/ui/api.rb +10 -1
  156. data/lib/rubino/ui/base.rb +11 -0
  157. data/lib/rubino/ui/bottom_composer.rb +382 -74
  158. data/lib/rubino/ui/cli.rb +515 -83
  159. data/lib/rubino/ui/completion_menu.rb +11 -7
  160. data/lib/rubino/ui/headless_trace.rb +63 -0
  161. data/lib/rubino/ui/live_region.rb +70 -7
  162. data/lib/rubino/ui/markdown_renderer.rb +142 -7
  163. data/lib/rubino/ui/notifier.rb +0 -2
  164. data/lib/rubino/ui/null.rb +52 -5
  165. data/lib/rubino/ui/paste_store.rb +16 -2
  166. data/lib/rubino/ui/queued_indicators.rb +6 -1
  167. data/lib/rubino/ui/status_bar.rb +61 -7
  168. data/lib/rubino/ui/streaming_markdown.rb +59 -6
  169. data/lib/rubino/ui/subagent_view.rb +15 -1
  170. data/lib/rubino/ui/tool_label.rb +52 -0
  171. data/lib/rubino/update_check.rb +39 -4
  172. data/lib/rubino/util/atomic_file.rb +117 -0
  173. data/lib/rubino/util/ignore_rules.rb +120 -0
  174. data/lib/rubino/util/output.rb +229 -12
  175. data/lib/rubino/util/secrets_mask.rb +70 -7
  176. data/lib/rubino/util/spill_store.rb +153 -0
  177. data/lib/rubino/version.rb +1 -1
  178. data/lib/rubino/workspace.rb +9 -1
  179. data/lib/rubino.rb +191 -7
  180. data/rubino-agent.gemspec +1 -0
  181. data/skills/ruby-expert/SKILL.md +1 -0
  182. metadata +41 -12
  183. data/lib/rubino/agent/router.rb +0 -65
  184. data/lib/rubino/database/migrations/002_create_runs.rb +0 -45
  185. data/lib/rubino/database/migrations/003_create_skill_states.rb +0 -15
  186. data/lib/rubino/database/migrations/004_create_cron_jobs.rb +0 -36
  187. data/lib/rubino/database/migrations/005_create_oauth_connections.rb +0 -27
  188. data/lib/rubino/database/migrations/006_create_webhook_deliveries.rb +0 -34
  189. data/lib/rubino/database/migrations/007_create_messages_fts.rb +0 -59
  190. data/lib/rubino/database/migrations/008_create_memory_facts.rb +0 -75
  191. data/lib/rubino/database/migrations/009_create_memory_graph.rb +0 -55
  192. data/lib/rubino/database/migrations/010_add_owner_pid_to_sessions.rb +0 -20
@@ -4,7 +4,7 @@ module Rubino
4
4
  module Agent
5
5
  # The core agent loop that handles LLM calls and tool execution cycles.
6
6
  # Runs until the LLM produces a final text response or budget is exhausted.
7
- class Loop
7
+ class Loop # rubocop:disable Metrics/ClassLength
8
8
  # Nudge issued on the final, toolless model call when the iteration/budget
9
9
  # ceiling is hit. Mirrors the reference handle_max_iterations summary request
10
10
  # — ask the model to wrap up in prose
@@ -81,7 +81,7 @@ module Rubino
81
81
  end
82
82
 
83
83
  # Runs the agent loop, returning the final assistant response content.
84
- def run(messages:, tools:)
84
+ def run(messages:, tools:) # rubocop:disable Metrics/PerceivedComplexity,Metrics/CyclomaticComplexity
85
85
  # Stash the resolved toolset so #streaming? can decide, per run, whether
86
86
  # this turn might block on a human (clarify/approval). When it might, we
87
87
  # run NON-STREAMING so the LLM HTTP request completes and CLOSES before
@@ -91,6 +91,18 @@ module Rubino
91
91
  iteration = 0
92
92
  turn_started_at = monotonic_now
93
93
 
94
+ # Reflect-guard against fabricated "done" (the #1 trust-killer): a
95
+ # toolless turn whose prose claims an action it never carried out. Built
96
+ # once per turn from the toolset actually on offer; counts its own
97
+ # corrective re-prompts so it can stop honestly at the cap.
98
+ @action_guard = ActionClaimGuard.new(exposed_tool_names: @turn_tools.map { |t| tool_name_of(t) })
99
+ @reflection_count = 0
100
+ # The user request driving this turn, captured from the OPENING transcript
101
+ # (before any guard reflection note is appended) — the guard consults it
102
+ # to skip challenging a NO-ACTION (plan/explain/"don't run tools") turn the
103
+ # user explicitly asked for (#353a).
104
+ @turn_user_request = originating_user_request(messages)
105
+
94
106
  # If a previous turn rotated to a fallback, restore the primary backend
95
107
  # so this turn gets a fresh attempt with the preferred model
96
108
  # (conversation_loop.py:427). No-op when we never left the primary.
@@ -103,7 +115,30 @@ module Rubino
103
115
  # locals) so the sink closure can update them.
104
116
  @tool_count = 0
105
117
  @denied_count = 0
106
- token_total = 0
118
+ # Of the tools that RAN, how many were MUTATING (edit/write/patch). Lets
119
+ # the pessimistic-summary reconciliation (#381) say "N tool calls (M edits
120
+ # — review uncommitted changes)" so a developer is pointed at real,
121
+ # possibly-uncommitted disk changes when the model claims it did nothing.
122
+ @edit_count = 0
123
+ # Round-trips ruby_llm ran INSIDE a single streaming ask() this turn
124
+ # (#355a). ruby_llm drives the whole model↔tool loop within one
125
+ # chat.ask, so the outer `iteration` counter above stays at 1 for the
126
+ # entire streaming turn and never re-consults the budget between the
127
+ # intermediate round-trips. The adapter calls #note_stream_round_trip
128
+ # once per round-trip (via on_round_trip), and #stream_budget_exhausted?
129
+ # reads this count so ToolBridge can Halt the in-ask loop once the
130
+ # iteration/time budget is spent. Reset per turn.
131
+ @stream_round_trips = 0
132
+ # Accumulates the content streamed to the screen this turn so that an
133
+ # interrupt mid-stream can persist EXACTLY what the user saw, marked
134
+ # interrupted (#338b). Reset per turn — a one-shot CancelToken plus a
135
+ # fresh buffer means a stale partial can never attach to a later turn.
136
+ @interrupt_partial = +""
137
+ # True once any denial this turn was a headless fail-closed block ("needs
138
+ # approval but no interactive session", #260) — lets the binding guard
139
+ # point at `--yolo` (F2) instead of "approve it" in the honest message.
140
+ @noninteractive_block = false
141
+ token_total = 0
107
142
 
108
143
  loop do
109
144
  iteration += 1
@@ -120,8 +155,15 @@ module Rubino
120
155
 
121
156
  unless @budget.can_continue?(iteration)
122
157
  @ui.warning("Iteration budget exhausted (#{iteration} turns)")
123
- return summarize_on_budget_exhausted(messages, iteration,
124
- turn_started_at, token_total)
158
+ outcome = handle_budget_exhausted(messages, iteration,
159
+ turn_started_at, token_total)
160
+ # :continue → the user (interactively) granted more budget; the
161
+ # iteration cap was raised and we re-enter the SAME turn with full
162
+ # context (no re-summary, no truncation). Anything else is the final
163
+ # assistant text (force-summary / abort).
164
+ next if outcome == :continue
165
+
166
+ return outcome
125
167
  end
126
168
 
127
169
  @event_bus.emit(Interaction::Events::MODEL_CALL_STARTED, iteration: iteration)
@@ -134,20 +176,51 @@ module Rubino
134
176
  response = call_model(messages, tools, iteration)
135
177
  rescue Rubino::Interrupted
136
178
  # The streaming callback (or the per-iteration check above)
137
- # observed cancellation. Close any open stream box on the UI
138
- # (commits the partial answer streamed so far) and bail out — the
139
- # standardized `⎿ interrupted` marker is appended once by the Runner's
140
- # rescue, right after this kept partial. Lifecycle will not persist a
141
- # turn that never completed, but the user already saw the partial.
179
+ # observed cancellation. Persist EXACTLY the partial that was shown
180
+ # on screen flagged interrupted in metadata so storage matches
181
+ # the screen and the transcript stays truthful & resumable (#338b).
182
+ # Without this, the on-screen `⎿ interrupted` partial was absent from
183
+ # the messages table and resume/compaction/memory diverged from what
184
+ # the user saw. Then close any open stream box (commits the partial
185
+ # answer streamed so far) and bail out — the standardized
186
+ # `⎿ interrupted` marker is appended once by the Runner's rescue,
187
+ # right after this kept partial. The upstream stream is already
188
+ # cancelled: raising out of the per-chunk callback unwinds Faraday's
189
+ # net-http read loop, which closes the socket (no drain) — verified
190
+ # against ruby_llm 1.x's Streaming#stream_response, where the block
191
+ # we raise from runs inside the on_data handler.
192
+ persist_interrupted_partial
142
193
  @ui.stream_end if streaming?
143
194
  raise
144
195
  end
145
196
  @event_bus.emit(Interaction::Events::MODEL_CALL_FINISHED,
146
197
  tokens: response.total_tokens,
198
+ input_tokens: response.input_tokens,
199
+ output_tokens: response.output_tokens,
200
+ stop_reason: response.stop_reason,
201
+ model_id: response.model_id,
147
202
  has_tool_calls: response.has_tool_calls?)
148
203
 
149
204
  token_total += response.total_tokens.to_i
150
205
 
206
+ # #355a: the streaming round-trip loop was cut short mid-flight because
207
+ # this turn's iteration/time budget was spent (ToolBridge returned
208
+ # Tool::Halt). ruby_llm already added a valid trailing tool message, so
209
+ # the history is well-formed — hand off to the same budget-exhausted
210
+ # summary the outer-loop cap uses. `iteration` is still 1 for a
211
+ # streaming turn, so pass the round-trip count as the iteration reached.
212
+ if response.halted?
213
+ outcome = handle_budget_exhausted(messages, @stream_round_trips,
214
+ turn_started_at, token_total)
215
+ # :continue → budget extended; the next ask() picks up the
216
+ # well-formed post-Halt history (ruby_llm already appended the
217
+ # trailing tool message) and resumes the in-ask round-trip loop
218
+ # against the now-larger budget. No tool_bridge change needed.
219
+ next if outcome == :continue
220
+
221
+ return outcome
222
+ end
223
+
151
224
  if response.interrupted?
152
225
  # The upstream stream was cut before a clean completion (no
153
226
  # finish_reason / [DONE]); `response` carries only a buffered partial
@@ -168,10 +241,35 @@ module Rubino
168
241
  end
169
242
 
170
243
  if response.text_only?
171
- persist_assistant_message(response)
172
- finalize_stream(response)
244
+ # Fabricated-"done" gate: the structured tool-call channel is the
245
+ # ONLY thing that advances state. If this toolless turn's prose
246
+ # asserts an action against a tool we expose (or claims a `cd` we
247
+ # cannot do), DON'T let that reach the user as a completed answer.
248
+ guard = guard_text_only_turn(response, messages)
249
+ # A corrective user message was appended; loop again so the model
250
+ # either calls the tool or owns up. iteration/token_total carry on.
251
+ next if guard == :reflected
252
+
253
+ # cd: the claim can never be true, so we replaced the fabricated
254
+ # final answer with an honest message (how to actually change the
255
+ # workspace). Surface that, not the model's no-op claim.
256
+ final = guard.is_a?(String) ? guard : response.content
257
+
258
+ persist_final_text(response, final)
259
+ finalize_stream_text(response, final)
173
260
  emit_turn_summary(turn_started_at, token_total)
174
- return response.content
261
+
262
+ # The ANSWER returned to the caller is the LAST text block only
263
+ # (#core-F1): on a streaming turn whose final round-trip used a tool,
264
+ # `response.content` is every text block of the turn concatenated
265
+ # (pre-tool narration + post-tool answer, no delimiter), which a
266
+ # headless `OUT=$(rubino prompt …)` would capture as one run-on string.
267
+ # The full text was already streamed live and persisted via #final
268
+ # above (transcript/render keep the narration, #261); the value we
269
+ # HAND BACK is the post-final-tool answer in isolation. A guard
270
+ # replacement is a synthesized string with no narration to strip, so it
271
+ # passes through unchanged.
272
+ return guard.is_a?(String) ? guard : response.final_text_block
175
273
  end
176
274
 
177
275
  if response.has_tool_calls?
@@ -272,7 +370,7 @@ module Rubino
272
370
  # parks) AND the toolset contains a tool that can trigger the gate:
273
371
  # - `question` → @ui.ask (clarify) — always blocks when called.
274
372
  # - any risky tool under manual approvals → @ui.confirm — blocks.
275
- # - `shell` when require_confirmation_for_shell is on → confirm.
373
+ # - `shell` under confirm_policy: confirm_all → confirm.
276
374
  # Memoised per run; the toolset is fixed for the turn.
277
375
  def interactive_turn?
278
376
  return @interactive_turn unless @interactive_turn.nil?
@@ -307,6 +405,87 @@ module Rubino
307
405
  tool.respond_to?(:name) ? tool.name.to_s : tool.to_s
308
406
  end
309
407
 
408
+ # Budget exhausted (#399). In INTERACTIVE mode, ask the human what to do
409
+ # before ending the turn with a force-summary: continue (grant more
410
+ # budget), summarize now (today's behaviour), or abort. Returns:
411
+ # :continue — the cap was raised via IterationBudget#extend!; the caller
412
+ # re-enters the SAME turn with FULL context (no re-summary,
413
+ # no truncation).
414
+ # String — the final assistant text (force-summary, or the honest
415
+ # abort note).
416
+ #
417
+ # HEADLESS GUARANTEE: @ui.select returns nil on UI::Null / UI::Base /
418
+ # no-TTY (see UI::CLI#select's interactive_terminal? gate), and a nil/
419
+ # unrecognised choice falls straight through to force-summarize — so the
420
+ # API/headless path is byte-identical to before this change. The prompt is
421
+ # also skipped entirely when agent.budget_extension_prompt is false.
422
+ def handle_budget_exhausted(messages, iteration, turn_started_at, token_total)
423
+ case budget_extension_choice(iteration)
424
+ when :continue
425
+ step = @config.agent_budget_extension_step
426
+ new_cap = @budget.extend!(step)
427
+ @event_bus.emit(Interaction::Events::BUDGET_EXTENDED,
428
+ iteration: iteration, granted: step, new_cap: new_cap)
429
+ @ui.note("Continuing — granted +#{step} tool iterations") if @ui.respond_to?(:note)
430
+ :continue
431
+ when :abort
432
+ abort_on_budget_exhausted(iteration, turn_started_at, token_total)
433
+ else
434
+ # :summarize, nil (headless / cancelled), or prompt disabled → today's
435
+ # force-summarize, unchanged.
436
+ force_summarize_budget_exhausted(messages, iteration, turn_started_at, token_total)
437
+ end
438
+ end
439
+
440
+ # Returns the user's choice at the cap, or nil to fall through to
441
+ # force-summarize. nil whenever the prompt is disabled by config OR the UI
442
+ # can't prompt a human (@ui.select → nil on Null/Base/no-TTY) — the latter
443
+ # is the headless guarantee, requiring zero special-casing here.
444
+ #
445
+ # #403: also nil when extending wouldn't help — i.e. a NON-extendable rail
446
+ # (the TIME limit OR the max_turns outer rail), not the soft iteration
447
+ # ceiling, is what's exhausted. extend! only raises the soft ceiling, so
448
+ # prompting "Continue (+N)" against either rail grants a no-op and the next
449
+ # pass re-exhausts on the same rail → infinite re-prompt. Only offer the
450
+ # prompt when the budget says extending can actually help.
451
+ def budget_extension_choice(iteration)
452
+ return nil unless @config.agent_budget_extension_prompt?
453
+ return nil unless @budget.extendable?(iteration)
454
+
455
+ step = @config.agent_budget_extension_step
456
+ @ui.select(
457
+ "Reached #{iteration} tool iterations",
458
+ [["Continue (+#{step})", :continue],
459
+ ["Summarize now", :summarize],
460
+ ["Abort", :abort]]
461
+ )
462
+ end
463
+
464
+ # :abort — the user asked to stop here. End the turn honestly with a short
465
+ # note rather than a force-summary (no extra model call). The ledger note
466
+ # keeps it truthful about how much ran.
467
+ def abort_on_budget_exhausted(iteration, turn_started_at, token_total)
468
+ note = "Stopped at user request after #{iteration} tool iteration" \
469
+ "#{"s" if iteration != 1} (#{tool_count_label})."
470
+ persist_user_message_note(note)
471
+ @ui.stream({ type: :content, text: note, message_id: 0 })
472
+ @ui.stream_end
473
+ emit_turn_summary(turn_started_at, token_total)
474
+ note
475
+ end
476
+
477
+ # Persists a harness-authored final assistant note (the abort message).
478
+ # A plain assistant row so --resume / audit keep the truthful ending.
479
+ def persist_user_message_note(note)
480
+ with_db_retries do
481
+ @message_store.create(
482
+ session_id: @session[:id],
483
+ role: "assistant",
484
+ content: note
485
+ )
486
+ end
487
+ end
488
+
310
489
  # Budget exhausted: instead of ending the turn with nothing, issue ONE
311
490
  # final model call with the tools stripped, nudging the model to summarise
312
491
  # what it did and what remains. The summary still runs through the normal
@@ -314,7 +493,7 @@ module Rubino
314
493
  # becomes the turn's final assistant content. Because tools are empty AND
315
494
  # this is the loop's terminal action, the summary can never re-enter the
316
495
  # tool loop. Ports conversation_loop.py:4296 / handle_max_iterations.
317
- def summarize_on_budget_exhausted(messages, iteration, turn_started_at, token_total)
496
+ def force_summarize_budget_exhausted(messages, iteration, turn_started_at, token_total)
318
497
  persist_user_message(MAX_ITERATIONS_SUMMARY_NUDGE)
319
498
  messages << { role: "user", content: MAX_ITERATIONS_SUMMARY_NUDGE }
320
499
 
@@ -323,13 +502,121 @@ module Rubino
323
502
  response = call_model(messages, [], iteration)
324
503
  @event_bus.emit(Interaction::Events::MODEL_CALL_FINISHED,
325
504
  tokens: response.total_tokens,
505
+ input_tokens: response.input_tokens,
506
+ output_tokens: response.output_tokens,
507
+ stop_reason: :max_iterations,
508
+ model_id: response.model_id,
326
509
  has_tool_calls: response.has_tool_calls?)
327
510
  token_total += response.total_tokens.to_i
328
511
 
329
- persist_assistant_message(response)
330
- finalize_stream(response)
512
+ # PESSIMISTIC-fabrication gate (#381): this forced summary ran AFTER real
513
+ # tool calls this turn. If the model writes it pessimistically — "I did
514
+ # nothing, read no files, made no edits" — while the ledger shows tools
515
+ # DID run, the user must learn work that happened did not vanish. The
516
+ # ledger (@tool_count / @edit_count), not the narration, is the authority
517
+ # on side-effects.
518
+ #
519
+ # The truthful harness note is HARNESS DIAGNOSTIC, not model answer, so it
520
+ # is routed to STDERR (via #warning) — NOT appended into the returned text
521
+ # answer, which would pollute `--output-format text` stdout, the
522
+ # clean-stdout contract (#418, mirroring the #372 / created-skills
523
+ # routing). nil ⇒ summary already truthful (or no tools ran) → no note.
524
+ note = @action_guard.pessimistic_summary_note(
525
+ content: response.content,
526
+ tool_count: @tool_count,
527
+ edit_count: @edit_count
528
+ )
529
+ emit_harness_note(note) if note
530
+
531
+ final = response.content
532
+ persist_final_text(response, final)
533
+ # Reset the live-region geometry before the force-summary's final commit
534
+ # repaint (#421): this terminal summary runs after a fresh thinking-row
535
+ # phase (#thinking_started above) and a streamed block, which leave the
536
+ # composer's recorded row geometry out of step with the physical rows.
537
+ # Without the reset the closing #stream_end walks a stale row count and
538
+ # the WHOLE summary block repaints twice. Same geometry-reset seam the
539
+ # interrupt finalize (#421) / Ctrl+L (#395) / resize (#401) use; guarded
540
+ # so non-CLI UIs (Null/API/Base) are untouched.
541
+ @ui.reset_finalize_geometry if @ui.respond_to?(:reset_finalize_geometry)
542
+ finalize_stream_text(response, final)
331
543
  emit_turn_summary(turn_started_at, token_total)
332
- response.content
544
+ final
545
+ end
546
+
547
+ # Surface the #381 reconcile note as a HARNESS diagnostic off the answer
548
+ # stream: a #warning (stderr in the CLI; latched + echoed to stderr by the
549
+ # headless one-shot adapter, #260) plus an event-bus signal so the JSON /
550
+ # SSE consumers can carry it as metadata. Never written into the text
551
+ # answer that reaches `--output-format text` stdout (#418).
552
+ def emit_harness_note(note)
553
+ @ui.warning(note) if @ui.respond_to?(:warning)
554
+ @event_bus&.emit(Interaction::Events::HARNESS_NOTE, note: note)
555
+ rescue StandardError => e
556
+ Rubino.logger&.warn(event: "loop.harness_note_failed", error: e.message)
557
+ end
558
+
559
+ # The fabricated-"done" gate for a TEXT-ONLY turn (#r5 F1 / MF-3 / B1).
560
+ # Investigation: MiniMax-M3 via /anthropic DOES return structured tool_use
561
+ # blocks and rubino parses them correctly (verified with RUBYLLM_DEBUG) —
562
+ # the failure is not an XML-in-text leak, it's the model genuinely
563
+ # narrating an action ("Running the suite now.", "Saved to hello.py")
564
+ # while issuing ZERO tool calls, so a fake success reaches the user. Since
565
+ # the structured channel is the only thing that advances state, a toolless
566
+ # turn that asserts such an action is a claim with nothing behind it.
567
+ #
568
+ # Returns:
569
+ # :reflected — a corrective user message was appended to `messages`; the
570
+ # Loop must re-enter (the model now either calls the tool or
571
+ # says it can't). Capped at MAX_REFLECTIONS.
572
+ # String — an honest replacement for the final answer. The cd case
573
+ # (rubino has no cd tool); the BINDING terminal override
574
+ # (G1: reflection budget spent, model still fabricating a
575
+ # mutation); and the denied/blocked-but-claims case (F1/F2:
576
+ # a fabricated success-narration or diff after a tool was
577
+ # blocked) all return their honest replacement text here.
578
+ # nil — nothing to do; surface the model's text as-is.
579
+ def guard_text_only_turn(response, messages)
580
+ # The reflection budget is spent → the guard must be BINDING this turn:
581
+ # replace a still-fabricated answer rather than ask for one more turn.
582
+ terminal = @reflection_count >= ActionClaimGuard::MAX_REFLECTIONS
583
+ verdict = @action_guard.evaluate(
584
+ content: response.content,
585
+ tool_count: @tool_count,
586
+ denied_count: @denied_count,
587
+ noninteractive: @noninteractive_block,
588
+ terminal: terminal,
589
+ user_request: @turn_user_request
590
+ )
591
+ return nil if verdict.nil?
592
+
593
+ kind, payload = verdict
594
+ # cd / blocked / terminal-replace all REPLACE the final answer with the
595
+ # honest deterministic text (payload) — the guard's verdict overrides the
596
+ # model's fabrication on this terminal turn.
597
+ return payload if %i[cd blocked replace].include?(kind)
598
+
599
+ # :reflect — re-prompt once, under the cap. The reflection is appended as
600
+ # a USER message at the same safe ordering boundary the steering injection
601
+ # uses (after the cancel check, no open tool_use pair).
602
+ note = @action_guard.reflection_message(payload, prior_reflections: @reflection_count)
603
+ @reflection_count += 1
604
+ # The fabricated text already streamed to the UI on the streaming path;
605
+ # close that box so the corrective re-prompt's answer renders cleanly
606
+ # beneath it (the kept partial stays visible, like an interrupt).
607
+ @ui.stream_end if streaming?
608
+ persist_assistant_message(response)
609
+ messages << build_assistant_tool_use_message(response)
610
+ persist_user_message(note)
611
+ messages << { role: "user", content: note }
612
+ @ui.note("checking that claim — no tool call was issued") if @ui.respond_to?(:note)
613
+ :reflected
614
+ end
615
+
616
+ # The last user message in the OPENING transcript (no guard notes appended
617
+ # yet at this point), as a plain string. Defensive "" when there is none.
618
+ def originating_user_request(messages)
619
+ (Array(messages).reverse.find { |msg| msg[:role].to_s == "user" } || {}).fetch(:content, "").to_s
333
620
  end
334
621
 
335
622
  # Builds the per-call LLM::Request and runs it through the ModelCallRunner,
@@ -347,14 +634,39 @@ module Rubino
347
634
  messages: messages,
348
635
  tools: tools,
349
636
  image_paths: image_paths,
350
- stream: streaming?
637
+ stream: streaming?,
638
+ # Round-trip hooks (#355 #351). ruby_llm runs the WHOLE model↔tool loop
639
+ # inside one streaming ask(); these let the Loop observe and bound that
640
+ # inner loop. on_intermediate_message persists each intermediate
641
+ # assistant(tool_use) row so the streaming transcript matches the
642
+ # non-streaming one (#351); on_round_trip counts round-trips so the
643
+ # budget can be consulted mid-loop; budget_exhausted is the predicate
644
+ # ToolBridge consults to Halt once the budget is spent (#355a).
645
+ on_intermediate_message: method(:persist_intermediate_assistant),
646
+ on_round_trip: method(:note_stream_round_trip),
647
+ budget_exhausted: method(:stream_budget_exhausted?)
351
648
  )
352
649
 
353
650
  # Single boundary entry (normalize_response seam).
354
651
  # The adapter dispatches stream-vs-chat off request.stream internally;
355
652
  # streaming yields chunks to the block, non-streaming returns in one shot.
356
653
  # The runner forwards this block straight through on each attempt.
654
+ #
655
+ # Interrupt path (#338): every content delta is also accumulated into
656
+ # @interrupt_partial so that if the user cancels mid-stream — and the
657
+ # adapter raises Rubino::Interrupted before returning a response — the
658
+ # Loop still has the exact text that was shown on screen to PERSIST as an
659
+ # interrupted partial (storage matches the screen, transcript stays
660
+ # truthful & resumable). And once the cancel token has flipped, a late
661
+ # chunk that escaped the per-chunk poll (arriving in the window between
662
+ # the flag flip and the adapter tearing down the socket) is DROPPED here
663
+ # — it is neither rendered nor accumulated, so no late token can bleed
664
+ # into the next turn (Gemini's turnCancelledRef pattern, belt-and-
665
+ # suspenders on top of the socket abort the raise already triggers).
357
666
  stream_chunk = lambda do |chunk|
667
+ next if @cancel_token&.cancelled?
668
+
669
+ @interrupt_partial << chunk[:text].to_s if chunk.is_a?(Hash) && chunk[:type] == :content
358
670
  @ui.stream(chunk)
359
671
  @event_bus.emit(Interaction::Events::MODEL_STREAM, chunk: chunk)
360
672
  end
@@ -388,6 +700,37 @@ module Rubino
388
700
  )
389
701
  end
390
702
 
703
+ # Persist the turn's final assistant text. When the guard left the content
704
+ # untouched (`final` == response.content) this is exactly
705
+ # #persist_assistant_message. When the guard REPLACED it (cd honest answer),
706
+ # persist the replacement so --resume/audit keep the truthful turn, not the
707
+ # model's no-op claim.
708
+ def persist_final_text(response, final)
709
+ return persist_assistant_message(response) if final.equal?(response.content) || final == response.content
710
+
711
+ with_db_retries do
712
+ @message_store.create(
713
+ session_id: @session[:id],
714
+ role: "assistant",
715
+ content: final,
716
+ token_count: response.output_tokens,
717
+ metadata: response.input_tokens.to_i.positive? ? { input_tokens: response.input_tokens } : {}
718
+ )
719
+ end
720
+ end
721
+
722
+ # Render the final text. Unchanged content streams/finalizes as before. A
723
+ # replaced cd answer: on the streaming path the fabricated line already
724
+ # reached the screen, so close that box and print the honest correction as
725
+ # a fresh block; on the non-streaming path just render the honest text.
726
+ def finalize_stream_text(response, final)
727
+ return finalize_stream(response) if final.equal?(response.content) || final == response.content
728
+
729
+ @ui.stream_end if streaming?
730
+ @ui.stream({ type: :content, text: final.to_s, message_id: 0 })
731
+ @ui.stream_end
732
+ end
733
+
391
734
  def finalize_stream(response)
392
735
  if streaming?
393
736
  @ui.stream_end
@@ -434,15 +777,23 @@ module Rubino
434
777
  # "0 run · 1 denied" so the deny outcome is unambiguous (#83).
435
778
  if result.respond_to?(:denied?) && result.denied?
436
779
  @denied_count += 1
780
+ # A headless fail-closed block carries the distinctive noninteractive
781
+ # denial output; remember it so the binding guard's honest message can
782
+ # name `--yolo` rather than "approve interactively" (F2).
783
+ @noninteractive_block = true if result.output.to_s.include?("no interactive session")
437
784
  else
438
785
  @tool_count += 1
786
+ # Track mutating tool calls separately so the pessimistic-summary
787
+ # reconciliation (#381) can point the user at uncommitted disk changes.
788
+ @edit_count += 1 if ActionClaimGuard::MUTATING_TOOLS.include?(name.to_s)
439
789
  end
440
790
  persist_tool_result(
441
791
  role: "tool",
442
792
  content: result.output,
443
793
  tool_call_id: call_id,
444
794
  name: name,
445
- arguments: arguments
795
+ arguments: arguments,
796
+ result: result
446
797
  )
447
798
  end
448
799
 
@@ -488,6 +839,102 @@ module Rubino
488
839
  @session_repo ||= Session::Repository.new
489
840
  end
490
841
 
842
+ # Persists the partial assistant text streamed so far when the user
843
+ # interrupts mid-turn (#338b). Bound to THIS session (and thereby the
844
+ # current user turn — the user row was appended by Lifecycle before the
845
+ # model call), flagged interrupted: true in metadata so resume / audit /
846
+ # compaction can tell a cut-off turn from a completed one and never
847
+ # mistake the truncated buffer for a finished answer. No-op when nothing
848
+ # streamed (interrupt during "thinking" before the first content token) —
849
+ # there's no partial to keep, only a status row to clear.
850
+ def persist_interrupted_partial
851
+ partial = @interrupt_partial.to_s
852
+ return if partial.strip.empty?
853
+
854
+ with_db_retries do
855
+ @message_store.create(
856
+ session_id: @session[:id],
857
+ role: "assistant",
858
+ content: partial,
859
+ metadata: { interrupted: true }
860
+ )
861
+ end
862
+ session_repo.increment_message_count!(@session[:id])
863
+ rescue StandardError => e
864
+ # Persisting the partial must never mask the interrupt itself — log and
865
+ # let the Interrupted propagate so the turn still unwinds cleanly.
866
+ Rubino.logger.warn(event: "loop.interrupt.persist_failed", error: e.message)
867
+ end
868
+
869
+ # #351: persist an INTERMEDIATE assistant(tool_use) message that ruby_llm
870
+ # produced inside a single streaming ask(). On the non-streaming path the
871
+ # Loop writes this row itself (via #persist_assistant_message before
872
+ # #execute_tool_calls); on the streaming path ruby_llm runs the whole loop
873
+ # internally and the row was previously never written — so resume /
874
+ # repair_tool_pairs / compaction saw tool(result) rows with no matching
875
+ # assistant(tool_use), and strict providers 400'd on the next turn. The
876
+ # adapter hands us the normalized message ({content:, tool_calls:,
877
+ # input_tokens:, output_tokens:}); we write the SAME shape the
878
+ # non-streaming path does (tool_calls + input_tokens in metadata).
879
+ #
880
+ # IDEMPOTENCY: the adapter only calls this for assistant messages that carry
881
+ # tool_calls — never the final text turn (which the Loop's own text path
882
+ # persists). Tokens are NOT folded into token_total here: the streaming
883
+ # build_response already SUMS every round-trip's usage into the single
884
+ # response whose total_tokens the loop adds once (#355b), so counting them
885
+ # again here would double-bill.
886
+ def persist_intermediate_assistant(msg)
887
+ # Orphan-avoidance (#355a + #351): on_round_trip fired just before this,
888
+ # so if the budget is now exhausted EVERY tool of this round-trip will be
889
+ # Halted by ToolBridge — no tool(result) row will be persisted for them.
890
+ # Persisting the assistant(tool_use) row anyway would leave an orphaned
891
+ # tool_use that repair_tool_pairs would later have to strip. The whole
892
+ # round-trip is voided by the Halt, so skip persisting it; the turn ends
893
+ # with the budget-exhausted summary instead. Completed round-trips (budget
894
+ # still available) persist normally and their tool results land via the
895
+ # ToolExecutor on_result sink.
896
+ return if stream_budget_exhausted?
897
+
898
+ tool_calls = msg[:tool_calls] || []
899
+ metadata = tool_calls.empty? ? {} : { tool_calls: tool_calls }
900
+ input_tokens = msg[:input_tokens].to_i
901
+ metadata[:input_tokens] = input_tokens if input_tokens.positive?
902
+
903
+ with_db_retries do
904
+ @message_store.create(
905
+ session_id: @session[:id],
906
+ role: "assistant",
907
+ content: msg[:content],
908
+ token_count: msg[:output_tokens],
909
+ metadata: metadata
910
+ )
911
+ end
912
+ rescue StandardError => e
913
+ # A persistence hiccup on an intermediate row must never abort the live
914
+ # tool loop the model is mid-way through — log and carry on.
915
+ Rubino.logger&.warn(event: "loop.intermediate.persist_failed", error: e.message)
916
+ end
917
+
918
+ # #355a: counts one round-trip ruby_llm ran inside the streaming ask().
919
+ # Fired by the adapter (on_round_trip) on each assistant(tool_use) message.
920
+ def note_stream_round_trip
921
+ @stream_round_trips += 1
922
+ end
923
+
924
+ # #355a: the predicate ToolBridge consults BEFORE each mid-stream tool
925
+ # dispatch. True once the per-turn iteration/time budget can no longer
926
+ # accommodate the round-trips ruby_llm has already produced — at which
927
+ # point the bridge returns Tool::Halt to stop the in-ask loop gracefully
928
+ # (current batch + at most one more model call) and hand control back here
929
+ # for the existing budget-exhausted summary. Counting the round-trips as
930
+ # iterations maps the in-ask loop onto the same budget the non-streaming
931
+ # path consumes one iteration at a time.
932
+ def stream_budget_exhausted?
933
+ return false if @stream_round_trips.zero?
934
+
935
+ !@budget.can_continue?(@stream_round_trips)
936
+ end
937
+
491
938
  def persist_assistant_message(response)
492
939
  # Stash tool_calls under metadata so --resume can rebuild the
493
940
  # assistant(toolUse) → tool(result) pair the provider expects. Without
@@ -520,6 +967,15 @@ module Rubino
520
967
  # Old rows that pre-date this field hydrate with empty metadata; the
521
968
  # replay path falls back to printing just the name.
522
969
  metadata = result[:arguments] ? { arguments: result[:arguments] } : {}
970
+ # Persist the OUTCOME (status + error_code) so --resume replay renders
971
+ # the SAME glyph the live session showed — a denied/failed tool replays
972
+ # with the red ✗, not a blanket green ✓ (the replay path used to wrap
973
+ # every stored row as Result.success). Old rows hydrate without these
974
+ # keys; the replay path then infers the outcome from the output text.
975
+ if (res = result[:result])
976
+ metadata[:status] = res.status.to_s if res.respond_to?(:status) && res.status
977
+ metadata[:error_code] = res.error_code.to_s if res.respond_to?(:error_code) && res.error_code
978
+ end
523
979
 
524
980
  with_db_retries do
525
981
  @message_store.create(