rubino-agent 0.5.2.1 → 0.5.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.dockerignore +15 -0
  3. data/CHANGELOG.md +56 -0
  4. data/Dockerfile +56 -0
  5. data/agent.md +112 -0
  6. data/docs/design/bg-shell-pty-port.md +88 -0
  7. data/docs/design/bg-shell-review-refinements.md +65 -0
  8. data/docs/design/bg-shell-ux.md +130 -0
  9. data/docs/tools.md +3 -12
  10. data/lib/rubino/agent/iteration_budget.rb +13 -0
  11. data/lib/rubino/agent/loop.rb +43 -5
  12. data/lib/rubino/agent/prompts/build.txt +3 -5
  13. data/lib/rubino/agent/prompts/memory_guidance.txt +5 -0
  14. data/lib/rubino/agent/prompts/tool_use_enforcement.txt +4 -0
  15. data/lib/rubino/agent/prompts/tool_use_enforcement_google.txt +9 -0
  16. data/lib/rubino/agent/prompts/tool_use_enforcement_openai.txt +48 -0
  17. data/lib/rubino/agent/runner.rb +55 -12
  18. data/lib/rubino/agent/tool_executor.rb +1 -1
  19. data/lib/rubino/cli/chat/idle_card_host.rb +6 -1
  20. data/lib/rubino/cli/chat_command.rb +119 -17
  21. data/lib/rubino/cli/commands.rb +5 -0
  22. data/lib/rubino/commands/handlers/agents.rb +27 -18
  23. data/lib/rubino/commands/handlers/status.rb +6 -3
  24. data/lib/rubino/config/configuration.rb +25 -8
  25. data/lib/rubino/config/defaults.rb +15 -13
  26. data/lib/rubino/context/prompt_assembler.rb +89 -1
  27. data/lib/rubino/context/summary_builder.rb +0 -22
  28. data/lib/rubino/interaction/events.rb +2 -2
  29. data/lib/rubino/interaction/lifecycle.rb +54 -20
  30. data/lib/rubino/llm/ruby_llm_adapter.rb +178 -20
  31. data/lib/rubino/security/redactor.rb +1 -1
  32. data/lib/rubino/session/message.rb +12 -0
  33. data/lib/rubino/tools/background_tasks.rb +107 -12
  34. data/lib/rubino/tools/base.rb +1 -1
  35. data/lib/rubino/tools/read_attachment_tool.rb +52 -54
  36. data/lib/rubino/tools/registry.rb +21 -72
  37. data/lib/rubino/tools/shell_entry_adapter.rb +97 -0
  38. data/lib/rubino/tools/shell_input_tool.rb +1 -1
  39. data/lib/rubino/tools/shell_kill_tool.rb +4 -4
  40. data/lib/rubino/tools/shell_registry.rb +178 -38
  41. data/lib/rubino/tools/shell_tool.rb +45 -5
  42. data/lib/rubino/tools/task_result_tool.rb +4 -1
  43. data/lib/rubino/tools/task_tool.rb +74 -11
  44. data/lib/rubino/tools/vision_tool.rb +1 -1
  45. data/lib/rubino/ui/agent_menu.rb +8 -2
  46. data/lib/rubino/ui/api.rb +11 -0
  47. data/lib/rubino/ui/bottom_composer.rb +24 -11
  48. data/lib/rubino/ui/cli.rb +254 -15
  49. data/lib/rubino/ui/markdown_renderer.rb +4 -1
  50. data/lib/rubino/ui/stdout_proxy.rb +25 -10
  51. data/lib/rubino/ui/streaming_markdown.rb +67 -12
  52. data/lib/rubino/ui/subagent_cards.rb +8 -7
  53. data/lib/rubino/ui/tool_args_stream.rb +143 -0
  54. data/lib/rubino/update_check.rb +10 -2
  55. data/lib/rubino/version.rb +1 -1
  56. metadata +14 -6
  57. data/AGENTS.md +0 -97
  58. data/docs/agents.md +0 -216
  59. data/lib/rubino/jobs/handlers/summarize_session_job.rb +0 -21
  60. data/lib/rubino/tools/summarize_file_tool.rb +0 -194
@@ -23,10 +23,13 @@ module Rubino
23
23
  # instead of ending the turn with nothing. Carries the trusted-harness marker
24
24
  # (#75) so it reads as runtime control, not as suspect user input.
25
25
  MAX_ITERATIONS_SUMMARY_NUDGE =
26
- "#{HARNESS_CONTROL_MARKER} You've reached the maximum number of " \
27
- "tool-calling iterations allowed. " \
28
- "Please provide a final response summarizing what you've found and " \
29
- "accomplished so far, without calling any more tools.".freeze
26
+ "#{HARNESS_CONTROL_MARKER} You've done a long run of tool calls this " \
27
+ "turn and hit this turn's tool-call checkpoint. Without calling any " \
28
+ "more tools, give the user a brief, constructive summary: what you " \
29
+ "accomplished and what's left. This is a per-turn checkpoint, NOT a " \
30
+ "hard limit on the work — do NOT tell the user to start a new session, " \
31
+ "and do NOT claim you are unable to continue or improve things. They " \
32
+ "can simply reply and you'll pick up right where you left off.".freeze
30
33
 
31
34
  # Framing for turn-start background notices (#148): tells the model the
32
35
  # notices are secondary to the user message that follows them.
@@ -119,6 +122,15 @@ module Rubino
119
122
  @tool_executor.on_result = method(:handle_tool_result) if @tool_executor.respond_to?(:on_result=)
120
123
  end
121
124
 
125
+ # How the LAST turn terminated, read back by the caller AFTER #run returns
126
+ # (mirrors how Lifecycle exposes #active_session). :completed on a normal
127
+ # answer; :max_iterations / :max_time when the turn was force-summarized at
128
+ # the tool/turn ceiling or the wall-clock net; :aborted on a user abort;
129
+ # :stream_incomplete when a truncated stream was handed back as the answer.
130
+ # The subagent-completion path reads this so a truncated run is reported
131
+ # PARTIAL instead of a false "completed" (#core-F1 honesty).
132
+ attr_reader :stop_reason
133
+
122
134
  # Runs the agent loop, returning the final assistant response content.
123
135
  def run(messages:, tools:) # rubocop:disable Metrics/PerceivedComplexity,Metrics/CyclomaticComplexity
124
136
  # Stash the resolved toolset so #streaming? can decide, per run, whether
@@ -191,6 +203,10 @@ module Rubino
191
203
  # most once per turn, only after a real block, and reset here so a fresh
192
204
  # turn never inherits a prior turn's reminder.
193
205
  @blocked_reminder_emitted = false
206
+ # Terminal outcome of THIS turn, read back via #stop_reason once #run
207
+ # returns. Optimistic default — every early return below that ISN'T a
208
+ # clean answer overwrites it (force-summary, abort, truncated stream).
209
+ @stop_reason = :completed
194
210
  token_total = 0
195
211
 
196
212
  loop do
@@ -313,6 +329,7 @@ module Rubino
313
329
  end
314
330
  # Continuations exhausted — hand back the recovered partial as the
315
331
  # (truncated) final answer: truthful and resumable, not a hard failure.
332
+ @stop_reason = :stream_incomplete
316
333
  emit_turn_summary(turn_started_at, token_total)
317
334
  return response.content
318
335
  end
@@ -577,6 +594,7 @@ module Rubino
577
594
  # note rather than a force-summary (no extra model call). The ledger note
578
595
  # keeps it truthful about how much ran.
579
596
  def abort_on_budget_exhausted(iteration, turn_started_at, token_total)
597
+ @stop_reason = :aborted
580
598
  note = "Stopped at user request after #{iteration} tool iteration" \
581
599
  "#{"s" if iteration != 1} (#{tool_count_label})."
582
600
  persist_user_message_note(note)
@@ -626,6 +644,10 @@ module Rubino
626
644
  end
627
645
 
628
646
  def force_summarize_budget_exhausted(messages, iteration, turn_started_at, token_total)
647
+ # Record WHICH rail forced the summary so a background subagent's
648
+ # completion can be reported PARTIAL with the real reason (time vs
649
+ # iterations) instead of a misleading "completed" (#core-F1).
650
+ @stop_reason = @budget.limiting_factor(iteration) == :time ? :max_time : :max_iterations
629
651
  nudge = force_summary_nudge
630
652
  persist_user_message(nudge)
631
653
  messages << { role: "user", content: nudge }
@@ -890,11 +912,16 @@ module Rubino
890
912
  # Providers like Bedrock require this message to appear in the conversation
891
913
  # history between the user prompt and the tool result(s).
892
914
  def build_assistant_tool_use_message(response)
893
- {
915
+ msg = {
894
916
  role: "assistant",
895
917
  content: response.content || "",
896
918
  tool_calls: response.tool_calls
897
919
  }
920
+ # Carry reasoning on the in-turn (non-streaming) assistant(tool_use) too,
921
+ # so load_history replays it and the prefix stays KV-cache-stable (#608b).
922
+ reasoning = response.respond_to?(:thinking) ? response.thinking : nil
923
+ msg[:reasoning] = reasoning if reasoning && !reasoning.to_s.empty?
924
+ msg
898
925
  end
899
926
 
900
927
  # Called once per executed tool by the ToolExecutor's on_result sink, on
@@ -1058,6 +1085,10 @@ module Rubino
1058
1085
  metadata = tool_calls.empty? ? {} : { tool_calls: tool_calls }
1059
1086
  input_tokens = msg[:input_tokens].to_i
1060
1087
  metadata[:input_tokens] = input_tokens if input_tokens.positive?
1088
+ # Keep the reasoning with the assistant(tool_use) row so the next turn
1089
+ # replays it and the KV-cache prefix stays byte-stable (#608b) — this is
1090
+ # the row that diverged from the server cache when reasoning was dropped.
1091
+ metadata[:reasoning] = msg[:reasoning] if msg[:reasoning] && !msg[:reasoning].to_s.empty?
1061
1092
 
1062
1093
  with_db_retries do
1063
1094
  @message_store.create(
@@ -1108,6 +1139,13 @@ module Rubino
1108
1139
  # they see tool result messages with no matching toolUse upstream.
1109
1140
  metadata = response.has_tool_calls? ? { tool_calls: response.tool_calls } : {}
1110
1141
 
1142
+ # Persist the reasoning so later turns can replay it (Hermes parity,
1143
+ # #608b): the local KV cache holds this turn's reasoning tokens, so a
1144
+ # later replay that omits them busts the prefix and re-prefills the whole
1145
+ # context. Session::Message#to_context re-emits it as wire reasoning_content.
1146
+ reasoning = response.respond_to?(:thinking) ? response.thinking : nil
1147
+ metadata[:reasoning] = reasoning if reasoning && !reasoning.to_s.empty?
1148
+
1111
1149
  # Record the REAL context size the provider saw for this response:
1112
1150
  # input_tokens covers the whole assembled prompt (system prompt +
1113
1151
  # history + tools), which no local chars/4 estimate can reproduce
@@ -33,11 +33,9 @@ assume or default to one.
33
33
  - Edit a file with `edit`/`multi_edit`/`patch`, never with `sed`/`awk`.
34
34
  - Search with `grep` or `glob`, never with raw `find` or shell pipelines.
35
35
  - Write a new file with `write`. Don't `echo > file` from the shell.
36
- - To get the gist of a LARGE document (converted PDF, log, transcript —
37
- more than a few hundred lines), use `summarize_file`, not `read`. It
38
- map-reduces the file in a separate context and returns only the summary,
39
- so the raw text never fills this conversation. Reach for `read` (with
40
- offset/limit) or `grep` only when you need exact lines, not an overview.
36
+ - For a LARGE file (converted PDF, log, transcript — more than a few hundred
37
+ lines), don't `read` it whole and flood this conversation: `grep` it to find
38
+ the relevant part, then `read` that span with offset/limit to page through it.
41
39
  - Tool output may be COMPRESSED to save context — it is lossless to YOU: a
42
40
  `# … N lines elided — read <path> offset=.. limit=..` pointer in a file read
43
41
  means that exact body is one targeted `read` away, verbatim (so issue that
@@ -0,0 +1,5 @@
1
+ # Memory discipline
2
+ You have persistent memory across sessions. Save durable facts using the memory tool: user preferences, environment details, tool quirks, and stable conventions. Memory is injected into every turn, so keep it compact and focused on facts that will still matter later.
3
+ Prioritize what reduces future user steering — the most valuable memory is one that prevents the user from having to correct or remind you again. User preferences and recurring corrections matter more than procedural task details.
4
+ Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO state to memory. Specifically: do not record PR numbers, issue numbers, commit SHAs, 'fixed bug X', 'submitted PR Y', 'Phase N done', file counts, or any artifact that will be stale in 7 days. If a fact will be stale in a week, it does not belong in memory. If you've discovered a reusable way to do something, save it as a skill, not a memory.
5
+ Write memories as declarative facts, not instructions to yourself. 'User prefers concise responses' ✓ — 'Always respond concisely' ✗. 'Project uses pytest with xdist' ✓ — 'Run tests with pytest -n 4' ✗. Imperative phrasing gets re-read as a directive in later sessions and can cause repeated work or override the user's current request. Procedures and workflows belong in skills, not memory.
@@ -0,0 +1,4 @@
1
+ # Tool-use enforcement
2
+ You MUST use your tools to take action — do not describe what you would do or plan to do without actually doing it. When you say you will perform an action (e.g. 'I will run the tests', 'Let me check the file', 'I will create the project'), you MUST immediately make the corresponding tool call in the same response. Never end your turn with a promise of future action — execute it now.
3
+ Keep working until the task is actually complete. Do not stop with a summary of what you plan to do next time. If you have tools available that can accomplish the task, use them instead of telling the user what you would do.
4
+ Every response should either (a) contain tool calls that make progress, or (b) deliver a final result to the user. Responses that only describe intentions without acting are not acceptable.
@@ -0,0 +1,9 @@
1
+ # Google model operational directives
2
+ Follow these operational rules strictly:
3
+ - **Absolute paths:** Always construct and use absolute file paths for all file system operations. Combine the project root with relative paths.
4
+ - **Verify first:** Use read/grep to check file contents and project structure before making changes. Never guess at file contents.
5
+ - **Dependency checks:** Never assume a library is available. Check package.json, requirements.txt, Cargo.toml, Gemfile, etc. before importing.
6
+ - **Conciseness:** Keep explanatory text brief — a few sentences, not paragraphs. Focus on actions and results over narration.
7
+ - **Parallel tool calls:** When you need to perform multiple independent operations (e.g. reading several files), make all the tool calls in a single response rather than sequentially.
8
+ - **Non-interactive commands:** Use flags like -y, --yes, --non-interactive to prevent CLI tools from hanging on prompts.
9
+ - **Keep going:** Work autonomously until the task is fully resolved. Don't stop with a plan — execute it.
@@ -0,0 +1,48 @@
1
+ # Execution discipline
2
+ <tool_persistence>
3
+ - Use tools whenever they improve correctness, completeness, or grounding.
4
+ - Do not stop early when another tool call would materially improve the result.
5
+ - If a tool returns empty or partial results, retry with a different query or strategy before giving up.
6
+ - Keep calling tools until: (1) the task is complete, AND (2) you have verified the result.
7
+ </tool_persistence>
8
+
9
+ <mandatory_tool_use>
10
+ NEVER answer these from memory or mental computation — ALWAYS use a tool:
11
+ - Arithmetic, math, calculations → use the shell or a code tool
12
+ - Hashes, encodings, checksums → use the shell (e.g. sha256sum, base64)
13
+ - Current time, date, timezone → use the shell (e.g. date)
14
+ - System state: OS, CPU, memory, disk, ports, processes → use the shell
15
+ - File contents, sizes, line counts → use read, grep, or the shell
16
+ - Git history, branches, diffs → use the shell
17
+ - Current facts (weather, news, versions) → use web_search
18
+ Your memory and user profile describe the USER, not the system you are running on. The execution environment may differ from what the user profile says about their personal setup.
19
+ </mandatory_tool_use>
20
+
21
+ <act_dont_ask>
22
+ When a question has an obvious default interpretation, act on it immediately instead of asking for clarification. Examples:
23
+ - 'Is port 443 open?' → check THIS machine (don't ask 'open where?')
24
+ - 'What OS am I running?' → check the live system (don't use user profile)
25
+ - 'What time is it?' → run `date` (don't guess)
26
+ Only ask for clarification when the ambiguity genuinely changes what tool you would call.
27
+ </act_dont_ask>
28
+
29
+ <prerequisite_checks>
30
+ - Before taking an action, check whether prerequisite discovery, lookup, or context-gathering steps are needed.
31
+ - Do not skip prerequisite steps just because the final action seems obvious.
32
+ - If a task depends on output from a prior step, resolve that dependency first.
33
+ </prerequisite_checks>
34
+
35
+ <verification>
36
+ Before finalizing your response:
37
+ - Correctness: does the output satisfy every stated requirement?
38
+ - Grounding: are factual claims backed by tool outputs or provided context?
39
+ - Formatting: does the output match the requested format or schema?
40
+ - Safety: if the next step has side effects (file writes, commands, API calls), confirm scope before executing.
41
+ </verification>
42
+
43
+ <missing_context>
44
+ - If required context is missing, do NOT guess or hallucinate an answer.
45
+ - Use the appropriate lookup tool when missing information is retrievable (grep, web_search, read, etc.).
46
+ - Ask a clarifying question only when the information cannot be retrieved by tools.
47
+ - If you must proceed with incomplete information, label assumptions explicitly.
48
+ </missing_context>
@@ -13,7 +13,8 @@ module Rubino
13
13
 
14
14
  def initialize(session_id: nil, model_override: nil, provider_override: nil,
15
15
  max_turns: nil, ignore_rules: false, ui: nil, agent_definition: nil,
16
- event_bus: nil, announce_session: true, session_source: "cli")
16
+ event_bus: nil, announce_session: true, session_source: "cli",
17
+ interactive: false)
17
18
  @ui = ui || Rubino.ui
18
19
  # An in-chat rewind/fork builds a runner on the child session but has its
19
20
  # own purpose-built "┄ rewound to message N — editing ┄" marker, so the
@@ -40,6 +41,11 @@ module Rubino
40
41
  # not the user's own conversations) while staying resumable by explicit
41
42
  # id. Like Claude Code hiding its Task subagent sessions from the picker.
42
43
  @session_source = session_source
44
+ # True only for the interactive REPL, where more in-process turns follow
45
+ # this one. Lifecycle uses it to keep automatic memory extraction OFF the
46
+ # live KV-cache slot between turns (#608c) — a headless one-shot, which
47
+ # exits after its single turn, leaves it false and extracts normally.
48
+ @interactive = interactive
43
49
  # Pre-instantiate so cancel! is meaningful between turns and during the
44
50
  # window between Signal.trap install and run() — a too-early Ctrl+C
45
51
  # used to land on a nil token and silently no-op, then the next run
@@ -123,18 +129,19 @@ module Rubino
123
129
  cancel_token: @cancel_token,
124
130
  model_override: @explicit_model_override,
125
131
  provider_override: @provider_override,
132
+ interactive: @interactive,
126
133
  # The SOFT iteration ceiling (where the budget-extension prompt fires)
127
- # vs the HARD max_turns outer rail. For the main agent @max_turns is the
128
- # `--max-turns N` override, which intentionally sets the soft ceiling.
129
- # A SUBAGENT, though, gets @max_turns = definition.max_turns (= config
130
- # agent.max_turns, 90) passing THAT as the soft ceiling made soft ==
131
- # hard, so #extendable? was always false and a subagent could NEVER
132
- # surface a budget request (#571) it just force-summarized. Subagents
133
- # therefore pass nil so the soft ceiling falls back to config
134
- # agent.max_tool_iterations (25) < the 90 hard rail, exactly like the
135
- # main agent so a subagent at 25 iterations parks and asks for budget
136
- # via the dropdown (#574), extendable up to the 90 outer rail.
137
- max_tool_iterations: @session_source == "subagent" ? nil : @max_turns,
134
+ # vs the HARD max_turns outer rail (config agent.max_turns, applied
135
+ # inside IterationBudget). @max_turns carries the per-run soft cap on
136
+ # BOTH paths:
137
+ # - MAIN agent: the `--max-turns N` override (nil config default).
138
+ # - SUBAGENT: definition.max_turns e.g. explore=20, general=50,
139
+ # BELOW the 90 hard railso the child both HONORS its per-agent
140
+ # cap (#571: it used to be dropped entirely) AND can surface the
141
+ # #574 budget-park at that cap, extendable up to the 90 outer rail.
142
+ # A subagent that sets no max_turns falls back to config agent.max_turns
143
+ # (soft == hard) and simply hard-stops there, like the main agent.
144
+ max_tool_iterations: @max_turns,
138
145
  polishing: @polishing
139
146
  )
140
147
 
@@ -150,10 +157,19 @@ module Rubino
150
157
  # counterpart to the manual /compact swap (chat_command rebuilds the
151
158
  # runner on result[:compact_into]).
152
159
  @session = lifecycle.active_session
160
+ # Post-turn state, read by the subagent-completion path (task_tool) so a
161
+ # force-summarized/truncated child is reported PARTIAL, not "completed".
162
+ @last_stop_reason = lifecycle.last_stop_reason
153
163
 
154
164
  response
155
165
  end
156
166
 
167
+ # How this runner's LAST turn terminated (Agent::Loop#stop_reason),
168
+ # threaded up via Lifecycle. nil until a turn has run. Read by the `task`
169
+ # tool after a subagent's #run! to distinguish a real completion from a
170
+ # budget-/time-truncated partial.
171
+ attr_reader :last_stop_reason
172
+
157
173
  # Pins the agent Definition this runner threads into every subsequent turn
158
174
  # (the sticky `/agent <name>` / Tab-cycle switch). Lifecycle reads
159
175
  # @agent_definition fresh on each #run!, so swapping it here takes effect
@@ -219,6 +235,32 @@ module Rubino
219
235
  model_id
220
236
  end
221
237
 
238
+ # Aligns a RESUMED session's stored model with the model the adapter will
239
+ # actually use this run (#model-resume). Lifecycle builds the adapter from
240
+ # `@explicit_model_override || @session[:model]`, and the CLI ALWAYS passes
241
+ # a boot override (explicit `-m`, else `model.default` from config) — so on
242
+ # resume the override, NOT the model this session happened to last use, is
243
+ # what generates. The session row, the footer/statusbar, the token-budget
244
+ # context window and `/status` all read `session[:model]`, so without this
245
+ # they showed the STALE pinned model (e.g. the old default) while the agent
246
+ # was really running the new one: changing `model.default` looked ignored
247
+ # even though generation honored it. Re-point the row to the effective
248
+ # model so every surface tells the truth and a config change takes visible
249
+ # effect. No-op when there is no explicit override (then the session model
250
+ # IS what the adapter uses) or it already matches.
251
+ def sync_resumed_session_model!(session)
252
+ return unless @explicit_model_override
253
+ return if session[:model] == @explicit_model_override
254
+
255
+ session[:model] = @explicit_model_override
256
+ session[:provider] = @provider_override ||
257
+ LLM::ProviderResolver.resolve(@explicit_model_override,
258
+ explicit_provider: @config.dig("model", "provider"))
259
+ return unless @session_repo.persisted?(session[:id])
260
+
261
+ @session_repo.update(session[:id], model: session[:model], provider: session[:provider])
262
+ end
263
+
222
264
  # Marks the current session ended (#100). Called from the CLI on a clean
223
265
  # REPL teardown (and best-effort on terminal close) so a session stops
224
266
  # showing as "active" forever and cleanup/list/--continue can tell a
@@ -437,6 +479,7 @@ module Rubino
437
479
  # sees us as the live owner and forks rather than interleaving.
438
480
  session[:persisted] = true
439
481
  session[:owner_pid] = Process.pid
482
+ sync_resumed_session_model!(session)
440
483
  @ui.status("Resuming session: #{session[:id][0..7]}...") if @announce_session
441
484
  session
442
485
  else
@@ -187,7 +187,7 @@ module Rubino
187
187
  # Mirror the chunk onto the bus so the API/SSE stream isn't silent
188
188
  # during a long tool call: the Recorder maps TOOL_PROGRESS to a
189
189
  # `tool.progress` event, which resets the idle watchdog. Without
190
- # this a busy tool (summarize_file: ~30 sequential aux-LLM calls,
190
+ # this a busy tool (a long shell stream, or an aux-LLM-backed tool,
191
191
  # no run-events) is killed at the 300s idle timeout. Throttled so a
192
192
  # chatty tool (shell streaming thousands of stdout lines) doesn't
193
193
  # write a DB row + SSE frame per line — one heartbeat per interval
@@ -44,13 +44,18 @@ module Rubino
44
44
  # between child events. Repaints go through the composer's render mutex, so
45
45
  # they never race the keystroke handler. Exits as soon as no child is live
46
46
  # (it clears the region one last time) or when killed on teardown.
47
- def start_ticker(composer)
47
+ # +on_tick+ (optional) runs once per tick after the card repaint — used by
48
+ # the attach view to live-tail a focused shell's new output on the SAME
49
+ # 1 Hz cadence and through the same render mutex (composer#print_above) the
50
+ # cards use, so it never races the keystroke handler.
51
+ def start_ticker(composer, &on_tick)
48
52
  Thread.new do
49
53
  loop do
50
54
  sleep(IDLE_CARD_TICK)
51
55
  break unless composer.equal?(UI::BottomComposer.current)
52
56
 
53
57
  paint
58
+ on_tick&.call
54
59
  break unless children_live?
55
60
  end
56
61
  rescue StandardError => e
@@ -124,6 +124,18 @@ module Rubino
124
124
  # must never break the idle prompt, so it falls back to "nothing pending"
125
125
  # and the manual slash paths still work.
126
126
  def auto_resolve_pending_subagent_request(_runner = nil)
127
+ # Defer while the subagent PICKER is open (#586): the picker and this
128
+ # blocking approval/budget modal compete for the same stdin. A child
129
+ # hammering tool calls re-hits its budget gate every few ticks, so
130
+ # auto-firing the `wants +budget` modal here would suspend the open picker
131
+ # and swallow the ↓/Enter the user meant to ATTACH with — worse, the
132
+ # picker's ↓+Enter gesture could land on the modal's destructive
133
+ # "Summarize now". The request stays pending and auto-presents on the very
134
+ # next idle tick once the picker closes: deferred a few seconds, never
135
+ # lost — it still appears like a permission, in arrival order. (@composer
136
+ # is nil on the piped/one-shot paths, which have no picker.)
137
+ return false if @composer&.agent_menu_open?
138
+
127
139
  agents_request_handler.auto_resolve_pending
128
140
  rescue StandardError => e
129
141
  # Resilience floor: a hiccup in the auto-open must never crash the idle
@@ -276,7 +288,7 @@ module Rubino
276
288
  text, image_paths = Chat::ImageInbox.resolve_oneshot(query, opt(:image))
277
289
  requested_session_id = session_resolver.resolve_session_id
278
290
  runner = build_runner(session_id: requested_session_id, ui: ui,
279
- announce_session: announce_session)
291
+ announce_session: announce_session, interactive: false)
280
292
  warn_if_resume_forked(requested_session_id, runner)
281
293
  note_if_resuming_compacted_parent(runner)
282
294
  recorder = Output::TurnRecorder.new.attach!
@@ -1576,7 +1588,7 @@ module Rubino
1576
1588
  composer.reset_input
1577
1589
  seed_draft(composer, draft)
1578
1590
  idle_cards.paint
1579
- ticker = idle_cards.children_live? ? idle_cards.start_ticker(composer) : nil
1591
+ ticker = idle_cards.children_live? ? idle_cards.start_ticker(composer) { tail_attached_shell(composer) } : nil
1580
1592
 
1581
1593
  # SIGINT trap as a FALLBACK only (BH-2 / #551): the dependable idle Ctrl+C
1582
1594
  # path is now the in-band \x03 byte (on_idle_interrupt above), because
@@ -1632,6 +1644,7 @@ module Rubino
1632
1644
  # suspend THIS composer and restore it after), so it does not race the
1633
1645
  # reader. Resolves one request, then `next` so the cards repaint and the
1634
1646
  # loop re-checks for the next pending request before reading input.
1647
+ # (It defers itself while the picker is open — see the hook, #586.)
1635
1648
  if auto_resolve_pending_subagent_request(runner)
1636
1649
  idle_cards.paint
1637
1650
  next
@@ -1940,7 +1953,11 @@ module Rubino
1940
1953
  # cards stay visible and their elapsed time advances until the turn ends.
1941
1954
  # Killed in the ensure below.
1942
1955
  idle_cards.paint
1943
- card_ticker = idle_cards.children_live? ? idle_cards.start_ticker(composer) : nil
1956
+ card_ticker = if idle_cards.children_live?
1957
+ idle_cards.start_ticker(composer) do
1958
+ tail_attached_shell(composer)
1959
+ end
1960
+ end
1944
1961
 
1945
1962
  # If this turn's prompt came off the input queue (interrupt-by-default
1946
1963
  # Enter, Alt+Enter, or "/queued" during the previous turn), commit it now
@@ -1967,6 +1984,11 @@ module Rubino
1967
1984
  # Only thread the paste expansions when a placeholder was actually
1968
1985
  # collected, so a normal turn's runner.run signature is unchanged.
1969
1986
  run_kwargs[:paste_expansions] = paste_expansions unless paste_expansions.empty?
1987
+ # Drive the live `ctx ~Xk/…` gauge during THIS turn (#608e): hand the UI a
1988
+ # cheap render lambda (base ctx captured once + the in-flight token
1989
+ # estimate) so its ticker repaints the bar ~1/s as the model generates,
1990
+ # instead of the bar sitting frozen until the turn ends. Cleared in ensure.
1991
+ ui.live_status_provider = live_status_meter(runner) if ui.respond_to?(:live_status_provider=)
1970
1992
  oneshot = one_shot_agent_definition(agent_name)
1971
1993
  if oneshot && runner.respond_to?(:run_with_agent)
1972
1994
  runner.run_with_agent(oneshot, prompt, **run_kwargs)
@@ -2001,6 +2023,8 @@ module Rubino
2001
2023
  # time the runner returns, so the facet has already landed in the
2002
2024
  # footer and the engine thread must not outlive the turn.
2003
2025
  ui.turn_finished if ui.respond_to?(:turn_finished)
2026
+ # Stop driving the live ctx gauge; the reconcile below sets the exact bar.
2027
+ ui.live_status_provider = nil if ui.respond_to?(:live_status_provider=)
2004
2028
  # Stop the during-turn panel ticker before tearing the composer down, so
2005
2029
  # it can't repaint over the next idle prompt (the idle read starts its
2006
2030
  # own ticker). Idempotent if it already exited on its own (no live child).
@@ -2033,17 +2057,42 @@ module Rubino
2033
2057
  session = runner.session
2034
2058
  budget = Context::TokenBudget.new(model_id: session[:model], config: Rubino.configuration)
2035
2059
  messages = ::Rubino::Session::Store.new.for_session(session[:id])
2060
+ render_status_bar(session, budget, context_tokens(messages, budget))
2061
+ rescue StandardError
2062
+ nil
2063
+ end
2064
+
2065
+ # A cheap, DB-free render lambda for the LIVE ctx gauge (#608e): captures the
2066
+ # base (persisted) token count ONCE here on the main thread, then maps an
2067
+ # in-flight token estimate → a bar line with NO further DB reads, so the UI
2068
+ # ticker can call it ~1/s from its thread without re-querying the session.
2069
+ # The base omits this turn's not-yet-persisted generation; the +extra+
2070
+ # estimate covers it, and #ensure reconciles to the exact bar at turn end.
2071
+ # nil (no live gauge) when the bar is disabled or on any failure.
2072
+ def live_status_meter(runner)
2073
+ return nil unless runner && Rubino.configuration.display_statusbar?
2074
+
2075
+ session = runner.session
2076
+ budget = Context::TokenBudget.new(model_id: session[:model], config: Rubino.configuration)
2077
+ base = context_tokens(::Rubino::Session::Store.new.for_session(session[:id]), budget)
2078
+ ->(extra) { render_status_bar(session, budget, base + extra.to_i) }
2079
+ rescue StandardError
2080
+ nil
2081
+ end
2082
+
2083
+ # Renders the model + context-saturation bar for +tokens+ against the
2084
+ # session's window. Shared by the turn-boundary bar (#build_status_line) and
2085
+ # the live gauge (#live_status_meter) so both read one format (#608e).
2086
+ def render_status_bar(session, budget, tokens)
2036
2087
  UI::StatusBar.render(
2037
2088
  chips: { mode: Rubino::Modes.current, agent: status_agent_chip,
2038
2089
  branch: @branch_short_id,
2039
2090
  skill: Rubino::ActiveSkill.current },
2040
2091
  model: session[:model] || model_name,
2041
- tokens: context_tokens(messages, budget),
2092
+ tokens: tokens,
2042
2093
  window: budget.available_tokens,
2043
2094
  pastel: pastel
2044
2095
  )
2045
- rescue StandardError
2046
- nil
2047
2096
  end
2048
2097
 
2049
2098
  # The status-bar agent chip (#320): the active primary agent name, but
@@ -2966,7 +3015,7 @@ module Rubino
2966
3015
  # Builds an Agent::Runner with this invocation's shared flag overrides —
2967
3016
  # only the session and UI vary per call site (one-shot, interactive boot,
2968
3017
  # /sessions resume, /new).
2969
- def build_runner(session_id:, ui:, announce_session: true)
3018
+ def build_runner(session_id:, ui:, announce_session: true, interactive: true)
2970
3019
  Agent::Runner.new(
2971
3020
  session_id: session_id,
2972
3021
  model_override: model_name,
@@ -2974,7 +3023,11 @@ module Rubino
2974
3023
  max_turns: max_turns_override,
2975
3024
  ignore_rules: opt(:ignore_rules) || false,
2976
3025
  ui: ui,
2977
- announce_session: announce_session
3026
+ announce_session: announce_session,
3027
+ # build_runner is the interactive-REPL builder; only setup_oneshot
3028
+ # overrides this to false (a headless one-shot exits after one turn).
3029
+ # Drives Lifecycle's single-slot KV-cache gate (#608c).
3030
+ interactive: interactive
2978
3031
  )
2979
3032
  end
2980
3033
 
@@ -3008,7 +3061,7 @@ module Rubino
3008
3061
  # and what it said.
3009
3062
  def attach_agent_view(id, ui)
3010
3063
  entry = Tools::BackgroundTasks.instance.find(id)
3011
- return ui.error("no background subagent with id #{id}") unless entry
3064
+ return ui.error("no background task with id #{id}") unless entry
3012
3065
 
3013
3066
  @attached_id = id
3014
3067
  # Focus the composer on this sub (tmux-style unified render): only frames
@@ -3030,13 +3083,57 @@ module Rubino
3030
3083
  # the focus gate) hands the bottom region to the sub; detach refocuses
3031
3084
  # main and the cards return.
3032
3085
  composer&.set_cards([])
3033
- ui.info(pastel.cyan("▶ attached to #{id} · #{entry.subagent}") +
3034
- pastel.dim(" type to steer · to switch subagents · ← to go back"))
3035
- session_resolver.replay_messages(ui, snapshot)
3086
+ # The attach BODY differs by kind — the one polymorphic seam: a subagent
3087
+ # replays its session transcript (and its per-sub CLI keeps painting live
3088
+ # through the focus gate); a shell has no transcript, so it shows its
3089
+ # captured OUTPUT and the user types straight to its stdin.
3090
+ if entry.shell?
3091
+ ui.info(pastel.cyan("▶ attached to #{id} · shell") +
3092
+ pastel.dim(" — type to send input · ↓ to switch · ← to go back"))
3093
+ paint_shell_tail(composer, entry, full: true)
3094
+ else
3095
+ ui.info(pastel.cyan("▶ attached to #{id} · #{entry.subagent}") +
3096
+ pastel.dim(" — type to steer · ↓ to switch subagents · ← to go back"))
3097
+ session_resolver.replay_messages(ui, snapshot)
3098
+ end
3099
+ end
3100
+ # No watcher: a subagent's OWN per-sub CLI paints its ongoing activity live
3101
+ # through the focus gate. A shell has none, so its NEW output is rendered
3102
+ # after each input (see #handle_attached_input) — continuous auto-tailing
3103
+ # is a later refinement.
3104
+ end
3105
+
3106
+ # Paint a focused shell's NEW output into the attached view, through the SAME
3107
+ # focus-gated, render-mutex-safe seam subagent live frames use
3108
+ # (composer#print_above with the shell's origin) — so it is safe to call both
3109
+ # from the keystroke handler AND the 1 Hz idle ticker thread. A private,
3110
+ # mutex-guarded cursor tracks bytes already shown so it NEVER advances the
3111
+ # shared read_offset the model's shell_output reads. full: ⇒ from the start
3112
+ # (on attach); otherwise only bytes added since the last paint.
3113
+ def paint_shell_tail(composer, entry, full: false)
3114
+ return unless composer && entry
3115
+
3116
+ @attached_shell_mutex ||= Mutex.new
3117
+ text = @attached_shell_mutex.synchronize do
3118
+ buf = entry.output_all.to_s
3119
+ @attached_shell_cursor = 0 if full || @attached_shell_cursor.nil?
3120
+ slice = buf.byteslice(@attached_shell_cursor..) || ""
3121
+ @attached_shell_cursor = buf.bytesize
3122
+ slice
3036
3123
  end
3037
- # No watcher: the sub's OWN per-sub CLI now paints its ongoing activity
3038
- # live through the focus gate (it commits with this sub's origin), so the
3039
- # attached view stays live without a polling ticker.
3124
+ return if text.strip.empty?
3125
+
3126
+ composer.print_above(text.chomp, origin: @attached_id)
3127
+ end
3128
+
3129
+ # The idle ticker's per-tick hook (#start_ticker): live-tail the focused
3130
+ # shell's output, if one is attached. A no-op while on a subagent (its own
3131
+ # per-sub CLI streams) or the main view.
3132
+ def tail_attached_shell(composer)
3133
+ return unless @attached_id
3134
+
3135
+ entry = Tools::BackgroundTasks.instance.find(@attached_id)
3136
+ paint_shell_tail(composer, entry) if entry&.shell?
3040
3137
  end
3041
3138
 
3042
3139
  # Leave the agent-view and return to the main session: clear the screen,
@@ -3154,9 +3251,14 @@ module Rubino
3154
3251
  result = cmd_executor.try_execute(input)
3155
3252
  attach_agent_view(result[:attach_agent], ui) if result.is_a?(Hash) && result[:attach_agent]
3156
3253
  else
3157
- # Plain text is a steer note folded into the child's context at its next
3158
- # turn boundary (a child parked on an approval folds it once it resumes).
3254
+ # Plain text the worker's input: a subagent folds it as a steer note at
3255
+ # its next turn boundary; a shell writes it to stdin. For a shell, surface
3256
+ # the output that input produced so the attached view stays useful.
3159
3257
  agents_request_handler.steer_agent(id, input)
3258
+ if entry.shell?
3259
+ sleep 0.2 # let the shell consume the line + emit its response
3260
+ paint_shell_tail(UI::BottomComposer.current, entry)
3261
+ end
3160
3262
  end
3161
3263
  end
3162
3264
 
@@ -601,6 +601,11 @@ module Rubino
601
601
  ui.warning("gem update failed. If this is a permission error, re-run the installer or try `gem update --user-install #{Rubino::UpdateCheck::GEM_NAME}`.")
602
602
  return
603
603
  end
604
+ # The subprocess installed the new gem into this process's gem paths,
605
+ # but our in-memory spec list predates it — refresh so the version
606
+ # query below sees what `gem update` just wrote (else we'd report the
607
+ # pre-update version and claim "already up to date").
608
+ Gem.refresh
604
609
  new_v = Rubino::UpdateCheck.installed_gem_version(Rubino::UpdateCheck::GEM_NAME)
605
610
  if new_v && Gem::Version.new(new_v) > Gem::Version.new(current)
606
611
  ui.info("rubino is now on v#{new_v} (was v#{current}).")