RubyGems - rubino-agent - Versions diffs - 0.5.2.1 → 0.5.2.2 - Mend

rubino-agent 0.5.2.1 → 0.5.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

checksums.yaml +4 -4
data/.dockerignore +15 -0
data/CHANGELOG.md +56 -0
data/Dockerfile +56 -0
data/agent.md +112 -0
data/docs/design/bg-shell-pty-port.md +88 -0
data/docs/design/bg-shell-review-refinements.md +65 -0
data/docs/design/bg-shell-ux.md +130 -0
data/docs/tools.md +3 -12
data/lib/rubino/agent/iteration_budget.rb +13 -0
data/lib/rubino/agent/loop.rb +43 -5
data/lib/rubino/agent/prompts/build.txt +3 -5
data/lib/rubino/agent/prompts/memory_guidance.txt +5 -0
data/lib/rubino/agent/prompts/tool_use_enforcement.txt +4 -0
data/lib/rubino/agent/prompts/tool_use_enforcement_google.txt +9 -0
data/lib/rubino/agent/prompts/tool_use_enforcement_openai.txt +48 -0
data/lib/rubino/agent/runner.rb +55 -12
data/lib/rubino/agent/tool_executor.rb +1 -1
data/lib/rubino/cli/chat/idle_card_host.rb +6 -1
data/lib/rubino/cli/chat_command.rb +119 -17
data/lib/rubino/cli/commands.rb +5 -0
data/lib/rubino/commands/handlers/agents.rb +27 -18
data/lib/rubino/commands/handlers/status.rb +6 -3
data/lib/rubino/config/configuration.rb +25 -8
data/lib/rubino/config/defaults.rb +15 -13
data/lib/rubino/context/prompt_assembler.rb +89 -1
data/lib/rubino/context/summary_builder.rb +0 -22
data/lib/rubino/interaction/events.rb +2 -2
data/lib/rubino/interaction/lifecycle.rb +54 -20
data/lib/rubino/llm/ruby_llm_adapter.rb +178 -20
data/lib/rubino/security/redactor.rb +1 -1
data/lib/rubino/session/message.rb +12 -0
data/lib/rubino/tools/background_tasks.rb +107 -12
data/lib/rubino/tools/base.rb +1 -1
data/lib/rubino/tools/read_attachment_tool.rb +52 -54
data/lib/rubino/tools/registry.rb +21 -72
data/lib/rubino/tools/shell_entry_adapter.rb +97 -0
data/lib/rubino/tools/shell_input_tool.rb +1 -1
data/lib/rubino/tools/shell_kill_tool.rb +4 -4
data/lib/rubino/tools/shell_registry.rb +178 -38
data/lib/rubino/tools/shell_tool.rb +45 -5
data/lib/rubino/tools/task_result_tool.rb +4 -1
data/lib/rubino/tools/task_tool.rb +74 -11
data/lib/rubino/tools/vision_tool.rb +1 -1
data/lib/rubino/ui/agent_menu.rb +8 -2
data/lib/rubino/ui/api.rb +11 -0
data/lib/rubino/ui/bottom_composer.rb +24 -11
data/lib/rubino/ui/cli.rb +254 -15
data/lib/rubino/ui/markdown_renderer.rb +4 -1
data/lib/rubino/ui/stdout_proxy.rb +25 -10
data/lib/rubino/ui/streaming_markdown.rb +67 -12
data/lib/rubino/ui/subagent_cards.rb +8 -7
data/lib/rubino/ui/tool_args_stream.rb +143 -0
data/lib/rubino/update_check.rb +10 -2
data/lib/rubino/version.rb +1 -1
metadata +14 -6
data/AGENTS.md +0 -97
data/docs/agents.md +0 -216
data/lib/rubino/jobs/handlers/summarize_session_job.rb +0 -21
data/lib/rubino/tools/summarize_file_tool.rb +0 -194

data/lib/rubino/agent/loop.rb CHANGED Viewed

@@ -23,10 +23,13 @@ module Rubino
       # instead of ending the turn with nothing. Carries the trusted-harness marker
       # (#75) so it reads as runtime control, not as suspect user input.
       MAX_ITERATIONS_SUMMARY_NUDGE =
-        "#{HARNESS_CONTROL_MARKER} You've reached the maximum number of " \
-        "tool-calling iterations allowed. " \
-        "Please provide a final response summarizing what you've found and " \
-        "accomplished so far, without calling any more tools.".freeze
+        "#{HARNESS_CONTROL_MARKER} You've done a long run of tool calls this " \
+        "turn and hit this turn's tool-call checkpoint. Without calling any " \
+        "more tools, give the user a brief, constructive summary: what you " \
+        "accomplished and what's left. This is a per-turn checkpoint, NOT a " \
+        "hard limit on the work — do NOT tell the user to start a new session, " \
+        "and do NOT claim you are unable to continue or improve things. They " \
+        "can simply reply and you'll pick up right where you left off.".freeze
       # Framing for turn-start background notices (#148): tells the model the
       # notices are secondary to the user message that follows them.
@@ -119,6 +122,15 @@ module Rubino
         @tool_executor.on_result = method(:handle_tool_result) if @tool_executor.respond_to?(:on_result=)
       end
+      # How the LAST turn terminated, read back by the caller AFTER #run returns
+      # (mirrors how Lifecycle exposes #active_session). :completed on a normal
+      # answer; :max_iterations / :max_time when the turn was force-summarized at
+      # the tool/turn ceiling or the wall-clock net; :aborted on a user abort;
+      # :stream_incomplete when a truncated stream was handed back as the answer.
+      # The subagent-completion path reads this so a truncated run is reported
+      # PARTIAL instead of a false "completed" (#core-F1 honesty).
+      attr_reader :stop_reason
       # Runs the agent loop, returning the final assistant response content.
       def run(messages:, tools:) # rubocop:disable Metrics/PerceivedComplexity,Metrics/CyclomaticComplexity
         # Stash the resolved toolset so #streaming? can decide, per run, whether
@@ -191,6 +203,10 @@ module Rubino
         # most once per turn, only after a real block, and reset here so a fresh
         # turn never inherits a prior turn's reminder.
         @blocked_reminder_emitted = false
+        # Terminal outcome of THIS turn, read back via #stop_reason once #run
+        # returns. Optimistic default — every early return below that ISN'T a
+        # clean answer overwrites it (force-summary, abort, truncated stream).
+        @stop_reason = :completed
         token_total = 0
         loop do
@@ -313,6 +329,7 @@ module Rubino
             end
             # Continuations exhausted — hand back the recovered partial as the
             # (truncated) final answer: truthful and resumable, not a hard failure.
+            @stop_reason = :stream_incomplete
             emit_turn_summary(turn_started_at, token_total)
             return response.content
           end
@@ -577,6 +594,7 @@ module Rubino
       # note rather than a force-summary (no extra model call). The ledger note
       # keeps it truthful about how much ran.
       def abort_on_budget_exhausted(iteration, turn_started_at, token_total)
+        @stop_reason = :aborted
         note = "Stopped at user request after #{iteration} tool iteration" \
                "#{"s" if iteration != 1} (#{tool_count_label})."
         persist_user_message_note(note)
@@ -626,6 +644,10 @@ module Rubino
       end
       def force_summarize_budget_exhausted(messages, iteration, turn_started_at, token_total)
+        # Record WHICH rail forced the summary so a background subagent's
+        # completion can be reported PARTIAL with the real reason (time vs
+        # iterations) instead of a misleading "completed" (#core-F1).
+        @stop_reason = @budget.limiting_factor(iteration) == :time ? :max_time : :max_iterations
         nudge = force_summary_nudge
         persist_user_message(nudge)
         messages << { role: "user", content: nudge }
@@ -890,11 +912,16 @@ module Rubino
       # Providers like Bedrock require this message to appear in the conversation
       # history between the user prompt and the tool result(s).
       def build_assistant_tool_use_message(response)
-        {
+        msg = {
           role: "assistant",
           content: response.content || "",
           tool_calls: response.tool_calls
         }
+        # Carry reasoning on the in-turn (non-streaming) assistant(tool_use) too,
+        # so load_history replays it and the prefix stays KV-cache-stable (#608b).
+        reasoning = response.respond_to?(:thinking) ? response.thinking : nil
+        msg[:reasoning] = reasoning if reasoning && !reasoning.to_s.empty?
+        msg
       end
       # Called once per executed tool by the ToolExecutor's on_result sink, on
@@ -1058,6 +1085,10 @@ module Rubino
         metadata = tool_calls.empty? ? {} : { tool_calls: tool_calls }
         input_tokens = msg[:input_tokens].to_i
         metadata[:input_tokens] = input_tokens if input_tokens.positive?
+        # Keep the reasoning with the assistant(tool_use) row so the next turn
+        # replays it and the KV-cache prefix stays byte-stable (#608b) — this is
+        # the row that diverged from the server cache when reasoning was dropped.
+        metadata[:reasoning] = msg[:reasoning] if msg[:reasoning] && !msg[:reasoning].to_s.empty?
         with_db_retries do
           @message_store.create(
@@ -1108,6 +1139,13 @@ module Rubino
         # they see tool result messages with no matching toolUse upstream.
         metadata = response.has_tool_calls? ? { tool_calls: response.tool_calls } : {}
+        # Persist the reasoning so later turns can replay it (Hermes parity,
+        # #608b): the local KV cache holds this turn's reasoning tokens, so a
+        # later replay that omits them busts the prefix and re-prefills the whole
+        # context. Session::Message#to_context re-emits it as wire reasoning_content.
+        reasoning = response.respond_to?(:thinking) ? response.thinking : nil
+        metadata[:reasoning] = reasoning if reasoning && !reasoning.to_s.empty?
         # Record the REAL context size the provider saw for this response:
         # input_tokens covers the whole assembled prompt (system prompt +
         # history + tools), which no local chars/4 estimate can reproduce

data/lib/rubino/agent/prompts/build.txt CHANGED Viewed

@@ -33,11 +33,9 @@ assume or default to one.
   - Edit a file with `edit`/`multi_edit`/`patch`, never with `sed`/`awk`.
   - Search with `grep` or `glob`, never with raw `find` or shell pipelines.
   - Write a new file with `write`. Don't `echo > file` from the shell.
-- To get the gist of a LARGE document (converted PDF, log, transcript —
-  more than a few hundred lines), use `summarize_file`, not `read`. It
-  map-reduces the file in a separate context and returns only the summary,
-  so the raw text never fills this conversation. Reach for `read` (with
-  offset/limit) or `grep` only when you need exact lines, not an overview.
+- For a LARGE file (converted PDF, log, transcript — more than a few hundred
+  lines), don't `read` it whole and flood this conversation: `grep` it to find
+  the relevant part, then `read` that span with offset/limit to page through it.
 - Tool output may be COMPRESSED to save context — it is lossless to YOU: a
   `# … N lines elided — read <path> offset=.. limit=..` pointer in a file read
   means that exact body is one targeted `read` away, verbatim (so issue that

data/lib/rubino/agent/prompts/memory_guidance.txt ADDED Viewed

@@ -0,0 +1,5 @@
+# Memory discipline
+You have persistent memory across sessions. Save durable facts using the memory tool: user preferences, environment details, tool quirks, and stable conventions. Memory is injected into every turn, so keep it compact and focused on facts that will still matter later.
+Prioritize what reduces future user steering — the most valuable memory is one that prevents the user from having to correct or remind you again. User preferences and recurring corrections matter more than procedural task details.
+Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO state to memory. Specifically: do not record PR numbers, issue numbers, commit SHAs, 'fixed bug X', 'submitted PR Y', 'Phase N done', file counts, or any artifact that will be stale in 7 days. If a fact will be stale in a week, it does not belong in memory. If you've discovered a reusable way to do something, save it as a skill, not a memory.
+Write memories as declarative facts, not instructions to yourself. 'User prefers concise responses' ✓ — 'Always respond concisely' ✗. 'Project uses pytest with xdist' ✓ — 'Run tests with pytest -n 4' ✗. Imperative phrasing gets re-read as a directive in later sessions and can cause repeated work or override the user's current request. Procedures and workflows belong in skills, not memory.

data/lib/rubino/agent/prompts/tool_use_enforcement.txt ADDED Viewed

@@ -0,0 +1,4 @@
+# Tool-use enforcement
+You MUST use your tools to take action — do not describe what you would do or plan to do without actually doing it. When you say you will perform an action (e.g. 'I will run the tests', 'Let me check the file', 'I will create the project'), you MUST immediately make the corresponding tool call in the same response. Never end your turn with a promise of future action — execute it now.
+Keep working until the task is actually complete. Do not stop with a summary of what you plan to do next time. If you have tools available that can accomplish the task, use them instead of telling the user what you would do.
+Every response should either (a) contain tool calls that make progress, or (b) deliver a final result to the user. Responses that only describe intentions without acting are not acceptable.

data/lib/rubino/agent/prompts/tool_use_enforcement_google.txt ADDED Viewed

@@ -0,0 +1,9 @@
+# Google model operational directives
+Follow these operational rules strictly:
+- **Absolute paths:** Always construct and use absolute file paths for all file system operations. Combine the project root with relative paths.
+- **Verify first:** Use read/grep to check file contents and project structure before making changes. Never guess at file contents.
+- **Dependency checks:** Never assume a library is available. Check package.json, requirements.txt, Cargo.toml, Gemfile, etc. before importing.
+- **Conciseness:** Keep explanatory text brief — a few sentences, not paragraphs. Focus on actions and results over narration.
+- **Parallel tool calls:** When you need to perform multiple independent operations (e.g. reading several files), make all the tool calls in a single response rather than sequentially.
+- **Non-interactive commands:** Use flags like -y, --yes, --non-interactive to prevent CLI tools from hanging on prompts.
+- **Keep going:** Work autonomously until the task is fully resolved. Don't stop with a plan — execute it.

data/lib/rubino/agent/prompts/tool_use_enforcement_openai.txt ADDED Viewed

@@ -0,0 +1,48 @@
+# Execution discipline
+<tool_persistence>
+- Use tools whenever they improve correctness, completeness, or grounding.
+- Do not stop early when another tool call would materially improve the result.
+- If a tool returns empty or partial results, retry with a different query or strategy before giving up.
+- Keep calling tools until: (1) the task is complete, AND (2) you have verified the result.
+</tool_persistence>
+<mandatory_tool_use>
+NEVER answer these from memory or mental computation — ALWAYS use a tool:
+- Arithmetic, math, calculations → use the shell or a code tool
+- Hashes, encodings, checksums → use the shell (e.g. sha256sum, base64)
+- Current time, date, timezone → use the shell (e.g. date)
+- System state: OS, CPU, memory, disk, ports, processes → use the shell
+- File contents, sizes, line counts → use read, grep, or the shell
+- Git history, branches, diffs → use the shell
+- Current facts (weather, news, versions) → use web_search
+Your memory and user profile describe the USER, not the system you are running on. The execution environment may differ from what the user profile says about their personal setup.
+</mandatory_tool_use>
+<act_dont_ask>
+When a question has an obvious default interpretation, act on it immediately instead of asking for clarification. Examples:
+- 'Is port 443 open?' → check THIS machine (don't ask 'open where?')
+- 'What OS am I running?' → check the live system (don't use user profile)
+- 'What time is it?' → run `date` (don't guess)
+Only ask for clarification when the ambiguity genuinely changes what tool you would call.
+</act_dont_ask>
+<prerequisite_checks>
+- Before taking an action, check whether prerequisite discovery, lookup, or context-gathering steps are needed.
+- Do not skip prerequisite steps just because the final action seems obvious.
+- If a task depends on output from a prior step, resolve that dependency first.
+</prerequisite_checks>
+<verification>
+Before finalizing your response:
+- Correctness: does the output satisfy every stated requirement?
+- Grounding: are factual claims backed by tool outputs or provided context?
+- Formatting: does the output match the requested format or schema?
+- Safety: if the next step has side effects (file writes, commands, API calls), confirm scope before executing.
+</verification>
+<missing_context>
+- If required context is missing, do NOT guess or hallucinate an answer.
+- Use the appropriate lookup tool when missing information is retrievable (grep, web_search, read, etc.).
+- Ask a clarifying question only when the information cannot be retrieved by tools.
+- If you must proceed with incomplete information, label assumptions explicitly.
+</missing_context>

data/lib/rubino/agent/runner.rb CHANGED Viewed

@@ -13,7 +13,8 @@ module Rubino
       def initialize(session_id: nil, model_override: nil, provider_override: nil,
                      max_turns: nil, ignore_rules: false, ui: nil, agent_definition: nil,
-                     event_bus: nil, announce_session: true, session_source: "cli")
+                     event_bus: nil, announce_session: true, session_source: "cli",
+                     interactive: false)
         @ui = ui || Rubino.ui
         # An in-chat rewind/fork builds a runner on the child session but has its
         # own purpose-built "┄ rewound to message N — editing ┄" marker, so the
@@ -40,6 +41,11 @@ module Rubino
         # not the user's own conversations) while staying resumable by explicit
         # id. Like Claude Code hiding its Task subagent sessions from the picker.
         @session_source = session_source
+        # True only for the interactive REPL, where more in-process turns follow
+        # this one. Lifecycle uses it to keep automatic memory extraction OFF the
+        # live KV-cache slot between turns (#608c) — a headless one-shot, which
+        # exits after its single turn, leaves it false and extracts normally.
+        @interactive = interactive
         # Pre-instantiate so cancel! is meaningful between turns and during the
         # window between Signal.trap install and run() — a too-early Ctrl+C
         # used to land on a nil token and silently no-op, then the next run
@@ -123,18 +129,19 @@ module Rubino
           cancel_token: @cancel_token,
           model_override: @explicit_model_override,
           provider_override: @provider_override,
+          interactive: @interactive,
           # The SOFT iteration ceiling (where the budget-extension prompt fires)
-          # vs the HARD max_turns outer rail. For the main agent @max_turns is the
-          # `--max-turns N` override, which intentionally sets the soft ceiling.
-          # A SUBAGENT, though, gets @max_turns = definition.max_turns (= config
-          # agent.max_turns, 90) — passing THAT as the soft ceiling made soft ==
-          # hard, so #extendable? was always false and a subagent could NEVER
-          # surface a budget request (#571) — it just force-summarized. Subagents
-          # therefore pass nil so the soft ceiling falls back to config
-          # agent.max_tool_iterations (25) < the 90 hard rail, exactly like the
-          # main agent — so a subagent at 25 iterations parks and asks for budget
-          # via the dropdown (#574), extendable up to the 90 outer rail.
-          max_tool_iterations: @session_source == "subagent" ? nil : @max_turns,
+          # vs the HARD max_turns outer rail (config agent.max_turns, applied
+          # inside IterationBudget). @max_turns carries the per-run soft cap on
+          # BOTH paths:
+          #   - MAIN agent: the `--max-turns N` override (nil ⇒ config default).
+          #   - SUBAGENT:   definition.max_turns — e.g. explore=20, general=50,
+          #     BELOW the 90 hard rail — so the child both HONORS its per-agent
+          #     cap (#571: it used to be dropped entirely) AND can surface the
+          #     #574 budget-park at that cap, extendable up to the 90 outer rail.
+          # A subagent that sets no max_turns falls back to config agent.max_turns
+          # (soft == hard) and simply hard-stops there, like the main agent.
+          max_tool_iterations: @max_turns,
           polishing: @polishing
         )
@@ -150,10 +157,19 @@ module Rubino
         # counterpart to the manual /compact swap (chat_command rebuilds the
         # runner on result[:compact_into]).
         @session = lifecycle.active_session
+        # Post-turn state, read by the subagent-completion path (task_tool) so a
+        # force-summarized/truncated child is reported PARTIAL, not "completed".
+        @last_stop_reason = lifecycle.last_stop_reason
         response
       end
+      # How this runner's LAST turn terminated (Agent::Loop#stop_reason),
+      # threaded up via Lifecycle. nil until a turn has run. Read by the `task`
+      # tool after a subagent's #run! to distinguish a real completion from a
+      # budget-/time-truncated partial.
+      attr_reader :last_stop_reason
       # Pins the agent Definition this runner threads into every subsequent turn
       # (the sticky `/agent <name>` / Tab-cycle switch). Lifecycle reads
       # @agent_definition fresh on each #run!, so swapping it here takes effect
@@ -219,6 +235,32 @@ module Rubino
         model_id
       end
+      # Aligns a RESUMED session's stored model with the model the adapter will
+      # actually use this run (#model-resume). Lifecycle builds the adapter from
+      # `@explicit_model_override || @session[:model]`, and the CLI ALWAYS passes
+      # a boot override (explicit `-m`, else `model.default` from config) — so on
+      # resume the override, NOT the model this session happened to last use, is
+      # what generates. The session row, the footer/statusbar, the token-budget
+      # context window and `/status` all read `session[:model]`, so without this
+      # they showed the STALE pinned model (e.g. the old default) while the agent
+      # was really running the new one: changing `model.default` looked ignored
+      # even though generation honored it. Re-point the row to the effective
+      # model so every surface tells the truth and a config change takes visible
+      # effect. No-op when there is no explicit override (then the session model
+      # IS what the adapter uses) or it already matches.
+      def sync_resumed_session_model!(session)
+        return unless @explicit_model_override
+        return if session[:model] == @explicit_model_override
+        session[:model]    = @explicit_model_override
+        session[:provider] = @provider_override ||
+                             LLM::ProviderResolver.resolve(@explicit_model_override,
+                                                           explicit_provider: @config.dig("model", "provider"))
+        return unless @session_repo.persisted?(session[:id])
+        @session_repo.update(session[:id], model: session[:model], provider: session[:provider])
+      end
       # Marks the current session ended (#100). Called from the CLI on a clean
       # REPL teardown (and best-effort on terminal close) so a session stops
       # showing as "active" forever and cleanup/list/--continue can tell a
@@ -437,6 +479,7 @@ module Rubino
           # sees us as the live owner and forks rather than interleaving.
           session[:persisted] = true
           session[:owner_pid] = Process.pid
+          sync_resumed_session_model!(session)
           @ui.status("Resuming session: #{session[:id][0..7]}...") if @announce_session
           session
         else

data/lib/rubino/agent/tool_executor.rb CHANGED Viewed

@@ -187,7 +187,7 @@ module Rubino
             # Mirror the chunk onto the bus so the API/SSE stream isn't silent
             # during a long tool call: the Recorder maps TOOL_PROGRESS to a
             # `tool.progress` event, which resets the idle watchdog. Without
-            # this a busy tool (summarize_file: ~30 sequential aux-LLM calls,
+            # this a busy tool (a long shell stream, or an aux-LLM-backed tool,
             # no run-events) is killed at the 300s idle timeout. Throttled so a
             # chatty tool (shell streaming thousands of stdout lines) doesn't
             # write a DB row + SSE frame per line — one heartbeat per interval

data/lib/rubino/cli/chat/idle_card_host.rb CHANGED Viewed

@@ -44,13 +44,18 @@ module Rubino
         # between child events. Repaints go through the composer's render mutex, so
         # they never race the keystroke handler. Exits as soon as no child is live
         # (it clears the region one last time) or when killed on teardown.
-        def start_ticker(composer)
+        # +on_tick+ (optional) runs once per tick after the card repaint — used by
+        # the attach view to live-tail a focused shell's new output on the SAME
+        # 1 Hz cadence and through the same render mutex (composer#print_above) the
+        # cards use, so it never races the keystroke handler.
+        def start_ticker(composer, &on_tick)
           Thread.new do
             loop do
               sleep(IDLE_CARD_TICK)
               break unless composer.equal?(UI::BottomComposer.current)
               paint
+              on_tick&.call
               break unless children_live?
             end
           rescue StandardError => e

data/lib/rubino/cli/chat_command.rb CHANGED Viewed

@@ -124,6 +124,18 @@ module Rubino
       # must never break the idle prompt, so it falls back to "nothing pending"
       # and the manual slash paths still work.
       def auto_resolve_pending_subagent_request(_runner = nil)
+        # Defer while the subagent PICKER is open (#586): the picker and this
+        # blocking approval/budget modal compete for the same stdin. A child
+        # hammering tool calls re-hits its budget gate every few ticks, so
+        # auto-firing the `wants +budget` modal here would suspend the open picker
+        # and swallow the ↓/Enter the user meant to ATTACH with — worse, the
+        # picker's ↓+Enter gesture could land on the modal's destructive
+        # "Summarize now". The request stays pending and auto-presents on the very
+        # next idle tick once the picker closes: deferred a few seconds, never
+        # lost — it still appears like a permission, in arrival order. (@composer
+        # is nil on the piped/one-shot paths, which have no picker.)
+        return false if @composer&.agent_menu_open?
         agents_request_handler.auto_resolve_pending
       rescue StandardError => e
         # Resilience floor: a hiccup in the auto-open must never crash the idle
@@ -276,7 +288,7 @@ module Rubino
         text, image_paths = Chat::ImageInbox.resolve_oneshot(query, opt(:image))
         requested_session_id = session_resolver.resolve_session_id
         runner = build_runner(session_id: requested_session_id, ui: ui,
-                              announce_session: announce_session)
+                              announce_session: announce_session, interactive: false)
         warn_if_resume_forked(requested_session_id, runner)
         note_if_resuming_compacted_parent(runner)
         recorder = Output::TurnRecorder.new.attach!
@@ -1576,7 +1588,7 @@ module Rubino
         composer.reset_input
         seed_draft(composer, draft)
         idle_cards.paint
-        ticker = idle_cards.children_live? ? idle_cards.start_ticker(composer) : nil
+        ticker = idle_cards.children_live? ? idle_cards.start_ticker(composer) { tail_attached_shell(composer) } : nil
         # SIGINT trap as a FALLBACK only (BH-2 / #551): the dependable idle Ctrl+C
         # path is now the in-band \x03 byte (on_idle_interrupt above), because
@@ -1632,6 +1644,7 @@ module Rubino
           # suspend THIS composer and restore it after), so it does not race the
           # reader. Resolves one request, then `next` so the cards repaint and the
           # loop re-checks for the next pending request before reading input.
+          # (It defers itself while the picker is open — see the hook, #586.)
           if auto_resolve_pending_subagent_request(runner)
             idle_cards.paint
             next
@@ -1940,7 +1953,11 @@ module Rubino
         # cards stay visible and their elapsed time advances until the turn ends.
         # Killed in the ensure below.
         idle_cards.paint
-        card_ticker = idle_cards.children_live? ? idle_cards.start_ticker(composer) : nil
+        card_ticker = if idle_cards.children_live?
+                        idle_cards.start_ticker(composer) do
+                          tail_attached_shell(composer)
+                        end
+                      end
         # If this turn's prompt came off the input queue (interrupt-by-default
         # Enter, Alt+Enter, or "/queued" during the previous turn), commit it now
@@ -1967,6 +1984,11 @@ module Rubino
         # Only thread the paste expansions when a placeholder was actually
         # collected, so a normal turn's runner.run signature is unchanged.
         run_kwargs[:paste_expansions] = paste_expansions unless paste_expansions.empty?
+        # Drive the live `ctx ~Xk/…` gauge during THIS turn (#608e): hand the UI a
+        # cheap render lambda (base ctx captured once + the in-flight token
+        # estimate) so its ticker repaints the bar ~1/s as the model generates,
+        # instead of the bar sitting frozen until the turn ends. Cleared in ensure.
+        ui.live_status_provider = live_status_meter(runner) if ui.respond_to?(:live_status_provider=)
         oneshot = one_shot_agent_definition(agent_name)
         if oneshot && runner.respond_to?(:run_with_agent)
           runner.run_with_agent(oneshot, prompt, **run_kwargs)
@@ -2001,6 +2023,8 @@ module Rubino
         # time the runner returns, so the facet has already landed in the
         # footer and the engine thread must not outlive the turn.
         ui.turn_finished if ui.respond_to?(:turn_finished)
+        # Stop driving the live ctx gauge; the reconcile below sets the exact bar.
+        ui.live_status_provider = nil if ui.respond_to?(:live_status_provider=)
         # Stop the during-turn panel ticker before tearing the composer down, so
         # it can't repaint over the next idle prompt (the idle read starts its
         # own ticker). Idempotent if it already exited on its own (no live child).
@@ -2033,17 +2057,42 @@ module Rubino
         session  = runner.session
         budget   = Context::TokenBudget.new(model_id: session[:model], config: Rubino.configuration)
         messages = ::Rubino::Session::Store.new.for_session(session[:id])
+        render_status_bar(session, budget, context_tokens(messages, budget))
+      rescue StandardError
+        nil
+      end
+      # A cheap, DB-free render lambda for the LIVE ctx gauge (#608e): captures the
+      # base (persisted) token count ONCE here on the main thread, then maps an
+      # in-flight token estimate → a bar line with NO further DB reads, so the UI
+      # ticker can call it ~1/s from its thread without re-querying the session.
+      # The base omits this turn's not-yet-persisted generation; the +extra+
+      # estimate covers it, and #ensure reconciles to the exact bar at turn end.
+      # nil (no live gauge) when the bar is disabled or on any failure.
+      def live_status_meter(runner)
+        return nil unless runner && Rubino.configuration.display_statusbar?
+        session = runner.session
+        budget  = Context::TokenBudget.new(model_id: session[:model], config: Rubino.configuration)
+        base    = context_tokens(::Rubino::Session::Store.new.for_session(session[:id]), budget)
+        ->(extra) { render_status_bar(session, budget, base + extra.to_i) }
+      rescue StandardError
+        nil
+      end
+      # Renders the model + context-saturation bar for +tokens+ against the
+      # session's window. Shared by the turn-boundary bar (#build_status_line) and
+      # the live gauge (#live_status_meter) so both read one format (#608e).
+      def render_status_bar(session, budget, tokens)
         UI::StatusBar.render(
           chips: { mode: Rubino::Modes.current, agent: status_agent_chip,
                    branch: @branch_short_id,
                    skill: Rubino::ActiveSkill.current },
           model: session[:model] || model_name,
-          tokens: context_tokens(messages, budget),
+          tokens: tokens,
           window: budget.available_tokens,
           pastel: pastel
         )
-      rescue StandardError
-        nil
       end
       # The status-bar agent chip (#320): the active primary agent name, but
@@ -2966,7 +3015,7 @@ module Rubino
       # Builds an Agent::Runner with this invocation's shared flag overrides —
       # only the session and UI vary per call site (one-shot, interactive boot,
       # /sessions resume, /new).
-      def build_runner(session_id:, ui:, announce_session: true)
+      def build_runner(session_id:, ui:, announce_session: true, interactive: true)
         Agent::Runner.new(
           session_id: session_id,
           model_override: model_name,
@@ -2974,7 +3023,11 @@ module Rubino
           max_turns: max_turns_override,
           ignore_rules: opt(:ignore_rules) || false,
           ui: ui,
-          announce_session: announce_session
+          announce_session: announce_session,
+          # build_runner is the interactive-REPL builder; only setup_oneshot
+          # overrides this to false (a headless one-shot exits after one turn).
+          # Drives Lifecycle's single-slot KV-cache gate (#608c).
+          interactive: interactive
         )
       end
@@ -3008,7 +3061,7 @@ module Rubino
       # and what it said.
       def attach_agent_view(id, ui)
         entry = Tools::BackgroundTasks.instance.find(id)
-        return ui.error("no background subagent with id #{id}") unless entry
+        return ui.error("no background task with id #{id}") unless entry
         @attached_id = id
         # Focus the composer on this sub (tmux-style unified render): only frames
@@ -3030,13 +3083,57 @@ module Rubino
           # the focus gate) hands the bottom region to the sub; detach refocuses
           # main and the cards return.
           composer&.set_cards([])
-          ui.info(pastel.cyan("▶ attached to #{id} · #{entry.subagent}") +
-                  pastel.dim(" — type to steer · ↓ to switch subagents · ← to go back"))
-          session_resolver.replay_messages(ui, snapshot)
+          # The attach BODY differs by kind — the one polymorphic seam: a subagent
+          # replays its session transcript (and its per-sub CLI keeps painting live
+          # through the focus gate); a shell has no transcript, so it shows its
+          # captured OUTPUT and the user types straight to its stdin.
+          if entry.shell?
+            ui.info(pastel.cyan("▶ attached to #{id} · shell") +
+                    pastel.dim(" — type to send input · ↓ to switch · ← to go back"))
+            paint_shell_tail(composer, entry, full: true)
+          else
+            ui.info(pastel.cyan("▶ attached to #{id} · #{entry.subagent}") +
+                    pastel.dim(" — type to steer · ↓ to switch subagents · ← to go back"))
+            session_resolver.replay_messages(ui, snapshot)
+          end
+        end
+        # No watcher: a subagent's OWN per-sub CLI paints its ongoing activity live
+        # through the focus gate. A shell has none, so its NEW output is rendered
+        # after each input (see #handle_attached_input) — continuous auto-tailing
+        # is a later refinement.
+      end
+      # Paint a focused shell's NEW output into the attached view, through the SAME
+      # focus-gated, render-mutex-safe seam subagent live frames use
+      # (composer#print_above with the shell's origin) — so it is safe to call both
+      # from the keystroke handler AND the 1 Hz idle ticker thread. A private,
+      # mutex-guarded cursor tracks bytes already shown so it NEVER advances the
+      # shared read_offset the model's shell_output reads. full: ⇒ from the start
+      # (on attach); otherwise only bytes added since the last paint.
+      def paint_shell_tail(composer, entry, full: false)
+        return unless composer && entry
+        @attached_shell_mutex ||= Mutex.new
+        text = @attached_shell_mutex.synchronize do
+          buf = entry.output_all.to_s
+          @attached_shell_cursor = 0 if full || @attached_shell_cursor.nil?
+          slice = buf.byteslice(@attached_shell_cursor..) || ""
+          @attached_shell_cursor = buf.bytesize
+          slice
         end
-        # No watcher: the sub's OWN per-sub CLI now paints its ongoing activity
-        # live through the focus gate (it commits with this sub's origin), so the
-        # attached view stays live without a polling ticker.
+        return if text.strip.empty?
+        composer.print_above(text.chomp, origin: @attached_id)
+      end
+      # The idle ticker's per-tick hook (#start_ticker): live-tail the focused
+      # shell's output, if one is attached. A no-op while on a subagent (its own
+      # per-sub CLI streams) or the main view.
+      def tail_attached_shell(composer)
+        return unless @attached_id
+        entry = Tools::BackgroundTasks.instance.find(@attached_id)
+        paint_shell_tail(composer, entry) if entry&.shell?
       end
       # Leave the agent-view and return to the main session: clear the screen,
@@ -3154,9 +3251,14 @@ module Rubino
           result = cmd_executor.try_execute(input)
           attach_agent_view(result[:attach_agent], ui) if result.is_a?(Hash) && result[:attach_agent]
         else
-          # Plain text is a steer note folded into the child's context at its next
-          # turn boundary (a child parked on an approval folds it once it resumes).
+          # Plain text → the worker's input: a subagent folds it as a steer note at
+          # its next turn boundary; a shell writes it to stdin. For a shell, surface
+          # the output that input produced so the attached view stays useful.
           agents_request_handler.steer_agent(id, input)
+          if entry.shell?
+            sleep 0.2 # let the shell consume the line + emit its response
+            paint_shell_tail(UI::BottomComposer.current, entry)
+          end
         end
       end

data/lib/rubino/cli/commands.rb CHANGED Viewed

@@ -601,6 +601,11 @@ module Rubino
             ui.warning("gem update failed. If this is a permission error, re-run the installer or try `gem update --user-install #{Rubino::UpdateCheck::GEM_NAME}`.")
             return
           end
+          # The subprocess installed the new gem into this process's gem paths,
+          # but our in-memory spec list predates it — refresh so the version
+          # query below sees what `gem update` just wrote (else we'd report the
+          # pre-update version and claim "already up to date").
+          Gem.refresh
           new_v = Rubino::UpdateCheck.installed_gem_version(Rubino::UpdateCheck::GEM_NAME)
           if new_v && Gem::Version.new(new_v) > Gem::Version.new(current)
             ui.info("rubino is now on v#{new_v} (was v#{current}).")