RubyGems - rubino-agent - Versions diffs - 0.5.1 → 0.5.2.2 - Mend

rubino-agent 0.5.1 → 0.5.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

checksums.yaml +4 -4
data/.dockerignore +15 -0
data/CHANGELOG.md +127 -0
data/Dockerfile +56 -0
data/agent.md +112 -0
data/docs/api/v1.md +2 -0
data/docs/commands.md +3 -6
data/docs/configuration.md +13 -6
data/docs/design/bg-shell-pty-port.md +88 -0
data/docs/design/bg-shell-review-refinements.md +65 -0
data/docs/design/bg-shell-ux.md +130 -0
data/docs/oauth-providers.md +21 -0
data/docs/tools.md +3 -12
data/lib/rubino/agent/iteration_budget.rb +13 -0
data/lib/rubino/agent/loop.rb +43 -5
data/lib/rubino/agent/prompts/build.txt +10 -5
data/lib/rubino/agent/prompts/memory_guidance.txt +5 -0
data/lib/rubino/agent/prompts/tool_use_enforcement.txt +4 -0
data/lib/rubino/agent/prompts/tool_use_enforcement_google.txt +9 -0
data/lib/rubino/agent/prompts/tool_use_enforcement_openai.txt +48 -0
data/lib/rubino/agent/runner.rb +55 -12
data/lib/rubino/agent/tool_executor.rb +1 -1
data/lib/rubino/api/operations/tasks/stop_operation.rb +0 -3
data/lib/rubino/attachments/classify.rb +0 -1
data/lib/rubino/cli/chat/completion_builder.rb +0 -8
data/lib/rubino/cli/chat/idle_card_host.rb +6 -1
data/lib/rubino/cli/chat_command.rb +324 -171
data/lib/rubino/cli/commands.rb +5 -0
data/lib/rubino/commands/built_ins.rb +0 -1
data/lib/rubino/commands/executor.rb +1 -7
data/lib/rubino/commands/handlers/agents.rb +55 -265
data/lib/rubino/commands/handlers/status.rb +6 -3
data/lib/rubino/compression/line_skeleton.rb +1 -1
data/lib/rubino/compression/python_code_skeleton.rb +1 -1
data/lib/rubino/compression/ruby_code_skeleton.rb +1 -1
data/lib/rubino/compression/tree_sitter_code_skeleton.rb +1 -1
data/lib/rubino/config/configuration.rb +47 -18
data/lib/rubino/config/defaults.rb +57 -33
data/lib/rubino/context/prompt_assembler.rb +89 -1
data/lib/rubino/context/summary_builder.rb +0 -22
data/lib/rubino/context/token_budget.rb +0 -5
data/lib/rubino/errors.rb +2 -2
data/lib/rubino/interaction/events.rb +2 -2
data/lib/rubino/interaction/lifecycle.rb +54 -20
data/lib/rubino/llm/anthropic_role_merge.rb +75 -0
data/lib/rubino/llm/error_classifier.rb +34 -1
data/lib/rubino/llm/fake_provider.rb +0 -4
data/lib/rubino/llm/ruby_llm_adapter.rb +222 -59
data/lib/rubino/llm/stream_tool_call_recovery.rb +91 -0
data/lib/rubino/llm/tool_call_recovery.rb +177 -0
data/lib/rubino/memory/sqlite_extraction_prompt.rb +0 -2
data/lib/rubino/memory/store.rb +0 -19
data/lib/rubino/security/pattern_matcher.rb +0 -2
data/lib/rubino/security/redactor.rb +1 -1
data/lib/rubino/security/secret_path.rb +16 -4
data/lib/rubino/session/message.rb +12 -0
data/lib/rubino/skills/registry.rb +16 -2
data/lib/rubino/tools/background_tasks.rb +132 -228
data/lib/rubino/tools/base.rb +1 -17
data/lib/rubino/tools/grep_tool.rb +13 -1
data/lib/rubino/tools/question_tool.rb +3 -4
data/lib/rubino/tools/read_attachment_tool.rb +52 -54
data/lib/rubino/tools/registry.rb +21 -72
data/lib/rubino/tools/shell_entry_adapter.rb +97 -0
data/lib/rubino/tools/shell_input_tool.rb +1 -1
data/lib/rubino/tools/shell_kill_tool.rb +4 -4
data/lib/rubino/tools/shell_registry.rb +178 -38
data/lib/rubino/tools/shell_tool.rb +45 -5
data/lib/rubino/tools/steer_tool.rb +3 -4
data/lib/rubino/tools/task_result_tool.rb +4 -1
data/lib/rubino/tools/task_stop_tool.rb +5 -7
data/lib/rubino/tools/task_tool.rb +81 -35
data/lib/rubino/tools/vision_tool.rb +1 -1
data/lib/rubino/tools/write_tool.rb +22 -2
data/lib/rubino/ui/agent_menu.rb +8 -4
data/lib/rubino/ui/api.rb +11 -0
data/lib/rubino/ui/bottom_composer.rb +240 -374
data/lib/rubino/ui/cli.rb +381 -155
data/lib/rubino/ui/input_history.rb +0 -5
data/lib/rubino/ui/live_region.rb +18 -1
data/lib/rubino/ui/markdown_renderer.rb +51 -4
data/lib/rubino/ui/markdown_repair.rb +114 -0
data/lib/rubino/ui/notifier.rb +4 -10
data/lib/rubino/ui/stdout_proxy.rb +25 -10
data/lib/rubino/ui/streaming_markdown.rb +79 -12
data/lib/rubino/ui/subagent_cards.rb +18 -44
data/lib/rubino/ui/tool_args_stream.rb +143 -0
data/lib/rubino/update_check.rb +10 -2
data/lib/rubino/util/ignore_rules.rb +18 -2
data/lib/rubino/util/secrets_mask.rb +0 -9
data/lib/rubino/version.rb +1 -1
data/lib/rubino.rb +33 -7
data/rubino-agent.gemspec +1 -0
metadata +31 -5
data/AGENTS.md +0 -97
data/docs/agents.md +0 -224
data/lib/rubino/jobs/handlers/summarize_session_job.rb +0 -21
data/lib/rubino/tools/summarize_file_tool.rb +0 -194

data/docs/design/bg-shell-ux.md ADDED Viewed

@@ -0,0 +1,130 @@
+# Background shells as first-class background work (see / focus / stop)
+Status: DESIGN (no implementation yet) · Branch: `feat/bg-shell-ux`
+## Goal
+Give a background **shell** the same user-facing affordances a background **subagent**
+already has:
+1. **See** it — a card + a picker row, at a glance.
+2. **Focus** it — attach to a clear, live view of what it's doing.
+3. **Stop** it — `/stop <id>` from the UI.
+Today a background shell lives ONLY in `ShellRegistry`, so it is invisible to every
+user surface. The model can read/tail/kill it via tools (`shell_output`,
+`shell_tail`, `shell_kill`), but the human has no card, no picker entry, no attach,
+no `/stop`.
+## The central reuse lever (why this is mostly DRY, not new UI)
+Three UI surfaces and the control handlers all read **one source of truth**:
+- `UI::CLI#set_subagent_cards` → `BackgroundTasks.instance.running` (`cli.rb:930`)
+- `UI::AgentMenu` picker entries default → `BackgroundTasks.instance.running` (`agent_menu.rb:21`)
+- `BottomComposer` card host → `BackgroundTasks.instance.running` (`bottom_composer.rb:1639`)
+- `/agents`, `/stop`, `auto_resolve_pending` → `BackgroundTasks` lookups
+None of these inspect `subagent`/`runner` to decide whether to show a row — they
+filter purely on `live_status?` (`LIVE_STATUSES = %i[running needs_approval stopping]`).
+**So: anything in `BackgroundTasks#running` automatically gets a card, a picker row,
+and `/stop`.** The whole feature reduces to *register the shell as a `BackgroundTasks`
+entry* + a few thin, kind-aware branches.
+## Architecture
+Add a `kind: :subagent | :shell` discriminator to `BackgroundTasks::Entry`
+(`background_tasks.rb:60`). A background shell gets BOTH:
+- its existing `ShellRegistry::Entry` (process group, output ring, kill, stdin) — unchanged;
+- a NEW linked `BackgroundTasks::Entry` (`kind: :shell`) that carries the SAME `bg_*`
+  id, so the card/picker/stop surfaces light up and `/stop bg_x` already matches
+  `shell_kill`'s id.
+The two entries are bridged 1:1 by id. `ShellRegistry` stays the process owner;
+`BackgroundTasks` becomes the *presentation + control* layer (as it already is for subagents).
+```
+ShellRegistry::Entry  ──(same bg_ id)──  BackgroundTasks::Entry(kind: :shell)
+  pgid, pipes, buffer                      status, card, picker row, /stop
+  read_new / write_input / kill            attach view, completion notice
+```
+### Reuse AS-IS (the shared seams — no shell-specific code)
+1. `BackgroundTasks#running` + `live_status?` / `LIVE_STATUSES` — the liveness oracle
+   that auto-drives cards + picker + composer.
+2. `UI::SubagentCards` row rendering — reads only plain struct fields
+   (`id, status, tool_count, started_at, prompt`); map `prompt`→command.
+3. `UI::AgentMenu` row rendering — reads only `id, subagent, status, budget_request`.
+4. `InputQueue#push_notice` → idle `coalesced_resume` (#561) — shells ALREADY ride
+   this (`shell_registry.rb:372`).
+5. `render_agent_output_tail` / `watch_loop` (`agents.rb:300-328`) — an existing
+   kind-agnostic byte-tail renderer, perfect for the shell attach view.
+6. `stop_entry` (`background_tasks.rb:456`) as the single stop entry-point, dispatched by kind.
+### Thin shell adapters (the only new code — kept minimal)
+1. **Bridge (register + sync).** In `shell_tool.rb#spawn_background` (`:382`), after
+   `ShellRegistry.spawn`, `reserve` a `kind: :shell` `BackgroundTasks` entry with the
+   same id. In `ShellRegistry#notify_completion` (`:357`), flip the linked entry to
+   `:completed`/`:failed` via `complete` (so the card/picker drop it). Status for a
+   shell is DERIVED (`ShellRegistry#status` from `wait_thr`); the bridge keeps the
+   stored `BackgroundTasks` status in sync — single sync point at completion + an
+   optional poll for the live `tool_count`/activity proxy (bytes/lines).
+2. **Attach branch.** In `chat_command.rb#attach_agent_view` (`:3009`), branch on
+   `kind == :shell`: `entry.messages` is empty (no session), so skip session replay
+   and instead render the captured buffer + a polling `read_new` live-tail (reuse the
+   `watch_loop` shape). Attached plain text → `ShellRegistry.write_input` (stdin),
+   not `steer_agent`.
+3. **Stop branch.** In `stop_entry` (`:456`), branch on `kind == :shell`:
+   `Process.kill` the pgid (reuse `ShellKillTool`'s SIGTERM → grace → SIGKILL body,
+   extracted to a shared `ShellRegistry#signal_group`) instead of `runner.cancel!`.
+### Kind-aware copy (cosmetic, one helper)
+`AgentMenu` header/hints ("subagents", "Enter attaches"), `SubagentCards` glyph
+wording, and `Agents` copy ("No background subagents") hardcode "subagent". Introduce
+ONE `entry_kind_label(entry)` → "subagent"/"shell" used by the picker header + card +
+list copy, so a shell row reads right without forking the renderers.
+## Lifecycle & the two-lifetime rule
+A shell has TWO decoupled lifetimes, by design:
+- The `BackgroundTasks` entry goes **terminal** (drops from `running`/cards/picker) the
+  moment the shell exits — so the UI stops showing a dead shell as live.
+- The `ShellRegistry` entry stays **retired** (RETIRED_TTL) so `shell_output` can still
+  fetch the final output for the model.
+Keep them decoupled: completion flips the BackgroundTasks status; retirement is
+ShellRegistry-only.
+## Open decisions (need your call)
+- **D1 — id namespace.** Recommend the shell's `BackgroundTasks` entry **keep its `bg_*`
+  id** (so `/stop bg_x` == `shell_kill bg_x`, one id the user sees everywhere). (Alt:
+  give it `sa_*` — rejected, splits the id space.)
+- **D2 — attach interactivity (scope).** v1 attach = **read-only live tail**; OR v1
+  also routes attached plain-text to the shell's **stdin** (interactive bg process).
+  stdin-steer is a nice win but more surface to test.
+- **D3 — steer/probe on a shell.** Disable for `kind: :shell` (a shell has no model to
+  probe / no steer queue), OR repurpose steer→stdin (ties to D2).
+## Proposed slices (incremental, each independently testable)
+- **Slice 1 — SEE + STOP.** `kind` discriminator + bridge (register/sync) + `stop_entry`
+  shell branch + kind-aware label. Outcome: a bg shell shows a card + picker row and
+  `/stop bg_x` kills it. (Biggest value, smallest surface — pure reuse + 2 thin branches.)
+- **Slice 2 — FOCUS.** `attach_agent_view` shell branch: clear + buffer + polling tail.
+  Outcome: Enter on a shell row attaches to a live output view; `←`/`/back` returns.
+- **Slice 3 — stdin (optional, D2/D3).** Attached plain-text → `shell_input`.
+Each slice: clean-code, DRY (reuse the named seams), spec'd, verified in the QA
+container with a real bg shell (tmux: card visible, `/stop` kills, attach tails live).
+## Non-goals (v1)
+Reworking `ShellRegistry`'s process model; per-shell resource limits; persisting shell
+output to a session Store (shells stay buffer-backed, not transcript-backed).

data/docs/oauth-providers.md CHANGED Viewed

@@ -1,5 +1,26 @@
 # OAuth provider connectors
+> **Status: NOT WIRED END-TO-END (WIP).** The pieces below exist and the HTTP
+> surface works — the `/v1/oauth/...` API endpoints perform the PKCE flow and
+> store **encrypted** tokens in the `oauth_connections` table. But the subsystem
+> is **API-only and not yet consumed**:
+> - **No tool uses the stored tokens.** Nothing reads `ConnectionRepository`
+>   outside the API operations — there is no `GithubTool`/`GoogleTool` etc. that
+>   pulls a connection's token to call a provider, so a connected account is not
+>   actually actionable by the agent yet.
+> - **No CLI surface.** There is no `rubino oauth` command; the connect/callback
+>   flow needs a browser redirect, so it lives only on the API. The CLI treats
+>   `RUBINO_ENCRYPTION_KEY` as optional (`doctor`: "only needed for the
+>   API/OAuth server").
+> - **Token sharing, when consumption lands:** tokens are not "passed" between
+>   CLI and API — both read the **same SQLite DB** (same `RUBINO_HOME`) and
+>   decrypt with the **same `RUBINO_ENCRYPTION_KEY`**. So wiring CLI consumption
+>   = read `ConnectionRepository` + require the key on the CLI too.
+>
+> Open design question (issue #590): finish the native subsystem, or deprecate
+> it and delegate third-party connections to an MCP server (which does its own
+> OAuth and holds its own tokens). Don't depend on native OAuth in production yet.
 Built-in OAuth integration lets users connect third-party accounts (Github, Google, etc.) so tools running inside rubino can act on their behalf.
 ## Design

data/docs/tools.md CHANGED Viewed

@@ -1,10 +1,10 @@
 # Tools Reference
-rubino ships **29 built-in tools** plus dynamic MCP tools (started at boot when `mcp.servers` is configured — see [mcp.md](mcp.md); being server-dependent they are excluded from the drift-checked list below) and custom user-defined tools. Each tool is gated by a `tools.<key>` config flag (opt-out: absent key = enabled, only an explicit `false` disables) and the approval model. The count and list below are drift-checked against the live registry by `spec/docs/tools_doc_drift_spec.rb`.
+rubino ships **28 built-in tools** plus dynamic MCP tools (started at boot when `mcp.servers` is configured — see [mcp.md](mcp.md); being server-dependent they are excluded from the drift-checked list below) and custom user-defined tools. Each tool is gated by a `tools.<key>` config flag (opt-out: absent key = enabled, only an explicit `false` disables) and the approval model. The count and list below are drift-checked against the live registry by `spec/docs/tools_doc_drift_spec.rb`.
-The full list (registration order): `read`, `summarize_file`, `write`, `edit`, `multi_edit`, `grep`, `glob`, `shell`, `shell_output`, `shell_tail`, `shell_input`, `shell_kill`, `ruby`, `apply_patch`, `webfetch`, `websearch`, `question`, `todowrite`, `memory`, `session_search`, `attach_file`, `read_attachment`, `vision`, `skill`, `task`, `task_result`, `task_stop`, `steer`, `probe`.
+The full list (registration order): `read`, `write`, `edit`, `multi_edit`, `grep`, `glob`, `shell`, `shell_output`, `shell_tail`, `shell_input`, `shell_kill`, `ruby`, `apply_patch`, `webfetch`, `websearch`, `question`, `todowrite`, `memory`, `session_search`, `attach_file`, `read_attachment`, `vision`, `skill`, `task`, `task_result`, `task_stop`, `steer`, `probe`.
-Several tools share one config gate, so `rubino tools` shows **24 rows** (config groups), not 29: `webfetch` + `websearch` share `tools.web`, and the whole delegation family (`task`, `task_result`, `task_stop`, `steer`, `probe`) rides on `tools.task` — disabling delegation disables them all.
+Several tools share one config gate, so `rubino tools` shows **23 rows** (config groups), not 28: `webfetch` + `websearch` share `tools.web`, and the whole delegation family (`task`, `task_result`, `task_stop`, `steer`, `probe`) rides on `tools.task` — disabling delegation disables them all.
 ## How tools are gated
@@ -45,15 +45,6 @@ Risk: low
 Parameters: file_path, offset, limit
 ```
-### summarize_file
-Summarize a large text file WITHOUT loading it into the conversation. The file is map-reduced by a separate summarization model; only the final summary returns, so the raw bytes never enter context. Prefer this over `read` for big documents.
-```
-Risk: low
-Parameters: file_path, focus, max_words
-```
 ### write
 Write content to a file, overwriting any existing content. Creates parent directories if needed. Use `edit`/`multi_edit` to modify an existing file in place.

data/lib/rubino/agent/iteration_budget.rb CHANGED Viewed

@@ -37,6 +37,19 @@ module Rubino
         within_iteration_limit?(iteration) && within_time_limit?
       end
+      # Which rail is blocking the turn RIGHT NOW, so a force-summarized turn can
+      # report WHY it stopped (honest subagent-completion reporting, not a false
+      # "completed"). :iterations when the tool/turn ceiling is spent, :time when
+      # the wall-clock safety-net is, nil when the turn could still continue.
+      # Mirrors #can_continue?'s conjunction — the iteration ceiling is checked
+      # first, matching the order the loop exhausts them.
+      def limiting_factor(iteration)
+        return :iterations unless within_iteration_limit?(iteration)
+        return :time unless within_time_limit?
+        nil
+      end
       # True ONLY when offering the interactive Continue extension would actually
       # help: the SOFT iteration ceiling (@max_tool_iterations) is what's
       # exhausted, and neither non-extendable rail is the blocker (#403).

data/lib/rubino/agent/loop.rb CHANGED Viewed

@@ -23,10 +23,13 @@ module Rubino
       # instead of ending the turn with nothing. Carries the trusted-harness marker
       # (#75) so it reads as runtime control, not as suspect user input.
       MAX_ITERATIONS_SUMMARY_NUDGE =
-        "#{HARNESS_CONTROL_MARKER} You've reached the maximum number of " \
-        "tool-calling iterations allowed. " \
-        "Please provide a final response summarizing what you've found and " \
-        "accomplished so far, without calling any more tools.".freeze
+        "#{HARNESS_CONTROL_MARKER} You've done a long run of tool calls this " \
+        "turn and hit this turn's tool-call checkpoint. Without calling any " \
+        "more tools, give the user a brief, constructive summary: what you " \
+        "accomplished and what's left. This is a per-turn checkpoint, NOT a " \
+        "hard limit on the work — do NOT tell the user to start a new session, " \
+        "and do NOT claim you are unable to continue or improve things. They " \
+        "can simply reply and you'll pick up right where you left off.".freeze
       # Framing for turn-start background notices (#148): tells the model the
       # notices are secondary to the user message that follows them.
@@ -119,6 +122,15 @@ module Rubino
         @tool_executor.on_result = method(:handle_tool_result) if @tool_executor.respond_to?(:on_result=)
       end
+      # How the LAST turn terminated, read back by the caller AFTER #run returns
+      # (mirrors how Lifecycle exposes #active_session). :completed on a normal
+      # answer; :max_iterations / :max_time when the turn was force-summarized at
+      # the tool/turn ceiling or the wall-clock net; :aborted on a user abort;
+      # :stream_incomplete when a truncated stream was handed back as the answer.
+      # The subagent-completion path reads this so a truncated run is reported
+      # PARTIAL instead of a false "completed" (#core-F1 honesty).
+      attr_reader :stop_reason
       # Runs the agent loop, returning the final assistant response content.
       def run(messages:, tools:) # rubocop:disable Metrics/PerceivedComplexity,Metrics/CyclomaticComplexity
         # Stash the resolved toolset so #streaming? can decide, per run, whether
@@ -191,6 +203,10 @@ module Rubino
         # most once per turn, only after a real block, and reset here so a fresh
         # turn never inherits a prior turn's reminder.
         @blocked_reminder_emitted = false
+        # Terminal outcome of THIS turn, read back via #stop_reason once #run
+        # returns. Optimistic default — every early return below that ISN'T a
+        # clean answer overwrites it (force-summary, abort, truncated stream).
+        @stop_reason = :completed
         token_total = 0
         loop do
@@ -313,6 +329,7 @@ module Rubino
             end
             # Continuations exhausted — hand back the recovered partial as the
             # (truncated) final answer: truthful and resumable, not a hard failure.
+            @stop_reason = :stream_incomplete
             emit_turn_summary(turn_started_at, token_total)
             return response.content
           end
@@ -577,6 +594,7 @@ module Rubino
       # note rather than a force-summary (no extra model call). The ledger note
       # keeps it truthful about how much ran.
       def abort_on_budget_exhausted(iteration, turn_started_at, token_total)
+        @stop_reason = :aborted
         note = "Stopped at user request after #{iteration} tool iteration" \
                "#{"s" if iteration != 1} (#{tool_count_label})."
         persist_user_message_note(note)
@@ -626,6 +644,10 @@ module Rubino
       end
       def force_summarize_budget_exhausted(messages, iteration, turn_started_at, token_total)
+        # Record WHICH rail forced the summary so a background subagent's
+        # completion can be reported PARTIAL with the real reason (time vs
+        # iterations) instead of a misleading "completed" (#core-F1).
+        @stop_reason = @budget.limiting_factor(iteration) == :time ? :max_time : :max_iterations
         nudge = force_summary_nudge
         persist_user_message(nudge)
         messages << { role: "user", content: nudge }
@@ -890,11 +912,16 @@ module Rubino
       # Providers like Bedrock require this message to appear in the conversation
       # history between the user prompt and the tool result(s).
       def build_assistant_tool_use_message(response)
-        {
+        msg = {
           role: "assistant",
           content: response.content || "",
           tool_calls: response.tool_calls
         }
+        # Carry reasoning on the in-turn (non-streaming) assistant(tool_use) too,
+        # so load_history replays it and the prefix stays KV-cache-stable (#608b).
+        reasoning = response.respond_to?(:thinking) ? response.thinking : nil
+        msg[:reasoning] = reasoning if reasoning && !reasoning.to_s.empty?
+        msg
       end
       # Called once per executed tool by the ToolExecutor's on_result sink, on
@@ -1058,6 +1085,10 @@ module Rubino
         metadata = tool_calls.empty? ? {} : { tool_calls: tool_calls }
         input_tokens = msg[:input_tokens].to_i
         metadata[:input_tokens] = input_tokens if input_tokens.positive?
+        # Keep the reasoning with the assistant(tool_use) row so the next turn
+        # replays it and the KV-cache prefix stays byte-stable (#608b) — this is
+        # the row that diverged from the server cache when reasoning was dropped.
+        metadata[:reasoning] = msg[:reasoning] if msg[:reasoning] && !msg[:reasoning].to_s.empty?
         with_db_retries do
           @message_store.create(
@@ -1108,6 +1139,13 @@ module Rubino
         # they see tool result messages with no matching toolUse upstream.
         metadata = response.has_tool_calls? ? { tool_calls: response.tool_calls } : {}
+        # Persist the reasoning so later turns can replay it (Hermes parity,
+        # #608b): the local KV cache holds this turn's reasoning tokens, so a
+        # later replay that omits them busts the prefix and re-prefills the whole
+        # context. Session::Message#to_context re-emits it as wire reasoning_content.
+        reasoning = response.respond_to?(:thinking) ? response.thinking : nil
+        metadata[:reasoning] = reasoning if reasoning && !reasoning.to_s.empty?
         # Record the REAL context size the provider saw for this response:
         # input_tokens covers the whole assembled prompt (system prompt +
         # history + tools), which no local chars/4 estimate can reproduce

data/lib/rubino/agent/prompts/build.txt CHANGED Viewed

@@ -33,11 +33,16 @@ assume or default to one.
   - Edit a file with `edit`/`multi_edit`/`patch`, never with `sed`/`awk`.
   - Search with `grep` or `glob`, never with raw `find` or shell pipelines.
   - Write a new file with `write`. Don't `echo > file` from the shell.
-- To get the gist of a LARGE document (converted PDF, log, transcript —
-  more than a few hundred lines), use `summarize_file`, not `read`. It
-  map-reduces the file in a separate context and returns only the summary,
-  so the raw text never fills this conversation. Reach for `read` (with
-  offset/limit) or `grep` only when you need exact lines, not an overview.
+- For a LARGE file (converted PDF, log, transcript — more than a few hundred
+  lines), don't `read` it whole and flood this conversation: `grep` it to find
+  the relevant part, then `read` that span with offset/limit to page through it.
+- Tool output may be COMPRESSED to save context — it is lossless to YOU: a
+  `# … N lines elided — read <path> offset=.. limit=..` pointer in a file read
+  means that exact body is one targeted `read` away, verbatim (so issue that
+  read before editing it). `[… N lines hidden by log compression …]` in command
+  output means only passing/info noise was dropped — every error/failure and the
+  final summary are kept. `{"_elided": N}` / `"<elided N chars>"` mark trimmed
+  JSON. These markers are NOT part of the file; never match or edit against them.
 - The `ruby` tool runs sandboxed Ruby for quick computation/scripting —
   reach for it when Ruby fits the project. Otherwise use `shell` for the
   host's binaries and the project's own toolchain (its interpreter, package

data/lib/rubino/agent/prompts/memory_guidance.txt ADDED Viewed

@@ -0,0 +1,5 @@
+# Memory discipline
+You have persistent memory across sessions. Save durable facts using the memory tool: user preferences, environment details, tool quirks, and stable conventions. Memory is injected into every turn, so keep it compact and focused on facts that will still matter later.
+Prioritize what reduces future user steering — the most valuable memory is one that prevents the user from having to correct or remind you again. User preferences and recurring corrections matter more than procedural task details.
+Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO state to memory. Specifically: do not record PR numbers, issue numbers, commit SHAs, 'fixed bug X', 'submitted PR Y', 'Phase N done', file counts, or any artifact that will be stale in 7 days. If a fact will be stale in a week, it does not belong in memory. If you've discovered a reusable way to do something, save it as a skill, not a memory.
+Write memories as declarative facts, not instructions to yourself. 'User prefers concise responses' ✓ — 'Always respond concisely' ✗. 'Project uses pytest with xdist' ✓ — 'Run tests with pytest -n 4' ✗. Imperative phrasing gets re-read as a directive in later sessions and can cause repeated work or override the user's current request. Procedures and workflows belong in skills, not memory.

data/lib/rubino/agent/prompts/tool_use_enforcement.txt ADDED Viewed

@@ -0,0 +1,4 @@
+# Tool-use enforcement
+You MUST use your tools to take action — do not describe what you would do or plan to do without actually doing it. When you say you will perform an action (e.g. 'I will run the tests', 'Let me check the file', 'I will create the project'), you MUST immediately make the corresponding tool call in the same response. Never end your turn with a promise of future action — execute it now.
+Keep working until the task is actually complete. Do not stop with a summary of what you plan to do next time. If you have tools available that can accomplish the task, use them instead of telling the user what you would do.
+Every response should either (a) contain tool calls that make progress, or (b) deliver a final result to the user. Responses that only describe intentions without acting are not acceptable.

data/lib/rubino/agent/prompts/tool_use_enforcement_google.txt ADDED Viewed

@@ -0,0 +1,9 @@
+# Google model operational directives
+Follow these operational rules strictly:
+- **Absolute paths:** Always construct and use absolute file paths for all file system operations. Combine the project root with relative paths.
+- **Verify first:** Use read/grep to check file contents and project structure before making changes. Never guess at file contents.
+- **Dependency checks:** Never assume a library is available. Check package.json, requirements.txt, Cargo.toml, Gemfile, etc. before importing.
+- **Conciseness:** Keep explanatory text brief — a few sentences, not paragraphs. Focus on actions and results over narration.
+- **Parallel tool calls:** When you need to perform multiple independent operations (e.g. reading several files), make all the tool calls in a single response rather than sequentially.
+- **Non-interactive commands:** Use flags like -y, --yes, --non-interactive to prevent CLI tools from hanging on prompts.
+- **Keep going:** Work autonomously until the task is fully resolved. Don't stop with a plan — execute it.

data/lib/rubino/agent/prompts/tool_use_enforcement_openai.txt ADDED Viewed

@@ -0,0 +1,48 @@
+# Execution discipline
+<tool_persistence>
+- Use tools whenever they improve correctness, completeness, or grounding.
+- Do not stop early when another tool call would materially improve the result.
+- If a tool returns empty or partial results, retry with a different query or strategy before giving up.
+- Keep calling tools until: (1) the task is complete, AND (2) you have verified the result.
+</tool_persistence>
+<mandatory_tool_use>
+NEVER answer these from memory or mental computation — ALWAYS use a tool:
+- Arithmetic, math, calculations → use the shell or a code tool
+- Hashes, encodings, checksums → use the shell (e.g. sha256sum, base64)
+- Current time, date, timezone → use the shell (e.g. date)
+- System state: OS, CPU, memory, disk, ports, processes → use the shell
+- File contents, sizes, line counts → use read, grep, or the shell
+- Git history, branches, diffs → use the shell
+- Current facts (weather, news, versions) → use web_search
+Your memory and user profile describe the USER, not the system you are running on. The execution environment may differ from what the user profile says about their personal setup.
+</mandatory_tool_use>
+<act_dont_ask>
+When a question has an obvious default interpretation, act on it immediately instead of asking for clarification. Examples:
+- 'Is port 443 open?' → check THIS machine (don't ask 'open where?')
+- 'What OS am I running?' → check the live system (don't use user profile)
+- 'What time is it?' → run `date` (don't guess)
+Only ask for clarification when the ambiguity genuinely changes what tool you would call.
+</act_dont_ask>
+<prerequisite_checks>
+- Before taking an action, check whether prerequisite discovery, lookup, or context-gathering steps are needed.
+- Do not skip prerequisite steps just because the final action seems obvious.
+- If a task depends on output from a prior step, resolve that dependency first.
+</prerequisite_checks>
+<verification>
+Before finalizing your response:
+- Correctness: does the output satisfy every stated requirement?
+- Grounding: are factual claims backed by tool outputs or provided context?
+- Formatting: does the output match the requested format or schema?
+- Safety: if the next step has side effects (file writes, commands, API calls), confirm scope before executing.
+</verification>
+<missing_context>
+- If required context is missing, do NOT guess or hallucinate an answer.
+- Use the appropriate lookup tool when missing information is retrievable (grep, web_search, read, etc.).
+- Ask a clarifying question only when the information cannot be retrieved by tools.
+- If you must proceed with incomplete information, label assumptions explicitly.
+</missing_context>

data/lib/rubino/agent/runner.rb CHANGED Viewed

@@ -13,7 +13,8 @@ module Rubino
       def initialize(session_id: nil, model_override: nil, provider_override: nil,
                      max_turns: nil, ignore_rules: false, ui: nil, agent_definition: nil,
-                     event_bus: nil, announce_session: true, session_source: "cli")
+                     event_bus: nil, announce_session: true, session_source: "cli",
+                     interactive: false)
         @ui = ui || Rubino.ui
         # An in-chat rewind/fork builds a runner on the child session but has its
         # own purpose-built "┄ rewound to message N — editing ┄" marker, so the
@@ -40,6 +41,11 @@ module Rubino
         # not the user's own conversations) while staying resumable by explicit
         # id. Like Claude Code hiding its Task subagent sessions from the picker.
         @session_source = session_source
+        # True only for the interactive REPL, where more in-process turns follow
+        # this one. Lifecycle uses it to keep automatic memory extraction OFF the
+        # live KV-cache slot between turns (#608c) — a headless one-shot, which
+        # exits after its single turn, leaves it false and extracts normally.
+        @interactive = interactive
         # Pre-instantiate so cancel! is meaningful between turns and during the
         # window between Signal.trap install and run() — a too-early Ctrl+C
         # used to land on a nil token and silently no-op, then the next run
@@ -123,18 +129,19 @@ module Rubino
           cancel_token: @cancel_token,
           model_override: @explicit_model_override,
           provider_override: @provider_override,
+          interactive: @interactive,
           # The SOFT iteration ceiling (where the budget-extension prompt fires)
-          # vs the HARD max_turns outer rail. For the main agent @max_turns is the
-          # `--max-turns N` override, which intentionally sets the soft ceiling.
-          # A SUBAGENT, though, gets @max_turns = definition.max_turns (= config
-          # agent.max_turns, 90) — passing THAT as the soft ceiling made soft ==
-          # hard, so #extendable? was always false and a subagent could NEVER
-          # surface a budget request (#571) — it just force-summarized. Subagents
-          # therefore pass nil so the soft ceiling falls back to config
-          # agent.max_tool_iterations (25) < the 90 hard rail, exactly like the
-          # main agent — so a subagent at 25 iterations parks and asks for budget
-          # via the dropdown (#574), extendable up to the 90 outer rail.
-          max_tool_iterations: @session_source == "subagent" ? nil : @max_turns,
+          # vs the HARD max_turns outer rail (config agent.max_turns, applied
+          # inside IterationBudget). @max_turns carries the per-run soft cap on
+          # BOTH paths:
+          #   - MAIN agent: the `--max-turns N` override (nil ⇒ config default).
+          #   - SUBAGENT:   definition.max_turns — e.g. explore=20, general=50,
+          #     BELOW the 90 hard rail — so the child both HONORS its per-agent
+          #     cap (#571: it used to be dropped entirely) AND can surface the
+          #     #574 budget-park at that cap, extendable up to the 90 outer rail.
+          # A subagent that sets no max_turns falls back to config agent.max_turns
+          # (soft == hard) and simply hard-stops there, like the main agent.
+          max_tool_iterations: @max_turns,
           polishing: @polishing
         )
@@ -150,10 +157,19 @@ module Rubino
         # counterpart to the manual /compact swap (chat_command rebuilds the
         # runner on result[:compact_into]).
         @session = lifecycle.active_session
+        # Post-turn state, read by the subagent-completion path (task_tool) so a
+        # force-summarized/truncated child is reported PARTIAL, not "completed".
+        @last_stop_reason = lifecycle.last_stop_reason
         response
       end
+      # How this runner's LAST turn terminated (Agent::Loop#stop_reason),
+      # threaded up via Lifecycle. nil until a turn has run. Read by the `task`
+      # tool after a subagent's #run! to distinguish a real completion from a
+      # budget-/time-truncated partial.
+      attr_reader :last_stop_reason
       # Pins the agent Definition this runner threads into every subsequent turn
       # (the sticky `/agent <name>` / Tab-cycle switch). Lifecycle reads
       # @agent_definition fresh on each #run!, so swapping it here takes effect
@@ -219,6 +235,32 @@ module Rubino
         model_id
       end
+      # Aligns a RESUMED session's stored model with the model the adapter will
+      # actually use this run (#model-resume). Lifecycle builds the adapter from
+      # `@explicit_model_override || @session[:model]`, and the CLI ALWAYS passes
+      # a boot override (explicit `-m`, else `model.default` from config) — so on
+      # resume the override, NOT the model this session happened to last use, is
+      # what generates. The session row, the footer/statusbar, the token-budget
+      # context window and `/status` all read `session[:model]`, so without this
+      # they showed the STALE pinned model (e.g. the old default) while the agent
+      # was really running the new one: changing `model.default` looked ignored
+      # even though generation honored it. Re-point the row to the effective
+      # model so every surface tells the truth and a config change takes visible
+      # effect. No-op when there is no explicit override (then the session model
+      # IS what the adapter uses) or it already matches.
+      def sync_resumed_session_model!(session)
+        return unless @explicit_model_override
+        return if session[:model] == @explicit_model_override
+        session[:model]    = @explicit_model_override
+        session[:provider] = @provider_override ||
+                             LLM::ProviderResolver.resolve(@explicit_model_override,
+                                                           explicit_provider: @config.dig("model", "provider"))
+        return unless @session_repo.persisted?(session[:id])
+        @session_repo.update(session[:id], model: session[:model], provider: session[:provider])
+      end
       # Marks the current session ended (#100). Called from the CLI on a clean
       # REPL teardown (and best-effort on terminal close) so a session stops
       # showing as "active" forever and cleanup/list/--continue can tell a
@@ -437,6 +479,7 @@ module Rubino
           # sees us as the live owner and forks rather than interleaving.
           session[:persisted] = true
           session[:owner_pid] = Process.pid
+          sync_resumed_session_model!(session)
           @ui.status("Resuming session: #{session[:id][0..7]}...") if @announce_session
           session
         else

data/lib/rubino/agent/tool_executor.rb CHANGED Viewed

@@ -187,7 +187,7 @@ module Rubino
             # Mirror the chunk onto the bus so the API/SSE stream isn't silent
             # during a long tool call: the Recorder maps TOOL_PROGRESS to a
             # `tool.progress` event, which resets the idle watchdog. Without
-            # this a busy tool (summarize_file: ~30 sequential aux-LLM calls,
+            # this a busy tool (a long shell stream, or an aux-LLM-backed tool,
             # no run-events) is killed at the 300s idle timeout. Throttled so a
             # chatty tool (shell streaming thousands of stdout lines) doesn't
             # write a DB row + SSE frame per line — one heartbeat per interval

data/lib/rubino/api/operations/tasks/stop_operation.rb CHANGED Viewed

@@ -31,9 +31,6 @@ module Rubino
             raise ConflictError, "task #{id} already #{entry.status} — nothing to stop" unless entry.status == :running
             entry.runner&.cancel!
-            # Stop-cascade (S5a): wake any descendant parked on a blocking
-            # ask_parent so the whole subtree unwinds at once.
-            @registry.cancel_descendant_ask_gates(id)
             [202, Serializer.detail(entry)]
           end
         end

data/lib/rubino/attachments/classify.rb CHANGED Viewed

@@ -33,7 +33,6 @@ module Rubino
         application/x-7z-compressed application/x-rar-compressed application/vnd.rar
         application/x-bzip2 application/x-xz
       ].freeze
-      IMAGE_EXTS = %w[.png .jpg .jpeg .gif .webp .bmp .tiff .tif].freeze
       # Leading magic bytes per recognised image/document MIME (WebP is
       # special-cased: RIFF container + WEBP tag). Marcel lets the file NAME

data/lib/rubino/cli/chat/completion_builder.rb CHANGED Viewed

@@ -62,7 +62,6 @@ module Rubino
           #   * /agents (alias /tasks) — the live subagent ids, then the
           #     steer/probe/--stop subcommand grammar, so the comm surface is
           #     discoverable from the composer (#39).
-          #   * /reply — the ids of children blocked waiting on the human.
           #   * /mcp — the configured server names (+ reload), then on/off for a
           #     named server (#182), same grammar shape as /agents.
           #   * /mode, /reasoning, /think — the closed enums (#185), via the
@@ -95,7 +94,6 @@ module Rubino
             "agents" => ->(args) { agents_arg_candidates(args) },
             "tasks" => ->(args) { agents_arg_candidates(args) },
             "agent" => ->(args) { args.empty? ? primary_agent_names : [] },
-            "reply" => ->(args) { args.empty? ? blocked_subagent_ids : [] },
             "mcp" => ->(args) { mcp_arg_candidates(args) },
             "mode" => ->(args) { args.empty? ? Rubino::Modes::ALL.map(&:to_s) : [] },
             "model" => ->(args) { args.empty? ? model_arg_candidates : [] },
@@ -148,12 +146,6 @@ module Rubino
           end
         end
-        # Children parked on an ask_parent waiting for the human — the ids /reply
-        # answers.
-        def blocked_subagent_ids
-          Tools::BackgroundTasks.instance.awaiting_human.map(&:id)
-        end
         # The /model candidates: the registry's model ids for the provider the
         # next turn would route through. Resolved lazily on each dropdown open so
         # a /model or /config provider switch is reflected immediately.

data/lib/rubino/cli/chat/idle_card_host.rb CHANGED Viewed

@@ -44,13 +44,18 @@ module Rubino
         # between child events. Repaints go through the composer's render mutex, so
         # they never race the keystroke handler. Exits as soon as no child is live
         # (it clears the region one last time) or when killed on teardown.
-        def start_ticker(composer)
+        # +on_tick+ (optional) runs once per tick after the card repaint — used by
+        # the attach view to live-tail a focused shell's new output on the SAME
+        # 1 Hz cadence and through the same render mutex (composer#print_above) the
+        # cards use, so it never races the keystroke handler.
+        def start_ticker(composer, &on_tick)
           Thread.new do
             loop do
               sleep(IDLE_CARD_TICK)
               break unless composer.equal?(UI::BottomComposer.current)
               paint
+              on_tick&.call
               break unless children_live?
             end
           rescue StandardError => e