rubino-agent 0.5.1 → 0.5.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/.dockerignore +15 -0
  3. data/CHANGELOG.md +127 -0
  4. data/Dockerfile +56 -0
  5. data/agent.md +112 -0
  6. data/docs/api/v1.md +2 -0
  7. data/docs/commands.md +3 -6
  8. data/docs/configuration.md +13 -6
  9. data/docs/design/bg-shell-pty-port.md +88 -0
  10. data/docs/design/bg-shell-review-refinements.md +65 -0
  11. data/docs/design/bg-shell-ux.md +130 -0
  12. data/docs/oauth-providers.md +21 -0
  13. data/docs/tools.md +3 -12
  14. data/lib/rubino/agent/iteration_budget.rb +13 -0
  15. data/lib/rubino/agent/loop.rb +43 -5
  16. data/lib/rubino/agent/prompts/build.txt +10 -5
  17. data/lib/rubino/agent/prompts/memory_guidance.txt +5 -0
  18. data/lib/rubino/agent/prompts/tool_use_enforcement.txt +4 -0
  19. data/lib/rubino/agent/prompts/tool_use_enforcement_google.txt +9 -0
  20. data/lib/rubino/agent/prompts/tool_use_enforcement_openai.txt +48 -0
  21. data/lib/rubino/agent/runner.rb +55 -12
  22. data/lib/rubino/agent/tool_executor.rb +1 -1
  23. data/lib/rubino/api/operations/tasks/stop_operation.rb +0 -3
  24. data/lib/rubino/attachments/classify.rb +0 -1
  25. data/lib/rubino/cli/chat/completion_builder.rb +0 -8
  26. data/lib/rubino/cli/chat/idle_card_host.rb +6 -1
  27. data/lib/rubino/cli/chat_command.rb +324 -171
  28. data/lib/rubino/cli/commands.rb +5 -0
  29. data/lib/rubino/commands/built_ins.rb +0 -1
  30. data/lib/rubino/commands/executor.rb +1 -7
  31. data/lib/rubino/commands/handlers/agents.rb +55 -265
  32. data/lib/rubino/commands/handlers/status.rb +6 -3
  33. data/lib/rubino/compression/line_skeleton.rb +1 -1
  34. data/lib/rubino/compression/python_code_skeleton.rb +1 -1
  35. data/lib/rubino/compression/ruby_code_skeleton.rb +1 -1
  36. data/lib/rubino/compression/tree_sitter_code_skeleton.rb +1 -1
  37. data/lib/rubino/config/configuration.rb +47 -18
  38. data/lib/rubino/config/defaults.rb +57 -33
  39. data/lib/rubino/context/prompt_assembler.rb +89 -1
  40. data/lib/rubino/context/summary_builder.rb +0 -22
  41. data/lib/rubino/context/token_budget.rb +0 -5
  42. data/lib/rubino/errors.rb +2 -2
  43. data/lib/rubino/interaction/events.rb +2 -2
  44. data/lib/rubino/interaction/lifecycle.rb +54 -20
  45. data/lib/rubino/llm/anthropic_role_merge.rb +75 -0
  46. data/lib/rubino/llm/error_classifier.rb +34 -1
  47. data/lib/rubino/llm/fake_provider.rb +0 -4
  48. data/lib/rubino/llm/ruby_llm_adapter.rb +222 -59
  49. data/lib/rubino/llm/stream_tool_call_recovery.rb +91 -0
  50. data/lib/rubino/llm/tool_call_recovery.rb +177 -0
  51. data/lib/rubino/memory/sqlite_extraction_prompt.rb +0 -2
  52. data/lib/rubino/memory/store.rb +0 -19
  53. data/lib/rubino/security/pattern_matcher.rb +0 -2
  54. data/lib/rubino/security/redactor.rb +1 -1
  55. data/lib/rubino/security/secret_path.rb +16 -4
  56. data/lib/rubino/session/message.rb +12 -0
  57. data/lib/rubino/skills/registry.rb +16 -2
  58. data/lib/rubino/tools/background_tasks.rb +132 -228
  59. data/lib/rubino/tools/base.rb +1 -17
  60. data/lib/rubino/tools/grep_tool.rb +13 -1
  61. data/lib/rubino/tools/question_tool.rb +3 -4
  62. data/lib/rubino/tools/read_attachment_tool.rb +52 -54
  63. data/lib/rubino/tools/registry.rb +21 -72
  64. data/lib/rubino/tools/shell_entry_adapter.rb +97 -0
  65. data/lib/rubino/tools/shell_input_tool.rb +1 -1
  66. data/lib/rubino/tools/shell_kill_tool.rb +4 -4
  67. data/lib/rubino/tools/shell_registry.rb +178 -38
  68. data/lib/rubino/tools/shell_tool.rb +45 -5
  69. data/lib/rubino/tools/steer_tool.rb +3 -4
  70. data/lib/rubino/tools/task_result_tool.rb +4 -1
  71. data/lib/rubino/tools/task_stop_tool.rb +5 -7
  72. data/lib/rubino/tools/task_tool.rb +81 -35
  73. data/lib/rubino/tools/vision_tool.rb +1 -1
  74. data/lib/rubino/tools/write_tool.rb +22 -2
  75. data/lib/rubino/ui/agent_menu.rb +8 -4
  76. data/lib/rubino/ui/api.rb +11 -0
  77. data/lib/rubino/ui/bottom_composer.rb +240 -374
  78. data/lib/rubino/ui/cli.rb +381 -155
  79. data/lib/rubino/ui/input_history.rb +0 -5
  80. data/lib/rubino/ui/live_region.rb +18 -1
  81. data/lib/rubino/ui/markdown_renderer.rb +51 -4
  82. data/lib/rubino/ui/markdown_repair.rb +114 -0
  83. data/lib/rubino/ui/notifier.rb +4 -10
  84. data/lib/rubino/ui/stdout_proxy.rb +25 -10
  85. data/lib/rubino/ui/streaming_markdown.rb +79 -12
  86. data/lib/rubino/ui/subagent_cards.rb +18 -44
  87. data/lib/rubino/ui/tool_args_stream.rb +143 -0
  88. data/lib/rubino/update_check.rb +10 -2
  89. data/lib/rubino/util/ignore_rules.rb +18 -2
  90. data/lib/rubino/util/secrets_mask.rb +0 -9
  91. data/lib/rubino/version.rb +1 -1
  92. data/lib/rubino.rb +33 -7
  93. data/rubino-agent.gemspec +1 -0
  94. metadata +31 -5
  95. data/AGENTS.md +0 -97
  96. data/docs/agents.md +0 -224
  97. data/lib/rubino/jobs/handlers/summarize_session_job.rb +0 -21
  98. data/lib/rubino/tools/summarize_file_tool.rb +0 -194
@@ -0,0 +1,130 @@
1
+ # Background shells as first-class background work (see / focus / stop)
2
+
3
+ Status: DESIGN (no implementation yet) · Branch: `feat/bg-shell-ux`
4
+
5
+ ## Goal
6
+
7
+ Give a background **shell** the same user-facing affordances a background **subagent**
8
+ already has:
9
+
10
+ 1. **See** it — a card + a picker row, at a glance.
11
+ 2. **Focus** it — attach to a clear, live view of what it's doing.
12
+ 3. **Stop** it — `/stop <id>` from the UI.
13
+
14
+ Today a background shell lives ONLY in `ShellRegistry`, so it is invisible to every
15
+ user surface. The model can read/tail/kill it via tools (`shell_output`,
16
+ `shell_tail`, `shell_kill`), but the human has no card, no picker entry, no attach,
17
+ no `/stop`.
18
+
19
+ ## The central reuse lever (why this is mostly DRY, not new UI)
20
+
21
+ Three UI surfaces and the control handlers all read **one source of truth**:
22
+
23
+ - `UI::CLI#set_subagent_cards` → `BackgroundTasks.instance.running` (`cli.rb:930`)
24
+ - `UI::AgentMenu` picker entries default → `BackgroundTasks.instance.running` (`agent_menu.rb:21`)
25
+ - `BottomComposer` card host → `BackgroundTasks.instance.running` (`bottom_composer.rb:1639`)
26
+ - `/agents`, `/stop`, `auto_resolve_pending` → `BackgroundTasks` lookups
27
+
28
+ None of these inspect `subagent`/`runner` to decide whether to show a row — they
29
+ filter purely on `live_status?` (`LIVE_STATUSES = %i[running needs_approval stopping]`).
30
+
31
+ **So: anything in `BackgroundTasks#running` automatically gets a card, a picker row,
32
+ and `/stop`.** The whole feature reduces to *register the shell as a `BackgroundTasks`
33
+ entry* + a few thin, kind-aware branches.
34
+
35
+ ## Architecture
36
+
37
+ Add a `kind: :subagent | :shell` discriminator to `BackgroundTasks::Entry`
38
+ (`background_tasks.rb:60`). A background shell gets BOTH:
39
+
40
+ - its existing `ShellRegistry::Entry` (process group, output ring, kill, stdin) — unchanged;
41
+ - a NEW linked `BackgroundTasks::Entry` (`kind: :shell`) that carries the SAME `bg_*`
42
+ id, so the card/picker/stop surfaces light up and `/stop bg_x` already matches
43
+ `shell_kill`'s id.
44
+
45
+ The two entries are bridged 1:1 by id. `ShellRegistry` stays the process owner;
46
+ `BackgroundTasks` becomes the *presentation + control* layer (as it already is for subagents).
47
+
48
+ ```
49
+ ShellRegistry::Entry ──(same bg_ id)── BackgroundTasks::Entry(kind: :shell)
50
+ pgid, pipes, buffer status, card, picker row, /stop
51
+ read_new / write_input / kill attach view, completion notice
52
+ ```
53
+
54
+ ### Reuse AS-IS (the shared seams — no shell-specific code)
55
+
56
+ 1. `BackgroundTasks#running` + `live_status?` / `LIVE_STATUSES` — the liveness oracle
57
+ that auto-drives cards + picker + composer.
58
+ 2. `UI::SubagentCards` row rendering — reads only plain struct fields
59
+ (`id, status, tool_count, started_at, prompt`); map `prompt`→command.
60
+ 3. `UI::AgentMenu` row rendering — reads only `id, subagent, status, budget_request`.
61
+ 4. `InputQueue#push_notice` → idle `coalesced_resume` (#561) — shells ALREADY ride
62
+ this (`shell_registry.rb:372`).
63
+ 5. `render_agent_output_tail` / `watch_loop` (`agents.rb:300-328`) — an existing
64
+ kind-agnostic byte-tail renderer, perfect for the shell attach view.
65
+ 6. `stop_entry` (`background_tasks.rb:456`) as the single stop entry-point, dispatched by kind.
66
+
67
+ ### Thin shell adapters (the only new code — kept minimal)
68
+
69
+ 1. **Bridge (register + sync).** In `shell_tool.rb#spawn_background` (`:382`), after
70
+ `ShellRegistry.spawn`, `reserve` a `kind: :shell` `BackgroundTasks` entry with the
71
+ same id. In `ShellRegistry#notify_completion` (`:357`), flip the linked entry to
72
+ `:completed`/`:failed` via `complete` (so the card/picker drop it). Status for a
73
+ shell is DERIVED (`ShellRegistry#status` from `wait_thr`); the bridge keeps the
74
+ stored `BackgroundTasks` status in sync — single sync point at completion + an
75
+ optional poll for the live `tool_count`/activity proxy (bytes/lines).
76
+ 2. **Attach branch.** In `chat_command.rb#attach_agent_view` (`:3009`), branch on
77
+ `kind == :shell`: `entry.messages` is empty (no session), so skip session replay
78
+ and instead render the captured buffer + a polling `read_new` live-tail (reuse the
79
+ `watch_loop` shape). Attached plain text → `ShellRegistry.write_input` (stdin),
80
+ not `steer_agent`.
81
+ 3. **Stop branch.** In `stop_entry` (`:456`), branch on `kind == :shell`:
82
+ `Process.kill` the pgid (reuse `ShellKillTool`'s SIGTERM → grace → SIGKILL body,
83
+ extracted to a shared `ShellRegistry#signal_group`) instead of `runner.cancel!`.
84
+
85
+ ### Kind-aware copy (cosmetic, one helper)
86
+
87
+ `AgentMenu` header/hints ("subagents", "Enter attaches"), `SubagentCards` glyph
88
+ wording, and `Agents` copy ("No background subagents") hardcode "subagent". Introduce
89
+ ONE `entry_kind_label(entry)` → "subagent"/"shell" used by the picker header + card +
90
+ list copy, so a shell row reads right without forking the renderers.
91
+
92
+ ## Lifecycle & the two-lifetime rule
93
+
94
+ A shell has TWO decoupled lifetimes, by design:
95
+
96
+ - The `BackgroundTasks` entry goes **terminal** (drops from `running`/cards/picker) the
97
+ moment the shell exits — so the UI stops showing a dead shell as live.
98
+ - The `ShellRegistry` entry stays **retired** (RETIRED_TTL) so `shell_output` can still
99
+ fetch the final output for the model.
100
+
101
+ Keep them decoupled: completion flips the BackgroundTasks status; retirement is
102
+ ShellRegistry-only.
103
+
104
+ ## Open decisions (need your call)
105
+
106
+ - **D1 — id namespace.** Recommend the shell's `BackgroundTasks` entry **keep its `bg_*`
107
+ id** (so `/stop bg_x` == `shell_kill bg_x`, one id the user sees everywhere). (Alt:
108
+ give it `sa_*` — rejected, splits the id space.)
109
+ - **D2 — attach interactivity (scope).** v1 attach = **read-only live tail**; OR v1
110
+ also routes attached plain-text to the shell's **stdin** (interactive bg process).
111
+ stdin-steer is a nice win but more surface to test.
112
+ - **D3 — steer/probe on a shell.** Disable for `kind: :shell` (a shell has no model to
113
+ probe / no steer queue), OR repurpose steer→stdin (ties to D2).
114
+
115
+ ## Proposed slices (incremental, each independently testable)
116
+
117
+ - **Slice 1 — SEE + STOP.** `kind` discriminator + bridge (register/sync) + `stop_entry`
118
+ shell branch + kind-aware label. Outcome: a bg shell shows a card + picker row and
119
+ `/stop bg_x` kills it. (Biggest value, smallest surface — pure reuse + 2 thin branches.)
120
+ - **Slice 2 — FOCUS.** `attach_agent_view` shell branch: clear + buffer + polling tail.
121
+ Outcome: Enter on a shell row attaches to a live output view; `←`/`/back` returns.
122
+ - **Slice 3 — stdin (optional, D2/D3).** Attached plain-text → `shell_input`.
123
+
124
+ Each slice: clean-code, DRY (reuse the named seams), spec'd, verified in the QA
125
+ container with a real bg shell (tmux: card visible, `/stop` kills, attach tails live).
126
+
127
+ ## Non-goals (v1)
128
+
129
+ Reworking `ShellRegistry`'s process model; per-shell resource limits; persisting shell
130
+ output to a session Store (shells stay buffer-backed, not transcript-backed).
@@ -1,5 +1,26 @@
1
1
  # OAuth provider connectors
2
2
 
3
+ > **Status: NOT WIRED END-TO-END (WIP).** The pieces below exist and the HTTP
4
+ > surface works — the `/v1/oauth/...` API endpoints perform the PKCE flow and
5
+ > store **encrypted** tokens in the `oauth_connections` table. But the subsystem
6
+ > is **API-only and not yet consumed**:
7
+ > - **No tool uses the stored tokens.** Nothing reads `ConnectionRepository`
8
+ > outside the API operations — there is no `GithubTool`/`GoogleTool` etc. that
9
+ > pulls a connection's token to call a provider, so a connected account is not
10
+ > actually actionable by the agent yet.
11
+ > - **No CLI surface.** There is no `rubino oauth` command; the connect/callback
12
+ > flow needs a browser redirect, so it lives only on the API. The CLI treats
13
+ > `RUBINO_ENCRYPTION_KEY` as optional (`doctor`: "only needed for the
14
+ > API/OAuth server").
15
+ > - **Token sharing, when consumption lands:** tokens are not "passed" between
16
+ > CLI and API — both read the **same SQLite DB** (same `RUBINO_HOME`) and
17
+ > decrypt with the **same `RUBINO_ENCRYPTION_KEY`**. So wiring CLI consumption
18
+ > = read `ConnectionRepository` + require the key on the CLI too.
19
+ >
20
+ > Open design question (issue #590): finish the native subsystem, or deprecate
21
+ > it and delegate third-party connections to an MCP server (which does its own
22
+ > OAuth and holds its own tokens). Don't depend on native OAuth in production yet.
23
+
3
24
  Built-in OAuth integration lets users connect third-party accounts (Github, Google, etc.) so tools running inside rubino can act on their behalf.
4
25
 
5
26
  ## Design
data/docs/tools.md CHANGED
@@ -1,10 +1,10 @@
1
1
  # Tools Reference
2
2
 
3
- rubino ships **29 built-in tools** plus dynamic MCP tools (started at boot when `mcp.servers` is configured — see [mcp.md](mcp.md); being server-dependent they are excluded from the drift-checked list below) and custom user-defined tools. Each tool is gated by a `tools.<key>` config flag (opt-out: absent key = enabled, only an explicit `false` disables) and the approval model. The count and list below are drift-checked against the live registry by `spec/docs/tools_doc_drift_spec.rb`.
3
+ rubino ships **28 built-in tools** plus dynamic MCP tools (started at boot when `mcp.servers` is configured — see [mcp.md](mcp.md); being server-dependent they are excluded from the drift-checked list below) and custom user-defined tools. Each tool is gated by a `tools.<key>` config flag (opt-out: absent key = enabled, only an explicit `false` disables) and the approval model. The count and list below are drift-checked against the live registry by `spec/docs/tools_doc_drift_spec.rb`.
4
4
 
5
- The full list (registration order): `read`, `summarize_file`, `write`, `edit`, `multi_edit`, `grep`, `glob`, `shell`, `shell_output`, `shell_tail`, `shell_input`, `shell_kill`, `ruby`, `apply_patch`, `webfetch`, `websearch`, `question`, `todowrite`, `memory`, `session_search`, `attach_file`, `read_attachment`, `vision`, `skill`, `task`, `task_result`, `task_stop`, `steer`, `probe`.
5
+ The full list (registration order): `read`, `write`, `edit`, `multi_edit`, `grep`, `glob`, `shell`, `shell_output`, `shell_tail`, `shell_input`, `shell_kill`, `ruby`, `apply_patch`, `webfetch`, `websearch`, `question`, `todowrite`, `memory`, `session_search`, `attach_file`, `read_attachment`, `vision`, `skill`, `task`, `task_result`, `task_stop`, `steer`, `probe`.
6
6
 
7
- Several tools share one config gate, so `rubino tools` shows **24 rows** (config groups), not 29: `webfetch` + `websearch` share `tools.web`, and the whole delegation family (`task`, `task_result`, `task_stop`, `steer`, `probe`) rides on `tools.task` — disabling delegation disables them all.
7
+ Several tools share one config gate, so `rubino tools` shows **23 rows** (config groups), not 28: `webfetch` + `websearch` share `tools.web`, and the whole delegation family (`task`, `task_result`, `task_stop`, `steer`, `probe`) rides on `tools.task` — disabling delegation disables them all.
8
8
 
9
9
  ## How tools are gated
10
10
 
@@ -45,15 +45,6 @@ Risk: low
45
45
  Parameters: file_path, offset, limit
46
46
  ```
47
47
 
48
- ### summarize_file
49
-
50
- Summarize a large text file WITHOUT loading it into the conversation. The file is map-reduced by a separate summarization model; only the final summary returns, so the raw bytes never enter context. Prefer this over `read` for big documents.
51
-
52
- ```
53
- Risk: low
54
- Parameters: file_path, focus, max_words
55
- ```
56
-
57
48
  ### write
58
49
 
59
50
  Write content to a file, overwriting any existing content. Creates parent directories if needed. Use `edit`/`multi_edit` to modify an existing file in place.
@@ -37,6 +37,19 @@ module Rubino
37
37
  within_iteration_limit?(iteration) && within_time_limit?
38
38
  end
39
39
 
40
+ # Which rail is blocking the turn RIGHT NOW, so a force-summarized turn can
41
+ # report WHY it stopped (honest subagent-completion reporting, not a false
42
+ # "completed"). :iterations when the tool/turn ceiling is spent, :time when
43
+ # the wall-clock safety-net is, nil when the turn could still continue.
44
+ # Mirrors #can_continue?'s conjunction — the iteration ceiling is checked
45
+ # first, matching the order the loop exhausts them.
46
+ def limiting_factor(iteration)
47
+ return :iterations unless within_iteration_limit?(iteration)
48
+ return :time unless within_time_limit?
49
+
50
+ nil
51
+ end
52
+
40
53
  # True ONLY when offering the interactive Continue extension would actually
41
54
  # help: the SOFT iteration ceiling (@max_tool_iterations) is what's
42
55
  # exhausted, and neither non-extendable rail is the blocker (#403).
@@ -23,10 +23,13 @@ module Rubino
23
23
  # instead of ending the turn with nothing. Carries the trusted-harness marker
24
24
  # (#75) so it reads as runtime control, not as suspect user input.
25
25
  MAX_ITERATIONS_SUMMARY_NUDGE =
26
- "#{HARNESS_CONTROL_MARKER} You've reached the maximum number of " \
27
- "tool-calling iterations allowed. " \
28
- "Please provide a final response summarizing what you've found and " \
29
- "accomplished so far, without calling any more tools.".freeze
26
+ "#{HARNESS_CONTROL_MARKER} You've done a long run of tool calls this " \
27
+ "turn and hit this turn's tool-call checkpoint. Without calling any " \
28
+ "more tools, give the user a brief, constructive summary: what you " \
29
+ "accomplished and what's left. This is a per-turn checkpoint, NOT a " \
30
+ "hard limit on the work — do NOT tell the user to start a new session, " \
31
+ "and do NOT claim you are unable to continue or improve things. They " \
32
+ "can simply reply and you'll pick up right where you left off.".freeze
30
33
 
31
34
  # Framing for turn-start background notices (#148): tells the model the
32
35
  # notices are secondary to the user message that follows them.
@@ -119,6 +122,15 @@ module Rubino
119
122
  @tool_executor.on_result = method(:handle_tool_result) if @tool_executor.respond_to?(:on_result=)
120
123
  end
121
124
 
125
+ # How the LAST turn terminated, read back by the caller AFTER #run returns
126
+ # (mirrors how Lifecycle exposes #active_session). :completed on a normal
127
+ # answer; :max_iterations / :max_time when the turn was force-summarized at
128
+ # the tool/turn ceiling or the wall-clock net; :aborted on a user abort;
129
+ # :stream_incomplete when a truncated stream was handed back as the answer.
130
+ # The subagent-completion path reads this so a truncated run is reported
131
+ # PARTIAL instead of a false "completed" (#core-F1 honesty).
132
+ attr_reader :stop_reason
133
+
122
134
  # Runs the agent loop, returning the final assistant response content.
123
135
  def run(messages:, tools:) # rubocop:disable Metrics/PerceivedComplexity,Metrics/CyclomaticComplexity
124
136
  # Stash the resolved toolset so #streaming? can decide, per run, whether
@@ -191,6 +203,10 @@ module Rubino
191
203
  # most once per turn, only after a real block, and reset here so a fresh
192
204
  # turn never inherits a prior turn's reminder.
193
205
  @blocked_reminder_emitted = false
206
+ # Terminal outcome of THIS turn, read back via #stop_reason once #run
207
+ # returns. Optimistic default — every early return below that ISN'T a
208
+ # clean answer overwrites it (force-summary, abort, truncated stream).
209
+ @stop_reason = :completed
194
210
  token_total = 0
195
211
 
196
212
  loop do
@@ -313,6 +329,7 @@ module Rubino
313
329
  end
314
330
  # Continuations exhausted — hand back the recovered partial as the
315
331
  # (truncated) final answer: truthful and resumable, not a hard failure.
332
+ @stop_reason = :stream_incomplete
316
333
  emit_turn_summary(turn_started_at, token_total)
317
334
  return response.content
318
335
  end
@@ -577,6 +594,7 @@ module Rubino
577
594
  # note rather than a force-summary (no extra model call). The ledger note
578
595
  # keeps it truthful about how much ran.
579
596
  def abort_on_budget_exhausted(iteration, turn_started_at, token_total)
597
+ @stop_reason = :aborted
580
598
  note = "Stopped at user request after #{iteration} tool iteration" \
581
599
  "#{"s" if iteration != 1} (#{tool_count_label})."
582
600
  persist_user_message_note(note)
@@ -626,6 +644,10 @@ module Rubino
626
644
  end
627
645
 
628
646
  def force_summarize_budget_exhausted(messages, iteration, turn_started_at, token_total)
647
+ # Record WHICH rail forced the summary so a background subagent's
648
+ # completion can be reported PARTIAL with the real reason (time vs
649
+ # iterations) instead of a misleading "completed" (#core-F1).
650
+ @stop_reason = @budget.limiting_factor(iteration) == :time ? :max_time : :max_iterations
629
651
  nudge = force_summary_nudge
630
652
  persist_user_message(nudge)
631
653
  messages << { role: "user", content: nudge }
@@ -890,11 +912,16 @@ module Rubino
890
912
  # Providers like Bedrock require this message to appear in the conversation
891
913
  # history between the user prompt and the tool result(s).
892
914
  def build_assistant_tool_use_message(response)
893
- {
915
+ msg = {
894
916
  role: "assistant",
895
917
  content: response.content || "",
896
918
  tool_calls: response.tool_calls
897
919
  }
920
+ # Carry reasoning on the in-turn (non-streaming) assistant(tool_use) too,
921
+ # so load_history replays it and the prefix stays KV-cache-stable (#608b).
922
+ reasoning = response.respond_to?(:thinking) ? response.thinking : nil
923
+ msg[:reasoning] = reasoning if reasoning && !reasoning.to_s.empty?
924
+ msg
898
925
  end
899
926
 
900
927
  # Called once per executed tool by the ToolExecutor's on_result sink, on
@@ -1058,6 +1085,10 @@ module Rubino
1058
1085
  metadata = tool_calls.empty? ? {} : { tool_calls: tool_calls }
1059
1086
  input_tokens = msg[:input_tokens].to_i
1060
1087
  metadata[:input_tokens] = input_tokens if input_tokens.positive?
1088
+ # Keep the reasoning with the assistant(tool_use) row so the next turn
1089
+ # replays it and the KV-cache prefix stays byte-stable (#608b) — this is
1090
+ # the row that diverged from the server cache when reasoning was dropped.
1091
+ metadata[:reasoning] = msg[:reasoning] if msg[:reasoning] && !msg[:reasoning].to_s.empty?
1061
1092
 
1062
1093
  with_db_retries do
1063
1094
  @message_store.create(
@@ -1108,6 +1139,13 @@ module Rubino
1108
1139
  # they see tool result messages with no matching toolUse upstream.
1109
1140
  metadata = response.has_tool_calls? ? { tool_calls: response.tool_calls } : {}
1110
1141
 
1142
+ # Persist the reasoning so later turns can replay it (Hermes parity,
1143
+ # #608b): the local KV cache holds this turn's reasoning tokens, so a
1144
+ # later replay that omits them busts the prefix and re-prefills the whole
1145
+ # context. Session::Message#to_context re-emits it as wire reasoning_content.
1146
+ reasoning = response.respond_to?(:thinking) ? response.thinking : nil
1147
+ metadata[:reasoning] = reasoning if reasoning && !reasoning.to_s.empty?
1148
+
1111
1149
  # Record the REAL context size the provider saw for this response:
1112
1150
  # input_tokens covers the whole assembled prompt (system prompt +
1113
1151
  # history + tools), which no local chars/4 estimate can reproduce
@@ -33,11 +33,16 @@ assume or default to one.
33
33
  - Edit a file with `edit`/`multi_edit`/`patch`, never with `sed`/`awk`.
34
34
  - Search with `grep` or `glob`, never with raw `find` or shell pipelines.
35
35
  - Write a new file with `write`. Don't `echo > file` from the shell.
36
- - To get the gist of a LARGE document (converted PDF, log, transcript —
37
- more than a few hundred lines), use `summarize_file`, not `read`. It
38
- map-reduces the file in a separate context and returns only the summary,
39
- so the raw text never fills this conversation. Reach for `read` (with
40
- offset/limit) or `grep` only when you need exact lines, not an overview.
36
+ - For a LARGE file (converted PDF, log, transcript — more than a few hundred
37
+ lines), don't `read` it whole and flood this conversation: `grep` it to find
38
+ the relevant part, then `read` that span with offset/limit to page through it.
39
+ - Tool output may be COMPRESSED to save context it is lossless to YOU: a
40
+ `# N lines elided read <path> offset=.. limit=..` pointer in a file read
41
+ means that exact body is one targeted `read` away, verbatim (so issue that
42
+ read before editing it). `[… N lines hidden by log compression …]` in command
43
+ output means only passing/info noise was dropped — every error/failure and the
44
+ final summary are kept. `{"_elided": N}` / `"<elided N chars>"` mark trimmed
45
+ JSON. These markers are NOT part of the file; never match or edit against them.
41
46
  - The `ruby` tool runs sandboxed Ruby for quick computation/scripting —
42
47
  reach for it when Ruby fits the project. Otherwise use `shell` for the
43
48
  host's binaries and the project's own toolchain (its interpreter, package
@@ -0,0 +1,5 @@
1
+ # Memory discipline
2
+ You have persistent memory across sessions. Save durable facts using the memory tool: user preferences, environment details, tool quirks, and stable conventions. Memory is injected into every turn, so keep it compact and focused on facts that will still matter later.
3
+ Prioritize what reduces future user steering — the most valuable memory is one that prevents the user from having to correct or remind you again. User preferences and recurring corrections matter more than procedural task details.
4
+ Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO state to memory. Specifically: do not record PR numbers, issue numbers, commit SHAs, 'fixed bug X', 'submitted PR Y', 'Phase N done', file counts, or any artifact that will be stale in 7 days. If a fact will be stale in a week, it does not belong in memory. If you've discovered a reusable way to do something, save it as a skill, not a memory.
5
+ Write memories as declarative facts, not instructions to yourself. 'User prefers concise responses' ✓ — 'Always respond concisely' ✗. 'Project uses pytest with xdist' ✓ — 'Run tests with pytest -n 4' ✗. Imperative phrasing gets re-read as a directive in later sessions and can cause repeated work or override the user's current request. Procedures and workflows belong in skills, not memory.
@@ -0,0 +1,4 @@
1
+ # Tool-use enforcement
2
+ You MUST use your tools to take action — do not describe what you would do or plan to do without actually doing it. When you say you will perform an action (e.g. 'I will run the tests', 'Let me check the file', 'I will create the project'), you MUST immediately make the corresponding tool call in the same response. Never end your turn with a promise of future action — execute it now.
3
+ Keep working until the task is actually complete. Do not stop with a summary of what you plan to do next time. If you have tools available that can accomplish the task, use them instead of telling the user what you would do.
4
+ Every response should either (a) contain tool calls that make progress, or (b) deliver a final result to the user. Responses that only describe intentions without acting are not acceptable.
@@ -0,0 +1,9 @@
1
+ # Google model operational directives
2
+ Follow these operational rules strictly:
3
+ - **Absolute paths:** Always construct and use absolute file paths for all file system operations. Combine the project root with relative paths.
4
+ - **Verify first:** Use read/grep to check file contents and project structure before making changes. Never guess at file contents.
5
+ - **Dependency checks:** Never assume a library is available. Check package.json, requirements.txt, Cargo.toml, Gemfile, etc. before importing.
6
+ - **Conciseness:** Keep explanatory text brief — a few sentences, not paragraphs. Focus on actions and results over narration.
7
+ - **Parallel tool calls:** When you need to perform multiple independent operations (e.g. reading several files), make all the tool calls in a single response rather than sequentially.
8
+ - **Non-interactive commands:** Use flags like -y, --yes, --non-interactive to prevent CLI tools from hanging on prompts.
9
+ - **Keep going:** Work autonomously until the task is fully resolved. Don't stop with a plan — execute it.
@@ -0,0 +1,48 @@
1
+ # Execution discipline
2
+ <tool_persistence>
3
+ - Use tools whenever they improve correctness, completeness, or grounding.
4
+ - Do not stop early when another tool call would materially improve the result.
5
+ - If a tool returns empty or partial results, retry with a different query or strategy before giving up.
6
+ - Keep calling tools until: (1) the task is complete, AND (2) you have verified the result.
7
+ </tool_persistence>
8
+
9
+ <mandatory_tool_use>
10
+ NEVER answer these from memory or mental computation — ALWAYS use a tool:
11
+ - Arithmetic, math, calculations → use the shell or a code tool
12
+ - Hashes, encodings, checksums → use the shell (e.g. sha256sum, base64)
13
+ - Current time, date, timezone → use the shell (e.g. date)
14
+ - System state: OS, CPU, memory, disk, ports, processes → use the shell
15
+ - File contents, sizes, line counts → use read, grep, or the shell
16
+ - Git history, branches, diffs → use the shell
17
+ - Current facts (weather, news, versions) → use web_search
18
+ Your memory and user profile describe the USER, not the system you are running on. The execution environment may differ from what the user profile says about their personal setup.
19
+ </mandatory_tool_use>
20
+
21
+ <act_dont_ask>
22
+ When a question has an obvious default interpretation, act on it immediately instead of asking for clarification. Examples:
23
+ - 'Is port 443 open?' → check THIS machine (don't ask 'open where?')
24
+ - 'What OS am I running?' → check the live system (don't use user profile)
25
+ - 'What time is it?' → run `date` (don't guess)
26
+ Only ask for clarification when the ambiguity genuinely changes what tool you would call.
27
+ </act_dont_ask>
28
+
29
+ <prerequisite_checks>
30
+ - Before taking an action, check whether prerequisite discovery, lookup, or context-gathering steps are needed.
31
+ - Do not skip prerequisite steps just because the final action seems obvious.
32
+ - If a task depends on output from a prior step, resolve that dependency first.
33
+ </prerequisite_checks>
34
+
35
+ <verification>
36
+ Before finalizing your response:
37
+ - Correctness: does the output satisfy every stated requirement?
38
+ - Grounding: are factual claims backed by tool outputs or provided context?
39
+ - Formatting: does the output match the requested format or schema?
40
+ - Safety: if the next step has side effects (file writes, commands, API calls), confirm scope before executing.
41
+ </verification>
42
+
43
+ <missing_context>
44
+ - If required context is missing, do NOT guess or hallucinate an answer.
45
+ - Use the appropriate lookup tool when missing information is retrievable (grep, web_search, read, etc.).
46
+ - Ask a clarifying question only when the information cannot be retrieved by tools.
47
+ - If you must proceed with incomplete information, label assumptions explicitly.
48
+ </missing_context>
@@ -13,7 +13,8 @@ module Rubino
13
13
 
14
14
  def initialize(session_id: nil, model_override: nil, provider_override: nil,
15
15
  max_turns: nil, ignore_rules: false, ui: nil, agent_definition: nil,
16
- event_bus: nil, announce_session: true, session_source: "cli")
16
+ event_bus: nil, announce_session: true, session_source: "cli",
17
+ interactive: false)
17
18
  @ui = ui || Rubino.ui
18
19
  # An in-chat rewind/fork builds a runner on the child session but has its
19
20
  # own purpose-built "┄ rewound to message N — editing ┄" marker, so the
@@ -40,6 +41,11 @@ module Rubino
40
41
  # not the user's own conversations) while staying resumable by explicit
41
42
  # id. Like Claude Code hiding its Task subagent sessions from the picker.
42
43
  @session_source = session_source
44
+ # True only for the interactive REPL, where more in-process turns follow
45
+ # this one. Lifecycle uses it to keep automatic memory extraction OFF the
46
+ # live KV-cache slot between turns (#608c) — a headless one-shot, which
47
+ # exits after its single turn, leaves it false and extracts normally.
48
+ @interactive = interactive
43
49
  # Pre-instantiate so cancel! is meaningful between turns and during the
44
50
  # window between Signal.trap install and run() — a too-early Ctrl+C
45
51
  # used to land on a nil token and silently no-op, then the next run
@@ -123,18 +129,19 @@ module Rubino
123
129
  cancel_token: @cancel_token,
124
130
  model_override: @explicit_model_override,
125
131
  provider_override: @provider_override,
132
+ interactive: @interactive,
126
133
  # The SOFT iteration ceiling (where the budget-extension prompt fires)
127
- # vs the HARD max_turns outer rail. For the main agent @max_turns is the
128
- # `--max-turns N` override, which intentionally sets the soft ceiling.
129
- # A SUBAGENT, though, gets @max_turns = definition.max_turns (= config
130
- # agent.max_turns, 90) passing THAT as the soft ceiling made soft ==
131
- # hard, so #extendable? was always false and a subagent could NEVER
132
- # surface a budget request (#571) it just force-summarized. Subagents
133
- # therefore pass nil so the soft ceiling falls back to config
134
- # agent.max_tool_iterations (25) < the 90 hard rail, exactly like the
135
- # main agent so a subagent at 25 iterations parks and asks for budget
136
- # via the dropdown (#574), extendable up to the 90 outer rail.
137
- max_tool_iterations: @session_source == "subagent" ? nil : @max_turns,
134
+ # vs the HARD max_turns outer rail (config agent.max_turns, applied
135
+ # inside IterationBudget). @max_turns carries the per-run soft cap on
136
+ # BOTH paths:
137
+ # - MAIN agent: the `--max-turns N` override (nil config default).
138
+ # - SUBAGENT: definition.max_turns e.g. explore=20, general=50,
139
+ # BELOW the 90 hard railso the child both HONORS its per-agent
140
+ # cap (#571: it used to be dropped entirely) AND can surface the
141
+ # #574 budget-park at that cap, extendable up to the 90 outer rail.
142
+ # A subagent that sets no max_turns falls back to config agent.max_turns
143
+ # (soft == hard) and simply hard-stops there, like the main agent.
144
+ max_tool_iterations: @max_turns,
138
145
  polishing: @polishing
139
146
  )
140
147
 
@@ -150,10 +157,19 @@ module Rubino
150
157
  # counterpart to the manual /compact swap (chat_command rebuilds the
151
158
  # runner on result[:compact_into]).
152
159
  @session = lifecycle.active_session
160
+ # Post-turn state, read by the subagent-completion path (task_tool) so a
161
+ # force-summarized/truncated child is reported PARTIAL, not "completed".
162
+ @last_stop_reason = lifecycle.last_stop_reason
153
163
 
154
164
  response
155
165
  end
156
166
 
167
+ # How this runner's LAST turn terminated (Agent::Loop#stop_reason),
168
+ # threaded up via Lifecycle. nil until a turn has run. Read by the `task`
169
+ # tool after a subagent's #run! to distinguish a real completion from a
170
+ # budget-/time-truncated partial.
171
+ attr_reader :last_stop_reason
172
+
157
173
  # Pins the agent Definition this runner threads into every subsequent turn
158
174
  # (the sticky `/agent <name>` / Tab-cycle switch). Lifecycle reads
159
175
  # @agent_definition fresh on each #run!, so swapping it here takes effect
@@ -219,6 +235,32 @@ module Rubino
219
235
  model_id
220
236
  end
221
237
 
238
+ # Aligns a RESUMED session's stored model with the model the adapter will
239
+ # actually use this run (#model-resume). Lifecycle builds the adapter from
240
+ # `@explicit_model_override || @session[:model]`, and the CLI ALWAYS passes
241
+ # a boot override (explicit `-m`, else `model.default` from config) — so on
242
+ # resume the override, NOT the model this session happened to last use, is
243
+ # what generates. The session row, the footer/statusbar, the token-budget
244
+ # context window and `/status` all read `session[:model]`, so without this
245
+ # they showed the STALE pinned model (e.g. the old default) while the agent
246
+ # was really running the new one: changing `model.default` looked ignored
247
+ # even though generation honored it. Re-point the row to the effective
248
+ # model so every surface tells the truth and a config change takes visible
249
+ # effect. No-op when there is no explicit override (then the session model
250
+ # IS what the adapter uses) or it already matches.
251
+ def sync_resumed_session_model!(session)
252
+ return unless @explicit_model_override
253
+ return if session[:model] == @explicit_model_override
254
+
255
+ session[:model] = @explicit_model_override
256
+ session[:provider] = @provider_override ||
257
+ LLM::ProviderResolver.resolve(@explicit_model_override,
258
+ explicit_provider: @config.dig("model", "provider"))
259
+ return unless @session_repo.persisted?(session[:id])
260
+
261
+ @session_repo.update(session[:id], model: session[:model], provider: session[:provider])
262
+ end
263
+
222
264
  # Marks the current session ended (#100). Called from the CLI on a clean
223
265
  # REPL teardown (and best-effort on terminal close) so a session stops
224
266
  # showing as "active" forever and cleanup/list/--continue can tell a
@@ -437,6 +479,7 @@ module Rubino
437
479
  # sees us as the live owner and forks rather than interleaving.
438
480
  session[:persisted] = true
439
481
  session[:owner_pid] = Process.pid
482
+ sync_resumed_session_model!(session)
440
483
  @ui.status("Resuming session: #{session[:id][0..7]}...") if @announce_session
441
484
  session
442
485
  else
@@ -187,7 +187,7 @@ module Rubino
187
187
  # Mirror the chunk onto the bus so the API/SSE stream isn't silent
188
188
  # during a long tool call: the Recorder maps TOOL_PROGRESS to a
189
189
  # `tool.progress` event, which resets the idle watchdog. Without
190
- # this a busy tool (summarize_file: ~30 sequential aux-LLM calls,
190
+ # this a busy tool (a long shell stream, or an aux-LLM-backed tool,
191
191
  # no run-events) is killed at the 300s idle timeout. Throttled so a
192
192
  # chatty tool (shell streaming thousands of stdout lines) doesn't
193
193
  # write a DB row + SSE frame per line — one heartbeat per interval
@@ -31,9 +31,6 @@ module Rubino
31
31
  raise ConflictError, "task #{id} already #{entry.status} — nothing to stop" unless entry.status == :running
32
32
 
33
33
  entry.runner&.cancel!
34
- # Stop-cascade (S5a): wake any descendant parked on a blocking
35
- # ask_parent so the whole subtree unwinds at once.
36
- @registry.cancel_descendant_ask_gates(id)
37
34
  [202, Serializer.detail(entry)]
38
35
  end
39
36
  end
@@ -33,7 +33,6 @@ module Rubino
33
33
  application/x-7z-compressed application/x-rar-compressed application/vnd.rar
34
34
  application/x-bzip2 application/x-xz
35
35
  ].freeze
36
- IMAGE_EXTS = %w[.png .jpg .jpeg .gif .webp .bmp .tiff .tif].freeze
37
36
 
38
37
  # Leading magic bytes per recognised image/document MIME (WebP is
39
38
  # special-cased: RIFF container + WEBP tag). Marcel lets the file NAME
@@ -62,7 +62,6 @@ module Rubino
62
62
  # * /agents (alias /tasks) — the live subagent ids, then the
63
63
  # steer/probe/--stop subcommand grammar, so the comm surface is
64
64
  # discoverable from the composer (#39).
65
- # * /reply — the ids of children blocked waiting on the human.
66
65
  # * /mcp — the configured server names (+ reload), then on/off for a
67
66
  # named server (#182), same grammar shape as /agents.
68
67
  # * /mode, /reasoning, /think — the closed enums (#185), via the
@@ -95,7 +94,6 @@ module Rubino
95
94
  "agents" => ->(args) { agents_arg_candidates(args) },
96
95
  "tasks" => ->(args) { agents_arg_candidates(args) },
97
96
  "agent" => ->(args) { args.empty? ? primary_agent_names : [] },
98
- "reply" => ->(args) { args.empty? ? blocked_subagent_ids : [] },
99
97
  "mcp" => ->(args) { mcp_arg_candidates(args) },
100
98
  "mode" => ->(args) { args.empty? ? Rubino::Modes::ALL.map(&:to_s) : [] },
101
99
  "model" => ->(args) { args.empty? ? model_arg_candidates : [] },
@@ -148,12 +146,6 @@ module Rubino
148
146
  end
149
147
  end
150
148
 
151
- # Children parked on an ask_parent waiting for the human — the ids /reply
152
- # answers.
153
- def blocked_subagent_ids
154
- Tools::BackgroundTasks.instance.awaiting_human.map(&:id)
155
- end
156
-
157
149
  # The /model candidates: the registry's model ids for the provider the
158
150
  # next turn would route through. Resolved lazily on each dropdown open so
159
151
  # a /model or /config provider switch is reflected immediately.
@@ -44,13 +44,18 @@ module Rubino
44
44
  # between child events. Repaints go through the composer's render mutex, so
45
45
  # they never race the keystroke handler. Exits as soon as no child is live
46
46
  # (it clears the region one last time) or when killed on teardown.
47
- def start_ticker(composer)
47
+ # +on_tick+ (optional) runs once per tick after the card repaint — used by
48
+ # the attach view to live-tail a focused shell's new output on the SAME
49
+ # 1 Hz cadence and through the same render mutex (composer#print_above) the
50
+ # cards use, so it never races the keystroke handler.
51
+ def start_ticker(composer, &on_tick)
48
52
  Thread.new do
49
53
  loop do
50
54
  sleep(IDLE_CARD_TICK)
51
55
  break unless composer.equal?(UI::BottomComposer.current)
52
56
 
53
57
  paint
58
+ on_tick&.call
54
59
  break unless children_live?
55
60
  end
56
61
  rescue StandardError => e