RubyGems - rubino-agent - Versions diffs - 0.3.0 → 0.5.0 - Mend

rubino-agent 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (196) hide show

checksums.yaml +4 -4
data/.rubocop_todo.yml +11 -2
data/AGENTS.md +1 -1
data/CHANGELOG.md +172 -5
data/CONTRIBUTING.md +10 -1
data/README.md +14 -5
data/Rakefile +31 -0
data/docs/agents.md +42 -23
data/docs/architecture.md +2 -2
data/docs/commands.md +35 -3
data/docs/configuration.md +20 -23
data/docs/getting-started.md +5 -3
data/docs/security.md +16 -5
data/docs/skills.md +31 -0
data/docs/troubleshooting.md +1 -1
data/exe/rubino +16 -2
data/install.sh +721 -59
data/lib/rubino/active_agent.rb +73 -0
data/lib/rubino/agent/action_claim_guard.rb +881 -0
data/lib/rubino/agent/agent_registry.rb +5 -2
data/lib/rubino/agent/definition.rb +1 -9
data/lib/rubino/agent/fallback_chain.rb +0 -6
data/lib/rubino/agent/iteration_budget.rb +109 -3
data/lib/rubino/agent/loop.rb +476 -20
data/lib/rubino/agent/model_call_runner.rb +81 -3
data/lib/rubino/agent/prompts/build.txt +22 -5
data/lib/rubino/agent/response_validator.rb +8 -0
data/lib/rubino/agent/runner.rb +133 -8
data/lib/rubino/agent/tool_executor.rb +166 -14
data/lib/rubino/agent/truncation_continuation.rb +4 -1
data/lib/rubino/api/server.rb +19 -0
data/lib/rubino/attachments/classify.rb +35 -17
data/lib/rubino/boot/config_guard.rb +71 -0
data/lib/rubino/cli/chat/completion_builder.rb +42 -6
data/lib/rubino/cli/chat/idle_card_host.rb +7 -1
data/lib/rubino/cli/chat/session_resolver.rb +87 -21
data/lib/rubino/cli/chat_command.rb +1189 -50
data/lib/rubino/cli/commands.rb +282 -2
data/lib/rubino/cli/config_command.rb +68 -8
data/lib/rubino/cli/doctor_command.rb +204 -12
data/lib/rubino/cli/jobs_command.rb +12 -0
data/lib/rubino/cli/memory_command.rb +53 -20
data/lib/rubino/cli/onboarding_wizard.rb +79 -6
data/lib/rubino/cli/session_command.rb +172 -18
data/lib/rubino/cli/setup_command.rb +131 -8
data/lib/rubino/cli/skills_command.rb +183 -9
data/lib/rubino/cli/trust_gate.rb +16 -7
data/lib/rubino/commands/built_ins.rb +2 -0
data/lib/rubino/commands/command.rb +12 -2
data/lib/rubino/commands/executor.rb +149 -12
data/lib/rubino/commands/handlers/agent_switch.rb +100 -0
data/lib/rubino/commands/handlers/agents.rb +156 -41
data/lib/rubino/commands/handlers/config.rb +4 -1
data/lib/rubino/commands/handlers/help.rb +113 -14
data/lib/rubino/commands/handlers/memory.rb +15 -5
data/lib/rubino/commands/handlers/sessions.rb +26 -3
data/lib/rubino/commands/handlers/status.rb +9 -4
data/lib/rubino/commands/loader.rb +12 -0
data/lib/rubino/config/configuration.rb +86 -24
data/lib/rubino/config/defaults.rb +140 -33
data/lib/rubino/config/loader.rb +62 -12
data/lib/rubino/config/validator.rb +341 -0
data/lib/rubino/config/writer.rb +123 -31
data/lib/rubino/context/compressor.rb +184 -22
data/lib/rubino/context/environment_inspector.rb +2 -2
data/lib/rubino/context/file_discovery.rb +2 -2
data/lib/rubino/context/message_boundary.rb +27 -1
data/lib/rubino/context/project_languages.rb +90 -0
data/lib/rubino/context/prompt_assembler.rb +105 -22
data/lib/rubino/context/summary_builder.rb +45 -4
data/lib/rubino/context/token_budget.rb +36 -11
data/lib/rubino/context/token_estimate.rb +45 -0
data/lib/rubino/context/tool_result_pruner.rb +81 -0
data/lib/rubino/database/connection.rb +154 -3
data/lib/rubino/database/migrations/001_create_initial_schema.rb +314 -40
data/lib/rubino/database/migrator.rb +98 -5
data/lib/rubino/documents/cap_exceeded.rb +13 -0
data/lib/rubino/documents/converters/csv.rb +4 -3
data/lib/rubino/documents/converters/docx.rb +29 -5
data/lib/rubino/documents/converters/html.rb +5 -1
data/lib/rubino/documents/converters/json.rb +2 -1
data/lib/rubino/documents/converters/pdf.rb +11 -2
data/lib/rubino/documents/converters/plain.rb +2 -1
data/lib/rubino/documents/converters/pptx.rb +11 -2
data/lib/rubino/documents/converters/xlsx.rb +35 -4
data/lib/rubino/documents/converters/xml.rb +2 -1
data/lib/rubino/documents/limits.rb +210 -0
data/lib/rubino/documents.rb +10 -3
data/lib/rubino/errors.rb +36 -5
data/lib/rubino/interaction/cancel_token.rb +19 -3
data/lib/rubino/interaction/events.rb +13 -0
data/lib/rubino/interaction/lifecycle.rb +99 -13
data/lib/rubino/interaction/polishing.rb +176 -0
data/lib/rubino/jobs/cron_job_repository.rb +5 -8
data/lib/rubino/jobs/handlers/cleanup_sessions_job.rb +11 -0
data/lib/rubino/jobs/handlers/distill_skill_job.rb +65 -9
data/lib/rubino/jobs/queue.rb +63 -8
data/lib/rubino/jobs/runner.rb +24 -6
data/lib/rubino/jobs/worker.rb +0 -4
data/lib/rubino/llm/adapter_response.rb +47 -4
data/lib/rubino/llm/credential_check.rb +15 -16
data/lib/rubino/llm/error_classifier.rb +89 -1
data/lib/rubino/llm/inline_think_filter.rb +69 -12
data/lib/rubino/llm/request.rb +30 -3
data/lib/rubino/llm/ruby_llm_adapter.rb +394 -46
data/lib/rubino/llm/tool_bridge.rb +113 -9
data/lib/rubino/mcp/manager.rb +18 -1
data/lib/rubino/mcp/mcp_tool_wrapper.rb +14 -3
data/lib/rubino/memory/aux_retry.rb +107 -0
data/lib/rubino/memory/backends/sqlite.rb +73 -44
data/lib/rubino/memory/backends.rb +23 -7
data/lib/rubino/memory/salience_gate.rb +103 -0
data/lib/rubino/memory/sqlite_extraction.rb +70 -0
data/lib/rubino/memory/sqlite_extraction_prompt.rb +11 -0
data/lib/rubino/memory/store.rb +33 -5
data/lib/rubino/memory/threat_scanner.rb +52 -0
data/lib/rubino/output/cost.rb +52 -0
data/lib/rubino/output/headless_block_latch.rb +53 -0
data/lib/rubino/output/result_serializer.rb +222 -0
data/lib/rubino/output/turn_recorder.rb +77 -0
data/lib/rubino/security/approval_policy.rb +227 -32
data/lib/rubino/security/command_allowlist.rb +79 -4
data/lib/rubino/security/doom_loop_detector.rb +21 -2
data/lib/rubino/security/hardline_guard.rb +189 -16
data/lib/rubino/security/pattern_matcher.rb +28 -5
data/lib/rubino/security/prefix_deriver.rb +25 -6
data/lib/rubino/security/readonly_commands.rb +145 -5
data/lib/rubino/security/secret_path.rb +134 -0
data/lib/rubino/security/url_safety.rb +255 -0
data/lib/rubino/session/repository.rb +212 -11
data/lib/rubino/session/store.rb +139 -14
data/lib/rubino/skills/installer.rb +230 -0
data/lib/rubino/skills/prompt_index.rb +2 -2
data/lib/rubino/skills/registry.rb +52 -1
data/lib/rubino/skills/skill.rb +64 -3
data/lib/rubino/skills/skill_tool.rb +16 -5
data/lib/rubino/tools/background_tasks.rb +157 -13
data/lib/rubino/tools/base.rb +204 -3
data/lib/rubino/tools/edit_tool.rb +73 -18
data/lib/rubino/tools/glob_tool.rb +48 -9
data/lib/rubino/tools/grep_tool.rb +103 -9
data/lib/rubino/tools/multi_edit_tool.rb +64 -9
data/lib/rubino/tools/patch_tool.rb +5 -0
data/lib/rubino/tools/read_attachment_tool.rb +3 -1
data/lib/rubino/tools/read_tool.rb +33 -15
data/lib/rubino/tools/read_tracker.rb +153 -35
data/lib/rubino/tools/registry.rb +113 -12
data/lib/rubino/tools/result.rb +9 -1
data/lib/rubino/tools/ruby_tool.rb +0 -0
data/lib/rubino/tools/shell_registry.rb +70 -0
data/lib/rubino/tools/shell_tool.rb +40 -1
data/lib/rubino/tools/summarize_file_tool.rb +6 -0
data/lib/rubino/tools/task_stop_tool.rb +10 -16
data/lib/rubino/tools/task_tool.rb +36 -8
data/lib/rubino/tools/vision_tool.rb +5 -0
data/lib/rubino/tools/webfetch_tool.rb +39 -7
data/lib/rubino/tools/websearch_tool.rb +92 -30
data/lib/rubino/tools/write_tool.rb +23 -4
data/lib/rubino/ui/api.rb +10 -1
data/lib/rubino/ui/base.rb +11 -0
data/lib/rubino/ui/bottom_composer.rb +382 -74
data/lib/rubino/ui/cli.rb +515 -83
data/lib/rubino/ui/completion_menu.rb +11 -7
data/lib/rubino/ui/headless_trace.rb +63 -0
data/lib/rubino/ui/live_region.rb +70 -7
data/lib/rubino/ui/markdown_renderer.rb +142 -7
data/lib/rubino/ui/notifier.rb +0 -2
data/lib/rubino/ui/null.rb +52 -5
data/lib/rubino/ui/paste_store.rb +16 -2
data/lib/rubino/ui/queued_indicators.rb +6 -1
data/lib/rubino/ui/status_bar.rb +61 -7
data/lib/rubino/ui/streaming_markdown.rb +59 -6
data/lib/rubino/ui/subagent_view.rb +29 -4
data/lib/rubino/ui/tool_label.rb +52 -0
data/lib/rubino/update_check.rb +39 -4
data/lib/rubino/util/atomic_file.rb +117 -0
data/lib/rubino/util/ignore_rules.rb +120 -0
data/lib/rubino/util/output.rb +229 -12
data/lib/rubino/util/secrets_mask.rb +70 -7
data/lib/rubino/util/spill_store.rb +153 -0
data/lib/rubino/version.rb +1 -1
data/lib/rubino/workspace.rb +9 -1
data/lib/rubino.rb +191 -7
data/rubino-agent.gemspec +1 -0
data/skills/ruby-expert/SKILL.md +1 -0
metadata +42 -12
data/lib/rubino/agent/router.rb +0 -65
data/lib/rubino/database/migrations/002_create_runs.rb +0 -45
data/lib/rubino/database/migrations/003_create_skill_states.rb +0 -15
data/lib/rubino/database/migrations/004_create_cron_jobs.rb +0 -36
data/lib/rubino/database/migrations/005_create_oauth_connections.rb +0 -27
data/lib/rubino/database/migrations/006_create_webhook_deliveries.rb +0 -34
data/lib/rubino/database/migrations/007_create_messages_fts.rb +0 -59
data/lib/rubino/database/migrations/008_create_memory_facts.rb +0 -75
data/lib/rubino/database/migrations/009_create_memory_graph.rb +0 -55
data/lib/rubino/database/migrations/010_add_owner_pid_to_sessions.rb +0 -20

data/lib/rubino/tools/background_tasks.rb CHANGED Viewed

@@ -38,8 +38,11 @@ module Rubino
       # #record_tool_started / #record_tool_finished) under the registry mutex
       # and read by the parent renderer (UI::SubagentCards) and
       # the /agents drill-in. activity_log is a bounded ring of the last few
-      # `✓ verb · hint` lines for the live drill-in; nothing is persisted (it
-      # dies with the process, like the rest of the registry).
+      # `✓ verb · hint` lines for the live drill-in; output_tail is the bounded
+      # line buffer of the CURRENTLY RUNNING tool's streamed output (fed by
+      # #record_tool_output, wiped at #record_tool_finished) that the drill-in's
+      # output: block tails (#5). Nothing is persisted (it dies with the
+      # process, like the rest of the registry).
       #
       # approval_gate / approval_question / approval_command are the
       # Option-2 approval-surfacing state: when a background child's tool needs
@@ -49,7 +52,7 @@ module Rubino
       Entry = Struct.new(
         :id, :subagent, :prompt, :status, :result, :error,
         :thread, :runner, :started_at, :finished_at,
-        :last_activity, :tool_count, :activity_log,
+        :last_activity, :tool_count, :activity_log, :output_tail,
         :approval_gate, :approval_id, :approval_question, :approval_command,
         # Parent->child steer (the `/agents <id> steer "..."` note). Wired into
         # the child Loop as its Interaction::InputQueue (the SAME turn-boundary
@@ -94,6 +97,26 @@ module Rubino
       # How many recent activity lines the drill-in shows (the live `recent:` ring).
       ACTIVITY_LOG_MAX = 6
+      # Bounds for the live output tail (#5): how many COMPLETE lines the
+      # drill-in's output: block shows (the buffer keeps one extra slot for the
+      # in-flight partial line), and the byte cap per buffered line so a
+      # newline-free stream can't grow a line unbounded.
+      OUTPUT_TAIL_MAX      = 6
+      OUTPUT_TAIL_LINE_MAX = 200
+      # Prefix #deliver_answer stamps on the steer-queue COPY of an answer it has
+      # ALREADY delivered to the child via its ask gate (the dual-path delivery:
+      # gate for a blocking ask, steer-queue for a non-blocking one). When the
+      # child resumes via the gate and finishes WITHOUT another turn boundary, the
+      # still-queued copy is drained by #complete and would surface as an
+      # "undelivered steer note" — but the answer WAS delivered via the gate, so
+      # reporting it undelivered is a false alarm (the /reply happy-path
+      # regression from the H5 fix #457). The completion-notice paths filter notes
+      # carrying this prefix OUT of the undelivered report for exactly that
+      # reason; a genuine `/agents <id> steer "..."` note never carries it, so the
+      # deliver-or-report-undelivered invariant for real steer notes is intact.
+      ANSWER_NOTE_PREFIX = "[parent answer] "
       class << self
         def instance
           @instance ||= new
@@ -190,6 +213,19 @@ module Rubino
       # see a consistent snapshot. A failure landing on a :stopping entry is a
       # USER-REQUESTED stop unwinding (Interrupted at the next checkpoint), so
       # it is recorded as :stopped — distinct from a genuine :failed (#108/#13).
+      #
+      # H5 — closes the drain↔complete race. The final drain of the child's
+      # steer_queue happens HERE, under the SAME registry mutex that flips the
+      # status to terminal, and #steer refuses to push onto a terminal entry
+      # under that SAME mutex. So a steer/answer arriving concurrently is
+      # serialised against this finalize: it is EITHER pushed before the status
+      # flips (and drained right here into the returned `undelivered` notes) OR
+      # rejected by #steer (which then honestly reports not-delivered). The
+      # earlier shape — drain (InputQueue lock) then complete (registry lock),
+      # two locks with a gap — let an answer land on a now-dead queue: dropped,
+      # omitted from `undelivered`, yet reported delivered. Returns the notes
+      # that were still queued at finalize time (never delivered to the child),
+      # so the caller can surface them as undelivered.
       def complete(entry, status:, result: nil, error: nil)
         @mutex.synchronize do
           status            = :stopped if entry.status == :stopping && status == :failed
@@ -197,6 +233,10 @@ module Rubino
           entry.result      = result
           entry.error       = error
           entry.finished_at = Time.now
+          # Drain UNDER the mutex: anything still here is undelivered (the child
+          # has no further turn to fold it in), and once status is terminal no
+          # new note can arrive — #steer rejects it.
+          entry.steer_queue&.drain || []
         end
       end
@@ -219,7 +259,8 @@ module Rubino
       # Records a child tool FINISHING: appends a terse line to the bounded
       # activity ring the live drill-in (#71) tails. Keeps the last
       # ACTIVITY_LOG_MAX entries so the ring never grows unbounded for a
-      # read-heavy child.
+      # read-heavy child. Also wipes the live output tail — it belongs to the
+      # tool that just finished, so the drill-in's output: block clears (#5).
       def record_tool_finished(id, line)
         @mutex.synchronize do
           entry = @entries[id]
@@ -228,6 +269,26 @@ module Rubino
           log = (entry.activity_log ||= [])
           log << line.to_s
           log.shift while log.size > ACTIVITY_LOG_MAX
+          entry.output_tail = nil
+        end
+      end
+      # Records a streamed chunk of the CURRENTLY RUNNING tool's output (#5):
+      # splits on newlines into a bounded line buffer whose LAST slot carries
+      # the in-flight partial line, so the /agents drill-in can tail it live.
+      # Called from UI::SubagentView#tool_chunk on the CHILD thread, so it MUST
+      # take the mutex like the other record_* writers. No-op for an unknown id.
+      def record_tool_output(id, chunk)
+        @mutex.synchronize do
+          entry = @entries[id]
+          return unless entry
+          tail = (entry.output_tail ||= [""])
+          chunk.to_s.each_line do |line|
+            tail[-1] = "#{tail[-1]}#{line.chomp}"[0, OUTPUT_TAIL_LINE_MAX]
+            tail << "" if line.end_with?("\n")
+          end
+          tail.shift while tail.size > OUTPUT_TAIL_MAX + 1
         end
       end
@@ -267,17 +328,28 @@ module Rubino
       # affordance). Pushes the text onto the child's steering queue, which the
       # child Loop drains at its next iteration boundary (Loop#inject_steered_input)
       # — between turns, never between a tool_use and its results. Best-effort:
-      # returns false (and pushes nothing) when the entry is gone or has no queue
-      # (e.g. a finished child), true when the note was queued.
+      # returns false (and pushes nothing) when the entry is gone, has no queue,
+      # or has ALREADY reached a terminal state (the child finished — there is no
+      # more turn to fold the note into); true when the note was queued.
+      #
+      # H5 — the push happens UNDER the registry mutex, gated on a non-terminal
+      # status, so it is serialised against #complete (which flips the status to
+      # terminal AND drains the queue under that SAME mutex). Either this push
+      # wins the lock first (the note is queued and will be drained — by the
+      # child at its next turn, or by #complete into the undelivered report) or
+      # #complete wins first (status is terminal and this returns false). There
+      # is no window in which a note is pushed onto a queue nobody will drain yet
+      # reported delivered. Pushing inside the mutex is safe: InputQueue#push has
+      # its own lock and never calls back into the registry, so no lock cycle.
       def steer(id, text)
-        queue = @mutex.synchronize do
+        @mutex.synchronize do
           entry = @entries[id]
-          entry&.steer_queue
-        end
-        return false unless queue
+          return false unless entry&.steer_queue
+          return false if terminal_status?(entry.status)
-        queue.push(text)
-        true
+          entry.steer_queue.push(text)
+          true
+        end
       end
       # Records a BILLED live probe against a child (S3): bumps probe_count and
@@ -352,8 +424,22 @@ module Rubino
         entry = find(id)
         return false unless entry&.ask_gate
+        # H5 — #steer is the SINGLE race-free liveness oracle here: it pushes the
+        # answer onto the steer_queue under the registry mutex IFF the child is
+        # still non-terminal, returning false the instant the child has finished
+        # (atomic against #complete, which flips the status and drains the queue
+        # under that same mutex). So we steer FIRST and let its honest result
+        # decide everything:
+        #   false ⇒ the child already finished; neither path can reach it. Do NOT
+        #           decide the gate (a no-op for a child that will never await
+        #           it) and do NOT clear the ask — report not-delivered.
+        #   true  ⇒ the child is live and the answer is queued; a BLOCKING ask
+        #           additionally needs its gate decided so the parked child wakes
+        #           with the answer as its tool result. Then clear the blocked
+        #           state and report delivered.
+        return false unless steer(entry.id, "#{ANSWER_NOTE_PREFIX}#{answer}")
         entry.ask_gate.decide(entry.ask_id, answer)
-        steer(entry.id, "[parent answer] #{answer}")
         end_ask(entry.id)
         true
       end
@@ -451,6 +537,54 @@ module Rubino
         descendants_of(id).each { |e| e.ask_gate&.cancel! }
       end
+      # The ONE per-entry stop body, shared by every stop path (the human
+      # /agents <id> --stop, the model-callable task_stop, and the
+      # parent-teardown #cancel_all below). Marks the stop so the unwind records
+      # as :stopped (not ✗ failed) and the list shows ◌ stopping, then wakes the
+      # entry no matter HOW it is blocked: a child parked on its OWN approval or
+      # ask gate (cancel those → Interrupted → clean unwind), any descendant
+      # parked on a blocking ask (the stop-cascade), and the runner's CancelToken
+      # for a child between checkpoints. Idempotent and safe on an already-stopped
+      # or never-blocked entry (each cancel! is one-shot; request_stop no-ops on a
+      # non-live status), so #cancel_all can call it across the whole registry.
+      def stop_entry(entry)
+        return unless entry
+        request_stop(entry.id)
+        entry.approval_gate&.cancel!
+        entry.ask_gate&.cancel!
+        cancel_descendant_ask_gates(entry.id)
+        entry.runner&.cancel!
+      end
+      # Structured-concurrency teardown seam: cancel EVERY live subagent so the
+      # process never leaves a child parked. The required fix for the parent-death
+      # deadlock (#XXX) — when the PARENT dies/interrupts (REPL break, HUP/TERM,
+      # clean quit, an aborted turn) a child blocked on ask_parent(blocking:true)
+      # otherwise stays parked on its gate for the full ask_parent_timeout (~900s)
+      # because nothing cancels its gate; the per-id stop paths only fire on an
+      # explicit /agents --stop or task_stop. Calling this from each parent-death
+      # edge wakes every blocked child SYNCHRONOUSLY (cancel! pushes its sentinel;
+      # the gate's await observes it within one WAKE_TICK) so each unwinds via the
+      # existing `rescue Rubino::Interrupted` with the clean "parent question was
+      # cancelled" message instead of hanging to the bound. No-op when there are no
+      # live children, and idempotent (#stop_entry is), so it is safe to invoke
+      # from a teardown `ensure` and from a signal trap. Snapshots #running first
+      # (outside the per-entry work) so we don't hold the registry mutex across the
+      # gate/runner cancels.
+      def cancel_all
+        running.each { |entry| stop_entry(entry) }
+        # Logical cancel alone (above) only flips cancel tokens and trusts each
+        # child THREAD to observe the token and reap its own shell within a wake
+        # tick — but on parent-DEATH the process exits before the thread reaches
+        # that checkpoint, so any shell a child spawned (its own pgid) reparents
+        # to init as an orphan (MED-2). Reap the tracked shell process groups
+        # SYNCHRONOUSLY here so the same parent-death edges that call cancel_all
+        # (clean quit, HUP/TERM trap, REPL break) leave no surviving shell.
+        ShellRegistry.instance.kill_all_groups
+      end
+      alias shutdown! cancel_all
       # True iff `child_id`'s direct owner is `parent_id` (the ownership predicate
       # later slices' steer/probe/answer_child AUTHORIZATION checks will build on).
       def owned_by?(parent_id, child_id)
@@ -508,6 +642,16 @@ module Rubino
         %i[running needs_approval blocked_on_human blocked_on_parent stopping].include?(status)
       end
+      # A child has reached a TERMINAL state once #complete has run: its worker
+      # thread is done, its steer_queue has been drained, and it has no further
+      # turn to fold a steer note into. #steer rejects pushes onto a terminal
+      # entry (H5) so an answer arriving after finalize is reported undelivered
+      # rather than dropped-but-reported-delivered. :cancelled is included for
+      # the API surface, which records cancellation via #complete too.
+      def terminal_status?(status)
+        %i[completed failed stopped cancelled].include?(status)
+      end
       def running_count
         @entries.values.count { |e| live_status?(e.status) }
       end

data/lib/rubino/tools/base.rb CHANGED Viewed

@@ -24,6 +24,14 @@ module Rubino
       # tool with no streamable output (read, edit, glob) just ignores it.
       attr_accessor :stream_chunk
+      # Optional render hint the ToolExecutor forwards to the UI alongside each
+      # streamed chunk (and the end-of-call body). :diff makes the CLI colorize
+      # +/-/@@ lines AND show the full hunks instead of collapsing to the 3-line
+      # preview — so "show me the diff" surfaces the real diff, not a snippet.
+      # Default nil ⇒ :plain. Set it from #call once the command/content kind is
+      # known; the streaming lambda reads it live.
+      attr_accessor :stream_kind
       # Convenience guard so tools don't sprinkle nil-checks at every emit.
       def emit_chunk(text)
         return if text.nil? || text.to_s.empty?
@@ -90,6 +98,26 @@ module Rubino
       protected
+      # Resolves a model-supplied path to an absolute one, anchoring a RELATIVE
+      # path at the workspace primary root (terminal.cwd || launch cwd) instead
+      # of the process cwd.
+      #
+      # `File.expand_path(rel)` anchors at Dir.pwd, but the agent's "current
+      # directory" — the dir the @-picker, shell/test and sandbox all agree on
+      # — is Workspace.primary_root, which is terminal.cwd when configured (e.g.
+      # bin/dev / the QA harness point it at a workspace subdir while the process
+      # launches from the parent). When the two diverge, a relative `shopkit/
+      # cart.py` resolved one directory too shallow and 404'd, forcing an
+      # ls→glob→re-read detour (r6 F3). Anchoring at primary_root fixes that
+      # while an ABSOLUTE path (or a ~ path) passes straight through unchanged,
+      # so the workspace guard downstream still sees the real target.
+      def expand_workspace_path(path)
+        str = path.to_s
+        return File.expand_path(str) if str.start_with?(File::SEPARATOR, "~")
+        File.expand_path(str, workspace_root)
+      end
       # Filesystem sandbox for write/edit/delete operations.
       #
       # Defaults to Dir.pwd, overridable via terminal.cwd in config. Mutating
@@ -191,6 +219,152 @@ module Rubino
           "Set tools.workspace_strict=false in config.yml to disable this check."
       end
+      # Typed "outside workspace" error gate, retained for the AUX-LLM read
+      # tools (summarize_file, vision) ONLY. Those route the raw file bytes
+      # through a third-party auxiliary model, so an out-of-workspace read would
+      # EXFILTRATE a sibling-repo secret / ~/.ssh file — a stronger threat than
+      # the in-process read/grep/glob, which were relaxed to broad in #406. A
+      # `path` is outside iff within_workspace? is false (strict mode on) and it
+      # isn't under the agent home; strict mode off never fires.
+      def outside_workspace?(expanded)
+        return false unless workspace_strict?
+        return false if within_workspace?(expanded)
+        # The agent's OWN home dir (~/.rubino) holds pastes, attachments and
+        # session files the agent explicitly points the model at — legitimate
+        # reads even though they sit outside the project workspace.
+        return false if under_agent_home?(expanded)
+        true
+      end
+      def outside_workspace_message(path)
+        roots = workspace_roots
+        roots_list = roots.length == 1 ? roots.first : roots.join(", ")
+        { output: "Error: '#{path}' is outside your workspace roots (#{roots_list}) — " \
+                  "it is NOT missing, you are not allowed to access it here. " \
+                  "Run `/add-dir #{File.dirname(File.expand_path(path.to_s))}` to include its folder, " \
+                  "or relaunch in that directory. Do not try to create or overwrite it.",
+          error_code: :outside_workspace }
+      end
+      # UNIFIED SECRET-PATH PREDICATE (#446). One "is this a secret/credential
+      # path?" question used by BOTH the read side (read/grep/glob) and the
+      # write side (write/edit/multi_edit/apply_patch). Previously the read
+      # denylist (#406) was a NARROW subset (.env*/.envrc + agent-home) and the
+      # write denylist (#413) the SUPERSET; the maintainer decision is that
+      # reading OR writing a secret both require EXPLICIT user approval, applied
+      # to the SAME set. So there is now ONE set — the (wider) write set — and
+      # ONE predicate: #secret_path_category. The approval gate lives in
+      # Security::ApprovalPolicy#decide (returns :ask for a secret target), which
+      # gives us the existing flow for free: interactive → approval dropdown
+      # auto-opens; approved → the tool proceeds; denied → refused; headless (no
+      # human) → fails CLOSED via ToolExecutor's :noninteractive floor. The tools
+      # therefore NO LONGER self-refuse a secret in #call — an approved read of
+      # your .env must actually return its bytes, and an approved write must
+      # actually write. The predicate is still consulted directly in ONE place:
+      # GrepTool post-filters its RESULTS through it so an include-glob
+      # (`include: "*.env"`) over a directory can't leak a secret the per-target
+      # gate never saw (F2).
+      #
+      # DELIBERATE DIVERGENCE FROM HERMES: Hermes' file_safety.get_read_block_error
+      # FLAT-DENIES reading project .env* (model-facing deny, no human in the
+      # loop, defense-in-depth only). rubino instead routes the read through an
+      # explicit user APPROVAL gate (ask, not deny) so the agent CAN read/update
+      # your .env when you say yes — stricter than Claude Code's default
+      # (ungated reads) and aider, more content-aware than Codex's OS-sandbox.
+      #
+      # Matches (by BASENAME, in any directory):
+      #   - project credential files: .env, .env.* (.env.local/.production), .envrc
+      #   - shell/credential dotfiles: .netrc, .pgpass, .npmrc, .pypirc,
+      #     .git-credentials, .bashrc, .zshrc, .profile, .bash_profile, .zprofile
+      # Matches (by absolute PATH / PREFIX):
+      #   - ~/.ssh, ~/.aws, ~/.gnupg, ~/.kube, ~/.docker, ~/.azure,
+      #     ~/.config/gh, ~/.config/gcloud  (the whole tree)
+      #   - /etc/sudoers, /etc/sudoers.d/*, /etc/passwd, /etc/shadow, /etc/systemd/*
+      #   - anything UNDER the agent home (~/.rubino) that holds auth/secrets:
+      #     the home .env, the sqlite DB, any *oauth* file, an mcp-tokens/ dir,
+      #     and *.key / *.pem material.
+      # Returns the matched category string (truthy) or nil when the path is not
+      # a secret. (Non-predicate: the truthy return carries the category string
+      # the approval question / block message interpolates.)
+      #
+      # The UNIFIED predicate (delegates to the single source of truth,
+      # Security::SecretPath.category). Returns the matched-secret category
+      # string (truthy) for a secret/credential path, or nil for a normal file.
+      def secret_path_category(expanded)
+        Security::SecretPath.category(expanded)
+      end
+      # Denial body for a secret hit that the GrepTool post-filter strips out of
+      # an include-glob result set (F2): the directory grep wasn't itself a
+      # secret target, so the per-call approval gate never saw it — we refuse the
+      # leaking RESULTS here instead. error_code stays :secret_denied for parity
+      # with the read side.
+      def secret_filtered_block_message(path, category)
+        { output: "Error: refusing to return secret content from '#{path}' — it is a #{category}. " \
+                  "The search matched a credential file via an include-glob; secrets are not " \
+                  "returned without explicit user approval. Ask the user, or read the file " \
+                  "directly (which prompts for approval).",
+          error_code: :secret_denied }
+      end
+      # True when +expanded+ resolves under the Rubino home directory. Symlinks
+      # are resolved on both sides so a link can't be used to claim home-ness.
+      def under_agent_home?(expanded)
+        home = Rubino.home_path
+        return false if home.nil? || home.to_s.empty?
+        home_real   = (File.realpath(home) if File.exist?(home)) || File.expand_path(home)
+        target_real = canonical_path(expanded)
+        return false unless target_real
+        target_real == home_real || target_real.start_with?("#{home_real}#{File::SEPARATOR}")
+      rescue StandardError => e
+        # Fail closed (treat as NOT under home) on any resolution error — but log
+        # it: this predicate gates a security-relevant decision, so a swallowed
+        # error that mis-resolves home-ness must at least leave a trace.
+        Rubino.logger&.warn(event: "tools.under_agent_home_failed",
+                            error: e.message, error_class: e.class.name)
+        false
+      end
+      # Reads a file and scrubs a stray non-UTF-8 byte (e.g. a Latin-1 `é` in a
+      # legacy/EU source) to the replacement char. Shared by EditTool and
+      # MultiEditTool so a single bad byte doesn't raise "invalid byte sequence
+      # in UTF-8" out of the include?/scan/sub that follow and leave the file
+      # uneditable. Lossy on the offending byte, graceful for everything else.
+      #
+      # IMPORTANT (#326): this is for MODEL CONTEXT only — NEVER feed the
+      # scrubbed buffer to a File.write, because `scrub` rewrites every
+      # non-UTF-8 byte on UNTOUCHED lines to U+FFFD, so a one-line ASCII edit
+      # would lossily corrupt the whole file. Use #read_for_edit for the
+      # read-modify-write path.
+      def read_scrubbed(path)
+        content = File.read(path)
+        content.valid_encoding? ? content : content.scrub
+      end
+      # Reads a file for the edit/multi_edit READ-MODIFY-WRITE path (#326).
+      #
+      # Returns the raw bytes as BINARY (ASCII-8BIT) so the literal
+      # include?/scan/sub/gsub run byte-wise and every byte OUTSIDE the matched
+      # span is preserved exactly — a Latin-1 `André` on an untouched line is
+      # written back byte-identical even when the file isn't valid UTF-8. The
+      # model-supplied old_string/new_string are likewise compared/spliced as
+      # bytes (see #to_match_bytes), so a UTF-8 needle still matches its UTF-8
+      # bytes in the file. Valid-UTF-8 files behave exactly as before.
+      def read_for_edit(path)
+        File.binread(path)
+      end
+      # Forces a model-supplied string to the SAME binary encoding the on-disk
+      # content carries in #read_for_edit, so include?/scan/sub compare raw
+      # bytes (a UTF-8 `é` needle matches its two on-disk bytes). dup so we
+      # never mutate the caller's frozen literal.
+      def to_match_bytes(str)
+        str.to_s.dup.force_encoding(Encoding::BINARY)
+      end
       # Read-before-edit gate shared by EditTool and MultiEditTool. Refuses the
       # write when the model never read this file in the current session, or
       # read it but the file changed on disk since. Returns nil (proceed) or an
@@ -208,15 +382,42 @@ module Rubino
                    error_code: :stale_read }
         end
+        # Fresh? matches on EITHER unchanged mtime OR unchanged content hash, so
+        # the agent's own write (refreshed via note_write), a no-op touch, a
+        # CRLF normalisation, or a linter rewrite to identical bytes does NOT
+        # trip this guard (r5 B2). Only a genuine content change does.
+        return nil if @read_tracker.fresh?(expanded)
         stashed = @read_tracker.mtime_at_read(expanded)
         current = File.mtime(expanded)
-        return nil if stashed.nil? || current <= stashed
         { output: "Error: #{display_path} changed on disk since the last read " \
-                  "(read at #{stashed.utc.iso8601}, now #{current.utc.iso8601}). " \
+                  "(read at #{stashed&.utc&.iso8601}, now #{current.utc.iso8601}). " \
                   "Re-read the file before editing so the #{verb} reflect the current contents.",
           error_code: :stale_read }
       end
+      # Read-before-overwrite gate for WriteTool on an EXISTING file (r5 MF-2).
+      # Refuses a blind `write` that would clobber a file the model never read
+      # this session (or read but is now stale on disk). New files don't reach
+      # here. Returns nil (proceed) or an error Hash with error_code:
+      # :unread_overwrite. No tracker → no gate.
+      def overwrite_guard_error(expanded, display_path)
+        return nil unless @read_tracker
+        unless @read_tracker.seen?(expanded)
+          return { output: "Error: refusing to overwrite existing file #{display_path} — " \
+                           "you have not read it this session, so a blind write would clobber its " \
+                           "current contents. Read it first (then use `edit`/`multi_edit` for a " \
+                           "targeted change, or `write` the full intended content).",
+                   error_code: :unread_overwrite }
+        end
+        return nil if @read_tracker.fresh?(expanded)
+        { output: "Error: #{display_path} changed on disk since you last read it — " \
+                  "re-read it before overwriting so you don't clobber newer content.",
+          error_code: :unread_overwrite }
+      end
     end
   end
 end

data/lib/rubino/tools/edit_tool.rb CHANGED Viewed

@@ -46,12 +46,21 @@ module Rubino
       end
       def call(arguments)
-        file_path = arguments["file_path"] || arguments[:file_path]
-        old_string = arguments["old_string"] || arguments[:old_string]
-        new_string = arguments["new_string"] || arguments[:new_string]
-        replace_all = arguments["replace_all"] || arguments[:replace_all] || false
+        file_path, old_string, new_string, replace_all = parse_args(arguments)
+        # Input guards (#329a/b): reject an empty needle (a literal sub/gsub on
+        # "" matches at every char boundary and would corrupt the file under
+        # replace_all) and a no-op old==new (reporting "1 replacement" misleads
+        # the model — multi_edit already rejects it, so match that).
+        if (guard = guard_args(old_string, new_string))
+          return guard
+        end
-        expanded = File.expand_path(file_path)
+        expanded = expand_workspace_path(file_path)
+        # SECRET/credential edits (#446) are no longer HARD-refused here — they
+        # are gated UPSTREAM by Security::ApprovalPolicy#decide (→ :ask): an
+        # APPROVED edit of your .env actually applies, a denied/headless one
+        # never reaches #call. The workspace sandbox below is unchanged.
         return workspace_violation_message(file_path) unless within_workspace?(expanded)
         return "Error: File not found: #{file_path}" unless File.exist?(expanded)
@@ -60,30 +69,40 @@ module Rubino
           return gate
         end
-        content = File.read(expanded)
-        unless content.include?(old_string)
+        # Read the RAW bytes (binary) for the read-modify-write so non-UTF-8
+        # bytes on untouched lines are preserved verbatim on write (#326); the
+        # model-supplied needle/replacement are matched/spliced as bytes too.
+        content    = read_for_edit(expanded)
+        old_bytes  = to_match_bytes(old_string)
+        new_bytes  = to_match_bytes(new_string)
+        unless content.include?(old_bytes)
+          # The model's mental model of the file was wrong (hallucinated text).
+          # Flag a recovery so its next read of this path bypasses dedup and
+          # returns FRESH bytes instead of a stale "[DUPLICATE READ]" nudge
+          # (r5 B3).
+          @read_tracker&.note_edit_failure(expanded)
           return "Error: old_string not found in file content. " \
                  "Make sure the text matches exactly including whitespace."
         end
         # Count occurrences
-        count = content.scan(old_string).size
+        count = content.scan(old_bytes).size
         if count > 1 && !replace_all
           return "Error: Found #{count} matches for old_string. " \
                  "Provide more surrounding context to make it unique, " \
                  "or set replace_all: true to replace all occurrences."
         end
-        # Perform replacement — use block form so new_string is treated as a
-        # literal string, not a pattern (avoids \0, \1, \& interpolation bugs).
-        new_content = if replace_all
-                        content.gsub(old_string) { new_string }
-                      else
-                        content.sub(old_string) { new_string }
-                      end
-        File.write(expanded, new_content)
+        new_content = replace_literal(content, old_bytes, new_bytes, replace_all)
+        # Crash-safe write: temp-in-same-dir + fsync + atomic rename, so a
+        # SIGINT/crash mid-flush can't destroy the user's existing file content
+        # (this is a read-modify-write of an existing file — HIGH-1).
+        Util::AtomicFile.write_atomic(expanded, new_content)
+        # Refresh-on-own-write: the bytes we just wrote are now authoritative,
+        # so the very next edit to this file passes the read-gate instead of
+        # "changed on disk since last read" (r5 B2).
+        @read_tracker&.note_write(expanded, new_content)
         replaced_count = replace_all ? count : 1
         added   = new_string.to_s.lines.size
@@ -93,10 +112,46 @@ module Rubino
                    "+#{added * replaced_count} −#{removed * replaced_count}",
           body: build_diff_preview(old_string, new_string, replaced_count),
           body_kind: :diff }
+      rescue StandardError => e
+        # Mirror WriteTool: a read-only/permission-denied target (Errno::EACCES)
+        # or any other filesystem error returns a clean, uniform message rather
+        # than leaking a raw exception/backtrace to the model.
+        "Error editing #{file_path}: #{e.message}"
       end
       private
+      # Returns an error string when old/new_string are unusable (#329a/b), or
+      # nil when they're fine. Kept out of #call so it stays under the length gate.
+      def guard_args(old_string, new_string)
+        if old_string.nil? || old_string.empty?
+          return "Error: old_string is empty. Provide the exact existing text to replace " \
+                 "(use the write tool to create or fully replace a file)."
+        end
+        return unless old_string == new_string
+        "Error: old_string and new_string are identical — nothing to change."
+      end
+      # Pull the four inputs (string- or symbol-keyed) in one place so #call
+      # stays under the complexity gate.
+      def parse_args(arguments)
+        [arguments["file_path"]  || arguments[:file_path],
+         arguments["old_string"] || arguments[:old_string],
+         arguments["new_string"] || arguments[:new_string],
+         arguments["replace_all"] || arguments[:replace_all] || false]
+      end
+      # Block form so new_string is treated as a literal replacement, not a
+      # pattern — avoids \0, \1, \& interpolation bugs in the new text.
+      def replace_literal(content, old_string, new_string, replace_all)
+        if replace_all
+          content.gsub(old_string) { new_string }
+        else
+          content.sub(old_string) { new_string }
+        end
+      end
       # Inline diff shown between the `tool · edit` and `done · edit` headers.
       # Not a real unified diff — just `- old` then `+ new` so the user can
       # see at a glance what the model is changing without scrolling back to