RubyGems - rubino-agent - Versions diffs - 0.5.1 → 0.5.2.2 - Mend

rubino-agent 0.5.1 → 0.5.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

checksums.yaml +4 -4
data/.dockerignore +15 -0
data/CHANGELOG.md +127 -0
data/Dockerfile +56 -0
data/agent.md +112 -0
data/docs/api/v1.md +2 -0
data/docs/commands.md +3 -6
data/docs/configuration.md +13 -6
data/docs/design/bg-shell-pty-port.md +88 -0
data/docs/design/bg-shell-review-refinements.md +65 -0
data/docs/design/bg-shell-ux.md +130 -0
data/docs/oauth-providers.md +21 -0
data/docs/tools.md +3 -12
data/lib/rubino/agent/iteration_budget.rb +13 -0
data/lib/rubino/agent/loop.rb +43 -5
data/lib/rubino/agent/prompts/build.txt +10 -5
data/lib/rubino/agent/prompts/memory_guidance.txt +5 -0
data/lib/rubino/agent/prompts/tool_use_enforcement.txt +4 -0
data/lib/rubino/agent/prompts/tool_use_enforcement_google.txt +9 -0
data/lib/rubino/agent/prompts/tool_use_enforcement_openai.txt +48 -0
data/lib/rubino/agent/runner.rb +55 -12
data/lib/rubino/agent/tool_executor.rb +1 -1
data/lib/rubino/api/operations/tasks/stop_operation.rb +0 -3
data/lib/rubino/attachments/classify.rb +0 -1
data/lib/rubino/cli/chat/completion_builder.rb +0 -8
data/lib/rubino/cli/chat/idle_card_host.rb +6 -1
data/lib/rubino/cli/chat_command.rb +324 -171
data/lib/rubino/cli/commands.rb +5 -0
data/lib/rubino/commands/built_ins.rb +0 -1
data/lib/rubino/commands/executor.rb +1 -7
data/lib/rubino/commands/handlers/agents.rb +55 -265
data/lib/rubino/commands/handlers/status.rb +6 -3
data/lib/rubino/compression/line_skeleton.rb +1 -1
data/lib/rubino/compression/python_code_skeleton.rb +1 -1
data/lib/rubino/compression/ruby_code_skeleton.rb +1 -1
data/lib/rubino/compression/tree_sitter_code_skeleton.rb +1 -1
data/lib/rubino/config/configuration.rb +47 -18
data/lib/rubino/config/defaults.rb +57 -33
data/lib/rubino/context/prompt_assembler.rb +89 -1
data/lib/rubino/context/summary_builder.rb +0 -22
data/lib/rubino/context/token_budget.rb +0 -5
data/lib/rubino/errors.rb +2 -2
data/lib/rubino/interaction/events.rb +2 -2
data/lib/rubino/interaction/lifecycle.rb +54 -20
data/lib/rubino/llm/anthropic_role_merge.rb +75 -0
data/lib/rubino/llm/error_classifier.rb +34 -1
data/lib/rubino/llm/fake_provider.rb +0 -4
data/lib/rubino/llm/ruby_llm_adapter.rb +222 -59
data/lib/rubino/llm/stream_tool_call_recovery.rb +91 -0
data/lib/rubino/llm/tool_call_recovery.rb +177 -0
data/lib/rubino/memory/sqlite_extraction_prompt.rb +0 -2
data/lib/rubino/memory/store.rb +0 -19
data/lib/rubino/security/pattern_matcher.rb +0 -2
data/lib/rubino/security/redactor.rb +1 -1
data/lib/rubino/security/secret_path.rb +16 -4
data/lib/rubino/session/message.rb +12 -0
data/lib/rubino/skills/registry.rb +16 -2
data/lib/rubino/tools/background_tasks.rb +132 -228
data/lib/rubino/tools/base.rb +1 -17
data/lib/rubino/tools/grep_tool.rb +13 -1
data/lib/rubino/tools/question_tool.rb +3 -4
data/lib/rubino/tools/read_attachment_tool.rb +52 -54
data/lib/rubino/tools/registry.rb +21 -72
data/lib/rubino/tools/shell_entry_adapter.rb +97 -0
data/lib/rubino/tools/shell_input_tool.rb +1 -1
data/lib/rubino/tools/shell_kill_tool.rb +4 -4
data/lib/rubino/tools/shell_registry.rb +178 -38
data/lib/rubino/tools/shell_tool.rb +45 -5
data/lib/rubino/tools/steer_tool.rb +3 -4
data/lib/rubino/tools/task_result_tool.rb +4 -1
data/lib/rubino/tools/task_stop_tool.rb +5 -7
data/lib/rubino/tools/task_tool.rb +81 -35
data/lib/rubino/tools/vision_tool.rb +1 -1
data/lib/rubino/tools/write_tool.rb +22 -2
data/lib/rubino/ui/agent_menu.rb +8 -4
data/lib/rubino/ui/api.rb +11 -0
data/lib/rubino/ui/bottom_composer.rb +240 -374
data/lib/rubino/ui/cli.rb +381 -155
data/lib/rubino/ui/input_history.rb +0 -5
data/lib/rubino/ui/live_region.rb +18 -1
data/lib/rubino/ui/markdown_renderer.rb +51 -4
data/lib/rubino/ui/markdown_repair.rb +114 -0
data/lib/rubino/ui/notifier.rb +4 -10
data/lib/rubino/ui/stdout_proxy.rb +25 -10
data/lib/rubino/ui/streaming_markdown.rb +79 -12
data/lib/rubino/ui/subagent_cards.rb +18 -44
data/lib/rubino/ui/tool_args_stream.rb +143 -0
data/lib/rubino/update_check.rb +10 -2
data/lib/rubino/util/ignore_rules.rb +18 -2
data/lib/rubino/util/secrets_mask.rb +0 -9
data/lib/rubino/version.rb +1 -1
data/lib/rubino.rb +33 -7
data/rubino-agent.gemspec +1 -0
metadata +31 -5
data/AGENTS.md +0 -97
data/docs/agents.md +0 -224
data/lib/rubino/jobs/handlers/summarize_session_job.rb +0 -21
data/lib/rubino/tools/summarize_file_tool.rb +0 -194

data/lib/rubino/llm/tool_call_recovery.rb ADDED Viewed

@@ -0,0 +1,177 @@
+# frozen_string_literal: true
+require "json"
+module Rubino
+  module LLM
+    # Recovers tool calls that a model LEAKED AS TEXT into its assistant
+    # content — instead of returning them in the structured tool_calls field —
+    # and strips the leaked markup from the visible/saved text.
+    #
+    # WHY: some models are trained to emit tool calls as markup (XML/JSON in
+    # tags) that a server-side parser is supposed to convert to structured
+    # calls. When that conversion fails (e.g. MiniMax's Anthropic-compatible
+    # shim), the raw markup + channel tokens leak into the text: the tool never
+    # runs (the model "describes" instead of "does") and the junk poisons the
+    # saved history so the model mimics its own broken format next turn.
+    #
+    # This mirrors the vLLM / SGLang per-model tool-call parsers and OpenHands'
+    # fn_call_converter: parse the markup back into {name, arguments} and run it.
+    # It covers the THREE format-families that account for ~80% of open models:
+    #
+    #   A) JSON-in-<tool_call>     — Hermes, Qwen2.5/Qwen3
+    #   B) XML invoke/parameter    — MiniMax-M2/M3, Qwen3-Coder
+    #   C) [TOOL_CALLS] JSON-array — Mistral / Mixtral
+    #
+    # Conventions copied from those parsers: peel reasoning <think> FIRST; use a
+    # two-branch "closed | unterminated-to-EOF" match so a missing close tag is
+    # still recovered.
+    module ToolCallRecovery
+      # {text:} is the content with all recovered markup removed (what the user
+      # sees and what gets saved); {calls:} is the list of recovered tool calls,
+      # each {name:, arguments:} with arguments a Hash.
+      Recovered = Struct.new(:text, :calls, keyword_init: true)
+      # MiniMax-M3 prefixes this literal channel/namespace marker on EVERY tag
+      # of a leaked tool call (a garbled render of its turn delimiters). Strip it
+      # everywhere so the inner <tool_call>/<invoke> structure is parseable, and
+      # so it never shows/poisons even when no call is recovered.
+      MINIMAX_NS = "]<]minimax[>["
+      # Reasoning blocks some models leak into content. Peeled before extraction
+      # (mirrors the upstream reasoning-parser layer) so a tool call mentioned
+      # INSIDE reasoning never fires and the scratchpad never shows.
+      THINK_BLOCK = %r{<(think|thinking|reasoning|thought)\b[^>]*>.*?</\1>}im
+      # Family B — one tool call: <invoke name="fn"> … </invoke> (closed, or
+      # unterminated to EOF). The body holds the parameters.
+      #
+      # TOLERANT to MiniMax-M3's GARBLED leak: M3's namespace special token
+      # `]<]minimax[>[` (id 200058) carries the literal chars ] < [ > which
+      # collide with XML delimiters, so the gateway routinely mis-segments the
+      # tag and drops `name=`, leaving forms like `<invoke">shell">` or
+      # `invoke name="shell">` (documented: llama.cpp #24523, mlx-lm #1145). The
+      # canonical vLLM/SGLang parsers hard-require `<invoke name="` and recover
+      # NONE of these. So we eat any garbled punctuation between `invoke` and the
+      # first identifier-like token, and capture that token as the tool name —
+      # recovering the name from the well-formed AND every garbled variant.
+      INVOKE = %r{
+        <?invoke                       # optional leading < (M3 drops it too)
+        [^A-Za-z0-9_]*                 # garbled punctuation: ">, ", stray brackets
+        (?:name\s*=\s*)?               # the name= attribute, when it survives
+        ["']?\s*([A-Za-z_][\w.-]*)\s*["']?  # the tool name (bareword identifier)
+        \s*>                           # close of the opening tag
+        (.*?)(?:</invoke>|\z)          # body up to </invoke> or EOF
+      }imx
+      # Family B parameters, two dialects inside an <invoke> body:
+      #   <parameter name="key">value</parameter>   (MiniMax-M2)
+      #   <key>value</key>                            (bare element = param name)
+      PARAM_NAMED = %r{<parameter\s+name="([^"]+)"\s*>(.*?)(?:</parameter>|\z)}im
+      PARAM_BARE  = %r{<([a-zA-Z_][\w-]*)\s*>(.*?)</\1>}im
+      # Family A — JSON in <tool_call> … </tool_call> (closed | unterminated).
+      TOOL_CALL_JSON = %r{<tool_call>\s*(\{.*?\})\s*(?:</tool_call>|\z)}im
+      # Family C — Mistral: [TOOL_CALLS] then a JSON array of calls.
+      TOOL_CALLS_ARRAY = /\[TOOL_CALLS\]\s*(\[.*\])/im
+      # Bare wrappers left over after the inner calls are extracted, removed so
+      # no orphan tags remain in the cleaned text.
+      ORPHAN_WRAPPERS = %r{</?(?:tool_call|minimax:tool_call|invoke|tool_calls)\b[^>]*>}im
+      module_function
+      def recover(content)
+        text = content.to_s
+        return Recovered.new(text: text, calls: []) if text.empty?
+        text = text.gsub(MINIMAX_NS, "")
+        text = text.gsub(THINK_BLOCK, "")
+        calls = []
+        text = extract_invoke!(text, calls) # B
+        text = extract_tool_call_json!(text, calls) # A
+        text = extract_tool_calls_array!(text, calls) if calls.empty? # C
+        text = text.gsub(ORPHAN_WRAPPERS, "") unless calls.empty?
+        Recovered.new(text: text.strip, calls: calls)
+      end
+      # --- family B: <invoke name="fn"><param…></invoke> -------------------
+      def extract_invoke!(text, calls)
+        text.gsub(INVOKE) do
+          name = Regexp.last_match(1)
+          body = Regexp.last_match(2).to_s
+          calls << { name: name, arguments: parse_invoke_params(body) }
+          ""
+        end
+      end
+      def parse_invoke_params(body)
+        args = {}
+        body.scan(PARAM_NAMED) { |k, v| args[k] = coerce(v.strip) }
+        # Bare child elements as params, but only outside the <parameter …> ones
+        # already consumed (and never the <parameter> tag itself).
+        body.gsub(PARAM_NAMED, "").scan(PARAM_BARE) do |k, v|
+          next if k.casecmp("parameter").zero?
+          args[k] = coerce(v.strip)
+        end
+        args
+      end
+      # --- family A: <tool_call>{json}</tool_call> -------------------------
+      def extract_tool_call_json!(text, calls)
+        text.gsub(TOOL_CALL_JSON) do
+          json = Regexp.last_match(1)
+          obj  = safe_json(json)
+          if obj.is_a?(Hash) && obj["name"]
+            calls << { name: obj["name"], arguments: normalize_args(obj["arguments"]) }
+            ""
+          else
+            Regexp.last_match(0) # leave untouched if not a real call
+          end
+        end
+      end
+      # --- family C: [TOOL_CALLS][{...}] ----------------------------------
+      def extract_tool_calls_array!(text, calls)
+        text.gsub(TOOL_CALLS_ARRAY) do
+          arr = safe_json(Regexp.last_match(1))
+          if arr.is_a?(Array)
+            arr.each do |c|
+              next unless c.is_a?(Hash) && c["name"]
+              calls << { name: c["name"], arguments: normalize_args(c["arguments"]) }
+            end
+            ""
+          else
+            Regexp.last_match(0)
+          end
+        end
+      end
+      # --- helpers ---------------------------------------------------------
+      def normalize_args(args)
+        case args
+        when Hash   then args
+        when String then safe_json(args).is_a?(Hash) ? safe_json(args) : { "value" => args }
+        else {}
+        end
+      end
+      # A leaked XML parameter value is always a string on the wire; keep it a
+      # string (the tool schema coerces). Only unwrap an obvious JSON scalar.
+      def coerce(value)
+        value
+      end
+      def safe_json(str)
+        JSON.parse(str)
+      rescue JSON::ParserError, TypeError
+        nil
+      end
+    end
+  end
+end

data/lib/rubino/memory/sqlite_extraction_prompt.rb CHANGED Viewed

@@ -9,8 +9,6 @@ module Rubino
     # facts to `supersede`. The doctrine ("durable declarative facts, not
     # imperatives, not stale artifacts") is lifted from the reference MEMORY_GUIDANCE.
     module SqliteExtractionPrompt
-      KINDS = %w[user_profile preference project fact env].freeze
       SYSTEM = <<~PROMPT
         You maintain a long-term memory of durable facts about the user and their project.
         You will see the latest conversation turn and the facts already in memory.

data/lib/rubino/memory/store.rb CHANGED Viewed

@@ -156,25 +156,6 @@ module Rubino
           .all
       end
-      # Returns all memories within the character limit
-      def within_limit(char_limit:)
-        memories = @db[:memories]
-                   .order(Sequel.desc(:confidence), Sequel.desc(:updated_at))
-                   .all
-        selected = []
-        total_chars = 0
-        memories.each do |m|
-          break if total_chars + m[:content].length > char_limit
-          selected << m
-          total_chars += m[:content].length
-        end
-        selected
-      end
       # Returns the total count of stored memories
       def count
         @db[:memories].count

data/lib/rubino/security/pattern_matcher.rb CHANGED Viewed

@@ -14,8 +14,6 @@ module Rubino
     #
     # Actions: "allow", "ask", "deny"
     class PatternMatcher
-      ACTIONS = %w[allow ask deny].freeze
       def initialize(rules: {})
         @rules = parse_rules(rules)
       end

data/lib/rubino/security/redactor.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module Rubino
     # `grep` match content (both with code_file:true to skip the ENV/JSON
     # assignment patterns that false-positive on source), `shell`/background
     # shell output (full patterns — `cat .env` / `printenv` leak keys), and
-    # the `summarize_file` chunks shipped to the auxiliary model.
+    # converted-document content from `read_attachment` before it enters context.
     #
     # Short tokens (< 18 chars) are fully masked; longer ones preserve the
     # first 6 and last 4 characters for debuggability — matching Hermes'

data/lib/rubino/security/secret_path.rb CHANGED Viewed

@@ -138,12 +138,24 @@ module Rubino
       # directories (~/.ssh, ~/.aws). Mirrors Hermes' write-deny exact-path +
       # prefix split, applied here to the READ gate.
       def home_credential_path?(target)
-        home = File.expand_path("~")
+        home = resolved_root(File.expand_path("~"))
         return true if BLOCKED_HOME_CREDENTIAL_FILES.any? { |rel| target == File.join(home, rel) }
         BLOCKED_HOME_CREDENTIAL_DIRS.any? { |rel| under_path?(target, File.join(home, rel)) }
       end
+      # Symlink-resolves a comparison ROOT through the SAME #canonical_path used
+      # on +target+, so the two sides match even when a system symlink sits on
+      # the path. Without this, macOS' symlinks defeat the match: `/etc` →
+      # `/private/etc` makes `/etc/sudoers` (and a non-existent `/etc/shadow`)
+      # resolve past SYSTEM_PATHS, and a `$TMPDIR`/$HOME under `/var` →
+      # `/private/var` slips the home credential dirs. Using canonical_path (not
+      # bare realpath) resolves the existing ancestor of a NON-existent root too,
+      # so `/etc/shadow` still classifies on a host where it doesn't exist.
+      def resolved_root(path)
+        canonical_path(path) || path
+      end
       # Resolved Rubino home dir, for the mcp-tokens/ subtree match above.
       def canonical_home
         home = Rubino.home_path
@@ -188,14 +200,14 @@ module Rubino
       # Absolute-path / prefix matches (SSH keys, cloud creds, /etc system
       # files), compared against the symlink-resolved target.
       def denied_path_category(target, base)
-        home = File.expand_path("~")
+        home = resolved_root(File.expand_path("~"))
         HOME_PREFIXES.each do |rel|
           return "credential directory (~/#{rel})" if under_path?(target, File.join(home, rel))
         end
-        return "system file (#{base})" if SYSTEM_PATHS.include?(target)
+        return "system file (#{base})" if SYSTEM_PATHS.any? { |p| target == resolved_root(p) }
         SYSTEM_PREFIXES.each do |prefix|
-          return "system path (#{prefix})" if under_path?(target, prefix)
+          return "system path (#{prefix})" if under_path?(target, resolved_root(prefix))
         end
         nil
       end

data/lib/rubino/session/message.rb CHANGED Viewed

@@ -65,6 +65,18 @@ module Rubino
         # Surface assistant tool_calls (persisted as metadata) so the adapter
         # can rebuild the toolUse block expected by strict providers on resume.
         msg[:tool_calls] = @metadata[:tool_calls] if @metadata.is_a?(Hash) && @metadata[:tool_calls]
+        # Replay the assistant's reasoning on every later turn (Hermes
+        # conversation_loop.py:940 "pass reasoning back to the API for ALL
+        # assistant messages"). The local server's KV cache, after generating a
+        # turn, holds the reasoning tokens; a replay that OMITS them diverges
+        # from that cache at the point the reasoning was generated, forcing a
+        # full re-prefill of the whole context every turn. Re-emitting the stored
+        # reasoning keeps the prompt prefix byte-stable so the server reuses the
+        # cache (verified: same assistant row got a KV hit WITH reasoning, a miss
+        # WITHOUT). The adapter rebuilds it into the wire `reasoning_content`.
+        if @role == "assistant" && @metadata.is_a?(Hash) && (reasoning = @metadata[:reasoning])
+          msg[:reasoning] = reasoning
+        end
         # #583: re-derive the error flag from the persisted outcome so a
         # denied/errored tool result replays to the model marked as an error
         # (is_error) on the next turn, exactly as it was sent live — never as a

data/lib/rubino/skills/registry.rb CHANGED Viewed

@@ -256,13 +256,27 @@ module Rubino
       def project_local_path?(path)
         return false if path.to_s.start_with?("~", "/")
-        expanded = File.expand_path(path.to_s)
-        root = File.expand_path(Workspace.primary_root)
+        # Symlink-resolve BOTH sides: a cwd-relative skill path expands through
+        # Dir.pwd (realpath — macOS `/var/...` → `/private/var/...`), but
+        # primary_root may be unresolved. Comparing the two raw made a symlinked
+        # workspace look NON-project-local, so the trust gate failed to drop an
+        # untrusted repo's `.rubino/skills` — loading hostile project skills in
+        # an untrusted dir. Resolving both makes the prefix check hold.
+        expanded = canonical_dir(File.expand_path(path.to_s))
+        root = canonical_dir(File.expand_path(Workspace.primary_root))
         expanded == root || expanded.start_with?("#{root}#{File::SEPARATOR}")
       rescue StandardError
         # Conservative: if we can't tell, treat as project-local and drop it.
         true
       end
+      # Realpath-resolved directory, falling back to the literal path when it
+      # isn't on disk, so the prefix comparison above survives a symlinked root.
+      def canonical_dir(path)
+        (File.realpath(path) if File.exist?(path)) || path
+      rescue StandardError
+        path
+      end
     end
   end
 end