rubino-agent 0.5.1 → 0.5.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/.dockerignore +15 -0
  3. data/CHANGELOG.md +127 -0
  4. data/Dockerfile +56 -0
  5. data/agent.md +112 -0
  6. data/docs/api/v1.md +2 -0
  7. data/docs/commands.md +3 -6
  8. data/docs/configuration.md +13 -6
  9. data/docs/design/bg-shell-pty-port.md +88 -0
  10. data/docs/design/bg-shell-review-refinements.md +65 -0
  11. data/docs/design/bg-shell-ux.md +130 -0
  12. data/docs/oauth-providers.md +21 -0
  13. data/docs/tools.md +3 -12
  14. data/lib/rubino/agent/iteration_budget.rb +13 -0
  15. data/lib/rubino/agent/loop.rb +43 -5
  16. data/lib/rubino/agent/prompts/build.txt +10 -5
  17. data/lib/rubino/agent/prompts/memory_guidance.txt +5 -0
  18. data/lib/rubino/agent/prompts/tool_use_enforcement.txt +4 -0
  19. data/lib/rubino/agent/prompts/tool_use_enforcement_google.txt +9 -0
  20. data/lib/rubino/agent/prompts/tool_use_enforcement_openai.txt +48 -0
  21. data/lib/rubino/agent/runner.rb +55 -12
  22. data/lib/rubino/agent/tool_executor.rb +1 -1
  23. data/lib/rubino/api/operations/tasks/stop_operation.rb +0 -3
  24. data/lib/rubino/attachments/classify.rb +0 -1
  25. data/lib/rubino/cli/chat/completion_builder.rb +0 -8
  26. data/lib/rubino/cli/chat/idle_card_host.rb +6 -1
  27. data/lib/rubino/cli/chat_command.rb +324 -171
  28. data/lib/rubino/cli/commands.rb +5 -0
  29. data/lib/rubino/commands/built_ins.rb +0 -1
  30. data/lib/rubino/commands/executor.rb +1 -7
  31. data/lib/rubino/commands/handlers/agents.rb +55 -265
  32. data/lib/rubino/commands/handlers/status.rb +6 -3
  33. data/lib/rubino/compression/line_skeleton.rb +1 -1
  34. data/lib/rubino/compression/python_code_skeleton.rb +1 -1
  35. data/lib/rubino/compression/ruby_code_skeleton.rb +1 -1
  36. data/lib/rubino/compression/tree_sitter_code_skeleton.rb +1 -1
  37. data/lib/rubino/config/configuration.rb +47 -18
  38. data/lib/rubino/config/defaults.rb +57 -33
  39. data/lib/rubino/context/prompt_assembler.rb +89 -1
  40. data/lib/rubino/context/summary_builder.rb +0 -22
  41. data/lib/rubino/context/token_budget.rb +0 -5
  42. data/lib/rubino/errors.rb +2 -2
  43. data/lib/rubino/interaction/events.rb +2 -2
  44. data/lib/rubino/interaction/lifecycle.rb +54 -20
  45. data/lib/rubino/llm/anthropic_role_merge.rb +75 -0
  46. data/lib/rubino/llm/error_classifier.rb +34 -1
  47. data/lib/rubino/llm/fake_provider.rb +0 -4
  48. data/lib/rubino/llm/ruby_llm_adapter.rb +222 -59
  49. data/lib/rubino/llm/stream_tool_call_recovery.rb +91 -0
  50. data/lib/rubino/llm/tool_call_recovery.rb +177 -0
  51. data/lib/rubino/memory/sqlite_extraction_prompt.rb +0 -2
  52. data/lib/rubino/memory/store.rb +0 -19
  53. data/lib/rubino/security/pattern_matcher.rb +0 -2
  54. data/lib/rubino/security/redactor.rb +1 -1
  55. data/lib/rubino/security/secret_path.rb +16 -4
  56. data/lib/rubino/session/message.rb +12 -0
  57. data/lib/rubino/skills/registry.rb +16 -2
  58. data/lib/rubino/tools/background_tasks.rb +132 -228
  59. data/lib/rubino/tools/base.rb +1 -17
  60. data/lib/rubino/tools/grep_tool.rb +13 -1
  61. data/lib/rubino/tools/question_tool.rb +3 -4
  62. data/lib/rubino/tools/read_attachment_tool.rb +52 -54
  63. data/lib/rubino/tools/registry.rb +21 -72
  64. data/lib/rubino/tools/shell_entry_adapter.rb +97 -0
  65. data/lib/rubino/tools/shell_input_tool.rb +1 -1
  66. data/lib/rubino/tools/shell_kill_tool.rb +4 -4
  67. data/lib/rubino/tools/shell_registry.rb +178 -38
  68. data/lib/rubino/tools/shell_tool.rb +45 -5
  69. data/lib/rubino/tools/steer_tool.rb +3 -4
  70. data/lib/rubino/tools/task_result_tool.rb +4 -1
  71. data/lib/rubino/tools/task_stop_tool.rb +5 -7
  72. data/lib/rubino/tools/task_tool.rb +81 -35
  73. data/lib/rubino/tools/vision_tool.rb +1 -1
  74. data/lib/rubino/tools/write_tool.rb +22 -2
  75. data/lib/rubino/ui/agent_menu.rb +8 -4
  76. data/lib/rubino/ui/api.rb +11 -0
  77. data/lib/rubino/ui/bottom_composer.rb +240 -374
  78. data/lib/rubino/ui/cli.rb +381 -155
  79. data/lib/rubino/ui/input_history.rb +0 -5
  80. data/lib/rubino/ui/live_region.rb +18 -1
  81. data/lib/rubino/ui/markdown_renderer.rb +51 -4
  82. data/lib/rubino/ui/markdown_repair.rb +114 -0
  83. data/lib/rubino/ui/notifier.rb +4 -10
  84. data/lib/rubino/ui/stdout_proxy.rb +25 -10
  85. data/lib/rubino/ui/streaming_markdown.rb +79 -12
  86. data/lib/rubino/ui/subagent_cards.rb +18 -44
  87. data/lib/rubino/ui/tool_args_stream.rb +143 -0
  88. data/lib/rubino/update_check.rb +10 -2
  89. data/lib/rubino/util/ignore_rules.rb +18 -2
  90. data/lib/rubino/util/secrets_mask.rb +0 -9
  91. data/lib/rubino/version.rb +1 -1
  92. data/lib/rubino.rb +33 -7
  93. data/rubino-agent.gemspec +1 -0
  94. metadata +31 -5
  95. data/AGENTS.md +0 -97
  96. data/docs/agents.md +0 -224
  97. data/lib/rubino/jobs/handlers/summarize_session_job.rb +0 -21
  98. data/lib/rubino/tools/summarize_file_tool.rb +0 -194
@@ -0,0 +1,177 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Rubino
6
+ module LLM
7
+ # Recovers tool calls that a model LEAKED AS TEXT into its assistant
8
+ # content — instead of returning them in the structured tool_calls field —
9
+ # and strips the leaked markup from the visible/saved text.
10
+ #
11
+ # WHY: some models are trained to emit tool calls as markup (XML/JSON in
12
+ # tags) that a server-side parser is supposed to convert to structured
13
+ # calls. When that conversion fails (e.g. MiniMax's Anthropic-compatible
14
+ # shim), the raw markup + channel tokens leak into the text: the tool never
15
+ # runs (the model "describes" instead of "does") and the junk poisons the
16
+ # saved history so the model mimics its own broken format next turn.
17
+ #
18
+ # This mirrors the vLLM / SGLang per-model tool-call parsers and OpenHands'
19
+ # fn_call_converter: parse the markup back into {name, arguments} and run it.
20
+ # It covers the THREE format-families that account for ~80% of open models:
21
+ #
22
+ # A) JSON-in-<tool_call> — Hermes, Qwen2.5/Qwen3
23
+ # B) XML invoke/parameter — MiniMax-M2/M3, Qwen3-Coder
24
+ # C) [TOOL_CALLS] JSON-array — Mistral / Mixtral
25
+ #
26
+ # Conventions copied from those parsers: peel reasoning <think> FIRST; use a
27
+ # two-branch "closed | unterminated-to-EOF" match so a missing close tag is
28
+ # still recovered.
29
+ module ToolCallRecovery
30
+ # {text:} is the content with all recovered markup removed (what the user
31
+ # sees and what gets saved); {calls:} is the list of recovered tool calls,
32
+ # each {name:, arguments:} with arguments a Hash.
33
+ Recovered = Struct.new(:text, :calls, keyword_init: true)
34
+
35
+ # MiniMax-M3 prefixes this literal channel/namespace marker on EVERY tag
36
+ # of a leaked tool call (a garbled render of its turn delimiters). Strip it
37
+ # everywhere so the inner <tool_call>/<invoke> structure is parseable, and
38
+ # so it never shows/poisons even when no call is recovered.
39
+ MINIMAX_NS = "]<]minimax[>["
40
+
41
+ # Reasoning blocks some models leak into content. Peeled before extraction
42
+ # (mirrors the upstream reasoning-parser layer) so a tool call mentioned
43
+ # INSIDE reasoning never fires and the scratchpad never shows.
44
+ THINK_BLOCK = %r{<(think|thinking|reasoning|thought)\b[^>]*>.*?</\1>}im
45
+
46
+ # Family B — one tool call: <invoke name="fn"> … </invoke> (closed, or
47
+ # unterminated to EOF). The body holds the parameters.
48
+ #
49
+ # TOLERANT to MiniMax-M3's GARBLED leak: M3's namespace special token
50
+ # `]<]minimax[>[` (id 200058) carries the literal chars ] < [ > which
51
+ # collide with XML delimiters, so the gateway routinely mis-segments the
52
+ # tag and drops `name=`, leaving forms like `<invoke">shell">` or
53
+ # `invoke name="shell">` (documented: llama.cpp #24523, mlx-lm #1145). The
54
+ # canonical vLLM/SGLang parsers hard-require `<invoke name="` and recover
55
+ # NONE of these. So we eat any garbled punctuation between `invoke` and the
56
+ # first identifier-like token, and capture that token as the tool name —
57
+ # recovering the name from the well-formed AND every garbled variant.
58
+ INVOKE = %r{
59
+ <?invoke # optional leading < (M3 drops it too)
60
+ [^A-Za-z0-9_]* # garbled punctuation: ">, ", stray brackets
61
+ (?:name\s*=\s*)? # the name= attribute, when it survives
62
+ ["']?\s*([A-Za-z_][\w.-]*)\s*["']? # the tool name (bareword identifier)
63
+ \s*> # close of the opening tag
64
+ (.*?)(?:</invoke>|\z) # body up to </invoke> or EOF
65
+ }imx
66
+
67
+ # Family B parameters, two dialects inside an <invoke> body:
68
+ # <parameter name="key">value</parameter> (MiniMax-M2)
69
+ # <key>value</key> (bare element = param name)
70
+ PARAM_NAMED = %r{<parameter\s+name="([^"]+)"\s*>(.*?)(?:</parameter>|\z)}im
71
+ PARAM_BARE = %r{<([a-zA-Z_][\w-]*)\s*>(.*?)</\1>}im
72
+
73
+ # Family A — JSON in <tool_call> … </tool_call> (closed | unterminated).
74
+ TOOL_CALL_JSON = %r{<tool_call>\s*(\{.*?\})\s*(?:</tool_call>|\z)}im
75
+
76
+ # Family C — Mistral: [TOOL_CALLS] then a JSON array of calls.
77
+ TOOL_CALLS_ARRAY = /\[TOOL_CALLS\]\s*(\[.*\])/im
78
+
79
+ # Bare wrappers left over after the inner calls are extracted, removed so
80
+ # no orphan tags remain in the cleaned text.
81
+ ORPHAN_WRAPPERS = %r{</?(?:tool_call|minimax:tool_call|invoke|tool_calls)\b[^>]*>}im
82
+
83
+ module_function
84
+
85
+ def recover(content)
86
+ text = content.to_s
87
+ return Recovered.new(text: text, calls: []) if text.empty?
88
+
89
+ text = text.gsub(MINIMAX_NS, "")
90
+ text = text.gsub(THINK_BLOCK, "")
91
+
92
+ calls = []
93
+ text = extract_invoke!(text, calls) # B
94
+ text = extract_tool_call_json!(text, calls) # A
95
+ text = extract_tool_calls_array!(text, calls) if calls.empty? # C
96
+
97
+ text = text.gsub(ORPHAN_WRAPPERS, "") unless calls.empty?
98
+ Recovered.new(text: text.strip, calls: calls)
99
+ end
100
+
101
+ # --- family B: <invoke name="fn"><param…></invoke> -------------------
102
+ def extract_invoke!(text, calls)
103
+ text.gsub(INVOKE) do
104
+ name = Regexp.last_match(1)
105
+ body = Regexp.last_match(2).to_s
106
+ calls << { name: name, arguments: parse_invoke_params(body) }
107
+ ""
108
+ end
109
+ end
110
+
111
+ def parse_invoke_params(body)
112
+ args = {}
113
+ body.scan(PARAM_NAMED) { |k, v| args[k] = coerce(v.strip) }
114
+ # Bare child elements as params, but only outside the <parameter …> ones
115
+ # already consumed (and never the <parameter> tag itself).
116
+ body.gsub(PARAM_NAMED, "").scan(PARAM_BARE) do |k, v|
117
+ next if k.casecmp("parameter").zero?
118
+
119
+ args[k] = coerce(v.strip)
120
+ end
121
+ args
122
+ end
123
+
124
+ # --- family A: <tool_call>{json}</tool_call> -------------------------
125
+ def extract_tool_call_json!(text, calls)
126
+ text.gsub(TOOL_CALL_JSON) do
127
+ json = Regexp.last_match(1)
128
+ obj = safe_json(json)
129
+ if obj.is_a?(Hash) && obj["name"]
130
+ calls << { name: obj["name"], arguments: normalize_args(obj["arguments"]) }
131
+ ""
132
+ else
133
+ Regexp.last_match(0) # leave untouched if not a real call
134
+ end
135
+ end
136
+ end
137
+
138
+ # --- family C: [TOOL_CALLS][{...}] ----------------------------------
139
+ def extract_tool_calls_array!(text, calls)
140
+ text.gsub(TOOL_CALLS_ARRAY) do
141
+ arr = safe_json(Regexp.last_match(1))
142
+ if arr.is_a?(Array)
143
+ arr.each do |c|
144
+ next unless c.is_a?(Hash) && c["name"]
145
+
146
+ calls << { name: c["name"], arguments: normalize_args(c["arguments"]) }
147
+ end
148
+ ""
149
+ else
150
+ Regexp.last_match(0)
151
+ end
152
+ end
153
+ end
154
+
155
+ # --- helpers ---------------------------------------------------------
156
+ def normalize_args(args)
157
+ case args
158
+ when Hash then args
159
+ when String then safe_json(args).is_a?(Hash) ? safe_json(args) : { "value" => args }
160
+ else {}
161
+ end
162
+ end
163
+
164
+ # A leaked XML parameter value is always a string on the wire; keep it a
165
+ # string (the tool schema coerces). Only unwrap an obvious JSON scalar.
166
+ def coerce(value)
167
+ value
168
+ end
169
+
170
+ def safe_json(str)
171
+ JSON.parse(str)
172
+ rescue JSON::ParserError, TypeError
173
+ nil
174
+ end
175
+ end
176
+ end
177
+ end
@@ -9,8 +9,6 @@ module Rubino
9
9
  # facts to `supersede`. The doctrine ("durable declarative facts, not
10
10
  # imperatives, not stale artifacts") is lifted from the reference MEMORY_GUIDANCE.
11
11
  module SqliteExtractionPrompt
12
- KINDS = %w[user_profile preference project fact env].freeze
13
-
14
12
  SYSTEM = <<~PROMPT
15
13
  You maintain a long-term memory of durable facts about the user and their project.
16
14
  You will see the latest conversation turn and the facts already in memory.
@@ -156,25 +156,6 @@ module Rubino
156
156
  .all
157
157
  end
158
158
 
159
- # Returns all memories within the character limit
160
- def within_limit(char_limit:)
161
- memories = @db[:memories]
162
- .order(Sequel.desc(:confidence), Sequel.desc(:updated_at))
163
- .all
164
-
165
- selected = []
166
- total_chars = 0
167
-
168
- memories.each do |m|
169
- break if total_chars + m[:content].length > char_limit
170
-
171
- selected << m
172
- total_chars += m[:content].length
173
- end
174
-
175
- selected
176
- end
177
-
178
159
  # Returns the total count of stored memories
179
160
  def count
180
161
  @db[:memories].count
@@ -14,8 +14,6 @@ module Rubino
14
14
  #
15
15
  # Actions: "allow", "ask", "deny"
16
16
  class PatternMatcher
17
- ACTIONS = %w[allow ask deny].freeze
18
-
19
17
  def initialize(rules: {})
20
18
  @rules = parse_rules(rules)
21
19
  end
@@ -11,7 +11,7 @@ module Rubino
11
11
  # `grep` match content (both with code_file:true to skip the ENV/JSON
12
12
  # assignment patterns that false-positive on source), `shell`/background
13
13
  # shell output (full patterns — `cat .env` / `printenv` leak keys), and
14
- # the `summarize_file` chunks shipped to the auxiliary model.
14
+ # converted-document content from `read_attachment` before it enters context.
15
15
  #
16
16
  # Short tokens (< 18 chars) are fully masked; longer ones preserve the
17
17
  # first 6 and last 4 characters for debuggability — matching Hermes'
@@ -138,12 +138,24 @@ module Rubino
138
138
  # directories (~/.ssh, ~/.aws). Mirrors Hermes' write-deny exact-path +
139
139
  # prefix split, applied here to the READ gate.
140
140
  def home_credential_path?(target)
141
- home = File.expand_path("~")
141
+ home = resolved_root(File.expand_path("~"))
142
142
  return true if BLOCKED_HOME_CREDENTIAL_FILES.any? { |rel| target == File.join(home, rel) }
143
143
 
144
144
  BLOCKED_HOME_CREDENTIAL_DIRS.any? { |rel| under_path?(target, File.join(home, rel)) }
145
145
  end
146
146
 
147
+ # Symlink-resolves a comparison ROOT through the SAME #canonical_path used
148
+ # on +target+, so the two sides match even when a system symlink sits on
149
+ # the path. Without this, macOS' symlinks defeat the match: `/etc` →
150
+ # `/private/etc` makes `/etc/sudoers` (and a non-existent `/etc/shadow`)
151
+ # resolve past SYSTEM_PATHS, and a `$TMPDIR`/$HOME under `/var` →
152
+ # `/private/var` slips the home credential dirs. Using canonical_path (not
153
+ # bare realpath) resolves the existing ancestor of a NON-existent root too,
154
+ # so `/etc/shadow` still classifies on a host where it doesn't exist.
155
+ def resolved_root(path)
156
+ canonical_path(path) || path
157
+ end
158
+
147
159
  # Resolved Rubino home dir, for the mcp-tokens/ subtree match above.
148
160
  def canonical_home
149
161
  home = Rubino.home_path
@@ -188,14 +200,14 @@ module Rubino
188
200
  # Absolute-path / prefix matches (SSH keys, cloud creds, /etc system
189
201
  # files), compared against the symlink-resolved target.
190
202
  def denied_path_category(target, base)
191
- home = File.expand_path("~")
203
+ home = resolved_root(File.expand_path("~"))
192
204
  HOME_PREFIXES.each do |rel|
193
205
  return "credential directory (~/#{rel})" if under_path?(target, File.join(home, rel))
194
206
  end
195
- return "system file (#{base})" if SYSTEM_PATHS.include?(target)
207
+ return "system file (#{base})" if SYSTEM_PATHS.any? { |p| target == resolved_root(p) }
196
208
 
197
209
  SYSTEM_PREFIXES.each do |prefix|
198
- return "system path (#{prefix})" if under_path?(target, prefix)
210
+ return "system path (#{prefix})" if under_path?(target, resolved_root(prefix))
199
211
  end
200
212
  nil
201
213
  end
@@ -65,6 +65,18 @@ module Rubino
65
65
  # Surface assistant tool_calls (persisted as metadata) so the adapter
66
66
  # can rebuild the toolUse block expected by strict providers on resume.
67
67
  msg[:tool_calls] = @metadata[:tool_calls] if @metadata.is_a?(Hash) && @metadata[:tool_calls]
68
+ # Replay the assistant's reasoning on every later turn (Hermes
69
+ # conversation_loop.py:940 "pass reasoning back to the API for ALL
70
+ # assistant messages"). The local server's KV cache, after generating a
71
+ # turn, holds the reasoning tokens; a replay that OMITS them diverges
72
+ # from that cache at the point the reasoning was generated, forcing a
73
+ # full re-prefill of the whole context every turn. Re-emitting the stored
74
+ # reasoning keeps the prompt prefix byte-stable so the server reuses the
75
+ # cache (verified: same assistant row got a KV hit WITH reasoning, a miss
76
+ # WITHOUT). The adapter rebuilds it into the wire `reasoning_content`.
77
+ if @role == "assistant" && @metadata.is_a?(Hash) && (reasoning = @metadata[:reasoning])
78
+ msg[:reasoning] = reasoning
79
+ end
68
80
  # #583: re-derive the error flag from the persisted outcome so a
69
81
  # denied/errored tool result replays to the model marked as an error
70
82
  # (is_error) on the next turn, exactly as it was sent live — never as a
@@ -256,13 +256,27 @@ module Rubino
256
256
  def project_local_path?(path)
257
257
  return false if path.to_s.start_with?("~", "/")
258
258
 
259
- expanded = File.expand_path(path.to_s)
260
- root = File.expand_path(Workspace.primary_root)
259
+ # Symlink-resolve BOTH sides: a cwd-relative skill path expands through
260
+ # Dir.pwd (realpath — macOS `/var/...` → `/private/var/...`), but
261
+ # primary_root may be unresolved. Comparing the two raw made a symlinked
262
+ # workspace look NON-project-local, so the trust gate failed to drop an
263
+ # untrusted repo's `.rubino/skills` — loading hostile project skills in
264
+ # an untrusted dir. Resolving both makes the prefix check hold.
265
+ expanded = canonical_dir(File.expand_path(path.to_s))
266
+ root = canonical_dir(File.expand_path(Workspace.primary_root))
261
267
  expanded == root || expanded.start_with?("#{root}#{File::SEPARATOR}")
262
268
  rescue StandardError
263
269
  # Conservative: if we can't tell, treat as project-local and drop it.
264
270
  true
265
271
  end
272
+
273
+ # Realpath-resolved directory, falling back to the literal path when it
274
+ # isn't on disk, so the prefix comparison above survives a symlinked root.
275
+ def canonical_dir(path)
276
+ (File.realpath(path) if File.exist?(path)) || path
277
+ rescue StandardError
278
+ path
279
+ end
266
280
  end
267
281
  end
268
282
  end