rubino-agent 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +11 -2
  3. data/AGENTS.md +1 -1
  4. data/CHANGELOG.md +172 -5
  5. data/CONTRIBUTING.md +10 -1
  6. data/README.md +14 -5
  7. data/Rakefile +31 -0
  8. data/docs/agents.md +42 -23
  9. data/docs/architecture.md +2 -2
  10. data/docs/commands.md +35 -3
  11. data/docs/configuration.md +20 -23
  12. data/docs/getting-started.md +5 -3
  13. data/docs/security.md +16 -5
  14. data/docs/skills.md +31 -0
  15. data/docs/troubleshooting.md +1 -1
  16. data/exe/rubino +16 -2
  17. data/install.sh +721 -59
  18. data/lib/rubino/active_agent.rb +73 -0
  19. data/lib/rubino/agent/action_claim_guard.rb +881 -0
  20. data/lib/rubino/agent/agent_registry.rb +5 -2
  21. data/lib/rubino/agent/definition.rb +1 -9
  22. data/lib/rubino/agent/fallback_chain.rb +0 -6
  23. data/lib/rubino/agent/iteration_budget.rb +109 -3
  24. data/lib/rubino/agent/loop.rb +476 -20
  25. data/lib/rubino/agent/model_call_runner.rb +81 -3
  26. data/lib/rubino/agent/prompts/build.txt +22 -5
  27. data/lib/rubino/agent/response_validator.rb +8 -0
  28. data/lib/rubino/agent/runner.rb +133 -8
  29. data/lib/rubino/agent/tool_executor.rb +166 -14
  30. data/lib/rubino/agent/truncation_continuation.rb +4 -1
  31. data/lib/rubino/api/server.rb +19 -0
  32. data/lib/rubino/attachments/classify.rb +35 -17
  33. data/lib/rubino/boot/config_guard.rb +71 -0
  34. data/lib/rubino/cli/chat/completion_builder.rb +42 -6
  35. data/lib/rubino/cli/chat/idle_card_host.rb +7 -1
  36. data/lib/rubino/cli/chat/session_resolver.rb +87 -21
  37. data/lib/rubino/cli/chat_command.rb +1189 -50
  38. data/lib/rubino/cli/commands.rb +282 -2
  39. data/lib/rubino/cli/config_command.rb +68 -8
  40. data/lib/rubino/cli/doctor_command.rb +204 -12
  41. data/lib/rubino/cli/jobs_command.rb +12 -0
  42. data/lib/rubino/cli/memory_command.rb +53 -20
  43. data/lib/rubino/cli/onboarding_wizard.rb +79 -6
  44. data/lib/rubino/cli/session_command.rb +172 -18
  45. data/lib/rubino/cli/setup_command.rb +131 -8
  46. data/lib/rubino/cli/skills_command.rb +183 -9
  47. data/lib/rubino/cli/trust_gate.rb +16 -7
  48. data/lib/rubino/commands/built_ins.rb +2 -0
  49. data/lib/rubino/commands/command.rb +12 -2
  50. data/lib/rubino/commands/executor.rb +149 -12
  51. data/lib/rubino/commands/handlers/agent_switch.rb +100 -0
  52. data/lib/rubino/commands/handlers/agents.rb +156 -41
  53. data/lib/rubino/commands/handlers/config.rb +4 -1
  54. data/lib/rubino/commands/handlers/help.rb +113 -14
  55. data/lib/rubino/commands/handlers/memory.rb +15 -5
  56. data/lib/rubino/commands/handlers/sessions.rb +26 -3
  57. data/lib/rubino/commands/handlers/status.rb +9 -4
  58. data/lib/rubino/commands/loader.rb +12 -0
  59. data/lib/rubino/config/configuration.rb +86 -24
  60. data/lib/rubino/config/defaults.rb +140 -33
  61. data/lib/rubino/config/loader.rb +62 -12
  62. data/lib/rubino/config/validator.rb +341 -0
  63. data/lib/rubino/config/writer.rb +123 -31
  64. data/lib/rubino/context/compressor.rb +184 -22
  65. data/lib/rubino/context/environment_inspector.rb +2 -2
  66. data/lib/rubino/context/file_discovery.rb +2 -2
  67. data/lib/rubino/context/message_boundary.rb +27 -1
  68. data/lib/rubino/context/project_languages.rb +90 -0
  69. data/lib/rubino/context/prompt_assembler.rb +105 -22
  70. data/lib/rubino/context/summary_builder.rb +45 -4
  71. data/lib/rubino/context/token_budget.rb +36 -11
  72. data/lib/rubino/context/token_estimate.rb +45 -0
  73. data/lib/rubino/context/tool_result_pruner.rb +81 -0
  74. data/lib/rubino/database/connection.rb +154 -3
  75. data/lib/rubino/database/migrations/001_create_initial_schema.rb +314 -40
  76. data/lib/rubino/database/migrator.rb +98 -5
  77. data/lib/rubino/documents/cap_exceeded.rb +13 -0
  78. data/lib/rubino/documents/converters/csv.rb +4 -3
  79. data/lib/rubino/documents/converters/docx.rb +29 -5
  80. data/lib/rubino/documents/converters/html.rb +5 -1
  81. data/lib/rubino/documents/converters/json.rb +2 -1
  82. data/lib/rubino/documents/converters/pdf.rb +11 -2
  83. data/lib/rubino/documents/converters/plain.rb +2 -1
  84. data/lib/rubino/documents/converters/pptx.rb +11 -2
  85. data/lib/rubino/documents/converters/xlsx.rb +35 -4
  86. data/lib/rubino/documents/converters/xml.rb +2 -1
  87. data/lib/rubino/documents/limits.rb +210 -0
  88. data/lib/rubino/documents.rb +10 -3
  89. data/lib/rubino/errors.rb +36 -5
  90. data/lib/rubino/interaction/cancel_token.rb +19 -3
  91. data/lib/rubino/interaction/events.rb +13 -0
  92. data/lib/rubino/interaction/lifecycle.rb +99 -13
  93. data/lib/rubino/interaction/polishing.rb +176 -0
  94. data/lib/rubino/jobs/cron_job_repository.rb +5 -8
  95. data/lib/rubino/jobs/handlers/cleanup_sessions_job.rb +11 -0
  96. data/lib/rubino/jobs/handlers/distill_skill_job.rb +65 -9
  97. data/lib/rubino/jobs/queue.rb +63 -8
  98. data/lib/rubino/jobs/runner.rb +24 -6
  99. data/lib/rubino/jobs/worker.rb +0 -4
  100. data/lib/rubino/llm/adapter_response.rb +47 -4
  101. data/lib/rubino/llm/credential_check.rb +15 -16
  102. data/lib/rubino/llm/error_classifier.rb +89 -1
  103. data/lib/rubino/llm/inline_think_filter.rb +69 -12
  104. data/lib/rubino/llm/request.rb +30 -3
  105. data/lib/rubino/llm/ruby_llm_adapter.rb +394 -46
  106. data/lib/rubino/llm/tool_bridge.rb +113 -9
  107. data/lib/rubino/mcp/manager.rb +18 -1
  108. data/lib/rubino/mcp/mcp_tool_wrapper.rb +14 -3
  109. data/lib/rubino/memory/aux_retry.rb +107 -0
  110. data/lib/rubino/memory/backends/sqlite.rb +73 -44
  111. data/lib/rubino/memory/backends.rb +23 -7
  112. data/lib/rubino/memory/salience_gate.rb +103 -0
  113. data/lib/rubino/memory/sqlite_extraction.rb +70 -0
  114. data/lib/rubino/memory/sqlite_extraction_prompt.rb +11 -0
  115. data/lib/rubino/memory/store.rb +33 -5
  116. data/lib/rubino/memory/threat_scanner.rb +52 -0
  117. data/lib/rubino/output/cost.rb +52 -0
  118. data/lib/rubino/output/headless_block_latch.rb +53 -0
  119. data/lib/rubino/output/result_serializer.rb +222 -0
  120. data/lib/rubino/output/turn_recorder.rb +77 -0
  121. data/lib/rubino/security/approval_policy.rb +227 -32
  122. data/lib/rubino/security/command_allowlist.rb +79 -4
  123. data/lib/rubino/security/doom_loop_detector.rb +21 -2
  124. data/lib/rubino/security/hardline_guard.rb +189 -16
  125. data/lib/rubino/security/pattern_matcher.rb +28 -5
  126. data/lib/rubino/security/prefix_deriver.rb +25 -6
  127. data/lib/rubino/security/readonly_commands.rb +145 -5
  128. data/lib/rubino/security/secret_path.rb +134 -0
  129. data/lib/rubino/security/url_safety.rb +255 -0
  130. data/lib/rubino/session/repository.rb +212 -11
  131. data/lib/rubino/session/store.rb +139 -14
  132. data/lib/rubino/skills/installer.rb +230 -0
  133. data/lib/rubino/skills/prompt_index.rb +2 -2
  134. data/lib/rubino/skills/registry.rb +52 -1
  135. data/lib/rubino/skills/skill.rb +64 -3
  136. data/lib/rubino/skills/skill_tool.rb +16 -5
  137. data/lib/rubino/tools/background_tasks.rb +157 -13
  138. data/lib/rubino/tools/base.rb +204 -3
  139. data/lib/rubino/tools/edit_tool.rb +73 -18
  140. data/lib/rubino/tools/glob_tool.rb +48 -9
  141. data/lib/rubino/tools/grep_tool.rb +103 -9
  142. data/lib/rubino/tools/multi_edit_tool.rb +64 -9
  143. data/lib/rubino/tools/patch_tool.rb +5 -0
  144. data/lib/rubino/tools/read_attachment_tool.rb +3 -1
  145. data/lib/rubino/tools/read_tool.rb +33 -15
  146. data/lib/rubino/tools/read_tracker.rb +153 -35
  147. data/lib/rubino/tools/registry.rb +113 -12
  148. data/lib/rubino/tools/result.rb +9 -1
  149. data/lib/rubino/tools/ruby_tool.rb +0 -0
  150. data/lib/rubino/tools/shell_registry.rb +70 -0
  151. data/lib/rubino/tools/shell_tool.rb +40 -1
  152. data/lib/rubino/tools/summarize_file_tool.rb +6 -0
  153. data/lib/rubino/tools/task_stop_tool.rb +10 -16
  154. data/lib/rubino/tools/task_tool.rb +36 -8
  155. data/lib/rubino/tools/vision_tool.rb +5 -0
  156. data/lib/rubino/tools/webfetch_tool.rb +39 -7
  157. data/lib/rubino/tools/websearch_tool.rb +92 -30
  158. data/lib/rubino/tools/write_tool.rb +23 -4
  159. data/lib/rubino/ui/api.rb +10 -1
  160. data/lib/rubino/ui/base.rb +11 -0
  161. data/lib/rubino/ui/bottom_composer.rb +382 -74
  162. data/lib/rubino/ui/cli.rb +515 -83
  163. data/lib/rubino/ui/completion_menu.rb +11 -7
  164. data/lib/rubino/ui/headless_trace.rb +63 -0
  165. data/lib/rubino/ui/live_region.rb +70 -7
  166. data/lib/rubino/ui/markdown_renderer.rb +142 -7
  167. data/lib/rubino/ui/notifier.rb +0 -2
  168. data/lib/rubino/ui/null.rb +52 -5
  169. data/lib/rubino/ui/paste_store.rb +16 -2
  170. data/lib/rubino/ui/queued_indicators.rb +6 -1
  171. data/lib/rubino/ui/status_bar.rb +61 -7
  172. data/lib/rubino/ui/streaming_markdown.rb +59 -6
  173. data/lib/rubino/ui/subagent_view.rb +29 -4
  174. data/lib/rubino/ui/tool_label.rb +52 -0
  175. data/lib/rubino/update_check.rb +39 -4
  176. data/lib/rubino/util/atomic_file.rb +117 -0
  177. data/lib/rubino/util/ignore_rules.rb +120 -0
  178. data/lib/rubino/util/output.rb +229 -12
  179. data/lib/rubino/util/secrets_mask.rb +70 -7
  180. data/lib/rubino/util/spill_store.rb +153 -0
  181. data/lib/rubino/version.rb +1 -1
  182. data/lib/rubino/workspace.rb +9 -1
  183. data/lib/rubino.rb +191 -7
  184. data/rubino-agent.gemspec +1 -0
  185. data/skills/ruby-expert/SKILL.md +1 -0
  186. metadata +42 -12
  187. data/lib/rubino/agent/router.rb +0 -65
  188. data/lib/rubino/database/migrations/002_create_runs.rb +0 -45
  189. data/lib/rubino/database/migrations/003_create_skill_states.rb +0 -15
  190. data/lib/rubino/database/migrations/004_create_cron_jobs.rb +0 -36
  191. data/lib/rubino/database/migrations/005_create_oauth_connections.rb +0 -27
  192. data/lib/rubino/database/migrations/006_create_webhook_deliveries.rb +0 -34
  193. data/lib/rubino/database/migrations/007_create_messages_fts.rb +0 -59
  194. data/lib/rubino/database/migrations/008_create_memory_facts.rb +0 -75
  195. data/lib/rubino/database/migrations/009_create_memory_graph.rb +0 -55
  196. data/lib/rubino/database/migrations/010_add_owner_pid_to_sessions.rb +0 -20
@@ -29,6 +29,11 @@ module Rubino
29
29
  max_results: {
30
30
  type: "integer",
31
31
  description: "Maximum number of results (default: 100)"
32
+ },
33
+ include_ignored: {
34
+ type: "boolean",
35
+ description: "Include files git ignores (.gitignore, build artifacts). " \
36
+ "Default false — results honor .gitignore like grep does."
32
37
  }
33
38
  },
34
39
  required: %w[pattern]
@@ -40,18 +45,20 @@ module Rubino
40
45
  end
41
46
 
42
47
  def call(arguments)
43
- pattern = arguments["pattern"] || arguments[:pattern]
44
- path = arguments["path"] || arguments[:path] || "."
48
+ pattern = arguments["pattern"] || arguments[:pattern]
49
+ path = arguments["path"] || arguments[:path] || "."
45
50
  max_results = arguments["max_results"] || arguments[:max_results] || 100
51
+ include_ignored = arguments["include_ignored"] || arguments[:include_ignored] || false
46
52
 
47
- expanded_path = File.expand_path(path)
48
- return "Error: Directory not found: #{path}" unless File.directory?(expanded_path)
53
+ # Glob is BROAD (#406): it resolves any path like Hermes/Claude/Codex.
54
+ # The read allowlist was never the data-loss boundary (that's on the
55
+ # WRITE path); glob only lists file PATHS (no content), so there is
56
+ # nothing to denylist here — secret protection lives on read/grep.
57
+ expanded_path = File.expand_path(path, workspace_root)
58
+ full_pattern = resolve_pattern(pattern, path, expanded_path)
59
+ return full_pattern if full_pattern.is_a?(String) && full_pattern.start_with?("Error:")
49
60
 
50
- full_pattern = File.join(expanded_path, pattern)
51
- files = Dir.glob(full_pattern)
52
- .select { |f| File.file?(f) }
53
- .sort_by { |f| -File.mtime(f).to_i }
54
- .first(max_results)
61
+ files = matching_files(full_pattern, expanded_path, max_results, include_ignored)
55
62
 
56
63
  if files.empty?
57
64
  "No files matched pattern: #{pattern}"
@@ -64,6 +71,38 @@ module Rubino
64
71
  body_kind: :plain }
65
72
  end
66
73
  end
74
+
75
+ private
76
+
77
+ # Globs +full_pattern+, drops dirs and (by default) git-ignored files,
78
+ # sorts newest-first, and caps at +max_results+. Honoring .gitignore here
79
+ # keeps glob consistent with grep's rg path (#375c); include_ignored: true
80
+ # opts back into the raw set.
81
+ def matching_files(full_pattern, expanded_path, max_results, include_ignored)
82
+ ignore = include_ignored ? nil : Util::IgnoreRules.new
83
+ Dir.glob(full_pattern)
84
+ .select { |f| File.file?(f) }
85
+ .reject { |f| ignore&.ignored?(f, expanded_path) }
86
+ .sort_by { |f| -File.mtime(f).to_i }
87
+ .first(max_results)
88
+ end
89
+
90
+ # Builds the pattern passed to Dir.glob.
91
+ #
92
+ # An ABSOLUTE pattern (e.g. `/work/shopkit/cart.py`) names the exact file
93
+ # already — glob it as-is. Joining it onto the base produced a doubled
94
+ # path (`File.join("/work", "/work/…")` → `/work/work/…`) that matched
95
+ # nothing, so `glob` of a file that plainly exists returned "No files
96
+ # matched" and the agent fell back to `ls` (r6 F1). A RELATIVE pattern is
97
+ # anchored at the workspace primary root (terminal.cwd || launch cwd), not
98
+ # Dir.pwd, so it agrees with read/edit (r6 F3). Returns an "Error:" string
99
+ # when the relative base directory doesn't exist.
100
+ def resolve_pattern(pattern, path, expanded_path)
101
+ return pattern.to_s if pattern.to_s.start_with?(File::SEPARATOR)
102
+ return "Error: Directory not found: #{path}" unless File.directory?(expanded_path)
103
+
104
+ File.join(expanded_path, pattern)
105
+ end
67
106
  end
68
107
  end
69
108
  end
@@ -70,7 +70,19 @@ module Rubino
70
70
  before = (ctx || arguments["before"] || arguments[:before] || 0).to_i.clamp(0, 50)
71
71
  after = (ctx || arguments["after"] || arguments[:after] || 0).to_i.clamp(0, 50)
72
72
 
73
- expanded_path = File.expand_path(path)
73
+ expanded_path = expand_workspace_path(path)
74
+ # Search is BROAD (#406): grep resolves any NON-secret path like
75
+ # Hermes/Claude/Codex. A grep whose `path` is a SECRET file directly
76
+ # (#446) is gated UPSTREAM by Security::ApprovalPolicy#decide (→ :ask),
77
+ # exactly like read — so it is NOT refused here; an approved grep of a
78
+ # secret file proceeds, a denied/headless one never reaches #call.
79
+ #
80
+ # F2: a DIRECTORY grep with `include: "*.env"` is NOT a secret target —
81
+ # the gate above can't see it — but rg's --glob OVERRIDES the default
82
+ # hidden-exclusion and would LEAK the matched .env lines. We therefore
83
+ # post-filter the RESULTS (see #filter_secret_hits): any result line that
84
+ # points at a secret file is stripped, so secrets never escape via an
85
+ # include-glob regardless of approval.
74
86
  return "Error: Path not found: #{path}" unless File.exist?(expanded_path)
75
87
 
76
88
  if ripgrep_available?
@@ -86,6 +98,28 @@ module Rubino
86
98
  system("which rg > /dev/null 2>&1")
87
99
  end
88
100
 
101
+ # True when an rg output line (`<file>:<lineno>:…`, a `<file>:<lineno>-…`
102
+ # context line, or a bare `--` separator) points at a secret/credential
103
+ # file — used to strip it from the result set so an include-glob over a
104
+ # directory can't leak a secret (F2). rg prints the file path verbatim
105
+ # from the search root we gave it; when the root is a single FILE rg omits
106
+ # the path prefix, but that case is the directly-targeted (approved) grep,
107
+ # so we resolve a bare line against `search_root` and let it fall through
108
+ # as non-secret. The `--` separator carries no path and is kept.
109
+ def secret_result_line?(line, search_root)
110
+ return false if line.nil? || line.start_with?("--")
111
+
112
+ # Split off the leading "<file>:<lineno>" — rg uses ':' for matches and
113
+ # ':'/'-' for context, always after the line number. Take everything up
114
+ # to the LAST ':' or '-' that precedes a digit run + delimiter.
115
+ m = line.match(/\A(.*?):\d+[:-]/)
116
+ return false unless m
117
+
118
+ file = m[1]
119
+ file = File.expand_path(file, search_root) unless file.start_with?(File::SEPARATOR)
120
+ !secret_path_category(file).nil?
121
+ end
122
+
89
123
  def search_with_ripgrep(pattern, path, include_pattern, max_results, before, after)
90
124
  # Build argv array and use Open3 to avoid shell injection — pattern
91
125
  # and path are passed as separate arguments, never interpolated into a
@@ -104,29 +138,75 @@ module Rubino
104
138
  argv += ["-A", after.to_s] if after.positive?
105
139
  argv += [pattern, path]
106
140
 
107
- output = IO.popen(argv, err: %i[child out], &:read)
141
+ # STREAM rg's output line-by-line and STOP after max_results (#375a).
142
+ # `IO.popen(argv).read` buffered the ENTIRE rg output — a pattern that
143
+ # matches a huge file produced +100MB in memory just to `.first(50)` it.
144
+ # Read until we have max_results+1 lines (the +1 detects "there are
145
+ # more"), then close the pipe (SIGPIPE stops rg) so neither memory nor
146
+ # CPU scale with the match count.
147
+ # F2: filter secret hits ONLY for a DIRECTORY search (an include-glob
148
+ # like `*.env` can pull a credential file in). A grep whose path is the
149
+ # secret FILE itself was already approved by the upstream gate, so its
150
+ # own lines must be returned, not stripped.
151
+ filter_secrets = File.directory?(path)
152
+ lines = []
153
+ more_exist = false
154
+ IO.popen(argv, err: %i[child out]) do |io|
155
+ io.each_line do |line|
156
+ # Drop a hit that points at a secret file BEFORE it counts toward the
157
+ # cap, so a result set of only-secrets doesn't crowd out the cap with
158
+ # content we'll never return.
159
+ next if filter_secrets && secret_result_line?(line, path)
160
+
161
+ if lines.size >= max_results
162
+ more_exist = true
163
+ break
164
+ end
165
+ lines << line
166
+ end
167
+ io.close # close early → rg gets SIGPIPE and stops scanning
168
+ end
108
169
  status = $?.exitstatus
170
+ # When WE deliberately close the pipe early after hitting the cap
171
+ # (#391/regression #375), rg is killed mid-scan and exits non-zero —
172
+ # and on some platforms the broken-pipe exit is reported as 1, the SAME
173
+ # code rg uses for a genuine "no matches". The old `status != 1` guard
174
+ # therefore EXCLUDED that case and fell through to the `status == 1`
175
+ # branch, dropping the 50 matches we already collected and reporting
176
+ # "No matches". Whenever we collected matches AND closed early (more_exist),
177
+ # it is unambiguously a success regardless of rg's exit code; a real
178
+ # "no matches" is 0 collected lines and we never closed early, so it
179
+ # still reaches the status==1 branch and reports correctly.
180
+ status = 0 if lines.any? && (more_exist || status != 1)
109
181
 
110
182
  if status == 0
111
- all_lines = output.lines
112
- lines = all_lines.first(max_results)
113
- more = all_lines.size - lines.size
183
+ # We can't cheaply know the exact remaining count once we stop early,
184
+ # so report "more" without an exact number when the cap was hit.
185
+ more = more_exist
114
186
  header = "#{lines.size} match(es) shown" \
115
- "#{" (#{more} more — raise max_results or narrow the pattern)" if more.positive?}"
187
+ "#{" (more — raise max_results or narrow the pattern)" if more}"
116
188
  full = "#{header}:\n\n#{lines.join}"
117
189
  { output: full,
118
- metrics: "#{lines.size} match#{"es" if lines.size != 1}#{"+" if more.positive?}",
190
+ metrics: "#{lines.size} match#{"es" if lines.size != 1}#{"+" if more}",
119
191
  body: Util::Output.preview(full),
120
192
  body_kind: :plain }
121
193
  elsif status == 1
122
194
  "No matches found for pattern: #{pattern}"
123
195
  else
124
- "Error executing search: #{output}"
196
+ "Error executing search: #{lines.join}"
125
197
  end
126
198
  end
127
199
 
128
200
  def search_with_ruby(pattern, path, include_pattern, max_results, before, after)
129
- regex = Regexp.new(pattern)
201
+ # The Ruby fallback is the LIVE path whenever rg isn't on PATH. A bad
202
+ # pattern the model emits (e.g. an unclosed paren) would otherwise
203
+ # raise RegexpError and hand the model a raw exception; return a clean,
204
+ # actionable tool error instead.
205
+ begin
206
+ regex = Regexp.new(pattern)
207
+ rescue RegexpError => e
208
+ return "Error: invalid regex pattern: #{e.message}"
209
+ end
130
210
  results = []
131
211
 
132
212
  # ripgrep accepts a single FILE as well as a directory; mirror that
@@ -134,8 +214,22 @@ module Rubino
134
214
  # `path` is a file we search it directly (include_pattern is moot).
135
215
  files = File.file?(path) ? [path] : Dir.glob(File.join(path, "**", include_pattern || "*"))
136
216
 
217
+ # Honor .gitignore the SAME way the rg path does (#375b): without this
218
+ # the fallback returned a different, larger set (build artifacts,
219
+ # node_modules, ignored secrets) than rg — non-deterministic on whether
220
+ # rg is installed. A single FILE path the model targeted directly is
221
+ # always searched (mirrors rg searching an explicit file argument).
222
+ ignore = Util::IgnoreRules.new
223
+ searching_file = File.file?(path)
224
+
137
225
  files.each do |file|
138
226
  next unless File.file?(file)
227
+ next if !searching_file && ignore.ignored?(file, path)
228
+ # F2: in a DIRECTORY search, never read a secret file's lines into
229
+ # results (an include-glob like `*.env` would otherwise leak it). A
230
+ # single-file grep the model targeted directly is already approved
231
+ # upstream, so it is searched normally.
232
+ next if !searching_file && secret_path_category(file)
139
233
  next if binary_file?(file)
140
234
 
141
235
  begin
@@ -57,7 +57,11 @@ module Rubino
57
57
  return "Error: file_path is required" if file_path.nil? || file_path.to_s.empty?
58
58
  return "Error: edits must be a non-empty array" if !edits.is_a?(Array) || edits.empty?
59
59
 
60
- expanded = File.expand_path(file_path)
60
+ expanded = expand_workspace_path(file_path)
61
+ # SECRET/credential edits (#446) are no longer HARD-refused here — they
62
+ # are gated UPSTREAM by Security::ApprovalPolicy#decide (→ :ask): an
63
+ # APPROVED multi_edit of your .env actually applies, a denied/headless
64
+ # one never reaches #call. The workspace sandbox below is unchanged.
61
65
  return workspace_violation_message(file_path) unless within_workspace?(expanded)
62
66
  return "Error: File not found: #{file_path}" unless File.exist?(expanded)
63
67
 
@@ -65,7 +69,11 @@ module Rubino
65
69
  return gate
66
70
  end
67
71
 
68
- content = File.read(expanded)
72
+ # Read RAW bytes (binary) so the read-modify-write preserves every byte
73
+ # outside the matched spans — a non-UTF-8 byte on an untouched line is
74
+ # written back verbatim (#326). The model-supplied needles/replacements
75
+ # are matched and spliced as bytes too (see Base#to_match_bytes).
76
+ content = read_for_edit(expanded)
69
77
  working = content.dup
70
78
  applied_count = 0
71
79
 
@@ -80,30 +88,77 @@ module Rubino
80
88
  replace_all = edit["replace_all"] || edit[:replace_all] || false
81
89
 
82
90
  return "Error: edit ##{idx + 1} is missing old_string or new_string" if old_s.nil? || new_s.nil?
91
+ # Empty needle would match at every char boundary and corrupt the
92
+ # file under replace_all (#329a) — reject it like a missing string.
93
+ return "Error: edit ##{idx + 1}: old_string is empty" if old_s.empty?
83
94
  return "Error: edit ##{idx + 1}: old_string and new_string are identical" if old_s == new_s
84
- unless working.include?(old_s)
95
+
96
+ old_b = to_match_bytes(old_s)
97
+ new_b = to_match_bytes(new_s)
98
+
99
+ unless working.include?(old_b)
100
+ # Mental model was wrong — let the model's next read of this path
101
+ # bypass dedup and fetch fresh bytes for recovery (r5 B3).
102
+ @read_tracker&.note_edit_failure(expanded)
85
103
  return "Error: edit ##{idx + 1}: old_string not found (check whitespace; " \
86
104
  "remember edits see the result of prior edits)"
87
105
  end
88
106
 
89
- count = working.scan(old_s).size
107
+ count = working.scan(old_b).size
90
108
  if count > 1 && !replace_all
91
109
  return "Error: edit ##{idx + 1}: #{count} matches for old_string. " \
92
110
  "Add surrounding context to disambiguate, or set replace_all: true."
93
111
  end
94
112
 
95
113
  working = if replace_all
96
- working.gsub(old_s) { new_s }
114
+ working.gsub(old_b) { new_b }
97
115
  else
98
- working.sub(old_s) { new_s }
116
+ working.sub(old_b) { new_b }
99
117
  end
100
118
  applied_count += replace_all ? count : 1
101
119
  end
102
120
 
103
- File.write(expanded, working)
104
- "Applied #{edits.size} edit(s), #{applied_count} replacement(s) in #{file_path}"
121
+ # Crash-safe write: temp-in-same-dir + fsync + atomic rename. The tool's
122
+ # description advertises "atomically" make it true on the disk seam too,
123
+ # so a SIGINT/crash mid-flush leaves the ORIGINAL file intact (HIGH-1).
124
+ Util::AtomicFile.write_atomic(expanded, working)
125
+ # Refresh-on-own-write so a follow-up edit to this file isn't refused
126
+ # as "changed on disk since last read" (r5 B2).
127
+ @read_tracker&.note_write(expanded, working)
128
+ { output: "Applied #{edits.size} edit(s), #{applied_count} replacement(s) in #{file_path}",
129
+ metrics: "#{edits.size} edit#{"s" if edits.size != 1} · " \
130
+ "#{applied_count} replacement#{"s" if applied_count != 1}",
131
+ body: build_diff_preview(edits),
132
+ body_kind: :diff }
105
133
  rescue StandardError => e
106
- "Error: #{e.message}"
134
+ # Uniform with WriteTool/EditTool: a read-only target (Errno::EACCES)
135
+ # or any other filesystem error returns a clean message.
136
+ "Error editing #{file_path}: #{e.message}"
137
+ end
138
+
139
+ # Inline diff for the applied result, mirroring EditTool: per edit, the
140
+ # old lines as `-` then the new lines as `+`, edits separated by a blank
141
+ # line. Trimmed to the first MAX_DIFF_LINES so a big batch stays a
142
+ # preview (the edits all still apply).
143
+ MAX_DIFF_LINES = 16
144
+
145
+ private
146
+
147
+ def build_diff_preview(edits)
148
+ lines = []
149
+ edits.each_with_index do |edit, idx|
150
+ old_s = edit["old_string"] || edit[:old_string]
151
+ new_s = edit["new_string"] || edit[:new_string]
152
+ lines << "" unless idx.zero?
153
+ lines.concat(old_s.to_s.lines.map { |l| "- #{l.chomp}" })
154
+ lines.concat(new_s.to_s.lines.map { |l| "+ #{l.chomp}" })
155
+ end
156
+ if lines.size > MAX_DIFF_LINES
157
+ dropped = lines.size - MAX_DIFF_LINES
158
+ lines = lines.first(MAX_DIFF_LINES)
159
+ lines << " [… #{dropped} more line(s)]"
160
+ end
161
+ lines.join("\n")
107
162
  end
108
163
  end
109
164
  end
@@ -66,6 +66,11 @@ module Rubino
66
66
  hunks.each do |hunk|
67
67
  file_path = File.expand_path(hunk[:file], base_path)
68
68
 
69
+ # SECRET/credential patches (#446) are no longer HARD-refused here —
70
+ # they are gated UPSTREAM by Security::ApprovalPolicy#decide, which
71
+ # scans the patch's target paths and prompts (→ :ask) when ANY hunk
72
+ # touches a secret; an approved apply_patch proceeds, a denied/headless
73
+ # one never reaches #call. The workspace sandbox below is unchanged.
69
74
  unless within_workspace?(file_path)
70
75
  return [nil, workspace_violation_message(hunk[:file]) +
71
76
  " (no changes applied — apply_patch is two-phase)"]
@@ -93,7 +93,9 @@ module Rubino
93
93
  "reads documents and text. Inspect other kinds via the shell."
94
94
  end
95
95
 
96
- markdown = Rubino::Documents.to_markdown(cls.path, mime: cls.mime)
96
+ # Thread the cancel_token so a runaway/bomb conversion is interruptible
97
+ # mid-flight and bounded by the converter's wall-clock/element caps.
98
+ markdown = Rubino::Documents.to_markdown(cls.path, mime: cls.mime, cancel_token: @cancel_token)
97
99
  # No in-process converter (unknown format / optional gem absent): degrade
98
100
  # with the actionable shell-extraction hint, exactly like the preamble.
99
101
  # NEVER raise -- a missing gem must not break the turn.
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "digest"
4
+
3
5
  module Rubino
4
6
  module Tools
5
7
  # Reads a file with `cat -n` style line numbers, offset/limit windowing,
@@ -49,7 +51,12 @@ module Rubino
49
51
 
50
52
  return "Error: file_path is required" if file_path.nil? || file_path.to_s.empty?
51
53
 
52
- expanded = File.expand_path(file_path)
54
+ expanded = expand_workspace_path(file_path)
55
+ # Reads are BROAD (#406): like Hermes/Claude/Codex, read resolves any
56
+ # NON-secret path with no prompt (clone-and-inspect). A SECRET/credential
57
+ # path (#446) is NOT refused here anymore — it is gated UPSTREAM by
58
+ # Security::ApprovalPolicy#decide (→ :ask), so an APPROVED read returns
59
+ # the real bytes while a denied/headless read never reaches #call.
53
60
  return "Error: File not found: #{file_path}" unless File.exist?(expanded)
54
61
  return "Error: Not a regular file: #{file_path}" unless File.file?(expanded)
55
62
 
@@ -64,20 +71,21 @@ module Rubino
64
71
  offset = 1 if offset < 1
65
72
  limit = DEFAULT_LIMIT if limit <= 0
66
73
 
67
- # Stash mtime BEFORE rendering so a slow render on a huge file doesn't
68
- # race with a concurrent writer — we want the mtime the model "saw",
69
- # not the one at end-of-render.
70
- mtime = File.mtime(expanded)
71
- @read_tracker&.register(expanded, mtime)
72
-
73
- # Re-reading the exact same window (same file, offset, limit, unchanged
74
- # mtime) within a turn just re-injects bytes already in context. Return
75
- # a short nudge instead so the conversation doesn't carry the same
76
- # content twice. A real edit bumps mtime, so legitimate re-reads pass.
77
- dup = @read_tracker&.register_window(expanded, offset, limit, mtime)
78
- if dup && dup > 1
74
+ # Stash mtime + content hash BEFORE rendering so a slow render on a huge
75
+ # file doesn't race with a concurrent writer — we want the state the
76
+ # model "saw", not the one at end-of-render. The hash is the single
77
+ # source of truth the edit-gate and dedup both consult.
78
+ mtime = File.mtime(expanded)
79
+ digest = Digest::SHA256.hexdigest(File.binread(expanded))
80
+ @read_tracker&.register(expanded, mtime, digest)
81
+
82
+ # Re-reading the exact same window of UNCHANGED bytes just re-injects
83
+ # content already in context. Skip the work with a nudge but only when
84
+ # the file still hashes the same, the TTL holds, and no edit-failure
85
+ # recovery is pending (those serve fresh content). See ReadTracker.
86
+ if @read_tracker&.duplicate_read?(expanded, offset, limit, digest)
79
87
  return { output: "[DUPLICATE READ] Exact repeat of an earlier read of #{file_path} " \
80
- "(lines #{offset}-#{offset + limit - 1}) this turn — reuse that result " \
88
+ "(lines #{offset}-#{offset + limit - 1}) — reuse that result " \
81
89
  "instead of re-reading.",
82
90
  metrics: "duplicate" }
83
91
  end
@@ -161,12 +169,22 @@ module Rubino
161
169
  last_shown = offset - 1
162
170
  byte_capped = false
163
171
 
164
- File.open(expanded, "r") do |io|
172
+ # Open as UTF-8 regardless of the process locale (#273): under a bare
173
+ # C/POSIX locale the default external encoding is US-ASCII, which would
174
+ # tag every line ASCII and force the scrub below to mangle perfectly
175
+ # valid UTF-8 file content. Pinning UTF-8 reads it correctly.
176
+ File.open(expanded, "r:UTF-8") do |io|
165
177
  io.each_line do |line|
166
178
  total_lines += 1
167
179
  next if total_lines < offset
168
180
  break if total_lines > last_line
169
181
 
182
+ # A single non-UTF-8 byte (e.g. a Latin-1 `é` in a legacy/EU
183
+ # source comment) would otherwise blow up `chomp`/`format` with
184
+ # "invalid byte sequence in UTF-8". Scrub it to the replacement
185
+ # char so the model can still read (and then edit) the file —
186
+ # lossy but graceful, instead of a blind read failure.
187
+ line = line.scrub unless line.valid_encoding?
170
188
  chomped = line.chomp
171
189
  chomped = chomped.byteslice(0, MAX_LINE_WIDTH) + "… [line truncated]" if chomped.bytesize > MAX_LINE_WIDTH
172
190
  out << format("%6d\t%s\n", total_lines, chomped)