ruby_llm-toolbox 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +49 -0
  3. data/GUIDE.md +598 -0
  4. data/LICENSE +21 -0
  5. data/README.md +412 -0
  6. data/bin/verify_prism_parity +112 -0
  7. data/lib/ruby_llm/toolbox/base.rb +112 -0
  8. data/lib/ruby_llm/toolbox/configuration.rb +148 -0
  9. data/lib/ruby_llm/toolbox/data_path.rb +54 -0
  10. data/lib/ruby_llm/toolbox/process_registry.rb +226 -0
  11. data/lib/ruby_llm/toolbox/process_runner.rb +72 -0
  12. data/lib/ruby_llm/toolbox/ruby_outline.rb +213 -0
  13. data/lib/ruby_llm/toolbox/safe_math.rb +182 -0
  14. data/lib/ruby_llm/toolbox/safety/command_guard.rb +42 -0
  15. data/lib/ruby_llm/toolbox/safety/path_jail.rb +55 -0
  16. data/lib/ruby_llm/toolbox/safety/url_guard.rb +111 -0
  17. data/lib/ruby_llm/toolbox/sandbox/base.rb +151 -0
  18. data/lib/ruby_llm/toolbox/sandbox/bubblewrap.rb +70 -0
  19. data/lib/ruby_llm/toolbox/sandbox/docker.rb +69 -0
  20. data/lib/ruby_llm/toolbox/sandbox/sandbox_exec.rb +75 -0
  21. data/lib/ruby_llm/toolbox/search/brave.rb +64 -0
  22. data/lib/ruby_llm/toolbox/search/searxng.rb +64 -0
  23. data/lib/ruby_llm/toolbox/search/tavily.rb +70 -0
  24. data/lib/ruby_llm/toolbox/text_diff.rb +81 -0
  25. data/lib/ruby_llm/toolbox/toml.rb +409 -0
  26. data/lib/ruby_llm/toolbox/tools/apply_patch.rb +92 -0
  27. data/lib/ruby_llm/toolbox/tools/bash_tool.rb +101 -0
  28. data/lib/ruby_llm/toolbox/tools/bundle.rb +71 -0
  29. data/lib/ruby_llm/toolbox/tools/calculator.rb +42 -0
  30. data/lib/ruby_llm/toolbox/tools/create_directory.rb +35 -0
  31. data/lib/ruby_llm/toolbox/tools/csv_read.rb +69 -0
  32. data/lib/ruby_llm/toolbox/tools/csv_write.rb +51 -0
  33. data/lib/ruby_llm/toolbox/tools/date_time.rb +42 -0
  34. data/lib/ruby_llm/toolbox/tools/delete_file.rb +64 -0
  35. data/lib/ruby_llm/toolbox/tools/diff.rb +35 -0
  36. data/lib/ruby_llm/toolbox/tools/download_file.rb +55 -0
  37. data/lib/ruby_llm/toolbox/tools/edit_file.rb +82 -0
  38. data/lib/ruby_llm/toolbox/tools/gem_tool.rb +140 -0
  39. data/lib/ruby_llm/toolbox/tools/git_add.rb +46 -0
  40. data/lib/ruby_llm/toolbox/tools/git_blame.rb +58 -0
  41. data/lib/ruby_llm/toolbox/tools/git_branch.rb +35 -0
  42. data/lib/ruby_llm/toolbox/tools/git_checkout.rb +43 -0
  43. data/lib/ruby_llm/toolbox/tools/git_commit.rb +47 -0
  44. data/lib/ruby_llm/toolbox/tools/git_diff.rb +50 -0
  45. data/lib/ruby_llm/toolbox/tools/git_grep.rb +66 -0
  46. data/lib/ruby_llm/toolbox/tools/git_helpers.rb +68 -0
  47. data/lib/ruby_llm/toolbox/tools/git_log.rb +47 -0
  48. data/lib/ruby_llm/toolbox/tools/git_show.rb +48 -0
  49. data/lib/ruby_llm/toolbox/tools/git_status.rb +27 -0
  50. data/lib/ruby_llm/toolbox/tools/glob.rb +62 -0
  51. data/lib/ruby_llm/toolbox/tools/grep_files.rb +221 -0
  52. data/lib/ruby_llm/toolbox/tools/http_helpers.rb +130 -0
  53. data/lib/ruby_llm/toolbox/tools/http_request.rb +75 -0
  54. data/lib/ruby_llm/toolbox/tools/json_query.rb +69 -0
  55. data/lib/ruby_llm/toolbox/tools/lint.rb +67 -0
  56. data/lib/ruby_llm/toolbox/tools/list_directory.rb +87 -0
  57. data/lib/ruby_llm/toolbox/tools/move_file.rb +54 -0
  58. data/lib/ruby_llm/toolbox/tools/multi_edit.rb +107 -0
  59. data/lib/ruby_llm/toolbox/tools/parse_ruby.rb +111 -0
  60. data/lib/ruby_llm/toolbox/tools/process_kill.rb +41 -0
  61. data/lib/ruby_llm/toolbox/tools/process_list.rb +29 -0
  62. data/lib/ruby_llm/toolbox/tools/process_output.rb +55 -0
  63. data/lib/ruby_llm/toolbox/tools/process_start.rb +109 -0
  64. data/lib/ruby_llm/toolbox/tools/python_tests.rb +77 -0
  65. data/lib/ruby_llm/toolbox/tools/read_file.rb +75 -0
  66. data/lib/ruby_llm/toolbox/tools/replace_in_files.rb +139 -0
  67. data/lib/ruby_llm/toolbox/tools/run_python.rb +38 -0
  68. data/lib/ruby_llm/toolbox/tools/run_ruby.rb +37 -0
  69. data/lib/ruby_llm/toolbox/tools/run_rust.rb +42 -0
  70. data/lib/ruby_llm/toolbox/tools/run_tests.rb +81 -0
  71. data/lib/ruby_llm/toolbox/tools/sandbox_run.rb +40 -0
  72. data/lib/ruby_llm/toolbox/tools/todo_write.rb +57 -0
  73. data/lib/ruby_llm/toolbox/tools/toml_query.rb +70 -0
  74. data/lib/ruby_llm/toolbox/tools/toolchain_helpers.rb +62 -0
  75. data/lib/ruby_llm/toolbox/tools/tree.rb +87 -0
  76. data/lib/ruby_llm/toolbox/tools/web_fetch.rb +77 -0
  77. data/lib/ruby_llm/toolbox/tools/web_search.rb +81 -0
  78. data/lib/ruby_llm/toolbox/tools/write_file.rb +52 -0
  79. data/lib/ruby_llm/toolbox/tools/yaml_query.rb +73 -0
  80. data/lib/ruby_llm/toolbox/truncator.rb +68 -0
  81. data/lib/ruby_llm/toolbox/version.rb +7 -0
  82. data/lib/ruby_llm/toolbox.rb +161 -0
  83. metadata +194 -0
data/README.md ADDED
@@ -0,0 +1,412 @@
1
+ # ruby_llm-toolbox
2
+
3
+ [![CI](https://github.com/washu/ruby_llm-toolbox/actions/workflows/ci.yml/badge.svg)](https://github.com/washu/ruby_llm-toolbox/actions/workflows/ci.yml)
4
+ [![Gem Version](https://badge.fury.io/rb/ruby_llm-toolbox.svg)](https://rubygems.org/gems/ruby_llm-toolbox)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
6
+ [![Ruby](https://img.shields.io/badge/ruby-%3E%3D%203.3-CC342D.svg)](https://www.ruby-lang.org/)
7
+
8
+ A safe-by-default bundle of [`RubyLLM::Tool`](https://github.com/crmne/ruby_llm) classes
9
+ covering the skills common to most LLM harnesses — filesystem, shell, web, git, and
10
+ structured-data tools — packaged as one gem with one require.
11
+
12
+ - **One gem, one require.** `require "ruby_llm/toolbox"` loads everything. No sub-gems, no second require.
13
+ - **Safe by default.** Read-only tools work out of the box. Mutating/exec tools are loaded but **inert** until you explicitly enable them.
14
+ - **Token-budgeted output.** Every tool result is truncated (head + tail, middle elided) to fit a token budget, counted with [`ruby_llm-tokenizer`](https://github.com/washu/ruby_llm-tokenizer) — so a single `grep` can't blow up the context window.
15
+ - **Uniform failure contract.** Tools never raise into the harness; failures come back as `{ error:, code: }`, matching ruby_llm's own convention.
16
+
17
+ > Status: **v0.1** — ships the framework plus fifty tools across filesystem, search, code intelligence, git, web, the Ruby/Python/Rust toolchain, structured data (JSON/YAML/TOML/CSV), background process management, and small utilities. Safe tools are on by default; exec tools (writes, mutations, code execution) are gated behind `enable_exec_tools`. `parse_ruby` uses Prism (bundled with the supported Ruby 3.3+), with a Ripper fallback for non-MRI runtimes. An optional, operator-controlled [unsafe override](#security-override) lets specific calls bypass individual guards when explicitly permitted. See [Tools](#tools) and the [Roadmap](#roadmap). For an end-to-end walkthrough — wiring, the safety model, sandbox/search selection, the full tool catalog, and "reach for X, not Y" rules you can hand to the agent — read the **[Usage Guide](GUIDE.md)**.
18
+
19
+ ## Installation
20
+
21
+ Requires **Ruby >= 3.3** (where Prism is bundled, so `parse_ruby` uses it with no extra
22
+ dependency).
23
+
24
+ The tokenizer dependency (`ruby_llm-tokenizer`) pulls in the `sentencepiece` native gem,
25
+ which requires the SentencePiece C library to be present at build time:
26
+
27
+ ```bash
28
+ # Ubuntu / Debian
29
+ sudo apt-get install -y libsentencepiece-dev
30
+
31
+ # macOS (Homebrew — arm64 installs to /opt/homebrew, so point the build at it)
32
+ brew install sentencepiece
33
+ bundle config set build.sentencepiece \
34
+ "--with-sentencepiece-dir=$(brew --prefix sentencepiece)"
35
+ ```
36
+
37
+ Then add the gem to your Gemfile:
38
+
39
+ ```ruby
40
+ # Gemfile
41
+ gem "ruby_llm-toolbox"
42
+ ```
43
+
44
+ ## Quick start
45
+
46
+ ```ruby
47
+ require "ruby_llm/toolbox"
48
+
49
+ RubyLLM::Toolbox.configure do |c|
50
+ c.fs_root = "/srv/project" # filesystem tools are jailed to this
51
+ c.max_output_tokens = 2_000 # per-result budget
52
+ c.tokenizer_model = "gpt-4o" # which tokenizer to count with
53
+ end
54
+
55
+ chat = RubyLLM.chat
56
+ chat.with_tools(*RubyLLM::Toolbox.safe_tools) # read-only set, always on
57
+ chat.ask("What does config/database.yml configure?")
58
+ ```
59
+
60
+ ### Enabling exec tools
61
+
62
+ Dangerous tools (`bash`, and the upcoming `write_file`, `edit_file`, `run_code`,
63
+ `git_commit`, mutating `http_request`) are loaded but refuse to run until you opt in:
64
+
65
+ ```ruby
66
+ RubyLLM::Toolbox.configure do |c|
67
+ c.enable_exec_tools = true
68
+ c.allowed_commands = %w[ls cat grep rg] # bash runs ONLY these executables
69
+ c.command_timeout = 30
70
+ end
71
+
72
+ chat.with_tools(*RubyLLM::Toolbox.all_tools) # exec tools still honor the gate
73
+ ```
74
+
75
+ You can also scope a single instance without touching global config:
76
+
77
+ ```ruby
78
+ chat.with_tool(RubyLLM::Toolbox::Tools::ReadFile.new(fs_root: "/srv/other"))
79
+ ```
80
+
81
+ ## Tools
82
+
83
+ ### `read_file` (safe)
84
+ Reads a UTF-8 text file from within `fs_root`, with an optional 1-based line range or a `tail`
85
+ of the last N lines (like `tail -n N`, which takes precedence over the range).
86
+ Output is token-budgeted. Path traversal and symlink escapes are rejected.
87
+
88
+ ### `list_directory` (safe)
89
+ Lists directory entries within `fs_root` with type (dir/file/symlink) and size.
90
+ Optional `recursive` and `include_hidden`. Symlinked directories are listed but not
91
+ traversed, so a link can't walk out of the jail.
92
+
93
+ ### `tree` (safe)
94
+ Renders a depth-limited directory tree under `fs_root` (default 3 levels) — a fast way to
95
+ grasp project structure without walking it one level at a time. Directories are marked with a
96
+ trailing slash; ignored directories and hidden entries are skipped (toggle with `show_hidden`),
97
+ symlinks aren't followed, and the listing is capped.
98
+
99
+ ### `glob` (safe)
100
+ Finds files matching a glob (`**/*.rb`, `app/models/*.rb`) within `fs_root`, relative
101
+ to an optional `base`. Patterns containing `..` are rejected and each hit is re-checked
102
+ through the jail to drop symlink escapes.
103
+
104
+ ### `grep_files` (safe)
105
+ Searches file contents for a regex within `fs_root`, returning `path:line: text`. Optional
106
+ file `glob` filter and `ignore_case`, plus `before`/`after`/`context` lines (like grep
107
+ `-B`/`-A`/`-C`) — context lines render as `path-line- text` and separate blocks are divided
108
+ with `--`. The pattern is compiled with a per-match timeout (ReDoS backstop), binary files and
109
+ noisy/VCS directories are skipped, and results are capped.
110
+
111
+ ### `gem` (safe)
112
+ Read-only RubyGems.org metadata lookup. Actions: `info` (summary), `version` (latest),
113
+ `dependencies` (runtime deps), `search` (find gems by query). The host is fixed and all
114
+ input is URL-encoded, so there's no arbitrary-URL surface.
115
+
116
+ ### `parse_ruby` (safe)
117
+ In-process structural outline of a Ruby file (classes, modules, methods, constants with
118
+ line numbers and nesting), or definition lookup by `query`/`kind`. It parses — never executes
119
+ — the code, through one of two interchangeable backends behind `RubyOutline`: **Prism** when
120
+ it can be loaded (it's bundled with Ruby 3.3+, the supported floor, so no gem install is
121
+ needed), and **Ripper** (stdlib) as a fallback for runtimes that don't bundle Prism (e.g.
122
+ non-MRI). The two are held to identical output by
123
+ `spec/ruby_outline_parity_spec.rb` and `bin/verify_prism_parity`, which compares both
124
+ backends over a corpus and can be run under any Ruby — including a sandboxed one
125
+ (`docker run --rm -v "$PWD":/app -w /app ruby:3.4-slim ruby bin/verify_prism_parity`).
126
+
127
+ ### `json_query` / `yaml_query` / `toml_query` / `csv_read` (safe), `csv_write` (exec)
128
+ `json_query`, `yaml_query`, and `toml_query` parse JSON / YAML / TOML (from a file in
129
+ `fs_root` or an inline string) and extract values with a shared dot/bracket path
130
+ (`users[0].name`, `dependencies.serde.version`, `products[].name`) or pretty-print. YAML is
131
+ loaded with `safe_load` (no arbitrary Ruby objects); TOML uses a dependency-free parser
132
+ covering the common surface of TOML 1.0 (tables, arrays-of-tables, inline tables, dotted
133
+ keys, all scalar forms). `csv_read` reads a CSV into readable rows (optional header, `limit`);
134
+ `csv_write` writes an array of rows (optional `headers`) to a CSV.
135
+
136
+ ### `web_fetch` / `web_search` / `http_request` (safe)
137
+ `web_fetch` retrieves a URL over http/https and returns readable text (HTML stripped),
138
+ following redirects. `web_search` queries the web through a swappable adapter — **Tavily** by default
139
+ (set `tavily_api_key`), or set `search_adapter` to `:brave` (commercial Brave Search API,
140
+ set `brave_api_key`), `:searxng` (a keyless, self-hosted SearXNG instance, set `searxng_url`),
141
+ or any object responding to `#search(query, max_results:)`. `http_request` is a general
142
+ client returning status/headers/body.
143
+ All three route through `Safety::UrlGuard` (see below). `http_request` allows GET/HEAD by
144
+ default; POST/PUT/PATCH/DELETE require `enable_exec_tools`.
145
+
146
+ ### `download_file` (exec, gated)
147
+ Downloads a URL to a file within `fs_root` (whereas `web_fetch` returns text). Routes through
148
+ `Safety::UrlGuard`, follows redirects safely, is capped at `config.max_fetch_bytes`, and jails
149
+ the destination path.
150
+
151
+ ### `bash` (exec, gated)
152
+ Runs **one allowlisted executable** with arguments. Deliberately **not a shell** — no
153
+ pipes, redirects, globs, quoting, or variable expansion. The program goes in `command`;
154
+ each argument is a separate element of `args`, passed verbatim as argv. This is the
155
+ primitive that the OS-command-injection bug class can't reach, because nothing ever
156
+ parses the input as a shell line.
157
+
158
+ ```jsonc
159
+ // model emits:
160
+ { "command": "rg", "args": ["TODO", "app/models"] }
161
+ ```
162
+
163
+ ### `run_ruby` (exec, gated)
164
+ Executes a Ruby snippet inside the active [sandbox runtime](#sandbox-runtimes) with code piped
165
+ on stdin. Under Docker it runs in an ephemeral, no-network, read-only, cap-dropped container;
166
+ under bubblewrap or sandbox-exec it runs the host's `ruby` in an isolated, no-network,
167
+ write-restricted environment. Requires `enable_exec_tools` and an available sandbox; returns a
168
+ clean `:sandbox_unavailable` error otherwise.
169
+
170
+ ### `run_python` (exec, gated)
171
+ Same sandbox as `run_ruby`, running Python (the `config.python_image` under Docker, or the
172
+ host's `python3` under the host-process backends). Code is piped to `python3` on stdin.
173
+
174
+ ### `python_tests` (exec, gated)
175
+ Runs the project's Python tests from `fs_root` — pytest by default, or unittest
176
+ (`python -m unittest discover`) — with a parsed pass/fail headline, mirroring `run_tests`.
177
+
178
+ ### `run_rust` (exec, gated)
179
+ Compiles and runs a self-contained Rust program in the same sandbox (`config.rust_image` under
180
+ Docker, or the host's `rustc` under the host-process backends). The source is piped on stdin; a
181
+ shell step inside the sandbox writes it to scratch, compiles with `rustc`, and runs the binary,
182
+ returning compiler output plus the program's stdout/stderr and exit.
183
+
184
+ ### `calculator` / `date_time` / `diff` / `todo_write` (safe)
185
+ Small in-process utilities. `calculator` evaluates an arithmetic expression with a real
186
+ recursive-descent parser — never `eval` — supporting `+ - * / % **`, parentheses, common
187
+ functions (`sqrt`, `sin`, `ln`, …), and constants (`pi`, `e`). `date_time` returns the
188
+ current time (or converts a unix timestamp), with an optional strftime format. `diff`
189
+ produces a readable line-by-line comparison of two text blocks. `todo_write` maintains a
190
+ task list across calls for multi-step work (pass the full list each time; statuses are
191
+ pending/in_progress/completed).
192
+
193
+ ### Background processes: `process_start` / `process_output` / `process_list` / `process_kill`
194
+ Long-running commands — dev servers, file watchers, log tails — that an agent
195
+ starts, polls, and stops without blocking on them.
196
+
197
+ `process_start` (**exec, gated**) launches one allowlisted executable as a
198
+ background process and returns its id (e.g. `proc_1`) immediately. It carries the
199
+ same safety model as `bash`: argv only (no shell), the minimal `env_passthrough`
200
+ environment, run in `fs_root`, in its own process group with an address-space cap
201
+ derived from `sandbox_memory` (but **no** CPU cap — these are meant to run
202
+ indefinitely). The number of concurrent live processes is bounded by
203
+ `max_processes`.
204
+
205
+ The other three are **safe** — they only act on processes already started, and
206
+ `process_kill` is always available as a stop valve even if exec tools are later
207
+ disabled. `process_output` returns the stdout/stderr produced since the last read
208
+ (incremental, so polling in a loop streams output without repeats) plus the
209
+ current status and exit code. `process_list` shows every process with its id,
210
+ status, pid, age, and command. `process_kill` stops a process — SIGTERM to its
211
+ group, escalating to SIGKILL, plus a `/proc` descendant sweep so children are
212
+ reaped even where group-signal delivery is incomplete — then returns any final
213
+ output and removes it from the registry. Output buffers are bounded (256 KB of
214
+ unread data per stream; older bytes are dropped with a marker), so a chatty
215
+ process can't exhaust memory. Everything still running is killed at interpreter
216
+ exit so nothing is orphaned.
217
+
218
+ ### `write_file` (exec, gated)
219
+ Creates or overwrites a text file within `fs_root`, creating missing parent directories.
220
+
221
+ ### `edit_file` (exec, gated)
222
+ The core editing primitive: replace an exact substring. `old_string` must match **exactly
223
+ once** (include surrounding context) unless `replace_all` is set; a missing or ambiguous
224
+ match fails clearly instead of guessing. Backslash sequences in `new_string` are written
225
+ literally — no accidental backreference interpretation.
226
+
227
+ ### `multi_edit` (exec, gated)
228
+ Applies several `edit_file`-style replacements to one file **atomically**. Edits run in order
229
+ (a later edit sees earlier results), each following the exact-match-once rule unless
230
+ `replace_all` is set. If any edit can't be applied, nothing is written and the failing edit is
231
+ named — so the file is never left half-edited. Saves a round-trip per change when batching.
232
+
233
+ ### `replace_in_files` (exec, gated)
234
+ Project-wide find/replace across files matching a glob (default `**/*`). Literal by default,
235
+ or `regex: true` with `\1` backreferences in the replacement; `ignore_case` and `dry_run`
236
+ are supported. Binary files and `ignored_dirs` are skipped, the pattern runs under a ReDoS
237
+ timeout, and every path is jailed to `fs_root`.
238
+
239
+ ### `create_directory` / `move_file` / `delete_file` (exec, gated)
240
+ `create_directory` does `mkdir -p` within the jail. `move_file` renames/moves with **both**
241
+ endpoints confined to `fs_root` and refuses to clobber unless `overwrite`. `delete_file`
242
+ removes a file or empty directory; a non-empty directory needs `recursive`, and `fs_root`
243
+ itself can't be deleted.
244
+
245
+ ### `git_status` / `git_diff` / `git_log` / `git_show` / `git_blame` / `git_grep` / `git_branch` (safe)
246
+ Read-only views of the repo at `fs_root`. `git_diff` takes optional `staged`, `path`, and
247
+ `ref`; `git_log` takes `count` and `path`; `git_show` shows a commit or a file at a ref;
248
+ `git_blame` shows line-by-line authorship (optional range); `git_grep` searches tracked
249
+ content (optional `path`, `ignore_case`, `fixed`), passing the pattern via `-e` so a
250
+ dash-leading pattern can't inject a git option; `git_branch` lists branches with the current
251
+ one marked (optional `all` for remotes). Because git can be made to run repo-configured
252
+ commands during read operations (`core.fsmonitor` on status, `diff.external`/textconv on
253
+ diff/show), these are neutralized so a hostile checkout can't turn a diff into code execution.
254
+ Refs are validated to block option injection, path arguments are jailed, and the pager and
255
+ credential prompts are disabled so nothing hangs. Requires git on the host.
256
+
257
+ ### `git_add` / `git_commit` / `git_checkout` / `apply_patch` (exec, gated)
258
+ `git_add`/`git_commit`/`git_checkout` stage, commit, and switch branches. `apply_patch`
259
+ applies a unified diff via `git apply` — validated with `--check` first (nothing is written
260
+ if it wouldn't apply cleanly), with `check: true` for a dry run. Path-escaping patches are
261
+ rejected. Does not push.
262
+
263
+ ### `run_tests` / `lint` / `bundle` (exec, gated)
264
+ The verify trio, run from `fs_root`. `run_tests` auto-detects RSpec (`spec/`/`.rspec`) or
265
+ Minitest (`test/` via rake) and returns output with a pass/fail headline (a failing suite is
266
+ a result, not a tool error). `lint` runs RuboCop (or Standard when `.standard.yml` is
267
+ present), with optional `autocorrect`. `bundle` runs Bundler actions (`install`, `update`,
268
+ `outdated`, `check`, `lock`, `add`). These inherit the full host environment (so bundler,
269
+ rbenv/rvm, and the dev binaries resolve), use `bundle exec` when a Gemfile exists, and report
270
+ `:unavailable` if the tool isn't installed.
271
+
272
+ ## Safety model
273
+
274
+ The dangerous surface is engineered, not just documented:
275
+
276
+ | Concern | Mitigation |
277
+ | --- | --- |
278
+ | Path traversal / symlink escape | `Safety::PathJail` resolves realpath and confines to `fs_root` |
279
+ | OS command injection | `bash` uses array-form spawn (no shell) + executable allowlist |
280
+ | Env leakage | spawned processes get a scrubbed env (`env_passthrough` only) |
281
+ | Runaway processes | hard wall-clock `command_timeout`, then `SIGKILL` |
282
+ | Untrusted code execution | runs in a pluggable [sandbox](#sandbox-runtimes) — Docker (no-network, read-only, cap-dropped) or host-process bubblewrap/sandbox-exec with no network, restricted writes, and rlimit caps |
283
+ | Malicious repo config (RCE) | git tools disable `core.fsmonitor`, external diff drivers, and textconv |
284
+ | Context blowup | every result passes through the token budgeter |
285
+ | ReDoS (user regex) | `grep_files` compiles patterns with a per-match `regex_timeout` |
286
+ | SSRF (web tools) | `Safety::UrlGuard` allows only http/https, blocks private/loopback/link-local/metadata IPs, **pins the socket to the vetted IP** (closing DNS rebinding), and re-checks every redirect hop |
287
+ | Privilege escalation by the agent | the unsafe override is opt-in per call **and** requires an operator-set `allow_unsafe`; an agent passing `unsafe: true` on its own gets `:unsafe_denied` |
288
+
289
+ ### Security override
290
+
291
+ Sometimes an operator genuinely wants a tool to step outside its guard — read a file outside
292
+ `fs_root`, run a non-allowlisted binary, fetch an internal URL. The override is built so the
293
+ **agent can ask but never grant**:
294
+
295
+ - A few tools (`read_file`, `write_file`, `bash`, `web_fetch`, `http_request`) take an
296
+ `unsafe: true` parameter.
297
+ - That alone does nothing. Unless a human has set `RubyLLM::Toolbox.config.allow_unsafe = true`,
298
+ any call requesting it is refused with `:unsafe_denied`. The model cannot flip that switch.
299
+ - When both line up, the call bypasses only its own guard (path jail, command allowlist, or
300
+ SSRF check) — never the deeper invariants (e.g. `bash` is still argv-only with no shell, and
301
+ still rejects NUL bytes). Set `config.unsafe_logger = ->(tool, detail) { … }` to audit every
302
+ override that fires.
303
+
304
+ This keeps the default safe, makes escalation a deliberate operator decision, and leaves an
305
+ audit trail — rather than a single boolean an agent could talk its way into.
306
+
307
+ ### Sandbox runtimes
308
+
309
+ The code-execution tools (`run_ruby`/`run_python`/`run_rust`) run through a pluggable sandbox,
310
+ chosen by `config.sandbox_runtime` (default `:auto`):
311
+
312
+ | Runtime | Platform | How it isolates |
313
+ | --- | --- | --- |
314
+ | `:docker` | any with Docker | Ephemeral container: `--network none`, read-only root + tmpfs `/tmp`, `--cap-drop ALL`, no-new-privileges, non-root user, memory/CPU/pids limits. Only the image is visible — not the host. |
315
+ | `:bubblewrap` | Linux (`bwrap`) | Fresh namespaces via `--unshare-all` (no network), host filesystem bound read-only, writable tmpfs `/tmp`, `--die-with-parent`. Runs host interpreters. |
316
+ | `:sandbox_exec` | macOS | Seatbelt profile: deny-by-default, all network denied, reads allowed, writes only to temp. Runs host interpreters. |
317
+ | `:none` | — | Disables code execution (`:sandbox_unavailable`). |
318
+
319
+ `:auto` prefers the native lightweight sandbox per platform (bubblewrap on Linux, sandbox-exec
320
+ on macOS), falling back to Docker, then to `:none`. The host-process backends apply
321
+ memory/CPU caps as inherited rlimits (since they don't use cgroups), and can be tuned with
322
+ `config.sandbox_bwrap_extra` and `config.sandbox_seatbelt_profile`.
323
+
324
+ One tradeoff worth knowing: unlike Docker (which only exposes its image), the host-process
325
+ backends leave the host filesystem **readable** (read-only) inside the sandbox. On a host with
326
+ secrets the model shouldn't read, prefer Docker, or add masks via `sandbox_bwrap_extra`
327
+ (e.g. `["--tmpfs", "/home"]`).
328
+
329
+ ## Return contract
330
+
331
+ - **Success** → a `String` (or a `Hash` for structured tools).
332
+ - **Failure** → `{ error: "human-readable message", code: :symbol }`. Never an exception.
333
+
334
+ Failure codes include `:exec_disabled`, `:path_denied`, `:not_a_file`, `:too_large`,
335
+ `:command_denied`, `:tool_exception`.
336
+
337
+ ## Configuration reference
338
+
339
+ | Option | Default | Purpose |
340
+ | --- | --- | --- |
341
+ | `fs_root` | `Dir.pwd` | Jail root for filesystem tools |
342
+ | `enable_exec_tools` | `false` | Master switch for the dangerous set |
343
+ | `allowed_commands` | `[]` | Executables `bash` and `process_start` may run |
344
+ | `command_timeout` | `30` | Wall-clock limit (seconds) for spawned processes |
345
+ | `max_processes` | `8` | Maximum concurrent background processes (`process_start`) |
346
+ | `env_passthrough` | `%w[PATH LANG LC_ALL HOME]` | Env vars forwarded to subprocesses |
347
+ | `max_output_tokens` | `2000` | Per-result token budget |
348
+ | `tokenizer_model` | `"gpt-4o"` | Model id used to pick a tokenizer |
349
+ | `regex_timeout` | `2` | Per-match timeout (seconds) for `grep_files` patterns |
350
+ | `max_grep_matches` | `200` | Cap on grep matches per call |
351
+ | `search_adapter` | `nil` | Web search backend: `nil`/`:tavily`, `:brave`, `:searxng`, or a custom adapter object |
352
+ | `tavily_api_key` | `ENV["TAVILY_API_KEY"]` | API key for the default (Tavily) `web_search` adapter |
353
+ | `brave_api_key` | `ENV["BRAVE_API_KEY"]` | Subscription token for the `:brave` adapter |
354
+ | `searxng_url` | `ENV["SEARXNG_URL"]` | Base URL of a self-hosted SearXNG instance for the `:searxng` adapter |
355
+ | `web_allowlist` / `web_denylist` | `[]` | Domain allow/deny lists enforced by `UrlGuard` |
356
+ | `max_fetch_bytes` / `max_redirects` | `2_000_000` / `5` | `web_fetch`/`http_request` body cap and redirect limit |
357
+ | `docker_image` / `python_image` / `rust_image` | `"ruby:3.3-slim"` / `"python:3.12-slim"` / `"rust:1-slim"` | Images for `run_ruby` / `run_python` / `run_rust` (Docker runtime) |
358
+ | `sandbox_runtime` | `:auto` | `:auto`, `:docker`, `:bubblewrap`, `:sandbox_exec`, or `:none` |
359
+ | `sandbox_bwrap_extra` | `[]` | Extra bubblewrap args (e.g. `["--tmpfs", "/home"]`) |
360
+ | `sandbox_seatbelt_profile` | `nil` | Custom macOS Seatbelt SBPL profile (overrides the default) |
361
+ | `allow_unsafe` | `false` | Operator master switch enabling the per-call unsafe override |
362
+ | `unsafe_logger` | `nil` | Callable `->(tool_name, detail)` invoked whenever an override fires |
363
+ | `sandbox_network` / `sandbox_memory` / `sandbox_cpus` / `sandbox_pids` | `none` / `256m` / `1.0` / `128` | Container limits for `run_ruby`/`run_python`/`run_rust` |
364
+ | `http_timeout` | `10` | Open/read timeout (seconds) for the `gem`, `web_fetch`, `web_search`, and `http_request` tools |
365
+
366
+ > Counting Claude models: call `RubyLLM::Tokenizer.enable_claude_approximation!` once at
367
+ > boot, then set `tokenizer_model` to your Claude model id.
368
+
369
+ ## Roadmap
370
+
371
+ Locked decisions: single gem, tokenizer-based budgeting, **Tavily** as the default search
372
+ provider (behind a swappable adapter — Brave / SearXNG drop in), **Docker** as the
373
+ `run_code` sandbox backend.
374
+
375
+ 1. **Skeleton + pattern** — base class, config, truncator, return contract, RSpec harness, `read_file`, `bash`. ✅
376
+ 2. **Filesystem read set** — `list_directory`, `glob`, `grep_files`. ✅
377
+ 3. **Ruby tools** — `gem` (RubyGems.org metadata, safe) and `run_ruby` (Docker sandbox, exec). ✅
378
+ 4. **Filesystem write set** — `write_file`, `edit_file`, `create_directory`, `move_file`, `delete_file` (exec). ✅
379
+ 5. **Git** — `git_status`/`git_diff`/`git_log` (safe), `git_add`/`git_commit`/`git_checkout` (exec). ✅
380
+ 6. **Verify loop** — `run_tests`, `lint`, `bundle` (exec). ✅
381
+ 7. **Python** — `run_python` (Docker sandbox) and `python_tests` (pytest/unittest), exec. ✅
382
+ 8. **Code intelligence** — `parse_ruby` (Ripper outline/navigation, safe). ✅
383
+ 9. **Web** — `web_fetch`, `web_search` (Tavily), `http_request` + `Safety::UrlGuard` SSRF protection. ✅
384
+ 10. **Patch, git history & data** — `apply_patch`, `git_show`, `git_blame`, `json_query`, `csv_read`/`csv_write`. ✅
385
+ 11. **Utilities, Rust & hardening** — `calculator`, `date_time`, `diff`, `todo_write`; `run_rust`; UrlGuard IP-pinning; operator-controlled unsafe override. ✅
386
+ 12. **Search, YAML & the Prism backend** — `git_grep`; `yaml_query` (safe_load) sharing one path engine with `json_query`; `parse_ruby` now auto-selects Prism (Ruby 3.3+) with a Ripper fallback and a parity harness. ✅
387
+ 13. **CI & sandbox runtimes** — GitHub Actions (rspec on Ruby 3.3/3.4 × Linux/macOS, parity harness, gem build); pluggable sandbox with bubblewrap (Linux) and sandbox-exec (macOS) backends alongside Docker, selected by `sandbox_runtime`. ✅
388
+ 14. **More tools** — `toml_query` (dependency-free TOML parser, completing JSON/YAML/TOML/CSV); `replace_in_files` (project-wide find/replace); `download_file` (SSRF-guarded fetch to disk); `git_branch`. ✅
389
+ 15. **Editing & navigation ergonomics** — `multi_edit` (atomic batched edits), `tree` (depth-limited overview); `read_file` already supports line ranges. ✅
390
+ 16. **Background processes** — `process_start` (gated), `process_output`, `process_list`, `process_kill`: stateful long-running commands (dev servers, watchers, log tails) with incremental output, bounded buffers, a concurrency cap, and group + `/proc`-descendant cleanup. ✅
391
+ 17. **Search isn't single-vendor** — two more `web_search` adapters behind the same seam: `:brave` (commercial Brave Search API, header-key auth) and `:searxng` (keyless, self-hosted), selected by `search_adapter`. ✅
392
+ 18. **Next** — an ecosystem-docs PR against `crmne/ruby_llm`, and a toolbox-level usage guide (safe→exec model, unsafe override, sandbox + search selection).
393
+
394
+ ## Development
395
+
396
+ ```bash
397
+ bundle install # installs ruby_llm, ruby_llm-tokenizer, rspec
398
+ bundle exec rspec # run the test suite
399
+ bundle exec rake build # build the gem into pkg/
400
+ bundle exec rake install # build + install locally
401
+
402
+ # verify the parse_ruby backends agree (Prism vs Ripper)
403
+ ruby bin/verify_prism_parity
404
+ ```
405
+
406
+ Requires Ruby >= 3.3. The Docker-backed tools (`run_ruby`/`run_python`/`run_rust`)
407
+ need a Docker daemon to actually execute; without one they return a clean
408
+ `:sandbox_unavailable` error, and their specs stub the sandbox.
409
+
410
+ ## License
411
+
412
+ MIT.
@@ -0,0 +1,112 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Verifies that the Prism and Ripper backends of RubyOutline produce identical
5
+ # outlines. Runnable on any Ruby; it only actually compares when Prism is
6
+ # available (Ruby 3.3+, where Prism is bundled — no gem install needed).
7
+ #
8
+ # Usage:
9
+ # ruby bin/verify_prism_parity # parse this gem's own lib/ + samples
10
+ # ruby bin/verify_prism_parity path/*.rb # parse the given files instead
11
+ #
12
+ # In a sandbox / CI this is the "run it under ruby:3.4" check:
13
+ # docker run --rm -v "$PWD":/app -w /app ruby:3.4-slim ruby bin/verify_prism_parity
14
+ #
15
+ # Exits 0 if the backends agree (or Prism is unavailable, so there's nothing to
16
+ # compare), 1 on any divergence.
17
+
18
+ $LOAD_PATH.unshift File.expand_path("../lib", __dir__)
19
+ require "ruby_llm/toolbox/ruby_outline"
20
+
21
+ RO = RubyLLM::Toolbox::RubyOutline
22
+
23
+ unless RO.prism_available?
24
+ warn "Prism is not available on this Ruby (#{RUBY_VERSION}); nothing to compare."
25
+ warn "Run this under Ruby 3.3+ (e.g. ruby:3.4-slim) to verify Prism/Ripper parity."
26
+ exit 0
27
+ end
28
+
29
+ SAMPLES = {
30
+ "nested" => <<~RUBY,
31
+ module App
32
+ CONFIG = 1
33
+ class User < Base
34
+ VERSION = "1"
35
+ def initialize; end
36
+ def self.find(id); end
37
+ class << self
38
+ def helper; end
39
+ end
40
+ end
41
+ module Helpers
42
+ def util; end
43
+ end
44
+ end
45
+ RUBY
46
+ "conditionals" => <<~RUBY,
47
+ class C
48
+ if RUBY_VERSION > "3"
49
+ def modern; end
50
+ else
51
+ def legacy; end
52
+ end
53
+ FLAG = true
54
+ end
55
+ RUBY
56
+ "toplevel" => <<~RUBY
57
+ TOP = 1
58
+ def bare; end
59
+ class A; end
60
+ class B::C; end
61
+ RUBY
62
+ }
63
+
64
+ def files_from_args
65
+ return [] if ARGV.empty?
66
+
67
+ ARGV.flat_map { |pattern| Dir.glob(pattern) }.select { |f| File.file?(f) }
68
+ end
69
+
70
+ def gem_lib_files
71
+ Dir.glob(File.expand_path("../lib/**/*.rb", __dir__))
72
+ end
73
+
74
+ def diff(label, source)
75
+ prism = RO.extract(source, backend: RO::PrismBackend)
76
+ ripper = RO.extract(source, backend: RO::RipperBackend)
77
+ return nil if prism == ripper
78
+
79
+ { label: label, prism: prism, ripper: ripper }
80
+ end
81
+
82
+ targets = files_from_args
83
+ targets = gem_lib_files if targets.empty?
84
+
85
+ mismatches = []
86
+
87
+ SAMPLES.each { |label, src| (m = diff("sample:#{label}", src)) && mismatches << m }
88
+
89
+ targets.each do |path|
90
+ source = File.read(path)
91
+ m = diff(path, source)
92
+ mismatches << m if m
93
+ rescue RubyLLM::Toolbox::RubyOutline::ParseError => e
94
+ warn "skip #{path}: #{e.message}"
95
+ end
96
+
97
+ checked = SAMPLES.size + targets.size
98
+
99
+ if mismatches.empty?
100
+ puts "OK: Prism and Ripper agree on #{checked} source(s) (Ruby #{RUBY_VERSION}, Prism #{Prism::VERSION})."
101
+ exit 0
102
+ end
103
+
104
+ puts "MISMATCH in #{mismatches.size} of #{checked} source(s):"
105
+ mismatches.each do |m|
106
+ puts "\n--- #{m[:label]} ---"
107
+ only_prism = m[:prism] - m[:ripper]
108
+ only_ripper = m[:ripper] - m[:prism]
109
+ only_prism.each { |e| puts " prism-only : #{e.kind} #{e.name} (L#{e.line}, d#{e.depth})" }
110
+ only_ripper.each { |e| puts " ripper-only: #{e.kind} #{e.name} (L#{e.line}, d#{e.depth})" }
111
+ end
112
+ exit 1
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ruby_llm"
4
+
5
+ module RubyLLM
6
+ module Toolbox
7
+ # Every toolbox tool subclasses this instead of RubyLLM::Tool directly.
8
+ # It adds four things on top of the base ruby_llm DSL:
9
+ #
10
+ # 1. A per-instance config snapshot (overridable at construction).
11
+ # 2. An exec gate: tools marked `exec_tool!` refuse to run unless
12
+ # config.enable_exec_tools is true.
13
+ # 3. A uniform failure contract: tools return { error:, code: } and never
14
+ # raise into the harness. (This matches ruby_llm's own convention of
15
+ # returning { error: ... } for bad arguments.)
16
+ # 4. Token-budgeted output via #truncate.
17
+ #
18
+ # Success returns are whatever the tool produces (usually a String, or a
19
+ # Hash for structured results). Failures are always { error:, code: }.
20
+ class Base < RubyLLM::Tool
21
+ # Raised when a call requests unsafe escalation that the operator has not
22
+ # permitted. Mapped to { error:, code: :unsafe_denied }.
23
+ class UnsafeDenied < StandardError; end
24
+
25
+ class << self
26
+ # Mark a subclass as part of the dangerous set.
27
+ def exec_tool!
28
+ @exec_tool = true
29
+ end
30
+
31
+ def exec_tool?
32
+ @exec_tool == true
33
+ end
34
+ end
35
+
36
+ def initialize(**overrides)
37
+ super()
38
+ @config = RubyLLM::Toolbox.config.dup_with(**overrides)
39
+ end
40
+
41
+ attr_reader :config
42
+
43
+ # ruby_llm derives the tool name from the full class name, which would
44
+ # turn RubyLLM::Toolbox::Tools::ReadFile into an ugly namespaced string.
45
+ # Demodulize first so tools get clean names ("read_file", "bash", ...).
46
+ def name
47
+ @name ||= begin
48
+ base = self.class.name.to_s.split("::").last.to_s
49
+ base.gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2')
50
+ .gsub(/([a-z\d])([A-Z])/, '\1_\2')
51
+ .downcase
52
+ .delete_suffix("_tool")
53
+ end
54
+ end
55
+
56
+ # Wraps the base #call to enforce the exec gate and guarantee that no
57
+ # exception ever escapes into the model loop.
58
+ def call(args)
59
+ if self.class.exec_tool? && !config.enable_exec_tools
60
+ return error(
61
+ "Exec tools are disabled. Set RubyLLM::Toolbox.config.enable_exec_tools = true " \
62
+ "(and an allowlist where relevant) to use #{self.class.name}.",
63
+ code: :exec_disabled
64
+ )
65
+ end
66
+
67
+ super
68
+ rescue UnsafeDenied => e
69
+ error(e.message, code: :unsafe_denied)
70
+ rescue StandardError => e
71
+ error("#{self.class.name} failed: #{e.message}", code: :tool_exception)
72
+ end
73
+
74
+ private
75
+
76
+ def error(message, code:)
77
+ { error: message, code: code }
78
+ end
79
+
80
+ # Security override. Returns true if this call may bypass its guard, false
81
+ # if no escalation was requested, and raises UnsafeDenied if escalation was
82
+ # requested but the operator hasn't permitted it (config.allow_unsafe). The
83
+ # agent can request, but only the operator can grant — and grants are
84
+ # logged via config.unsafe_logger.
85
+ def permit_unsafe!(requested, detail = nil)
86
+ return false unless requested
87
+ unless config.allow_unsafe
88
+ raise UnsafeDenied,
89
+ "this call requested an unsafe override, but it is not permitted. An operator must set " \
90
+ "RubyLLM::Toolbox.config.allow_unsafe = true to allow #{self.class.name} to bypass its guard."
91
+ end
92
+
93
+ logger = config.unsafe_logger
94
+ logger.call(self.class.name, detail) if logger.respond_to?(:call)
95
+ true
96
+ end
97
+
98
+ # A path jail that enforces fs_root unless this call was granted an unsafe
99
+ # override, in which case it resolves paths anywhere on the host.
100
+ def path_jail(unsafe: false, detail: nil)
101
+ Safety::PathJail.new(config.fs_root, enforce: !permit_unsafe!(unsafe, detail))
102
+ end
103
+
104
+ def truncate(text)
105
+ Truncator.new(
106
+ model: config.tokenizer_model,
107
+ max_tokens: config.max_output_tokens
108
+ ).call(text.to_s)
109
+ end
110
+ end
111
+ end
112
+ end