ruby_llm-toolbox 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +49 -0
  3. data/GUIDE.md +598 -0
  4. data/LICENSE +21 -0
  5. data/README.md +412 -0
  6. data/bin/verify_prism_parity +112 -0
  7. data/lib/ruby_llm/toolbox/base.rb +112 -0
  8. data/lib/ruby_llm/toolbox/configuration.rb +148 -0
  9. data/lib/ruby_llm/toolbox/data_path.rb +54 -0
  10. data/lib/ruby_llm/toolbox/process_registry.rb +226 -0
  11. data/lib/ruby_llm/toolbox/process_runner.rb +72 -0
  12. data/lib/ruby_llm/toolbox/ruby_outline.rb +213 -0
  13. data/lib/ruby_llm/toolbox/safe_math.rb +182 -0
  14. data/lib/ruby_llm/toolbox/safety/command_guard.rb +42 -0
  15. data/lib/ruby_llm/toolbox/safety/path_jail.rb +55 -0
  16. data/lib/ruby_llm/toolbox/safety/url_guard.rb +111 -0
  17. data/lib/ruby_llm/toolbox/sandbox/base.rb +151 -0
  18. data/lib/ruby_llm/toolbox/sandbox/bubblewrap.rb +70 -0
  19. data/lib/ruby_llm/toolbox/sandbox/docker.rb +69 -0
  20. data/lib/ruby_llm/toolbox/sandbox/sandbox_exec.rb +75 -0
  21. data/lib/ruby_llm/toolbox/search/brave.rb +64 -0
  22. data/lib/ruby_llm/toolbox/search/searxng.rb +64 -0
  23. data/lib/ruby_llm/toolbox/search/tavily.rb +70 -0
  24. data/lib/ruby_llm/toolbox/text_diff.rb +81 -0
  25. data/lib/ruby_llm/toolbox/toml.rb +409 -0
  26. data/lib/ruby_llm/toolbox/tools/apply_patch.rb +92 -0
  27. data/lib/ruby_llm/toolbox/tools/bash_tool.rb +101 -0
  28. data/lib/ruby_llm/toolbox/tools/bundle.rb +71 -0
  29. data/lib/ruby_llm/toolbox/tools/calculator.rb +42 -0
  30. data/lib/ruby_llm/toolbox/tools/create_directory.rb +35 -0
  31. data/lib/ruby_llm/toolbox/tools/csv_read.rb +69 -0
  32. data/lib/ruby_llm/toolbox/tools/csv_write.rb +51 -0
  33. data/lib/ruby_llm/toolbox/tools/date_time.rb +42 -0
  34. data/lib/ruby_llm/toolbox/tools/delete_file.rb +64 -0
  35. data/lib/ruby_llm/toolbox/tools/diff.rb +35 -0
  36. data/lib/ruby_llm/toolbox/tools/download_file.rb +55 -0
  37. data/lib/ruby_llm/toolbox/tools/edit_file.rb +82 -0
  38. data/lib/ruby_llm/toolbox/tools/gem_tool.rb +140 -0
  39. data/lib/ruby_llm/toolbox/tools/git_add.rb +46 -0
  40. data/lib/ruby_llm/toolbox/tools/git_blame.rb +58 -0
  41. data/lib/ruby_llm/toolbox/tools/git_branch.rb +35 -0
  42. data/lib/ruby_llm/toolbox/tools/git_checkout.rb +43 -0
  43. data/lib/ruby_llm/toolbox/tools/git_commit.rb +47 -0
  44. data/lib/ruby_llm/toolbox/tools/git_diff.rb +50 -0
  45. data/lib/ruby_llm/toolbox/tools/git_grep.rb +66 -0
  46. data/lib/ruby_llm/toolbox/tools/git_helpers.rb +68 -0
  47. data/lib/ruby_llm/toolbox/tools/git_log.rb +47 -0
  48. data/lib/ruby_llm/toolbox/tools/git_show.rb +48 -0
  49. data/lib/ruby_llm/toolbox/tools/git_status.rb +27 -0
  50. data/lib/ruby_llm/toolbox/tools/glob.rb +62 -0
  51. data/lib/ruby_llm/toolbox/tools/grep_files.rb +221 -0
  52. data/lib/ruby_llm/toolbox/tools/http_helpers.rb +130 -0
  53. data/lib/ruby_llm/toolbox/tools/http_request.rb +75 -0
  54. data/lib/ruby_llm/toolbox/tools/json_query.rb +69 -0
  55. data/lib/ruby_llm/toolbox/tools/lint.rb +67 -0
  56. data/lib/ruby_llm/toolbox/tools/list_directory.rb +87 -0
  57. data/lib/ruby_llm/toolbox/tools/move_file.rb +54 -0
  58. data/lib/ruby_llm/toolbox/tools/multi_edit.rb +107 -0
  59. data/lib/ruby_llm/toolbox/tools/parse_ruby.rb +111 -0
  60. data/lib/ruby_llm/toolbox/tools/process_kill.rb +41 -0
  61. data/lib/ruby_llm/toolbox/tools/process_list.rb +29 -0
  62. data/lib/ruby_llm/toolbox/tools/process_output.rb +55 -0
  63. data/lib/ruby_llm/toolbox/tools/process_start.rb +109 -0
  64. data/lib/ruby_llm/toolbox/tools/python_tests.rb +77 -0
  65. data/lib/ruby_llm/toolbox/tools/read_file.rb +75 -0
  66. data/lib/ruby_llm/toolbox/tools/replace_in_files.rb +139 -0
  67. data/lib/ruby_llm/toolbox/tools/run_python.rb +38 -0
  68. data/lib/ruby_llm/toolbox/tools/run_ruby.rb +37 -0
  69. data/lib/ruby_llm/toolbox/tools/run_rust.rb +42 -0
  70. data/lib/ruby_llm/toolbox/tools/run_tests.rb +81 -0
  71. data/lib/ruby_llm/toolbox/tools/sandbox_run.rb +40 -0
  72. data/lib/ruby_llm/toolbox/tools/todo_write.rb +57 -0
  73. data/lib/ruby_llm/toolbox/tools/toml_query.rb +70 -0
  74. data/lib/ruby_llm/toolbox/tools/toolchain_helpers.rb +62 -0
  75. data/lib/ruby_llm/toolbox/tools/tree.rb +87 -0
  76. data/lib/ruby_llm/toolbox/tools/web_fetch.rb +77 -0
  77. data/lib/ruby_llm/toolbox/tools/web_search.rb +81 -0
  78. data/lib/ruby_llm/toolbox/tools/write_file.rb +52 -0
  79. data/lib/ruby_llm/toolbox/tools/yaml_query.rb +73 -0
  80. data/lib/ruby_llm/toolbox/truncator.rb +68 -0
  81. data/lib/ruby_llm/toolbox/version.rb +7 -0
  82. data/lib/ruby_llm/toolbox.rb +161 -0
  83. metadata +194 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 7901ba482abf5f0ee5176b69fe4a1fd3015dd60794e4b049227d7553d0b79fe2
4
+ data.tar.gz: ab519ed44d20c0fa121ee85bc1d71891697823d5929bfd4c67f8ec428da167d2
5
+ SHA512:
6
+ metadata.gz: a8f716e573c86df412c75521097e16d6939adbe1080603937366ad1ba2ef190506f83d87977de31eca1ac96ce92846d7134e1a31a1d44a28a2dcbf9468447d5b
7
+ data.tar.gz: 645bc71e1f16769099d73aab4326573380f9e2b6e93139332b349677f6aa15db77ca1362d8a5288bd32e30d7a20a76f071fd3cb16b691c461e10c2671fb7eabf
data/CHANGELOG.md ADDED
@@ -0,0 +1,49 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project are documented here. The format is based on
4
+ [Keep a Changelog](https://keepachangelog.com/), and this project adheres to
5
+ Semantic Versioning.
6
+
7
+ ## [0.1.0] - Unreleased
8
+
9
+ Initial release: forty `RubyLLM::Tool` subclasses behind a safe-by-default loader.
10
+
11
+ ### Added
12
+ - Framework: single `require "ruby_llm/toolbox"`, per-instance configuration,
13
+ uniform `{ error:, code: }` return contract, and token-budgeted output via
14
+ `ruby_llm-tokenizer`.
15
+ - Safe (read-only) tools: `read_file`, `list_directory`, `tree`, `glob`,
16
+ `grep_files`, `gem`, `parse_ruby`, `web_fetch`, `web_search`, `http_request`,
17
+ `git_status`, `git_diff`, `git_log`, `git_show`, `git_blame`, `git_grep`,
18
+ `git_branch`, `json_query`, `yaml_query`, `toml_query`, `csv_read`,
19
+ `calculator`, `date_time`, `diff`, `todo_write`, `process_output`,
20
+ `process_list`, `process_kill`.
21
+ - Exec (gated) tools: `write_file`, `edit_file`, `multi_edit`,
22
+ `replace_in_files`, `create_directory`, `move_file`, `delete_file`,
23
+ `download_file`, `git_add`, `git_commit`, `git_checkout`, `apply_patch`,
24
+ `csv_write`, `run_tests`, `python_tests`, `lint`, `bundle`, `bash`,
25
+ `run_ruby`, `run_python`, `run_rust`, `process_start`.
26
+ - Background process management: `process_start` (gated) launches an
27
+ allowlisted long-running command in its own process group with an
28
+ address-space cap and a `max_processes` concurrency limit; `process_output`
29
+ reads new stdout/stderr incrementally with bounded buffers; `process_list`
30
+ enumerates running processes; `process_kill` stops a process (group signal +
31
+ `/proc` descendant sweep, TERM→KILL escalation) and returns its final output.
32
+ Everything still running is cleaned up at interpreter exit.
33
+ - A dependency-free TOML parser (`RubyLLM::Toolbox::Toml`) backing `toml_query`.
34
+ - Swappable `web_search` backends behind one adapter seam: Tavily (default),
35
+ `:brave` (commercial Brave Search API, header-key auth), and `:searxng`
36
+ (keyless, self-hosted), selected via `config.search_adapter` with
37
+ `brave_api_key` / `searxng_url` knobs.
38
+ - Safety: path-jailing, no-shell argv execution with an allowlist,
39
+ repo-config-RCE hardening for git tools, SSRF protection with private-IP
40
+ blocking and DNS-rebinding-safe IP pinning, ReDoS guards, and a pluggable
41
+ code-execution sandbox: Docker, plus host-process bubblewrap (Linux) and
42
+ sandbox-exec (macOS) backends selected by `sandbox_runtime`, with rlimit
43
+ memory/CPU caps for the host-process backends.
44
+ - `parse_ruby` dual backend: Prism (Ruby 3.3+) with a Ripper fallback, kept in
45
+ parity by `spec/ruby_outline_parity_spec.rb` and `bin/verify_prism_parity`.
46
+ - Operator-controlled unsafe override (`config.allow_unsafe`, per-call
47
+ `unsafe:`) that an agent can request but cannot grant itself.
48
+
49
+ [0.1.0]: https://github.com/washu/ruby_llm-toolbox/releases/tag/v0.1.0
data/GUIDE.md ADDED
@@ -0,0 +1,598 @@
1
+ # ruby_llm-toolbox — Usage Guide
2
+
3
+ A practical, end-to-end guide to wiring `ruby_llm-toolbox` into an agent harness
4
+ and using its fifty tools well. It covers the mental model, configuration, the
5
+ safety architecture, the sandbox and search backends, the background-process
6
+ lifecycle, a full tool catalog, and a set of "reach for X, not Y" decision rules
7
+ you can hand to the agent itself.
8
+
9
+ This guide is for two audiences. The early sections (mental model through
10
+ configuration) are for the **developer** wiring the gem into a harness. The
11
+ catalog and the [Decision rules](#decision-rules-reach-for-x-not-y) are written
12
+ so they can be dropped into an **agent's** system prompt verbatim. See
13
+ [Using this guide as agent context](#using-this-guide-as-agent-context).
14
+
15
+ ---
16
+
17
+ ## Table of contents
18
+
19
+ 1. [The mental model](#the-mental-model)
20
+ 2. [Quick start](#quick-start)
21
+ 3. [Configuration reference](#configuration-reference)
22
+ 4. [The safe → exec security model](#the-safe--exec-security-model)
23
+ 5. [The unsafe override](#the-unsafe-override)
24
+ 6. [Sandboxing code execution](#sandboxing-code-execution)
25
+ 7. [Web search backends](#web-search-backends)
26
+ 8. [Background processes](#background-processes)
27
+ 9. [Tool catalog](#tool-catalog)
28
+ 10. [Decision rules: reach for X, not Y](#decision-rules-reach-for-x-not-y)
29
+ 11. [The return contract and error codes](#the-return-contract-and-error-codes)
30
+ 12. [Token budgeting](#token-budgeting)
31
+ 13. [Recipes](#recipes)
32
+ 14. [Operational notes and honest limitations](#operational-notes-and-honest-limitations)
33
+ 15. [Using this guide as agent context](#using-this-guide-as-agent-context)
34
+
35
+ ---
36
+
37
+ ## The mental model
38
+
39
+ Three ideas explain almost everything about how the toolbox behaves.
40
+
41
+ **1. Many narrow, typed tools — not one big shell.** Where a generic agent shells
42
+ out to `cat`, `grep`, `jq`, `git`, and `curl`, this toolbox gives each of those a
43
+ dedicated tool with typed parameters, structured output, and its own guard rails.
44
+ A typed tool is easier for a model to call correctly, easier to secure, and
45
+ easier to reason about than a free-form shell string. `bash` still exists, but as
46
+ a deliberate, allowlisted escape hatch — not the default path.
47
+
48
+ **2. Safe by default, exec on request.** Every tool is either **safe**
49
+ (read-only: it observes the world but never changes it) or **exec** (it writes
50
+ files, mutates a repo, runs code, or starts processes). Safe tools always work.
51
+ Exec tools are loaded but **inert** until an operator flips
52
+ `config.enable_exec_tools = true`. This means you can hand an agent the full tool
53
+ set and trust that, until you opt in, it cannot alter anything.
54
+
55
+ **3. A uniform return contract.** Every tool returns either a `String` (success)
56
+ or a `Hash` of the shape `{ error: "...", code: :some_symbol }` (failure). The
57
+ agent — and your harness — can branch on one predictable shape regardless of
58
+ which tool ran. See [The return contract](#the-return-contract-and-error-codes).
59
+
60
+ The toolbox namespace is `RubyLLM::Toolbox`; tools live under
61
+ `RubyLLM::Toolbox::Tools`. A single `require "ruby_llm/toolbox"` loads everything.
62
+
63
+ ---
64
+
65
+ ## Quick start
66
+
67
+ ```ruby
68
+ require "ruby_llm/toolbox"
69
+
70
+ # Configure once, at boot.
71
+ RubyLLM::Toolbox.configure do |c|
72
+ c.fs_root = Dir.pwd # the jail: no tool reads/writes outside this
73
+ c.max_output_tokens = 2_000 # per-call output budget (the default)
74
+ # exec tools stay OFF here — read-only agent
75
+ end
76
+
77
+ chat = RubyLLM.chat(model: "gpt-4o") # your ruby_llm chat object
78
+ chat.with_tools(*RubyLLM::Toolbox.safe_tools) # hand it the read-only set
79
+ chat.ask("What does lib/foo.rb do, and where is bar() defined?")
80
+ ```
81
+
82
+ Three sets are available to wire in:
83
+
84
+ | Method | Returns |
85
+ | --- | --- |
86
+ | `RubyLLM::Toolbox.safe_tools` | the read-only tools (always usable) |
87
+ | `RubyLLM::Toolbox.exec_tools` | the mutating/exec tools (honor the gate) |
88
+ | `RubyLLM::Toolbox.all_tools` | both sets together |
89
+
90
+ `with_tools` accepts tool **instances** (what these methods return) or you can
91
+ pass tool **classes**; both work with `ruby_llm`. Each tool snapshots the global
92
+ config at construction, so configure before you build the tool list — or rebuild
93
+ the list after a config change.
94
+
95
+ To enable the mutating set:
96
+
97
+ ```ruby
98
+ RubyLLM::Toolbox.configure do |c|
99
+ c.fs_root = "/srv/project"
100
+ c.enable_exec_tools = true
101
+ c.allowed_commands = %w[ls cat git rspec] # bash / process_start allowlist
102
+ end
103
+ chat.with_tools(*RubyLLM::Toolbox.all_tools)
104
+ ```
105
+
106
+ `RubyLLM::Toolbox.reset!` restores a pristine global configuration — handy in
107
+ tests and between sessions.
108
+
109
+ ---
110
+
111
+ ## Configuration reference
112
+
113
+ All configuration goes through `RubyLLM::Toolbox.configure { |c| ... }`. Knobs are
114
+ grouped below by concern. Per-call overrides are possible too: most tools accept
115
+ keyword overrides at construction (e.g. `ReadFile.new(fs_root: "/other")`), which
116
+ produce a one-off config snapshot without touching global state.
117
+
118
+ ### Core / filesystem
119
+
120
+ | Knob | Default | Purpose |
121
+ | --- | --- | --- |
122
+ | `fs_root` | `Dir.pwd` | The path jail. Every filesystem tool resolves real paths and refuses anything outside this root. |
123
+ | `enable_exec_tools` | `false` | Master switch for the entire exec set. |
124
+ | `ignored_dirs` | `.git .hg .svn node_modules .bundle tmp` | Directories skipped by `tree`, `grep_files`, `replace_in_files`. |
125
+ | `max_output_tokens` | `2_000` | Output budget per call; longer output is truncated with a marker. |
126
+ | `tokenizer_model` | `gpt-4o` | Model name for `ruby_llm-tokenizer` so truncation counts the right tokens. |
127
+
128
+ ### Command execution
129
+
130
+ | Knob | Default | Purpose |
131
+ | --- | --- | --- |
132
+ | `allowed_commands` | `[]` | Executables that `bash` **and** `process_start` may run. Empty = nothing runs. |
133
+ | `command_timeout` | `30` | Wall-clock seconds before a spawned process is killed. |
134
+ | `max_processes` | `8` | Max concurrent background processes (`process_start`). |
135
+ | `env_passthrough` | `PATH LANG LC_ALL HOME` | Which host env vars are forwarded to spawned processes. Everything else is stripped. |
136
+
137
+ ### Search and ReDoS
138
+
139
+ | Knob | Default | Purpose |
140
+ | --- | --- | --- |
141
+ | `regex_timeout` | `2` | `Regexp.timeout` ceiling (seconds) for `grep_files` / `replace_in_files`, defusing catastrophic backtracking. |
142
+ | `max_grep_matches` | `200` | Cap on matches returned by a single grep. |
143
+
144
+ ### Web search
145
+
146
+ | Knob | Default | Purpose |
147
+ | --- | --- | --- |
148
+ | `search_adapter` | `nil` | `nil`/`:tavily`, `:brave`, `:searxng`, or a custom object responding to `#search(query, max_results:)`. |
149
+ | `tavily_api_key` | `ENV["TAVILY_API_KEY"]` | Key for the default Tavily adapter. |
150
+ | `brave_api_key` | `ENV["BRAVE_API_KEY"]` | Subscription token for the `:brave` adapter. |
151
+ | `searxng_url` | `ENV["SEARXNG_URL"]` | Base URL of a self-hosted SearXNG instance for `:searxng`. |
152
+
153
+ ### HTTP / fetch
154
+
155
+ | Knob | Default | Purpose |
156
+ | --- | --- | --- |
157
+ | `web_allowlist` / `web_denylist` | `[]` / `[]` | Host filters layered on top of the SSRF guard for `web_fetch` / `http_request`. |
158
+ | `http_timeout` | `10` | Open/read timeout for `gem`, `web_fetch`, `web_search`, `http_request`. |
159
+ | `user_agent` | `ruby_llm-toolbox/<version>` | User-Agent header for outbound requests. |
160
+ | `max_fetch_bytes` | `2_000_000` | Size cap on a fetched/downloaded body. |
161
+ | `max_redirects` | `5` | Redirect hops followed (each re-checked by the SSRF guard). |
162
+
163
+ ### Sandbox (code-execution tools)
164
+
165
+ | Knob | Default | Purpose |
166
+ | --- | --- | --- |
167
+ | `sandbox_runtime` | `:auto` | `:auto`, `:docker`, `:bubblewrap`, `:sandbox_exec`. |
168
+ | `docker_image` / `python_image` / `rust_image` | `ruby:3.3-slim` / `python:3.12-slim` / `rust:1-slim` | Images for `run_ruby` / `run_python` / `run_rust` under Docker. |
169
+ | `sandbox_network` | `none` | Network mode for sandboxed code (default: no network). |
170
+ | `sandbox_memory` / `sandbox_cpus` / `sandbox_pids` | `256m` / `1.0` / `128` | Resource caps. |
171
+ | `sandbox_user` | `1000:1000` | UID:GID the sandboxed process runs as. |
172
+ | `sandbox_bwrap_extra` | `[]` | Extra `bwrap` args (e.g. masks to hide host paths). |
173
+ | `sandbox_seatbelt_profile` | `nil` | Override the default macOS Seatbelt SBPL profile. |
174
+
175
+ ### Unsafe override
176
+
177
+ | Knob | Default | Purpose |
178
+ | --- | --- | --- |
179
+ | `allow_unsafe` | `false` | Operator master switch enabling per-call `unsafe:` requests. |
180
+ | `unsafe_logger` | `nil` | A callable invoked on every honored unsafe call, for audit. |
181
+
182
+ ---
183
+
184
+ ## The safe → exec security model
185
+
186
+ The split between safe and exec tools is the backbone of the design.
187
+
188
+ **Safe tools** are read-only by construction. `read_file`, `grep_files`,
189
+ `git_log`, `web_fetch`, `json_query`, and the rest observe the filesystem, a repo,
190
+ or the network but never mutate state. They are always available, even with
191
+ `enable_exec_tools = false`. (The three process-management tools `process_output`,
192
+ `process_list`, and `process_kill` are classed safe because they only act on
193
+ processes that already exist — and `process_kill` is deliberately always
194
+ available as a stop valve.)
195
+
196
+ **Exec tools** can change the world: write files, commit to a repo, run arbitrary
197
+ code, start processes. They are loaded but refuse to run until
198
+ `enable_exec_tools = true`. An exec tool called while the gate is closed returns
199
+ `{ error: ..., code: :exec_disabled }` rather than doing anything.
200
+
201
+ On top of the gate sit several independent guards, each defending a specific
202
+ class of attack:
203
+
204
+ | Guard | Defends against | Where |
205
+ | --- | --- | --- |
206
+ | **Path jail** | Reading or writing outside `fs_root` (`../../etc/passwd`, symlink escapes). Paths are resolved to their real location and checked. | all filesystem tools |
207
+ | **No-shell argv execution** | OS command injection. `bash`/`process_start` take a program plus an argument array — never a shell string — so there is no place for `;`, `|`, `$()`, or globbing to be interpreted. Plus an allowlist. | `bash`, `process_start` |
208
+ | **SSRF guard with IP pinning** | Server-side request forgery and DNS rebinding. Only http/https; private, loopback, link-local, CGNAT, and cloud-metadata IPs are blocked; the socket is pinned to the vetted IP; every redirect hop is re-checked. | `web_fetch`, `http_request`, `download_file` |
209
+ | **ReDoS guard** | Catastrophic regex backtracking locking the process. User patterns run under `Regexp.timeout`. | `grep_files`, `replace_in_files` |
210
+ | **Repo-config RCE hardening** | A malicious checked-out repo executing code via git config / hooks. Git tools run with hardened flags. | all `git_*` tools |
211
+ | **Sandbox** | Untrusted code touching the host. Code-execution tools run in an isolated, no-network, resource-capped sandbox. | `run_ruby`, `run_python`, `run_rust` |
212
+ | **Token budget** | Blowing the context window with a huge file or command output. | every tool, via `max_output_tokens` |
213
+
214
+ The guards are layered: enabling exec tools does **not** disable the path jail,
215
+ the SSRF guard, or the sandbox. Each must be crossed on its own terms.
216
+
217
+ ---
218
+
219
+ ## The unsafe override
220
+
221
+ Sometimes a guard is in the way of legitimate work — reading a file just outside
222
+ `fs_root`, fetching a `localhost` dev server, running a non-allowlisted binary.
223
+ The toolbox provides an escape hatch that is **two-key by design**: the agent can
224
+ *request* a bypass, but only an operator can *grant* the capability.
225
+
226
+ - The operator sets `config.allow_unsafe = true` (off by default).
227
+ - The agent passes `unsafe: true` on a supporting tool call.
228
+ - Only when **both** are true does the specific guard step aside — and only for
229
+ that one call. The agent cannot self-escalate; setting `unsafe: true` while
230
+ `allow_unsafe` is false is simply refused (`code: :refused`).
231
+ - Every honored unsafe call is passed to `config.unsafe_logger` (if set) for an
232
+ audit trail.
233
+
234
+ Tools that expose an `unsafe:` parameter: **`read_file`**, **`write_file`**,
235
+ **`bash`**, **`process_start`**, **`web_fetch`**, **`http_request`**. Each bypass
236
+ is scoped to that tool's guard only (e.g. `read_file unsafe: true` relaxes the
237
+ path jail for that read; it does not turn off anything else).
238
+
239
+ Treat `allow_unsafe` as a trusted-operator, trusted-environment setting. In a
240
+ hands-off or adversarial deployment, leave it off.
241
+
242
+ ---
243
+
244
+ ## Sandboxing code execution
245
+
246
+ `run_ruby`, `run_python`, and `run_rust` execute model-authored code, so they run
247
+ inside a sandbox rather than directly on the host. The backend is pluggable and
248
+ selected by `config.sandbox_runtime`:
249
+
250
+ | Runtime | Platform | Isolation | Notes |
251
+ | --- | --- | --- | --- |
252
+ | `:docker` | any with Docker | **Strongest.** Code runs in a container off an image, so the host filesystem isn't visible at all. | `--network none`, `--read-only`, `--cap-drop ALL`, `--security-opt no-new-privileges`, memory/cpu/pids caps, non-root user, tmpfs `/tmp`. |
253
+ | `:bubblewrap` | Linux | Process-level. No-network, restricted writes, rlimit caps. | Host FS is **bind-mounted read-only** — see caveat below. |
254
+ | `:sandbox_exec` | macOS | Process-level via Seatbelt (SBPL). Deny-by-default, no network, writes only to temp dirs. | Host FS is **readable** — see caveat below. |
255
+ | `:auto` (default) | — | Picks the best available: macOS → Seatbelt then Docker; Linux → bubblewrap then Docker; otherwise Docker; falls back to a `Null` backend that refuses if none exist. | |
256
+
257
+ When no sandbox is available, the code-execution tools return
258
+ `code: :sandbox_unavailable` rather than running unsandboxed.
259
+
260
+ **Honest caveat on the host-process backends.** Bubblewrap and Seatbelt isolate
261
+ *writes* and *network*, but they leave the host filesystem **readable** (Docker
262
+ does not, because the container only sees its image). If read-confidentiality
263
+ matters — secrets on disk the code shouldn't see — prefer `:docker`, or add
264
+ `sandbox_bwrap_extra` masks / a custom `sandbox_seatbelt_profile` to hide
265
+ sensitive paths. The default backends are about containing damage and side
266
+ effects, not about hiding the source tree the agent is already working in.
267
+
268
+ ---
269
+
270
+ ## Web search backends
271
+
272
+ `web_search` runs through a swappable adapter so you are not locked to one vendor.
273
+ Select with `config.search_adapter`:
274
+
275
+ | Value | Backend | Auth | Best for |
276
+ | --- | --- | --- | --- |
277
+ | `nil` / `:tavily` | Tavily | `tavily_api_key` | Default. Agent-oriented: returns cleaned content plus a synthesized answer. |
278
+ | `:brave` | Brave Search API | `brave_api_key` (header token) | A commercial drop-in alternative; ranked web results. |
279
+ | `:searxng` | self-hosted SearXNG | none — `searxng_url` | Keyless and private; you run the instance. Surfaces SearXNG "instant answers". |
280
+ | any object | custom | your code | Anything responding to `#search(query, max_results:)` returning `{ answer:, results: [{title:, url:, content:}] }`. |
281
+
282
+ Every adapter returns the same shape, so the tool's output and your harness logic
283
+ are identical regardless of provider. A missing credential surfaces as
284
+ `code: :no_api_key`; an unknown symbol or backend failure as `code: :search_failed`.
285
+
286
+ The SearXNG base URL is treated as operator-configured infrastructure (often on a
287
+ private network) and is **not** run through the SSRF guard — reaching an internal
288
+ instance is the intended behavior.
289
+
290
+ ---
291
+
292
+ ## Background processes
293
+
294
+ Four tools manage long-running commands — dev servers, file watchers, log tails —
295
+ that you don't want to block on:
296
+
297
+ - **`process_start`** (exec, gated) launches one allowlisted command in the
298
+ background and returns an id like `proc_1` immediately. Same safety model as
299
+ `bash`: argv only, the minimal `env_passthrough` environment, run in `fs_root`,
300
+ in its own process group with a memory cap (no CPU cap — these run
301
+ indefinitely). Bounded by `max_processes`.
302
+ - **`process_output`** (safe) returns the stdout/stderr produced *since the last
303
+ read*, plus status and exit code. Poll it in a loop to stream output without
304
+ repeats.
305
+ - **`process_list`** (safe) shows every process with id, status, pid, age, and
306
+ command.
307
+ - **`process_kill`** (safe) stops a process — SIGTERM to its group, escalating to
308
+ SIGKILL, plus a `/proc` descendant sweep so children are reaped — then returns
309
+ any final output and removes it from the registry.
310
+
311
+ Lifecycle: **start → poll output (repeat) → kill**. Output buffers are bounded
312
+ (256 KB of unread data per stream; older bytes drop with a marker) so a chatty
313
+ process can't exhaust memory, and everything still running is killed at
314
+ interpreter exit so nothing is orphaned.
315
+
316
+ ```
317
+ process_start command:"ruby" args:["server.rb"] name:"web" # → "Started proc_1 (pid 4242)"
318
+ process_output id:"proc_1" # → new output + "running"
319
+ # ... do other work, poll again ...
320
+ process_kill id:"proc_1" # → final output, removed
321
+ ```
322
+
323
+ ---
324
+
325
+ ## Tool catalog
326
+
327
+ Fifty tools. Safe tools are always available; **(exec)** tools require
328
+ `enable_exec_tools = true`.
329
+
330
+ ### Filesystem — read
331
+
332
+ | Tool | What it does |
333
+ | --- | --- |
334
+ | `read_file` | Read a text file in `fs_root`. Optional `start_line`/`end_line` window, or `tail` for the last N lines. `unsafe:` relaxes the jail. |
335
+ | `list_directory` | List one directory's entries. |
336
+ | `tree` | Depth-limited recursive overview (default depth 3), dirs marked `/`, skips ignored/hidden, no symlink follow, capped at 500 entries. |
337
+ | `glob` | Match files by glob pattern within `fs_root`. |
338
+ | `grep_files` | Content search with a regex (ReDoS-guarded). Supports `before`/`after`/`context` lines like `grep -B/-A/-C`. |
339
+
340
+ ### Filesystem — write (exec)
341
+
342
+ | Tool | What it does |
343
+ | --- | --- |
344
+ | `write_file` | Create or overwrite a whole file (makes parent dirs). `unsafe:` relaxes the jail. |
345
+ | `edit_file` | Replace an exact substring — must match **once** unless `replace_all`. The precise single-edit primitive. |
346
+ | `multi_edit` | Several exact edits to one file, applied sequentially and atomically; names the first failing edit. |
347
+ | `replace_in_files` | Project-wide find/replace across a glob (literal or regex with backrefs), `ignore_case`, `dry_run`; skips binary and ignored dirs. |
348
+ | `create_directory` / `move_file` / `delete_file` | Directory and file management within the jail. |
349
+ | `download_file` | SSRF-guarded fetch straight to a file on disk (size-capped). |
350
+ | `apply_patch` | Apply a unified diff. |
351
+
352
+ ### Code intelligence
353
+
354
+ | Tool | What it does |
355
+ | --- | --- |
356
+ | `parse_ruby` | Structural outline of a Ruby file (classes, modules, methods) via Prism, with a Ripper fallback. |
357
+
358
+ ### Structured data
359
+
360
+ | Tool | What it does |
361
+ | --- | --- |
362
+ | `json_query` | Extract from JSON with a dot/bracket path (`a.b[0].c`, `[]` maps). |
363
+ | `yaml_query` | Same path engine over YAML (`safe_load`). |
364
+ | `toml_query` | Same path engine over TOML (dependency-free parser; file in `fs_root` or inline). |
365
+ | `csv_read` | Read CSV with headers. |
366
+ | `csv_write` *(exec)* | Write rows to a CSV. |
367
+
368
+ ### Git — read
369
+
370
+ `git_status`, `git_diff`, `git_log`, `git_show`, `git_blame`, `git_grep`,
371
+ `git_branch` (`-vv`, remotes). All run with repo-config-RCE hardening.
372
+
373
+ ### Git — write (exec)
374
+
375
+ `git_add`, `git_commit`, `git_checkout`.
376
+
377
+ ### Web
378
+
379
+ | Tool | What it does |
380
+ | --- | --- |
381
+ | `web_fetch` | Fetch a page (SSRF-guarded, size-capped, follows redirects). `unsafe:` relaxes the guard. |
382
+ | `web_search` | Search via the configured adapter (Tavily/Brave/SearXNG/custom). |
383
+ | `http_request` *(mutating verbs gated)* | General HTTP client returning status/headers/body. Safe for GET/HEAD; POST/PUT/PATCH/DELETE need the exec gate. `unsafe:` relaxes the guard. |
384
+
385
+ ### Toolchain (exec)
386
+
387
+ | Tool | What it does |
388
+ | --- | --- |
389
+ | `run_ruby` / `run_python` / `run_rust` | Execute code in the sandbox. |
390
+ | `run_tests` | Run the Ruby test suite (RSpec / Minitest). |
391
+ | `python_tests` | Run pytest / unittest. |
392
+ | `lint` | Run a linter (e.g. RuboCop). |
393
+ | `bundle` | Run a Bundler subcommand. |
394
+ | `bash` | Run one allowlisted executable, argv only, no shell. The escape hatch. `unsafe:` relaxes the allowlist. |
395
+
396
+ ### Background processes
397
+
398
+ `process_start` *(exec)*, `process_output`, `process_list`, `process_kill` — see
399
+ [Background processes](#background-processes).
400
+
401
+ ### Utilities
402
+
403
+ | Tool | What it does |
404
+ | --- | --- |
405
+ | `calculator` | Evaluate arithmetic with a real parser — never `eval` — with functions and constants. |
406
+ | `date_time` | Current time or convert a unix timestamp; optional strftime format. |
407
+ | `diff` | Line-by-line comparison of two text blocks. |
408
+ | `todo_write` | Maintain a task list across calls for multi-step work (pass the full list each time). |
409
+
410
+ ---
411
+
412
+ ## Decision rules: reach for X, not Y
413
+
414
+ This section is written for the agent. Prefer the dedicated typed tool over a
415
+ shell command every time one exists — it is safer, its output is structured, and
416
+ it won't trip the allowlist.
417
+
418
+ **Looking at the filesystem**
419
+ - Read a file → `read_file` (use `start_line`/`end_line` or `tail` for big files). Not `bash cat`/`head`/`tail`.
420
+ - See a project's shape → `tree`. List one dir → `list_directory`. Find by name → `glob`. Not `bash ls`/`find`.
421
+ - Search file contents → `grep_files` (add `context` for surrounding lines). In a git repo, `git_grep` is faster and respects tracking. Not `bash grep`/`rg`.
422
+
423
+ **Reading structured data**
424
+ - JSON → `json_query`; YAML → `yaml_query`; TOML → `toml_query`; CSV → `csv_read`. These return typed extractions via dot/bracket paths. Do **not** shell out to `jq`/`yq` or hand-parse with regex.
425
+
426
+ **Changing files**
427
+ - One precise change → `edit_file` (exact-once match; include surrounding context to disambiguate).
428
+ - Several changes to the same file → `multi_edit` (atomic; one call).
429
+ - The same change across many files → `replace_in_files` (try `dry_run: true` first).
430
+ - Create or fully rewrite a file → `write_file`.
431
+ - You have a unified diff → `apply_patch`.
432
+ - Avoid `bash sed`/`awk` for edits; the typed tools are jailed and reversible-by-diff.
433
+
434
+ **Working with git**
435
+ - Status, history, diffs, blame, branches → the `git_*` tools. They are hardened against malicious repo configs in a way `bash git` is not.
436
+
437
+ **Touching the network**
438
+ - Read one page → `web_fetch`. Discover sources → `web_search`. Call an API → `http_request`. Save a file → `download_file`. All are SSRF-guarded. Do **not** `bash curl`/`wget` — those bypass the guard and need allowlisting.
439
+
440
+ **Running code**
441
+ - Ruby/Python/Rust snippets → `run_ruby`/`run_python`/`run_rust` (sandboxed). Tests → `run_tests`/`python_tests`. Lint → `lint`. Dependencies → `bundle`. Prefer these over `bash ruby`/`python`, which run unsandboxed and need allowlisting.
442
+ - Arithmetic → `calculator`, never code execution.
443
+
444
+ **Long-running commands**
445
+ - A server, watcher, or anything that doesn't return promptly → `process_start`, then `process_output` to follow it, then `process_kill`. Never start these with `bash` — `bash` is one-shot and will block until it times out.
446
+
447
+ **Planning**
448
+ - Multi-step work → `todo_write` to track it; pass the full list each call and update statuses (pending / in_progress / completed).
449
+
450
+ **When to use `bash` at all**
451
+ - Only for something with no dedicated tool, and only once the operator has allowlisted the executable. It takes a program plus an argument array — there is no shell, so pipes, redirects, globs, and `$()` won't work; compose multiple tool calls instead.
452
+
453
+ ---
454
+
455
+ ## The return contract and error codes
456
+
457
+ Every tool returns **either**:
458
+
459
+ - a `String` on success, or
460
+ - a `Hash` `{ error: "human-readable message", code: :symbol }` on failure.
461
+
462
+ Branch on the presence of `:error` (or check `result.is_a?(Hash)`). The `code` is
463
+ a stable symbol you can match programmatically. Common codes you'll encounter:
464
+
465
+ | Code | Meaning |
466
+ | --- | --- |
467
+ | `:exec_disabled` | An exec tool was called while the gate is closed. Enable `enable_exec_tools`. |
468
+ | `:path_denied` / `:bad_path` | A path fell outside `fs_root` (or was malformed). |
469
+ | `:command_denied` | The executable isn't on `allowed_commands`. |
470
+ | `:too_many_processes` | `max_processes` reached; kill some first. |
471
+ | `:not_found` | Unknown process id (or missing target). |
472
+ | `:url_blocked` | The SSRF guard rejected a host/IP. |
473
+ | `:http_error` / `:fetch_failed` / `:request_failed` | Network/HTTP failure. |
474
+ | `:regex_timeout` | A pattern hit the ReDoS ceiling. |
475
+ | `:no_api_key` | A search adapter is missing its credential. |
476
+ | `:search_failed` | Search backend error or unknown adapter. |
477
+ | `:sandbox_unavailable` | No sandbox runtime available for code execution. |
478
+ | `:ambiguous` / `:edit_failed` / `:no_change` | An edit didn't match uniquely, failed, or was a no-op. |
479
+ | `:bad_json` / `:bad_yaml` / `:bad_toml` / `:bad_csv` | A structured-data input didn't parse. |
480
+ | `:refused` | An `unsafe:` request was made while `allow_unsafe` is off. |
481
+
482
+ (The full vocabulary is larger and tool-specific; the above are the ones worth
483
+ handling explicitly. Treat any unrecognized `code` as a soft failure and surface
484
+ the `error` message.)
485
+
486
+ ---
487
+
488
+ ## Token budgeting
489
+
490
+ Tool output can be large — a file, a diff, a command's stdout. Rather than let one
491
+ call flood the context window, every tool truncates its output to
492
+ `max_output_tokens`, counted with `ruby_llm-tokenizer` using `tokenizer_model` so
493
+ the count matches your actual model. Truncated output ends with a clear marker.
494
+
495
+ Practical implications for the agent:
496
+ - For large files, pass `start_line`/`end_line` or `tail` to `read_file` instead
497
+ of reading the whole thing.
498
+ - Narrow `grep_files` with a tighter pattern or a path glob rather than scanning
499
+ everything; results are also capped at `max_grep_matches`.
500
+ - Prefer `tree` (depth-limited) over a deep recursive listing.
501
+
502
+ Set `max_output_tokens` to a fraction of your model's window so several tool
503
+ calls can coexist in one turn.
504
+
505
+ ---
506
+
507
+ ## Recipes
508
+
509
+ **Read-only code investigation (no exec needed)**
510
+ ```
511
+ tree depth:2 # get the lay of the land
512
+ grep_files pattern:"def process" # find a definition
513
+ read_file path:"lib/x.rb" start_line:40 end_line:90
514
+ parse_ruby path:"lib/x.rb" # structural outline
515
+ git_log path:"lib/x.rb" # how it got here
516
+ git_blame path:"lib/x.rb" # who changed the suspicious line
517
+ ```
518
+
519
+ **Make a change and verify it (exec enabled)**
520
+ ```
521
+ read_file path:"lib/x.rb" # confirm exact text
522
+ edit_file path:"lib/x.rb" old_string:"..." new_string:"..."
523
+ run_tests # or python_tests
524
+ lint # style check
525
+ git_diff # review
526
+ git_add paths:["lib/x.rb"]
527
+ git_commit message:"Fix ..."
528
+ ```
529
+
530
+ **Run a dev server while you work**
531
+ ```
532
+ process_start command:"ruby" args:["bin/server"] name:"web"
533
+ # ... edit files, run tests ...
534
+ process_output id:"proc_1" # check it's healthy / read logs
535
+ process_kill id:"proc_1" # when done
536
+ ```
537
+
538
+ **Fetch and use external data**
539
+ ```
540
+ web_search query:"library X changelog 4.0"
541
+ web_fetch url:"https://.../CHANGELOG.md"
542
+ http_request url:"https://api.example.com/v1/status" # GET is safe
543
+ download_file url:"https://.../data.csv" path:"tmp/data.csv" # exec
544
+ csv_read path:"tmp/data.csv"
545
+ ```
546
+
547
+ **Project-wide rename (cautiously)**
548
+ ```
549
+ replace_in_files glob:"**/*.rb" pattern:"OldName" replacement:"NewName" dry_run:true
550
+ # review the report, then:
551
+ replace_in_files glob:"**/*.rb" pattern:"OldName" replacement:"NewName"
552
+ run_tests
553
+ ```
554
+
555
+ ---
556
+
557
+ ## Operational notes and honest limitations
558
+
559
+ - **`fs_root` is the boundary that matters most.** Set it deliberately to the
560
+ project you want the agent working in. Everything filesystem-related is
561
+ measured against it.
562
+ - **An empty `allowed_commands` means `bash` and `process_start` can run
563
+ nothing.** That's intentional — opt in to exactly the executables you trust.
564
+ - **Host-process sandboxes leave the host FS readable.** Bubblewrap and Seatbelt
565
+ contain writes and network but not reads; use Docker (or masks) when
566
+ read-confidentiality matters. See [Sandboxing](#sandboxing-code-execution).
567
+ - **`process_kill`'s full descendant reaping depends on the OS.** The
568
+ implementation is standard (process-group signal + `/proc` descendant sweep +
569
+ TERM→KILL escalation) and reaps a whole tree on a real Linux host and in CI.
570
+ Some restricted container runtimes don't deliver process-group signals to
571
+ non-leader members; there, deeply-nested grandchildren may need the per-pid
572
+ sweep to catch them, which it does where `/proc` is present.
573
+ - **The default search provider needs a key.** Out of the box `web_search` is
574
+ Tavily; with no `tavily_api_key` it returns `:no_api_key`. Switch to `:searxng`
575
+ for a keyless, self-hosted option.
576
+ - **Requires Ruby ≥ 3.3.** `parse_ruby` uses Prism (bundled with supported Ruby)
577
+ with a Ripper fallback for non-MRI runtimes.
578
+
579
+ ---
580
+
581
+ ## Using this guide as agent context
582
+
583
+ The [Decision rules](#decision-rules-reach-for-x-not-y) and
584
+ [Tool catalog](#tool-catalog) sections are written to be dropped into an agent's
585
+ system prompt directly — they tell the model which tool to reach for and why,
586
+ which measurably improves tool selection over exposing the raw tool schemas
587
+ alone. A compact prompt-ready summary:
588
+
589
+ > You have a toolbox of typed tools. Always prefer the specific tool over `bash`:
590
+ > `read_file`/`tree`/`glob`/`grep_files` for the filesystem;
591
+ > `json_query`/`yaml_query`/`toml_query`/`csv_read` for structured data;
592
+ > `edit_file`/`multi_edit`/`replace_in_files`/`write_file`/`apply_patch` for changes;
593
+ > the `git_*` tools for version control; `web_fetch`/`web_search`/`http_request`/`download_file` for the network;
594
+ > `run_ruby`/`run_python`/`run_rust`/`run_tests`/`lint`/`bundle` for code and toolchain;
595
+ > `process_start`/`process_output`/`process_kill` for anything long-running;
596
+ > `calculator` for arithmetic; `todo_write` to plan multi-step work.
597
+ > Reserve `bash` for tasks with no dedicated tool. Tools return a string on
598
+ > success or `{ error:, code: }` on failure — read the `code` and adjust.
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 washu
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.