ruby_llm-toolbox 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +49 -0
- data/GUIDE.md +598 -0
- data/LICENSE +21 -0
- data/README.md +412 -0
- data/bin/verify_prism_parity +112 -0
- data/lib/ruby_llm/toolbox/base.rb +112 -0
- data/lib/ruby_llm/toolbox/configuration.rb +148 -0
- data/lib/ruby_llm/toolbox/data_path.rb +54 -0
- data/lib/ruby_llm/toolbox/process_registry.rb +226 -0
- data/lib/ruby_llm/toolbox/process_runner.rb +72 -0
- data/lib/ruby_llm/toolbox/ruby_outline.rb +213 -0
- data/lib/ruby_llm/toolbox/safe_math.rb +182 -0
- data/lib/ruby_llm/toolbox/safety/command_guard.rb +42 -0
- data/lib/ruby_llm/toolbox/safety/path_jail.rb +55 -0
- data/lib/ruby_llm/toolbox/safety/url_guard.rb +111 -0
- data/lib/ruby_llm/toolbox/sandbox/base.rb +151 -0
- data/lib/ruby_llm/toolbox/sandbox/bubblewrap.rb +70 -0
- data/lib/ruby_llm/toolbox/sandbox/docker.rb +69 -0
- data/lib/ruby_llm/toolbox/sandbox/sandbox_exec.rb +75 -0
- data/lib/ruby_llm/toolbox/search/brave.rb +64 -0
- data/lib/ruby_llm/toolbox/search/searxng.rb +64 -0
- data/lib/ruby_llm/toolbox/search/tavily.rb +70 -0
- data/lib/ruby_llm/toolbox/text_diff.rb +81 -0
- data/lib/ruby_llm/toolbox/toml.rb +409 -0
- data/lib/ruby_llm/toolbox/tools/apply_patch.rb +92 -0
- data/lib/ruby_llm/toolbox/tools/bash_tool.rb +101 -0
- data/lib/ruby_llm/toolbox/tools/bundle.rb +71 -0
- data/lib/ruby_llm/toolbox/tools/calculator.rb +42 -0
- data/lib/ruby_llm/toolbox/tools/create_directory.rb +35 -0
- data/lib/ruby_llm/toolbox/tools/csv_read.rb +69 -0
- data/lib/ruby_llm/toolbox/tools/csv_write.rb +51 -0
- data/lib/ruby_llm/toolbox/tools/date_time.rb +42 -0
- data/lib/ruby_llm/toolbox/tools/delete_file.rb +64 -0
- data/lib/ruby_llm/toolbox/tools/diff.rb +35 -0
- data/lib/ruby_llm/toolbox/tools/download_file.rb +55 -0
- data/lib/ruby_llm/toolbox/tools/edit_file.rb +82 -0
- data/lib/ruby_llm/toolbox/tools/gem_tool.rb +140 -0
- data/lib/ruby_llm/toolbox/tools/git_add.rb +46 -0
- data/lib/ruby_llm/toolbox/tools/git_blame.rb +58 -0
- data/lib/ruby_llm/toolbox/tools/git_branch.rb +35 -0
- data/lib/ruby_llm/toolbox/tools/git_checkout.rb +43 -0
- data/lib/ruby_llm/toolbox/tools/git_commit.rb +47 -0
- data/lib/ruby_llm/toolbox/tools/git_diff.rb +50 -0
- data/lib/ruby_llm/toolbox/tools/git_grep.rb +66 -0
- data/lib/ruby_llm/toolbox/tools/git_helpers.rb +68 -0
- data/lib/ruby_llm/toolbox/tools/git_log.rb +47 -0
- data/lib/ruby_llm/toolbox/tools/git_show.rb +48 -0
- data/lib/ruby_llm/toolbox/tools/git_status.rb +27 -0
- data/lib/ruby_llm/toolbox/tools/glob.rb +62 -0
- data/lib/ruby_llm/toolbox/tools/grep_files.rb +221 -0
- data/lib/ruby_llm/toolbox/tools/http_helpers.rb +130 -0
- data/lib/ruby_llm/toolbox/tools/http_request.rb +75 -0
- data/lib/ruby_llm/toolbox/tools/json_query.rb +69 -0
- data/lib/ruby_llm/toolbox/tools/lint.rb +67 -0
- data/lib/ruby_llm/toolbox/tools/list_directory.rb +87 -0
- data/lib/ruby_llm/toolbox/tools/move_file.rb +54 -0
- data/lib/ruby_llm/toolbox/tools/multi_edit.rb +107 -0
- data/lib/ruby_llm/toolbox/tools/parse_ruby.rb +111 -0
- data/lib/ruby_llm/toolbox/tools/process_kill.rb +41 -0
- data/lib/ruby_llm/toolbox/tools/process_list.rb +29 -0
- data/lib/ruby_llm/toolbox/tools/process_output.rb +55 -0
- data/lib/ruby_llm/toolbox/tools/process_start.rb +109 -0
- data/lib/ruby_llm/toolbox/tools/python_tests.rb +77 -0
- data/lib/ruby_llm/toolbox/tools/read_file.rb +75 -0
- data/lib/ruby_llm/toolbox/tools/replace_in_files.rb +139 -0
- data/lib/ruby_llm/toolbox/tools/run_python.rb +38 -0
- data/lib/ruby_llm/toolbox/tools/run_ruby.rb +37 -0
- data/lib/ruby_llm/toolbox/tools/run_rust.rb +42 -0
- data/lib/ruby_llm/toolbox/tools/run_tests.rb +81 -0
- data/lib/ruby_llm/toolbox/tools/sandbox_run.rb +40 -0
- data/lib/ruby_llm/toolbox/tools/todo_write.rb +57 -0
- data/lib/ruby_llm/toolbox/tools/toml_query.rb +70 -0
- data/lib/ruby_llm/toolbox/tools/toolchain_helpers.rb +62 -0
- data/lib/ruby_llm/toolbox/tools/tree.rb +87 -0
- data/lib/ruby_llm/toolbox/tools/web_fetch.rb +77 -0
- data/lib/ruby_llm/toolbox/tools/web_search.rb +81 -0
- data/lib/ruby_llm/toolbox/tools/write_file.rb +52 -0
- data/lib/ruby_llm/toolbox/tools/yaml_query.rb +73 -0
- data/lib/ruby_llm/toolbox/truncator.rb +68 -0
- data/lib/ruby_llm/toolbox/version.rb +7 -0
- data/lib/ruby_llm/toolbox.rb +161 -0
- metadata +194 -0
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rbconfig"
|
|
4
|
+
|
|
5
|
+
module RubyLLM
|
|
6
|
+
module Toolbox
|
|
7
|
+
# Pluggable sandboxes for the code-execution tools (run_ruby/run_python/
|
|
8
|
+
# run_rust). Each backend exposes the same contract:
|
|
9
|
+
#
|
|
10
|
+
# available? -> true if it can run here
|
|
11
|
+
# run(argv, stdin:, image: nil) -> [stdout, stderr, status]
|
|
12
|
+
# command(argv, image: nil) -> the fully wrapped argv (for tests)
|
|
13
|
+
#
|
|
14
|
+
# `image` is only meaningful for Docker; the host-process sandboxes
|
|
15
|
+
# (Bubblewrap, sandbox-exec) ignore it and run the host's interpreters.
|
|
16
|
+
#
|
|
17
|
+
# Sandbox.build(config) returns the active backend based on
|
|
18
|
+
# config.sandbox_runtime (:auto by default): on Linux it prefers bubblewrap,
|
|
19
|
+
# on macOS sandbox-exec, falling back to Docker, then to a Null backend that
|
|
20
|
+
# reports unavailability cleanly.
|
|
21
|
+
module Sandbox
|
|
22
|
+
class Unavailable < StandardError; end
|
|
23
|
+
|
|
24
|
+
module_function
|
|
25
|
+
|
|
26
|
+
def host_os
|
|
27
|
+
RbConfig::CONFIG["host_os"].to_s
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def linux?
|
|
31
|
+
host_os.include?("linux")
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def macos?
|
|
35
|
+
host_os =~ /darwin|mac os/ ? true : false
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def build(config)
|
|
39
|
+
case config.sandbox_runtime
|
|
40
|
+
when :docker then Docker.new(config)
|
|
41
|
+
when :bubblewrap then Bubblewrap.new(config)
|
|
42
|
+
when :sandbox_exec then SandboxExec.new(config)
|
|
43
|
+
when :none then Null.new(config)
|
|
44
|
+
else detect(config)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# :auto — first available backend, preferring the native lightweight
|
|
49
|
+
# sandbox for the platform, then Docker.
|
|
50
|
+
def detect(config)
|
|
51
|
+
candidates =
|
|
52
|
+
if macos?
|
|
53
|
+
[SandboxExec, Docker]
|
|
54
|
+
elsif linux?
|
|
55
|
+
[Bubblewrap, Docker]
|
|
56
|
+
else
|
|
57
|
+
[Docker]
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
candidates.each do |klass|
|
|
61
|
+
backend = klass.new(config)
|
|
62
|
+
return backend if backend.available?
|
|
63
|
+
end
|
|
64
|
+
Null.new(config)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Shared behavior for every backend.
|
|
68
|
+
class Base
|
|
69
|
+
def initialize(config)
|
|
70
|
+
@config = config
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
attr_reader :config
|
|
74
|
+
|
|
75
|
+
def name
|
|
76
|
+
self.class.name.split("::").last.downcase
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def available?
|
|
80
|
+
false
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def run(_argv, stdin: nil, image: nil)
|
|
84
|
+
raise NotImplementedError
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def command(_argv, image: nil)
|
|
88
|
+
raise NotImplementedError
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
private
|
|
92
|
+
|
|
93
|
+
# A minimal, non-secret environment for host-process sandboxes: enough
|
|
94
|
+
# for interpreters to resolve and behave, without leaking the operator's
|
|
95
|
+
# whole environment to model-generated code.
|
|
96
|
+
def sandbox_env
|
|
97
|
+
{
|
|
98
|
+
"PATH" => ENV["PATH"],
|
|
99
|
+
"HOME" => "/tmp",
|
|
100
|
+
"TMPDIR" => "/tmp",
|
|
101
|
+
"LANG" => ENV["LANG"],
|
|
102
|
+
"LC_ALL" => ENV["LC_ALL"]
|
|
103
|
+
}.compact
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Resource caps applied via Process.spawn rlimits (inherited by the
|
|
107
|
+
# sandboxed child). Used by the host-process backends, since they can't
|
|
108
|
+
# rely on Docker's --memory/--cpus. Address space from sandbox_memory,
|
|
109
|
+
# CPU seconds from the command timeout plus a small grace.
|
|
110
|
+
def spawn_rlimits
|
|
111
|
+
limits = {}
|
|
112
|
+
bytes = parse_memory_bytes(config.sandbox_memory)
|
|
113
|
+
limits[:rlimit_as] = bytes if bytes
|
|
114
|
+
limits[:rlimit_cpu] = config.command_timeout.to_i + 2 if config.command_timeout
|
|
115
|
+
limits
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def parse_memory_bytes(value)
|
|
119
|
+
str = value.to_s.strip.downcase
|
|
120
|
+
return nil if str.empty?
|
|
121
|
+
|
|
122
|
+
if (m = str.match(/\A(\d+)\s*([kmg])?b?\z/))
|
|
123
|
+
n = m[1].to_i
|
|
124
|
+
case m[2]
|
|
125
|
+
when "k" then n * 1024
|
|
126
|
+
when "m" then n * 1024 * 1024
|
|
127
|
+
when "g" then n * 1024 * 1024 * 1024
|
|
128
|
+
else n
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Used when no sandbox runtime is available; keeps the error path uniform.
|
|
135
|
+
class Null < Base
|
|
136
|
+
def available?
|
|
137
|
+
false
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def run(_argv, stdin: nil, image: nil)
|
|
141
|
+
raise Unavailable,
|
|
142
|
+
"no sandbox runtime available (need docker, or bubblewrap on Linux / sandbox-exec on macOS)"
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def command(argv, image: nil)
|
|
146
|
+
Array(argv)
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ruby_llm/toolbox/process_runner"
|
|
4
|
+
require "ruby_llm/toolbox/sandbox/base"
|
|
5
|
+
|
|
6
|
+
module RubyLLM
|
|
7
|
+
module Toolbox
|
|
8
|
+
module Sandbox
|
|
9
|
+
# Linux host-process sandbox via bubblewrap (bwrap). No daemon, no image:
|
|
10
|
+
# it runs the host's interpreters inside fresh namespaces. Isolation:
|
|
11
|
+
#
|
|
12
|
+
# --unshare-all new PID/IPC/UTS/cgroup/user/NET namespaces -> no network
|
|
13
|
+
# --die-with-parent dies if the toolbox process is killed (enforces timeout)
|
|
14
|
+
# --ro-bind / / the host filesystem, read-only (so any interpreter
|
|
15
|
+
# path resolves) — nothing on the host can be written
|
|
16
|
+
# --proc/--dev fresh /proc and a minimal /dev
|
|
17
|
+
# --tmpfs /tmp ... the only writable space, in memory
|
|
18
|
+
#
|
|
19
|
+
# Memory/CPU caps are applied as rlimits (inherited by the child), since
|
|
20
|
+
# bwrap doesn't do cgroup limits itself.
|
|
21
|
+
#
|
|
22
|
+
# Note: unlike Docker, the host filesystem is *readable* (read-only) inside
|
|
23
|
+
# the sandbox. For read-confidentiality on a host with secrets, prefer
|
|
24
|
+
# Docker, or add masks (e.g. "--tmpfs", "/home") via config.sandbox_bwrap_extra.
|
|
25
|
+
class Bubblewrap < Base
|
|
26
|
+
def available?
|
|
27
|
+
return false unless Sandbox.linux?
|
|
28
|
+
|
|
29
|
+
system("bwrap", "--version", out: File::NULL, err: File::NULL)
|
|
30
|
+
rescue StandardError
|
|
31
|
+
false
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def run(command_argv, stdin: nil, image: nil)
|
|
35
|
+
raise Unavailable, "bubblewrap (bwrap) is not available on this Linux host" unless available?
|
|
36
|
+
|
|
37
|
+
ProcessRunner.capture(
|
|
38
|
+
command(command_argv, image: image),
|
|
39
|
+
env: sandbox_env,
|
|
40
|
+
stdin: stdin,
|
|
41
|
+
timeout: config.command_timeout,
|
|
42
|
+
unsetenv_others: true,
|
|
43
|
+
rlimits: spawn_rlimits
|
|
44
|
+
)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def command(command_argv, image: nil)
|
|
48
|
+
[
|
|
49
|
+
"bwrap",
|
|
50
|
+
"--unshare-all",
|
|
51
|
+
"--die-with-parent",
|
|
52
|
+
"--new-session",
|
|
53
|
+
"--ro-bind", "/", "/",
|
|
54
|
+
"--proc", "/proc",
|
|
55
|
+
"--dev", "/dev",
|
|
56
|
+
"--tmpfs", "/tmp",
|
|
57
|
+
"--tmpfs", "/run",
|
|
58
|
+
"--tmpfs", "/dev/shm",
|
|
59
|
+
"--chdir", "/tmp",
|
|
60
|
+
*Array(config.sandbox_bwrap_extra),
|
|
61
|
+
"--",
|
|
62
|
+
*command_argv
|
|
63
|
+
]
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
Unavailable = Sandbox::Unavailable
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ruby_llm/toolbox/process_runner"
|
|
4
|
+
require "ruby_llm/toolbox/sandbox/base"
|
|
5
|
+
|
|
6
|
+
module RubyLLM
|
|
7
|
+
module Toolbox
|
|
8
|
+
module Sandbox
|
|
9
|
+
# Runs a command inside a hardened, ephemeral Docker container. The host
|
|
10
|
+
# docker client runs with the normal environment (so DOCKER_HOST etc. are
|
|
11
|
+
# honored); all isolation is applied to the container:
|
|
12
|
+
#
|
|
13
|
+
# --rm ephemeral, removed on exit
|
|
14
|
+
# --network none no network by default
|
|
15
|
+
# --read-only + tmpfs /tmp immutable root, scratch space only in tmpfs
|
|
16
|
+
# --cap-drop ALL no Linux capabilities
|
|
17
|
+
# --security-opt no-new-privileges
|
|
18
|
+
# --user <uid:gid> non-root
|
|
19
|
+
# --memory/--cpus/--pids-limit resource caps
|
|
20
|
+
#
|
|
21
|
+
# The program is array-form (no shell). Source is fed on stdin, so nothing
|
|
22
|
+
# from the host filesystem is mounted into the container. Unlike the
|
|
23
|
+
# host-process backends, the container only sees its image — not the host.
|
|
24
|
+
class Docker < Base
|
|
25
|
+
# Kept for backwards compatibility; the canonical error is
|
|
26
|
+
# Sandbox::Unavailable.
|
|
27
|
+
Unavailable = Sandbox::Unavailable
|
|
28
|
+
|
|
29
|
+
def available?
|
|
30
|
+
system("docker", "version", out: File::NULL, err: File::NULL)
|
|
31
|
+
rescue StandardError
|
|
32
|
+
false
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def run(command_argv, stdin: nil, image: nil)
|
|
36
|
+
raise Unavailable, "docker was not found or is not running on the host" unless available?
|
|
37
|
+
|
|
38
|
+
ProcessRunner.capture(
|
|
39
|
+
command(command_argv, image: image),
|
|
40
|
+
stdin: stdin,
|
|
41
|
+
timeout: config.command_timeout,
|
|
42
|
+
unsetenv_others: false # the docker *client* needs the host env
|
|
43
|
+
)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Exposed so the exact isolation flags can be asserted in tests.
|
|
47
|
+
def command(command_argv, image: nil)
|
|
48
|
+
[
|
|
49
|
+
"docker", "run", "--rm", "-i",
|
|
50
|
+
"--network", config.sandbox_network,
|
|
51
|
+
"--memory", config.sandbox_memory,
|
|
52
|
+
"--cpus", config.sandbox_cpus.to_s,
|
|
53
|
+
"--pids-limit", config.sandbox_pids.to_s,
|
|
54
|
+
"--read-only",
|
|
55
|
+
"--tmpfs", "/tmp:rw,size=64m",
|
|
56
|
+
"--cap-drop", "ALL",
|
|
57
|
+
"--security-opt", "no-new-privileges",
|
|
58
|
+
"--user", config.sandbox_user,
|
|
59
|
+
image || config.docker_image,
|
|
60
|
+
*command_argv
|
|
61
|
+
]
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Backwards-compatible alias.
|
|
65
|
+
alias docker_argv command
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ruby_llm/toolbox/process_runner"
|
|
4
|
+
require "ruby_llm/toolbox/sandbox/base"
|
|
5
|
+
|
|
6
|
+
module RubyLLM
|
|
7
|
+
module Toolbox
|
|
8
|
+
module Sandbox
|
|
9
|
+
# macOS host-process sandbox via sandbox-exec (Seatbelt). No daemon, no
|
|
10
|
+
# image: it runs the host's interpreters under a Seatbelt profile that
|
|
11
|
+
# denies by default, blocks all network, allows reading the system, and
|
|
12
|
+
# permits writes only to temp directories. Memory/CPU caps are applied as
|
|
13
|
+
# rlimits.
|
|
14
|
+
#
|
|
15
|
+
# sandbox-exec is deprecated by Apple but remains present and functional on
|
|
16
|
+
# current macOS. As with bubblewrap, the filesystem is readable (read-only
|
|
17
|
+
# for writes) inside the sandbox; use Docker for full read-confidentiality.
|
|
18
|
+
class SandboxExec < Base
|
|
19
|
+
BINARY = "/usr/bin/sandbox-exec"
|
|
20
|
+
|
|
21
|
+
DEFAULT_PROFILE = <<~SBPL
|
|
22
|
+
(version 1)
|
|
23
|
+
(deny default)
|
|
24
|
+
(allow process-fork)
|
|
25
|
+
(allow process-exec)
|
|
26
|
+
(allow signal (target self))
|
|
27
|
+
(allow sysctl-read)
|
|
28
|
+
(allow mach-lookup)
|
|
29
|
+
(allow file-read*)
|
|
30
|
+
(allow file-write*
|
|
31
|
+
(subpath "/tmp")
|
|
32
|
+
(subpath "/private/tmp")
|
|
33
|
+
(subpath "/private/var/tmp")
|
|
34
|
+
(subpath "/private/var/folders")
|
|
35
|
+
(literal "/dev/null")
|
|
36
|
+
(literal "/dev/zero")
|
|
37
|
+
(literal "/dev/random")
|
|
38
|
+
(literal "/dev/urandom")
|
|
39
|
+
(literal "/dev/dtracehelper"))
|
|
40
|
+
(deny network*)
|
|
41
|
+
SBPL
|
|
42
|
+
|
|
43
|
+
def available?
|
|
44
|
+
return false unless Sandbox.macos?
|
|
45
|
+
|
|
46
|
+
File.executable?(BINARY)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def run(command_argv, stdin: nil, image: nil)
|
|
50
|
+
raise Unavailable, "sandbox-exec is not available on this host" unless available?
|
|
51
|
+
|
|
52
|
+
ProcessRunner.capture(
|
|
53
|
+
command(command_argv, image: image),
|
|
54
|
+
env: sandbox_env,
|
|
55
|
+
stdin: stdin,
|
|
56
|
+
timeout: config.command_timeout,
|
|
57
|
+
unsetenv_others: true,
|
|
58
|
+
rlimits: spawn_rlimits
|
|
59
|
+
)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def command(command_argv, image: nil)
|
|
63
|
+
["sandbox-exec", "-p", profile, *command_argv]
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def profile
|
|
67
|
+
prof = config.sandbox_seatbelt_profile
|
|
68
|
+
prof && !prof.to_s.strip.empty? ? prof.to_s : DEFAULT_PROFILE
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
Unavailable = Sandbox::Unavailable
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "net/http"
|
|
5
|
+
require "uri"
|
|
6
|
+
require "ruby_llm/toolbox/search/tavily"
|
|
7
|
+
|
|
8
|
+
module RubyLLM
|
|
9
|
+
module Toolbox
|
|
10
|
+
module Search
|
|
11
|
+
# Brave Search API adapter — a commercial drop-in alternative to Tavily.
|
|
12
|
+
# Auth is a subscription token sent in the X-Subscribe-Token header. The
|
|
13
|
+
# basic plan has no synthesized answer, so #search returns answer: nil and
|
|
14
|
+
# a list of web results.
|
|
15
|
+
#
|
|
16
|
+
# Select it with config.search_adapter = :brave and config.brave_api_key.
|
|
17
|
+
class Brave
|
|
18
|
+
ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
|
|
19
|
+
|
|
20
|
+
def initialize(api_key:, user_agent: nil, timeout: 10)
|
|
21
|
+
@api_key = api_key
|
|
22
|
+
@user_agent = user_agent
|
|
23
|
+
@timeout = timeout
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def search(query, max_results: 5)
|
|
27
|
+
raise Error, "missing Brave API key" if @api_key.nil? || @api_key.to_s.empty?
|
|
28
|
+
|
|
29
|
+
# Brave caps count at 20; keep within the tool's own 1..10 range anyway.
|
|
30
|
+
count = max_results.clamp(1, 20)
|
|
31
|
+
data = get_json(ENDPOINT, q: query, count: count)
|
|
32
|
+
|
|
33
|
+
results = Array(data.dig("web", "results")).map do |r|
|
|
34
|
+
{ title: r["title"], url: r["url"], content: r["description"] }
|
|
35
|
+
end
|
|
36
|
+
{ answer: nil, results: results }
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Seam for tests.
|
|
40
|
+
def get_json(url, params)
|
|
41
|
+
uri = URI.parse(url)
|
|
42
|
+
uri.query = URI.encode_www_form(params)
|
|
43
|
+
request = Net::HTTP::Get.new(uri)
|
|
44
|
+
request["Accept"] = "application/json"
|
|
45
|
+
request["X-Subscribe-Token"] = @api_key
|
|
46
|
+
request["User-Agent"] = @user_agent if @user_agent
|
|
47
|
+
|
|
48
|
+
response = Net::HTTP.start(uri.host, uri.port, use_ssl: true,
|
|
49
|
+
open_timeout: @timeout, read_timeout: @timeout) do |http|
|
|
50
|
+
http.request(request)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
raise Error, "Brave returned HTTP #{response.code}" unless response.is_a?(Net::HTTPSuccess)
|
|
54
|
+
|
|
55
|
+
JSON.parse(response.body)
|
|
56
|
+
rescue JSON::ParserError => e
|
|
57
|
+
raise Error, "invalid JSON from Brave (#{e.message})"
|
|
58
|
+
rescue SocketError, Net::OpenTimeout, Net::ReadTimeout => e
|
|
59
|
+
raise Error, e.message
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "net/http"
|
|
5
|
+
require "uri"
|
|
6
|
+
require "ruby_llm/toolbox/search/tavily"
|
|
7
|
+
|
|
8
|
+
module RubyLLM
|
|
9
|
+
module Toolbox
|
|
10
|
+
module Search
|
|
11
|
+
# SearXNG adapter — a self-hosted, keyless metasearch alternative. Point it
|
|
12
|
+
# at your own instance with config.searxng_url; no API key or third-party
|
|
13
|
+
# account is involved, which is the whole appeal. SearXNG's JSON response
|
|
14
|
+
# often carries instant "answers", which are surfaced as the answer field.
|
|
15
|
+
#
|
|
16
|
+
# Select it with config.search_adapter = :searxng and config.searxng_url.
|
|
17
|
+
#
|
|
18
|
+
# The base URL is operator-configured infrastructure (frequently on a
|
|
19
|
+
# private network), so it is deliberately NOT run through the SSRF guard:
|
|
20
|
+
# reaching an internal SearXNG host is the intended behavior, not an attack.
|
|
21
|
+
class SearXNG
|
|
22
|
+
def initialize(base_url:, user_agent: nil, timeout: 10)
|
|
23
|
+
@base_url = base_url.to_s.sub(%r{/+\z}, "")
|
|
24
|
+
@user_agent = user_agent
|
|
25
|
+
@timeout = timeout
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def search(query, max_results: 5)
|
|
29
|
+
raise Error, "missing SearXNG URL (set config.searxng_url)" if @base_url.empty?
|
|
30
|
+
|
|
31
|
+
data = get_json("#{@base_url}/search", q: query, format: "json")
|
|
32
|
+
|
|
33
|
+
results = Array(data["results"]).first(max_results).map do |r|
|
|
34
|
+
{ title: r["title"], url: r["url"], content: r["content"] }
|
|
35
|
+
end
|
|
36
|
+
answer = Array(data["answers"]).map(&:to_s).reject(&:empty?).join(" ")
|
|
37
|
+
{ answer: (answer.empty? ? nil : answer), results: results }
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Seam for tests.
|
|
41
|
+
def get_json(url, params)
|
|
42
|
+
uri = URI.parse(url)
|
|
43
|
+
uri.query = URI.encode_www_form(params)
|
|
44
|
+
request = Net::HTTP::Get.new(uri)
|
|
45
|
+
request["Accept"] = "application/json"
|
|
46
|
+
request["User-Agent"] = @user_agent if @user_agent
|
|
47
|
+
|
|
48
|
+
response = Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https",
|
|
49
|
+
open_timeout: @timeout, read_timeout: @timeout) do |http|
|
|
50
|
+
http.request(request)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
raise Error, "SearXNG returned HTTP #{response.code}" unless response.is_a?(Net::HTTPSuccess)
|
|
54
|
+
|
|
55
|
+
JSON.parse(response.body)
|
|
56
|
+
rescue JSON::ParserError => e
|
|
57
|
+
raise Error, "invalid JSON from SearXNG (#{e.message}) — is format=json enabled on the instance?"
|
|
58
|
+
rescue SocketError, Net::OpenTimeout, Net::ReadTimeout => e
|
|
59
|
+
raise Error, e.message
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "net/http"
|
|
5
|
+
require "uri"
|
|
6
|
+
|
|
7
|
+
module RubyLLM
|
|
8
|
+
module Toolbox
|
|
9
|
+
module Search
|
|
10
|
+
# Base error for any search adapter, so the web_search tool can rescue one
|
|
11
|
+
# type regardless of backend.
|
|
12
|
+
class Error < StandardError; end
|
|
13
|
+
|
|
14
|
+
# Default web_search backend. Tavily is built for agent use: one call
|
|
15
|
+
# returns cleaned result content and an optional synthesized answer. Swap
|
|
16
|
+
# in another adapter via config.search_adapter (anything responding to
|
|
17
|
+
# #search(query, max_results:) and returning { answer:, results: }).
|
|
18
|
+
class Tavily
|
|
19
|
+
ENDPOINT = "https://api.tavily.com/search"
|
|
20
|
+
|
|
21
|
+
def initialize(api_key:, user_agent: nil, timeout: 10)
|
|
22
|
+
@api_key = api_key
|
|
23
|
+
@user_agent = user_agent
|
|
24
|
+
@timeout = timeout
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def search(query, max_results: 5)
|
|
28
|
+
raise Error, "missing Tavily API key" if @api_key.nil? || @api_key.to_s.empty?
|
|
29
|
+
|
|
30
|
+
data = post_json(ENDPOINT, {
|
|
31
|
+
api_key: @api_key,
|
|
32
|
+
query: query,
|
|
33
|
+
max_results: max_results,
|
|
34
|
+
include_answer: true
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
{
|
|
38
|
+
answer: data["answer"],
|
|
39
|
+
results: Array(data["results"]).map do |r|
|
|
40
|
+
{ title: r["title"], url: r["url"], content: r["content"] }
|
|
41
|
+
end
|
|
42
|
+
}
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Seam for tests.
|
|
46
|
+
def post_json(url, payload)
|
|
47
|
+
uri = URI.parse(url)
|
|
48
|
+
request = Net::HTTP::Post.new(uri)
|
|
49
|
+
request["Content-Type"] = "application/json"
|
|
50
|
+
request["Accept"] = "application/json"
|
|
51
|
+
request["User-Agent"] = @user_agent if @user_agent
|
|
52
|
+
request.body = JSON.generate(payload)
|
|
53
|
+
|
|
54
|
+
response = Net::HTTP.start(uri.host, uri.port, use_ssl: true,
|
|
55
|
+
open_timeout: @timeout, read_timeout: @timeout) do |http|
|
|
56
|
+
http.request(request)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
raise Error, "Tavily returned HTTP #{response.code}" unless response.is_a?(Net::HTTPSuccess)
|
|
60
|
+
|
|
61
|
+
JSON.parse(response.body)
|
|
62
|
+
rescue JSON::ParserError => e
|
|
63
|
+
raise Error, "invalid JSON from Tavily (#{e.message})"
|
|
64
|
+
rescue SocketError, Net::OpenTimeout, Net::ReadTimeout => e
|
|
65
|
+
raise Error, e.message
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Toolbox
|
|
5
|
+
# Line-based diff via longest-common-subsequence. Produces a readable diff
|
|
6
|
+
# ('-'/'+'/' ' prefixes) with long unchanged runs elided. Pure stdlib.
|
|
7
|
+
module TextDiff
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
def unified(old_text, new_text, old_label: "old", new_label: "new", context: 3)
|
|
11
|
+
a = old_text.to_s.lines
|
|
12
|
+
b = new_text.to_s.lines
|
|
13
|
+
ops = diff_ops(a, b)
|
|
14
|
+
return "(no differences)" if ops.all? { |type, _| type == :eq }
|
|
15
|
+
|
|
16
|
+
render(ops, old_label, new_label, context)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def diff_ops(a, b)
|
|
20
|
+
n = a.size
|
|
21
|
+
m = b.size
|
|
22
|
+
lcs = Array.new(n + 1) { Array.new(m + 1, 0) }
|
|
23
|
+
(n - 1).downto(0) do |i|
|
|
24
|
+
(m - 1).downto(0) do |j|
|
|
25
|
+
lcs[i][j] = a[i] == b[j] ? lcs[i + 1][j + 1] + 1 : [lcs[i + 1][j], lcs[i][j + 1]].max
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
ops = []
|
|
30
|
+
i = 0
|
|
31
|
+
j = 0
|
|
32
|
+
while i < n && j < m
|
|
33
|
+
if a[i] == b[j]
|
|
34
|
+
ops << [:eq, a[i]]
|
|
35
|
+
i += 1
|
|
36
|
+
j += 1
|
|
37
|
+
elsif lcs[i + 1][j] >= lcs[i][j + 1]
|
|
38
|
+
ops << [:del, a[i]]
|
|
39
|
+
i += 1
|
|
40
|
+
else
|
|
41
|
+
ops << [:add, b[j]]
|
|
42
|
+
j += 1
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
ops.concat(a[i..].map { |line| [:del, line] }) if i < n
|
|
46
|
+
ops.concat(b[j..].map { |line| [:add, line] }) if j < m
|
|
47
|
+
ops
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def render(ops, old_label, new_label, context)
|
|
51
|
+
out = ["--- #{old_label}", "+++ #{new_label}"]
|
|
52
|
+
i = 0
|
|
53
|
+
while i < ops.size
|
|
54
|
+
type, line = ops[i]
|
|
55
|
+
if type == :eq
|
|
56
|
+
run = []
|
|
57
|
+
while i < ops.size && ops[i][0] == :eq
|
|
58
|
+
run << ops[i][1]
|
|
59
|
+
i += 1
|
|
60
|
+
end
|
|
61
|
+
emit_context(out, run, context)
|
|
62
|
+
else
|
|
63
|
+
out << "#{type == :del ? '-' : '+'} #{line.chomp}"
|
|
64
|
+
i += 1
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
out.join("\n")
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def emit_context(out, run, context)
|
|
71
|
+
if run.size > (context * 2) + 1
|
|
72
|
+
run.first(context).each { |line| out << " #{line.chomp}" }
|
|
73
|
+
out << " ⋮ (#{run.size - (context * 2)} unchanged lines)"
|
|
74
|
+
run.last(context).each { |line| out << " #{line.chomp}" }
|
|
75
|
+
else
|
|
76
|
+
run.each { |line| out << " #{line.chomp}" }
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|