ruby_llm-toolbox 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +49 -0
- data/GUIDE.md +598 -0
- data/LICENSE +21 -0
- data/README.md +412 -0
- data/bin/verify_prism_parity +112 -0
- data/lib/ruby_llm/toolbox/base.rb +112 -0
- data/lib/ruby_llm/toolbox/configuration.rb +148 -0
- data/lib/ruby_llm/toolbox/data_path.rb +54 -0
- data/lib/ruby_llm/toolbox/process_registry.rb +226 -0
- data/lib/ruby_llm/toolbox/process_runner.rb +72 -0
- data/lib/ruby_llm/toolbox/ruby_outline.rb +213 -0
- data/lib/ruby_llm/toolbox/safe_math.rb +182 -0
- data/lib/ruby_llm/toolbox/safety/command_guard.rb +42 -0
- data/lib/ruby_llm/toolbox/safety/path_jail.rb +55 -0
- data/lib/ruby_llm/toolbox/safety/url_guard.rb +111 -0
- data/lib/ruby_llm/toolbox/sandbox/base.rb +151 -0
- data/lib/ruby_llm/toolbox/sandbox/bubblewrap.rb +70 -0
- data/lib/ruby_llm/toolbox/sandbox/docker.rb +69 -0
- data/lib/ruby_llm/toolbox/sandbox/sandbox_exec.rb +75 -0
- data/lib/ruby_llm/toolbox/search/brave.rb +64 -0
- data/lib/ruby_llm/toolbox/search/searxng.rb +64 -0
- data/lib/ruby_llm/toolbox/search/tavily.rb +70 -0
- data/lib/ruby_llm/toolbox/text_diff.rb +81 -0
- data/lib/ruby_llm/toolbox/toml.rb +409 -0
- data/lib/ruby_llm/toolbox/tools/apply_patch.rb +92 -0
- data/lib/ruby_llm/toolbox/tools/bash_tool.rb +101 -0
- data/lib/ruby_llm/toolbox/tools/bundle.rb +71 -0
- data/lib/ruby_llm/toolbox/tools/calculator.rb +42 -0
- data/lib/ruby_llm/toolbox/tools/create_directory.rb +35 -0
- data/lib/ruby_llm/toolbox/tools/csv_read.rb +69 -0
- data/lib/ruby_llm/toolbox/tools/csv_write.rb +51 -0
- data/lib/ruby_llm/toolbox/tools/date_time.rb +42 -0
- data/lib/ruby_llm/toolbox/tools/delete_file.rb +64 -0
- data/lib/ruby_llm/toolbox/tools/diff.rb +35 -0
- data/lib/ruby_llm/toolbox/tools/download_file.rb +55 -0
- data/lib/ruby_llm/toolbox/tools/edit_file.rb +82 -0
- data/lib/ruby_llm/toolbox/tools/gem_tool.rb +140 -0
- data/lib/ruby_llm/toolbox/tools/git_add.rb +46 -0
- data/lib/ruby_llm/toolbox/tools/git_blame.rb +58 -0
- data/lib/ruby_llm/toolbox/tools/git_branch.rb +35 -0
- data/lib/ruby_llm/toolbox/tools/git_checkout.rb +43 -0
- data/lib/ruby_llm/toolbox/tools/git_commit.rb +47 -0
- data/lib/ruby_llm/toolbox/tools/git_diff.rb +50 -0
- data/lib/ruby_llm/toolbox/tools/git_grep.rb +66 -0
- data/lib/ruby_llm/toolbox/tools/git_helpers.rb +68 -0
- data/lib/ruby_llm/toolbox/tools/git_log.rb +47 -0
- data/lib/ruby_llm/toolbox/tools/git_show.rb +48 -0
- data/lib/ruby_llm/toolbox/tools/git_status.rb +27 -0
- data/lib/ruby_llm/toolbox/tools/glob.rb +62 -0
- data/lib/ruby_llm/toolbox/tools/grep_files.rb +221 -0
- data/lib/ruby_llm/toolbox/tools/http_helpers.rb +130 -0
- data/lib/ruby_llm/toolbox/tools/http_request.rb +75 -0
- data/lib/ruby_llm/toolbox/tools/json_query.rb +69 -0
- data/lib/ruby_llm/toolbox/tools/lint.rb +67 -0
- data/lib/ruby_llm/toolbox/tools/list_directory.rb +87 -0
- data/lib/ruby_llm/toolbox/tools/move_file.rb +54 -0
- data/lib/ruby_llm/toolbox/tools/multi_edit.rb +107 -0
- data/lib/ruby_llm/toolbox/tools/parse_ruby.rb +111 -0
- data/lib/ruby_llm/toolbox/tools/process_kill.rb +41 -0
- data/lib/ruby_llm/toolbox/tools/process_list.rb +29 -0
- data/lib/ruby_llm/toolbox/tools/process_output.rb +55 -0
- data/lib/ruby_llm/toolbox/tools/process_start.rb +109 -0
- data/lib/ruby_llm/toolbox/tools/python_tests.rb +77 -0
- data/lib/ruby_llm/toolbox/tools/read_file.rb +75 -0
- data/lib/ruby_llm/toolbox/tools/replace_in_files.rb +139 -0
- data/lib/ruby_llm/toolbox/tools/run_python.rb +38 -0
- data/lib/ruby_llm/toolbox/tools/run_ruby.rb +37 -0
- data/lib/ruby_llm/toolbox/tools/run_rust.rb +42 -0
- data/lib/ruby_llm/toolbox/tools/run_tests.rb +81 -0
- data/lib/ruby_llm/toolbox/tools/sandbox_run.rb +40 -0
- data/lib/ruby_llm/toolbox/tools/todo_write.rb +57 -0
- data/lib/ruby_llm/toolbox/tools/toml_query.rb +70 -0
- data/lib/ruby_llm/toolbox/tools/toolchain_helpers.rb +62 -0
- data/lib/ruby_llm/toolbox/tools/tree.rb +87 -0
- data/lib/ruby_llm/toolbox/tools/web_fetch.rb +77 -0
- data/lib/ruby_llm/toolbox/tools/web_search.rb +81 -0
- data/lib/ruby_llm/toolbox/tools/write_file.rb +52 -0
- data/lib/ruby_llm/toolbox/tools/yaml_query.rb +73 -0
- data/lib/ruby_llm/toolbox/truncator.rb +68 -0
- data/lib/ruby_llm/toolbox/version.rb +7 -0
- data/lib/ruby_llm/toolbox.rb +161 -0
- metadata +194 -0
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "version"
|
|
4
|
+
|
|
5
|
+
module RubyLLM
|
|
6
|
+
module Toolbox
|
|
7
|
+
# Holds global defaults. Every tool takes a snapshot of this at
|
|
8
|
+
# construction time (see Base#initialize) so a single tool instance can be
|
|
9
|
+
# scoped without mutating the global config:
|
|
10
|
+
#
|
|
11
|
+
# chat.with_tool(ReadFile.new(fs_root: "/srv/project"))
|
|
12
|
+
#
|
|
13
|
+
# Treat configuration values as read-only inside tools. Do not mutate the
|
|
14
|
+
# arrays in place; assign a new value instead.
|
|
15
|
+
class Configuration
|
|
16
|
+
# --- Filesystem -------------------------------------------------------
|
|
17
|
+
# Every filesystem tool is confined to this root (symlinks resolved).
|
|
18
|
+
attr_accessor :fs_root
|
|
19
|
+
|
|
20
|
+
# --- Exec / mutation gate --------------------------------------------
|
|
21
|
+
# Master switch for the dangerous tool set (bash, write_file, edit_file,
|
|
22
|
+
# run_code, git_commit, mutating http). Off by default: the gem is
|
|
23
|
+
# safe-by-default even though every class is loaded.
|
|
24
|
+
attr_accessor :enable_exec_tools
|
|
25
|
+
|
|
26
|
+
# Executables BashTool is permitted to run. Empty means "nothing".
|
|
27
|
+
attr_accessor :allowed_commands
|
|
28
|
+
|
|
29
|
+
# Hard wall-clock limit for any spawned process, in seconds.
|
|
30
|
+
attr_accessor :command_timeout
|
|
31
|
+
attr_accessor :max_processes # max concurrent background processes (process_start)
|
|
32
|
+
|
|
33
|
+
# Only these environment variables are passed through to spawned
|
|
34
|
+
# processes; everything else is unset.
|
|
35
|
+
attr_accessor :env_passthrough
|
|
36
|
+
|
|
37
|
+
# --- Output budgeting -------------------------------------------------
|
|
38
|
+
# Tool results are truncated (head + tail, middle elided) to fit this
|
|
39
|
+
# many tokens, counted with ruby_llm-tokenizer.
|
|
40
|
+
attr_accessor :max_output_tokens
|
|
41
|
+
|
|
42
|
+
# Model identifier used to pick a tokenizer. For Claude models, call
|
|
43
|
+
# RubyLLM::Tokenizer.enable_claude_approximation! once at boot.
|
|
44
|
+
attr_accessor :tokenizer_model
|
|
45
|
+
|
|
46
|
+
# --- Search / traversal ----------------------------------------------
|
|
47
|
+
# Per-pattern wall-clock limit for user-supplied regexes (ReDoS guard).
|
|
48
|
+
attr_accessor :regex_timeout
|
|
49
|
+
|
|
50
|
+
# Cap on grep matches returned in a single call.
|
|
51
|
+
attr_accessor :max_grep_matches
|
|
52
|
+
|
|
53
|
+
# Directory basenames pruned during recursive walks.
|
|
54
|
+
attr_accessor :ignored_dirs
|
|
55
|
+
|
|
56
|
+
# --- Web (phase 3) ----------------------------------------------------
|
|
57
|
+
# Pluggable search backend. Tavily is the chosen default provider, but the
|
|
58
|
+
# adapter is swappable: set search_adapter to an object responding to
|
|
59
|
+
# #search(query, max_results:), or to a symbol (:tavily, :brave, :searxng)
|
|
60
|
+
# to select a built-in adapter. nil falls back to Tavily.
|
|
61
|
+
attr_accessor :search_adapter
|
|
62
|
+
attr_accessor :tavily_api_key
|
|
63
|
+
attr_accessor :brave_api_key # for the :brave adapter (Brave Search API)
|
|
64
|
+
attr_accessor :searxng_url # base URL of a self-hosted SearXNG instance
|
|
65
|
+
attr_accessor :web_allowlist
|
|
66
|
+
attr_accessor :web_denylist
|
|
67
|
+
|
|
68
|
+
# --- Sandbox (run_ruby) ----------------------------------------------
|
|
69
|
+
# Docker is the locked code-execution backend. These map to `docker run`
|
|
70
|
+
# isolation flags.
|
|
71
|
+
attr_accessor :docker_image
|
|
72
|
+
attr_accessor :python_image # image for run_python
|
|
73
|
+
attr_accessor :rust_image # image for run_rust
|
|
74
|
+
attr_accessor :sandbox_network # --network
|
|
75
|
+
attr_accessor :sandbox_memory # --memory
|
|
76
|
+
attr_accessor :sandbox_cpus # --cpus
|
|
77
|
+
attr_accessor :sandbox_pids # --pids-limit
|
|
78
|
+
attr_accessor :sandbox_user # --user (uid:gid)
|
|
79
|
+
attr_accessor :sandbox_runtime # :auto | :docker | :bubblewrap | :sandbox_exec | :none
|
|
80
|
+
attr_accessor :sandbox_bwrap_extra # extra bwrap args (e.g. ["--tmpfs", "/home"])
|
|
81
|
+
attr_accessor :sandbox_seatbelt_profile # custom Seatbelt SBPL profile string (overrides default)
|
|
82
|
+
|
|
83
|
+
# --- HTTP (gem tool, web tools) --------------------------------------
|
|
84
|
+
attr_accessor :http_timeout
|
|
85
|
+
attr_accessor :user_agent
|
|
86
|
+
attr_accessor :max_fetch_bytes # cap on a fetched response body
|
|
87
|
+
attr_accessor :max_redirects # redirect hops web_fetch will follow
|
|
88
|
+
|
|
89
|
+
# --- Security override (operator-controlled) -------------------------
|
|
90
|
+
# Master switch for per-call unsafe escalation. When false (the default),
|
|
91
|
+
# any tool call that passes unsafe: true is REFUSED — an agent cannot
|
|
92
|
+
# escalate its own privileges. Only a human operator can set this to true,
|
|
93
|
+
# at which point a tool may bypass its guard (path jail, URL guard, command
|
|
94
|
+
# allowlist) for that specific call. Set unsafe_logger to audit every
|
|
95
|
+
# escalation that is actually granted.
|
|
96
|
+
attr_accessor :allow_unsafe
|
|
97
|
+
attr_accessor :unsafe_logger # callable: ->(tool_name, detail) { ... }
|
|
98
|
+
|
|
99
|
+
def initialize
|
|
100
|
+
@fs_root = Dir.pwd
|
|
101
|
+
@enable_exec_tools = false
|
|
102
|
+
@allowed_commands = []
|
|
103
|
+
@command_timeout = 30
|
|
104
|
+
@max_processes = 8
|
|
105
|
+
@env_passthrough = %w[PATH LANG LC_ALL HOME]
|
|
106
|
+
@max_output_tokens = 2_000
|
|
107
|
+
@tokenizer_model = "gpt-4o"
|
|
108
|
+
@regex_timeout = 2
|
|
109
|
+
@max_grep_matches = 200
|
|
110
|
+
@ignored_dirs = %w[.git .hg .svn node_modules .bundle tmp]
|
|
111
|
+
@search_adapter = nil
|
|
112
|
+
@tavily_api_key = ENV["TAVILY_API_KEY"]
|
|
113
|
+
@brave_api_key = ENV["BRAVE_API_KEY"] || ENV["BRAVE_SEARCH_API_KEY"]
|
|
114
|
+
@searxng_url = ENV["SEARXNG_URL"]
|
|
115
|
+
@web_allowlist = []
|
|
116
|
+
@web_denylist = []
|
|
117
|
+
@docker_image = "ruby:3.3-slim"
|
|
118
|
+
@python_image = "python:3.12-slim"
|
|
119
|
+
@rust_image = "rust:1-slim"
|
|
120
|
+
@sandbox_network = "none"
|
|
121
|
+
@sandbox_memory = "256m"
|
|
122
|
+
@sandbox_cpus = "1.0"
|
|
123
|
+
@sandbox_pids = 128
|
|
124
|
+
@sandbox_user = "1000:1000"
|
|
125
|
+
@sandbox_runtime = :auto
|
|
126
|
+
@sandbox_bwrap_extra = []
|
|
127
|
+
@sandbox_seatbelt_profile = nil
|
|
128
|
+
@http_timeout = 10
|
|
129
|
+
@user_agent = "ruby_llm-toolbox/#{RubyLLM::Toolbox::VERSION}"
|
|
130
|
+
@max_fetch_bytes = 2_000_000
|
|
131
|
+
@max_redirects = 5
|
|
132
|
+
@allow_unsafe = false
|
|
133
|
+
@unsafe_logger = nil
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Returns a copy with the given attributes overridden. Used to scope a
|
|
137
|
+
# single tool instance without touching global state.
|
|
138
|
+
def dup_with(**overrides)
|
|
139
|
+
copy = self.class.new
|
|
140
|
+
instance_variables.each do |ivar|
|
|
141
|
+
copy.instance_variable_set(ivar, instance_variable_get(ivar))
|
|
142
|
+
end
|
|
143
|
+
overrides.each { |key, value| copy.public_send("#{key}=", value) }
|
|
144
|
+
copy
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Toolbox
|
|
5
|
+
# Shared path-navigation for the structured-data tools (json_query,
|
|
6
|
+
# yaml_query). Path syntax: dot-separated keys, [n] for array indices, and
|
|
7
|
+
# [] to map a field across an array. Examples:
|
|
8
|
+
# users[0].name users[].email config.server.port
|
|
9
|
+
module DataPath
|
|
10
|
+
class Error < StandardError; end
|
|
11
|
+
|
|
12
|
+
module_function
|
|
13
|
+
|
|
14
|
+
def query(data, path)
|
|
15
|
+
apply(data, parse(path))
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def parse(path)
|
|
19
|
+
cleaned = path.to_s.strip.sub(/\A\$?\.?/, "")
|
|
20
|
+
tokens = []
|
|
21
|
+
cleaned.scan(/[^.\[\]]+|\[\d+\]|\[\]/) do |match|
|
|
22
|
+
tokens << case match
|
|
23
|
+
when "[]" then :map
|
|
24
|
+
when /\A\[(\d+)\]\z/ then Regexp.last_match(1).to_i
|
|
25
|
+
else match
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
raise Error, "could not parse path: #{path.inspect}" if tokens.empty?
|
|
29
|
+
|
|
30
|
+
tokens
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def apply(data, tokens)
|
|
34
|
+
return data if tokens.empty? || data.nil?
|
|
35
|
+
|
|
36
|
+
token, *rest = tokens
|
|
37
|
+
case token
|
|
38
|
+
when :map
|
|
39
|
+
raise Error, "[] expects an array, got #{data.class}" unless data.is_a?(Array)
|
|
40
|
+
|
|
41
|
+
data.map { |element| apply(element, rest) }
|
|
42
|
+
when Integer
|
|
43
|
+
raise Error, "index [#{token}] expects an array, got #{data.class}" unless data.is_a?(Array)
|
|
44
|
+
|
|
45
|
+
apply(data[token], rest)
|
|
46
|
+
else
|
|
47
|
+
raise Error, "key '#{token}' expects an object, got #{data.class}" unless data.is_a?(Hash)
|
|
48
|
+
|
|
49
|
+
apply(data[token], rest)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "open3"
|
|
4
|
+
|
|
5
|
+
module RubyLLM
|
|
6
|
+
module Toolbox
|
|
7
|
+
# A single managed background process. Its stdout and stderr are drained
|
|
8
|
+
# continuously by reader threads into bounded buffers, so a chatty child
|
|
9
|
+
# can't deadlock on a full pipe or grow memory without limit. Output is read
|
|
10
|
+
# incrementally (each read returns only what's new since the last one). The
|
|
11
|
+
# child runs in its own process group so it — and any children it spawns —
|
|
12
|
+
# can be killed together.
|
|
13
|
+
class ManagedProcess
|
|
14
|
+
MAX_BUFFER = 256 * 1024 # retain at most this many unread bytes per stream
|
|
15
|
+
|
|
16
|
+
attr_reader :id, :name, :argv, :pid, :started_at
|
|
17
|
+
|
|
18
|
+
def initialize(id:, argv:, env:, chdir:, name:, rlimits: {})
|
|
19
|
+
@id = id
|
|
20
|
+
@argv = argv
|
|
21
|
+
@name = name
|
|
22
|
+
@started_at = Time.now
|
|
23
|
+
@mutex = Mutex.new
|
|
24
|
+
@out = +""
|
|
25
|
+
@err = +""
|
|
26
|
+
@out_dropped = false
|
|
27
|
+
@err_dropped = false
|
|
28
|
+
|
|
29
|
+
opts = { unsetenv_others: true, pgroup: true }
|
|
30
|
+
opts[:chdir] = chdir if chdir && !chdir.to_s.empty?
|
|
31
|
+
opts.merge!(rlimits) if rlimits && !rlimits.empty?
|
|
32
|
+
|
|
33
|
+
stdin, stdout, stderr, @wait_thr = Open3.popen3(env, *argv, **opts)
|
|
34
|
+
@pid = @wait_thr.pid
|
|
35
|
+
begin
|
|
36
|
+
stdin.close
|
|
37
|
+
rescue StandardError
|
|
38
|
+
nil
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
@readers = [drain(stdout, :out), drain(stderr, :err)]
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def running?
|
|
45
|
+
@wait_thr.alive?
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def status
|
|
49
|
+
running? ? :running : :exited
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def exit_code
|
|
53
|
+
return nil if running?
|
|
54
|
+
|
|
55
|
+
@wait_thr.value.exitstatus
|
|
56
|
+
rescue StandardError
|
|
57
|
+
nil
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def age
|
|
61
|
+
Time.now - @started_at
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Returns and clears the output accumulated since the previous call.
|
|
65
|
+
def read_new
|
|
66
|
+
@mutex.synchronize do
|
|
67
|
+
data = { out: @out.dup, err: @err.dup, out_dropped: @out_dropped, err_dropped: @err_dropped }
|
|
68
|
+
@out = +""
|
|
69
|
+
@err = +""
|
|
70
|
+
@out_dropped = false
|
|
71
|
+
@err_dropped = false
|
|
72
|
+
data
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# SIGTERM the whole tree, escalate to SIGKILL after a grace period.
|
|
77
|
+
# Descendants are collected up front (before the parent dies and they get
|
|
78
|
+
# reparented), then signalled both via the process group and individually
|
|
79
|
+
# — so cleanup is reliable even in sandboxes that don't deliver
|
|
80
|
+
# process-group signals to non-leader members.
|
|
81
|
+
def kill(grace: 2.0)
|
|
82
|
+
return unless @wait_thr.alive?
|
|
83
|
+
|
|
84
|
+
targets = [@pid] + descendants(@pid)
|
|
85
|
+
signal_group("TERM")
|
|
86
|
+
targets.each { |pid| signal_pid(pid, "TERM") }
|
|
87
|
+
|
|
88
|
+
deadline = Time.now + grace
|
|
89
|
+
sleep(0.05) while @wait_thr.alive? && Time.now < deadline
|
|
90
|
+
|
|
91
|
+
signal_group("KILL")
|
|
92
|
+
targets.each { |pid| signal_pid(pid, "KILL") }
|
|
93
|
+
begin
|
|
94
|
+
@wait_thr.value
|
|
95
|
+
rescue StandardError
|
|
96
|
+
nil
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
private
|
|
101
|
+
|
|
102
|
+
def signal_group(sig)
|
|
103
|
+
Process.kill("-#{sig}", @pid) # negative pid => the process group
|
|
104
|
+
rescue StandardError
|
|
105
|
+
nil
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def signal_pid(pid, sig)
|
|
109
|
+
Process.kill(sig, pid)
|
|
110
|
+
rescue StandardError
|
|
111
|
+
nil
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# All transitive children of root, via /proc (Linux). Collected before the
|
|
115
|
+
# parent is killed, so the parent->child links are still intact.
|
|
116
|
+
def descendants(root)
|
|
117
|
+
return [] unless File.directory?("/proc")
|
|
118
|
+
|
|
119
|
+
children = Hash.new { |h, k| h[k] = [] }
|
|
120
|
+
Dir.glob("/proc/[0-9]*/stat").each do |file|
|
|
121
|
+
data = File.read(file)
|
|
122
|
+
open_paren = data.index("(")
|
|
123
|
+
close_paren = data.rindex(")")
|
|
124
|
+
next unless open_paren && close_paren
|
|
125
|
+
|
|
126
|
+
pid = data[0...open_paren].to_i
|
|
127
|
+
ppid = data[(close_paren + 2)..].to_s.split[1].to_i
|
|
128
|
+
children[ppid] << pid
|
|
129
|
+
rescue StandardError
|
|
130
|
+
next
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
result = []
|
|
134
|
+
queue = children[root].dup
|
|
135
|
+
until queue.empty?
|
|
136
|
+
pid = queue.shift
|
|
137
|
+
next if result.include?(pid)
|
|
138
|
+
|
|
139
|
+
result << pid
|
|
140
|
+
queue.concat(children[pid])
|
|
141
|
+
end
|
|
142
|
+
result
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def drain(io, which)
|
|
146
|
+
Thread.new do
|
|
147
|
+
loop do
|
|
148
|
+
append(which, io.readpartial(4096))
|
|
149
|
+
end
|
|
150
|
+
rescue EOFError, IOError
|
|
151
|
+
nil
|
|
152
|
+
ensure
|
|
153
|
+
begin
|
|
154
|
+
io.close
|
|
155
|
+
rescue StandardError
|
|
156
|
+
nil
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def append(which, chunk)
|
|
162
|
+
@mutex.synchronize do
|
|
163
|
+
buf = which == :out ? @out : @err
|
|
164
|
+
buf << chunk
|
|
165
|
+
next unless buf.bytesize > MAX_BUFFER
|
|
166
|
+
|
|
167
|
+
overflow = buf.bytesize - MAX_BUFFER
|
|
168
|
+
buf.replace(buf.byteslice(overflow, MAX_BUFFER) || +"")
|
|
169
|
+
if which == :out
|
|
170
|
+
@out_dropped = true
|
|
171
|
+
else
|
|
172
|
+
@err_dropped = true
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Thread-safe registry of background processes, shared across tool calls.
|
|
179
|
+
# Holds an upper bound on concurrent live processes and cleans everything up
|
|
180
|
+
# at interpreter exit so nothing is orphaned.
|
|
181
|
+
module ProcessRegistry
|
|
182
|
+
class LimitError < StandardError; end
|
|
183
|
+
|
|
184
|
+
@mutex = Mutex.new
|
|
185
|
+
@procs = {}
|
|
186
|
+
@counter = 0
|
|
187
|
+
|
|
188
|
+
class << self
|
|
189
|
+
def start(argv:, env:, chdir:, name:, rlimits: {}, max: 8)
|
|
190
|
+
@mutex.synchronize do
|
|
191
|
+
live = @procs.values.count(&:running?)
|
|
192
|
+
raise LimitError, "too many background processes (limit #{max}); kill some first" if live >= max
|
|
193
|
+
|
|
194
|
+
@counter += 1
|
|
195
|
+
id = "proc_#{@counter}"
|
|
196
|
+
@procs[id] = ManagedProcess.new(id: id, argv: argv, env: env, chdir: chdir, name: name, rlimits: rlimits)
|
|
197
|
+
id
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def get(id)
|
|
202
|
+
@mutex.synchronize { @procs[id] }
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def all
|
|
206
|
+
@mutex.synchronize { @procs.values.dup }
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def delete(id)
|
|
210
|
+
@mutex.synchronize { @procs.delete(id) }
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def kill_all
|
|
214
|
+
all.each { |proc| proc.kill(grace: 0.2) }
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def reset!
|
|
218
|
+
all.each { |proc| proc.kill(grace: 0.2) }
|
|
219
|
+
@mutex.synchronize { @procs = {} }
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
at_exit { RubyLLM::Toolbox::ProcessRegistry.kill_all }
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "open3"
|
|
4
|
+
require "timeout"
|
|
5
|
+
|
|
6
|
+
module RubyLLM
|
|
7
|
+
module Toolbox
|
|
8
|
+
# Shared subprocess runner used by BashTool and the Docker sandbox. Always
|
|
9
|
+
# array-form (no shell), streams both pipes so a chatty child can't deadlock
|
|
10
|
+
# on a full pipe buffer, and hard-kills the process if it blows the
|
|
11
|
+
# wall-clock budget.
|
|
12
|
+
#
|
|
13
|
+
# Returns [stdout, stderr, status] where status is a Process::Status or the
|
|
14
|
+
# symbol :timeout.
|
|
15
|
+
module ProcessRunner
|
|
16
|
+
module_function
|
|
17
|
+
|
|
18
|
+
def capture(argv, env: {}, stdin: nil, timeout: 30, unsetenv_others: true, chdir: nil, rlimits: {})
|
|
19
|
+
opts = { unsetenv_others: unsetenv_others }
|
|
20
|
+
opts[:chdir] = chdir if chdir
|
|
21
|
+
opts.merge!(rlimits) if rlimits && !rlimits.empty?
|
|
22
|
+
Open3.popen3(env, *argv, **opts) do |i, o, e, thr|
|
|
23
|
+
write_stdin(i, stdin)
|
|
24
|
+
pump(o, e, thr, timeout)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def write_stdin(io, stdin)
|
|
29
|
+
io.write(stdin) if stdin && !stdin.empty?
|
|
30
|
+
rescue StandardError
|
|
31
|
+
# Child may have exited/closed the pipe before we finished writing.
|
|
32
|
+
nil
|
|
33
|
+
ensure
|
|
34
|
+
begin
|
|
35
|
+
io.close
|
|
36
|
+
rescue StandardError
|
|
37
|
+
nil
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def pump(stdout, stderr, wait_thr, timeout)
|
|
42
|
+
out = +""
|
|
43
|
+
err = +""
|
|
44
|
+
readers = [stdout, stderr]
|
|
45
|
+
|
|
46
|
+
Timeout.timeout(timeout) do
|
|
47
|
+
until readers.empty?
|
|
48
|
+
ready, = IO.select(readers)
|
|
49
|
+
ready.each do |io|
|
|
50
|
+
chunk = io.read_nonblock(4096)
|
|
51
|
+
(io.equal?(stdout) ? out : err) << chunk
|
|
52
|
+
rescue IO::WaitReadable
|
|
53
|
+
next
|
|
54
|
+
rescue EOFError
|
|
55
|
+
readers.delete(io)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
[out, err, wait_thr.value]
|
|
59
|
+
end
|
|
60
|
+
rescue Timeout::Error
|
|
61
|
+
kill(wait_thr.pid)
|
|
62
|
+
[out, err, :timeout]
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def kill(pid)
|
|
66
|
+
Process.kill("KILL", pid)
|
|
67
|
+
rescue StandardError
|
|
68
|
+
nil
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|