ruby_llm-toolbox 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +49 -0
- data/GUIDE.md +598 -0
- data/LICENSE +21 -0
- data/README.md +412 -0
- data/bin/verify_prism_parity +112 -0
- data/lib/ruby_llm/toolbox/base.rb +112 -0
- data/lib/ruby_llm/toolbox/configuration.rb +148 -0
- data/lib/ruby_llm/toolbox/data_path.rb +54 -0
- data/lib/ruby_llm/toolbox/process_registry.rb +226 -0
- data/lib/ruby_llm/toolbox/process_runner.rb +72 -0
- data/lib/ruby_llm/toolbox/ruby_outline.rb +213 -0
- data/lib/ruby_llm/toolbox/safe_math.rb +182 -0
- data/lib/ruby_llm/toolbox/safety/command_guard.rb +42 -0
- data/lib/ruby_llm/toolbox/safety/path_jail.rb +55 -0
- data/lib/ruby_llm/toolbox/safety/url_guard.rb +111 -0
- data/lib/ruby_llm/toolbox/sandbox/base.rb +151 -0
- data/lib/ruby_llm/toolbox/sandbox/bubblewrap.rb +70 -0
- data/lib/ruby_llm/toolbox/sandbox/docker.rb +69 -0
- data/lib/ruby_llm/toolbox/sandbox/sandbox_exec.rb +75 -0
- data/lib/ruby_llm/toolbox/search/brave.rb +64 -0
- data/lib/ruby_llm/toolbox/search/searxng.rb +64 -0
- data/lib/ruby_llm/toolbox/search/tavily.rb +70 -0
- data/lib/ruby_llm/toolbox/text_diff.rb +81 -0
- data/lib/ruby_llm/toolbox/toml.rb +409 -0
- data/lib/ruby_llm/toolbox/tools/apply_patch.rb +92 -0
- data/lib/ruby_llm/toolbox/tools/bash_tool.rb +101 -0
- data/lib/ruby_llm/toolbox/tools/bundle.rb +71 -0
- data/lib/ruby_llm/toolbox/tools/calculator.rb +42 -0
- data/lib/ruby_llm/toolbox/tools/create_directory.rb +35 -0
- data/lib/ruby_llm/toolbox/tools/csv_read.rb +69 -0
- data/lib/ruby_llm/toolbox/tools/csv_write.rb +51 -0
- data/lib/ruby_llm/toolbox/tools/date_time.rb +42 -0
- data/lib/ruby_llm/toolbox/tools/delete_file.rb +64 -0
- data/lib/ruby_llm/toolbox/tools/diff.rb +35 -0
- data/lib/ruby_llm/toolbox/tools/download_file.rb +55 -0
- data/lib/ruby_llm/toolbox/tools/edit_file.rb +82 -0
- data/lib/ruby_llm/toolbox/tools/gem_tool.rb +140 -0
- data/lib/ruby_llm/toolbox/tools/git_add.rb +46 -0
- data/lib/ruby_llm/toolbox/tools/git_blame.rb +58 -0
- data/lib/ruby_llm/toolbox/tools/git_branch.rb +35 -0
- data/lib/ruby_llm/toolbox/tools/git_checkout.rb +43 -0
- data/lib/ruby_llm/toolbox/tools/git_commit.rb +47 -0
- data/lib/ruby_llm/toolbox/tools/git_diff.rb +50 -0
- data/lib/ruby_llm/toolbox/tools/git_grep.rb +66 -0
- data/lib/ruby_llm/toolbox/tools/git_helpers.rb +68 -0
- data/lib/ruby_llm/toolbox/tools/git_log.rb +47 -0
- data/lib/ruby_llm/toolbox/tools/git_show.rb +48 -0
- data/lib/ruby_llm/toolbox/tools/git_status.rb +27 -0
- data/lib/ruby_llm/toolbox/tools/glob.rb +62 -0
- data/lib/ruby_llm/toolbox/tools/grep_files.rb +221 -0
- data/lib/ruby_llm/toolbox/tools/http_helpers.rb +130 -0
- data/lib/ruby_llm/toolbox/tools/http_request.rb +75 -0
- data/lib/ruby_llm/toolbox/tools/json_query.rb +69 -0
- data/lib/ruby_llm/toolbox/tools/lint.rb +67 -0
- data/lib/ruby_llm/toolbox/tools/list_directory.rb +87 -0
- data/lib/ruby_llm/toolbox/tools/move_file.rb +54 -0
- data/lib/ruby_llm/toolbox/tools/multi_edit.rb +107 -0
- data/lib/ruby_llm/toolbox/tools/parse_ruby.rb +111 -0
- data/lib/ruby_llm/toolbox/tools/process_kill.rb +41 -0
- data/lib/ruby_llm/toolbox/tools/process_list.rb +29 -0
- data/lib/ruby_llm/toolbox/tools/process_output.rb +55 -0
- data/lib/ruby_llm/toolbox/tools/process_start.rb +109 -0
- data/lib/ruby_llm/toolbox/tools/python_tests.rb +77 -0
- data/lib/ruby_llm/toolbox/tools/read_file.rb +75 -0
- data/lib/ruby_llm/toolbox/tools/replace_in_files.rb +139 -0
- data/lib/ruby_llm/toolbox/tools/run_python.rb +38 -0
- data/lib/ruby_llm/toolbox/tools/run_ruby.rb +37 -0
- data/lib/ruby_llm/toolbox/tools/run_rust.rb +42 -0
- data/lib/ruby_llm/toolbox/tools/run_tests.rb +81 -0
- data/lib/ruby_llm/toolbox/tools/sandbox_run.rb +40 -0
- data/lib/ruby_llm/toolbox/tools/todo_write.rb +57 -0
- data/lib/ruby_llm/toolbox/tools/toml_query.rb +70 -0
- data/lib/ruby_llm/toolbox/tools/toolchain_helpers.rb +62 -0
- data/lib/ruby_llm/toolbox/tools/tree.rb +87 -0
- data/lib/ruby_llm/toolbox/tools/web_fetch.rb +77 -0
- data/lib/ruby_llm/toolbox/tools/web_search.rb +81 -0
- data/lib/ruby_llm/toolbox/tools/write_file.rb +52 -0
- data/lib/ruby_llm/toolbox/tools/yaml_query.rb +73 -0
- data/lib/ruby_llm/toolbox/truncator.rb +68 -0
- data/lib/ruby_llm/toolbox/version.rb +7 -0
- data/lib/ruby_llm/toolbox.rb +161 -0
- metadata +194 -0
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ripper"
|
|
4
|
+
|
|
5
|
+
module RubyLLM
|
|
6
|
+
module Toolbox
|
|
7
|
+
# Extracts a structural outline (classes, modules, methods, constants) from
|
|
8
|
+
# Ruby source, with line numbers and nesting depth. Parsing only — the code
|
|
9
|
+
# is never executed.
|
|
10
|
+
#
|
|
11
|
+
# Two backends sit behind one dispatcher and produce identical Entry lists:
|
|
12
|
+
#
|
|
13
|
+
# * PrismBackend — used automatically when `require "prism"` succeeds.
|
|
14
|
+
# Prism is bundled with Ruby 3.3+, so on a modern Ruby this needs no gem
|
|
15
|
+
# install. It is the same parser the VM itself uses, so its line numbers
|
|
16
|
+
# and structure are authoritative.
|
|
17
|
+
# * RipperBackend — the stdlib fallback for runtimes that don't bundle
|
|
18
|
+
# Prism (e.g. non-MRI). Dependency-free, always present.
|
|
19
|
+
#
|
|
20
|
+
# Parity between the two is enforced by spec/ruby_outline_parity_spec.rb and
|
|
21
|
+
# by bin/verify_prism_parity, which can be run under any Ruby (e.g. a
|
|
22
|
+
# sandboxed ruby:3.4) to confirm the backends agree.
|
|
23
|
+
module RubyOutline
|
|
24
|
+
class ParseError < StandardError; end
|
|
25
|
+
|
|
26
|
+
Entry = Struct.new(:kind, :name, :line, :depth, keyword_init: true)
|
|
27
|
+
|
|
28
|
+
module_function
|
|
29
|
+
|
|
30
|
+
# True when the Prism backend can be loaded on this Ruby. Memoized.
|
|
31
|
+
def prism_available?
|
|
32
|
+
return @prism_available if defined?(@prism_available)
|
|
33
|
+
|
|
34
|
+
@prism_available = begin
|
|
35
|
+
require "prism"
|
|
36
|
+
true
|
|
37
|
+
rescue LoadError
|
|
38
|
+
false
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def active_backend
|
|
43
|
+
prism_available? ? PrismBackend : RipperBackend
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Returns an Array<Entry> in source order. Raises ParseError on a syntax
|
|
47
|
+
# error. Pass backend: to force a specific one (used by the parity tests).
|
|
48
|
+
def extract(source, backend: active_backend)
|
|
49
|
+
backend.extract(source.to_s)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# --- Prism backend ----------------------------------------------------
|
|
53
|
+
module PrismBackend
|
|
54
|
+
module_function
|
|
55
|
+
|
|
56
|
+
def extract(source)
|
|
57
|
+
require "prism"
|
|
58
|
+
result = Prism.parse(source)
|
|
59
|
+
raise ParseError, "source is not valid Ruby (syntax error)" unless result.success?
|
|
60
|
+
|
|
61
|
+
acc = []
|
|
62
|
+
visit(result.value, 0, acc)
|
|
63
|
+
acc
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def visit(node, depth, acc)
|
|
67
|
+
return if node.nil?
|
|
68
|
+
|
|
69
|
+
case node
|
|
70
|
+
when Prism::ClassNode
|
|
71
|
+
acc << Entry.new(kind: :class, name: node.constant_path.slice,
|
|
72
|
+
line: node.constant_path.location.start_line, depth: depth)
|
|
73
|
+
visit(node.body, depth + 1, acc)
|
|
74
|
+
when Prism::ModuleNode
|
|
75
|
+
acc << Entry.new(kind: :module, name: node.constant_path.slice,
|
|
76
|
+
line: node.constant_path.location.start_line, depth: depth)
|
|
77
|
+
visit(node.body, depth + 1, acc)
|
|
78
|
+
when Prism::SingletonClassNode
|
|
79
|
+
acc << Entry.new(kind: :singleton_class, name: "<< #{node.expression.slice}",
|
|
80
|
+
line: node.location.start_line, depth: depth)
|
|
81
|
+
visit(node.body, depth + 1, acc)
|
|
82
|
+
when Prism::DefNode
|
|
83
|
+
name = node.receiver ? "#{node.receiver.slice}.#{node.name}" : node.name.to_s
|
|
84
|
+
acc << Entry.new(kind: :method, name: name, line: node.name_loc.start_line, depth: depth)
|
|
85
|
+
# method bodies are not descended into
|
|
86
|
+
when Prism::ConstantWriteNode
|
|
87
|
+
acc << Entry.new(kind: :constant, name: node.name.to_s,
|
|
88
|
+
line: node.name_loc.start_line, depth: depth)
|
|
89
|
+
else
|
|
90
|
+
node.compact_child_nodes.each { |child| visit(child, depth, acc) }
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# --- Ripper backend (stdlib) ------------------------------------------
|
|
96
|
+
module RipperBackend
|
|
97
|
+
module_function
|
|
98
|
+
|
|
99
|
+
def extract(source)
|
|
100
|
+
sexp = Ripper.sexp(source.to_s)
|
|
101
|
+
raise ParseError, "source is not valid Ruby (syntax error)" if sexp.nil?
|
|
102
|
+
|
|
103
|
+
acc = []
|
|
104
|
+
walk(sexp, 0, acc)
|
|
105
|
+
acc
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def walk(node, depth, acc)
|
|
109
|
+
return unless node.is_a?(Array)
|
|
110
|
+
|
|
111
|
+
if node[0].is_a?(Symbol)
|
|
112
|
+
dispatch(node, depth, acc)
|
|
113
|
+
else
|
|
114
|
+
node.each { |child| walk(child, depth, acc) }
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def dispatch(node, depth, acc)
|
|
119
|
+
case node[0]
|
|
120
|
+
when :program
|
|
121
|
+
walk(node[1], depth, acc)
|
|
122
|
+
when :class
|
|
123
|
+
acc << Entry.new(kind: :class, name: const_name(node[1]), line: line_of(node[1]), depth: depth)
|
|
124
|
+
walk(node[3], depth + 1, acc) # bodystmt
|
|
125
|
+
when :module
|
|
126
|
+
acc << Entry.new(kind: :module, name: const_name(node[1]), line: line_of(node[1]), depth: depth)
|
|
127
|
+
walk(node[2], depth + 1, acc)
|
|
128
|
+
when :sclass # class << self
|
|
129
|
+
acc << Entry.new(kind: :singleton_class, name: "<< #{simple_name(node[1])}", line: line_of(node), depth: depth)
|
|
130
|
+
walk(node[2], depth + 1, acc)
|
|
131
|
+
when :def
|
|
132
|
+
acc << Entry.new(kind: :method, name: ident_name(node[1]), line: line_of(node[1]), depth: depth)
|
|
133
|
+
when :defs # def self.x / def Recv.x
|
|
134
|
+
name = "#{simple_name(node[1])}.#{ident_name(node[3])}"
|
|
135
|
+
acc << Entry.new(kind: :method, name: name, line: line_of(node[3]), depth: depth)
|
|
136
|
+
when :bodystmt
|
|
137
|
+
walk(node[1], depth, acc)
|
|
138
|
+
when :assign
|
|
139
|
+
handle_assign(node, depth, acc)
|
|
140
|
+
else
|
|
141
|
+
node[1..].each { |child| walk(child, depth, acc) }
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def handle_assign(node, depth, acc)
|
|
146
|
+
target = node[1]
|
|
147
|
+
return unless target.is_a?(Array) && target[0] == :var_field
|
|
148
|
+
|
|
149
|
+
field = target[1]
|
|
150
|
+
return unless field.is_a?(Array) && field[0] == :@const
|
|
151
|
+
|
|
152
|
+
acc << Entry.new(kind: :constant, name: field[1], line: field[2]&.first, depth: depth)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def const_name(node)
|
|
156
|
+
case node && node[0]
|
|
157
|
+
when :const_ref, :top_const_ref, :var_ref
|
|
158
|
+
simple_name(node[1])
|
|
159
|
+
when :const_path_ref # Foo::Bar
|
|
160
|
+
"#{const_name(node[1])}::#{simple_name(node[2])}"
|
|
161
|
+
else
|
|
162
|
+
simple_name(node)
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def simple_name(node)
|
|
167
|
+
return node.to_s unless node.is_a?(Array)
|
|
168
|
+
|
|
169
|
+
case node[0]
|
|
170
|
+
when :@const, :@ident, :@ivar, :@gvar, :@kw then node[1].to_s
|
|
171
|
+
when :const_ref, :var_ref, :var_field then simple_name(node[1])
|
|
172
|
+
when :const_path_ref then "#{simple_name(node[1])}::#{simple_name(node[2])}"
|
|
173
|
+
else
|
|
174
|
+
leaf = find_name_leaf(node)
|
|
175
|
+
leaf ? leaf[1].to_s : "?"
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def ident_name(node)
|
|
180
|
+
return node.to_s unless node.is_a?(Array)
|
|
181
|
+
|
|
182
|
+
node[0] == :@ident || node[0] == :@const || node[0] == :@kw ? node[1].to_s : simple_name(node)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def line_of(node)
|
|
186
|
+
return nil unless node.is_a?(Array)
|
|
187
|
+
|
|
188
|
+
if node.size == 3 && node[2].is_a?(Array) && node[2].size == 2 &&
|
|
189
|
+
node[2][0].is_a?(Integer) && node[0].is_a?(Symbol)
|
|
190
|
+
return node[2][0]
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
node.each do |child|
|
|
194
|
+
line = line_of(child)
|
|
195
|
+
return line if line
|
|
196
|
+
end
|
|
197
|
+
nil
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def find_name_leaf(node)
|
|
201
|
+
return nil unless node.is_a?(Array)
|
|
202
|
+
return node if %i[@const @ident @kw].include?(node[0])
|
|
203
|
+
|
|
204
|
+
node.each do |child|
|
|
205
|
+
found = find_name_leaf(child)
|
|
206
|
+
return found if found
|
|
207
|
+
end
|
|
208
|
+
nil
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
end
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Toolbox
|
|
5
|
+
# A safe arithmetic evaluator. Parses and evaluates a math expression with a
|
|
6
|
+
# hand-written recursive-descent parser — it never calls eval, so a
|
|
7
|
+
# malicious expression can't execute Ruby. Supports + - * / % **, unary
|
|
8
|
+
# minus, parentheses, a small set of functions, and the constants pi and e.
|
|
9
|
+
module SafeMath
|
|
10
|
+
class Error < StandardError; end
|
|
11
|
+
|
|
12
|
+
FUNCTIONS = {
|
|
13
|
+
"sqrt" => ->(x) { Math.sqrt(x) },
|
|
14
|
+
"abs" => ->(x) { x.abs },
|
|
15
|
+
"sin" => ->(x) { Math.sin(x) },
|
|
16
|
+
"cos" => ->(x) { Math.cos(x) },
|
|
17
|
+
"tan" => ->(x) { Math.tan(x) },
|
|
18
|
+
"ln" => ->(x) { Math.log(x) },
|
|
19
|
+
"log10" => ->(x) { Math.log10(x) },
|
|
20
|
+
"exp" => ->(x) { Math.exp(x) },
|
|
21
|
+
"floor" => ->(x) { x.floor },
|
|
22
|
+
"ceil" => ->(x) { x.ceil },
|
|
23
|
+
"round" => ->(x) { x.round }
|
|
24
|
+
}.freeze
|
|
25
|
+
|
|
26
|
+
CONSTANTS = { "pi" => Math::PI, "e" => Math::E }.freeze
|
|
27
|
+
|
|
28
|
+
module_function
|
|
29
|
+
|
|
30
|
+
def evaluate(expression)
|
|
31
|
+
tokens = tokenize(expression.to_s)
|
|
32
|
+
parser = Parser.new(tokens)
|
|
33
|
+
value = parser.expression
|
|
34
|
+
raise Error, "unexpected token: #{parser.current.inspect}" unless parser.done?
|
|
35
|
+
|
|
36
|
+
value
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def tokenize(str)
|
|
40
|
+
tokens = []
|
|
41
|
+
scanner = str.dup
|
|
42
|
+
until scanner.empty?
|
|
43
|
+
case scanner
|
|
44
|
+
when /\A\s+/ then scanner = scanner[Regexp.last_match(0).length..]
|
|
45
|
+
when /\A(\d+\.\d+|\.\d+|\d+)([eE][+-]?\d+)?/
|
|
46
|
+
tokens << [:num, Regexp.last_match(0).to_f]
|
|
47
|
+
scanner = scanner[Regexp.last_match(0).length..]
|
|
48
|
+
when /\A[A-Za-z_]\w*/
|
|
49
|
+
tokens << [:ident, Regexp.last_match(0)]
|
|
50
|
+
scanner = scanner[Regexp.last_match(0).length..]
|
|
51
|
+
when /\A\*\*/ then tokens << [:op, "**"]; scanner = scanner[2..]
|
|
52
|
+
when %r{\A[-+*/%(),]} then tokens << [:op, Regexp.last_match(0)]; scanner = scanner[1..]
|
|
53
|
+
else raise Error, "unexpected character: #{scanner[0].inspect}"
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
tokens
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Recursive-descent parser with standard precedence.
|
|
60
|
+
class Parser
|
|
61
|
+
def initialize(tokens)
|
|
62
|
+
@tokens = tokens
|
|
63
|
+
@pos = 0
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def current
|
|
67
|
+
@tokens[@pos]
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def done?
|
|
71
|
+
@pos >= @tokens.length
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def expression
|
|
75
|
+
value = term
|
|
76
|
+
while op?(%w[+ -])
|
|
77
|
+
operator = take[1]
|
|
78
|
+
rhs = term
|
|
79
|
+
value = operator == "+" ? value + rhs : value - rhs
|
|
80
|
+
end
|
|
81
|
+
value
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
private
|
|
85
|
+
|
|
86
|
+
def term
|
|
87
|
+
value = power
|
|
88
|
+
while op?(%w[* / %])
|
|
89
|
+
operator = take[1]
|
|
90
|
+
rhs = power
|
|
91
|
+
value = apply(operator, value, rhs)
|
|
92
|
+
end
|
|
93
|
+
value
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def power
|
|
97
|
+
base = unary
|
|
98
|
+
if op?(["**"])
|
|
99
|
+
take
|
|
100
|
+
base**power # right-associative
|
|
101
|
+
else
|
|
102
|
+
base
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def unary
|
|
107
|
+
if op?(%w[- +])
|
|
108
|
+
operator = take[1]
|
|
109
|
+
value = unary
|
|
110
|
+
operator == "-" ? -value : value
|
|
111
|
+
else
|
|
112
|
+
primary
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def primary
|
|
117
|
+
token = current
|
|
118
|
+
raise Error, "unexpected end of expression" if token.nil?
|
|
119
|
+
|
|
120
|
+
case token[0]
|
|
121
|
+
when :num
|
|
122
|
+
take[1]
|
|
123
|
+
when :ident
|
|
124
|
+
identifier(take[1])
|
|
125
|
+
when :op
|
|
126
|
+
raise Error, "expected '(' " unless token[1] == "("
|
|
127
|
+
|
|
128
|
+
take
|
|
129
|
+
value = expression
|
|
130
|
+
expect(")")
|
|
131
|
+
value
|
|
132
|
+
else
|
|
133
|
+
raise Error, "unexpected token: #{token.inspect}"
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def identifier(name)
|
|
138
|
+
key = name.downcase
|
|
139
|
+
return CONSTANTS[key] if CONSTANTS.key?(key)
|
|
140
|
+
|
|
141
|
+
func = FUNCTIONS[key]
|
|
142
|
+
raise Error, "unknown name: #{name}" unless func
|
|
143
|
+
|
|
144
|
+
expect("(")
|
|
145
|
+
arg = expression
|
|
146
|
+
expect(")")
|
|
147
|
+
func.call(arg)
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def apply(operator, lhs, rhs)
|
|
151
|
+
case operator
|
|
152
|
+
when "*" then lhs * rhs
|
|
153
|
+
when "/"
|
|
154
|
+
raise Error, "division by zero" if rhs.zero?
|
|
155
|
+
|
|
156
|
+
lhs / rhs
|
|
157
|
+
when "%"
|
|
158
|
+
raise Error, "modulo by zero" if rhs.zero?
|
|
159
|
+
|
|
160
|
+
lhs % rhs
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def op?(values)
|
|
165
|
+
token = current
|
|
166
|
+
token && token[0] == :op && values.include?(token[1])
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def take
|
|
170
|
+
token = current
|
|
171
|
+
@pos += 1
|
|
172
|
+
token
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def expect(symbol)
|
|
176
|
+
token = take
|
|
177
|
+
raise Error, "expected #{symbol.inspect}" unless token && token[1] == symbol
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Toolbox
|
|
5
|
+
module Safety
|
|
6
|
+
# Validates the executable name for BashTool. The actual execution path
|
|
7
|
+
# uses array-form spawning (no shell), so this guard only has to ensure
|
|
8
|
+
# the program itself is on the allowlist and isn't smuggling a path or
|
|
9
|
+
# shell metacharacters. Arguments are passed verbatim as argv and are
|
|
10
|
+
# therefore inert — there is no shell to interpret them.
|
|
11
|
+
class CommandGuard
|
|
12
|
+
class Blocked < StandardError; end
|
|
13
|
+
|
|
14
|
+
SHELL_META = /[;&|<>`$(){}\[\]*?!#~\n\r]/
|
|
15
|
+
PATH_SEP = %r{[/\\]}
|
|
16
|
+
|
|
17
|
+
def initialize(allowed)
|
|
18
|
+
@allowed = Array(allowed).map(&:to_s)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Returns the validated executable name, or raises Blocked.
|
|
22
|
+
def check!(command)
|
|
23
|
+
cmd = command.to_s
|
|
24
|
+
raise Blocked, "no command given" if cmd.empty?
|
|
25
|
+
|
|
26
|
+
if cmd.match?(PATH_SEP)
|
|
27
|
+
raise Blocked, "executable name may not contain a path: #{cmd.inspect}"
|
|
28
|
+
end
|
|
29
|
+
if cmd.match?(SHELL_META)
|
|
30
|
+
raise Blocked, "executable name may not contain shell metacharacters: #{cmd.inspect}"
|
|
31
|
+
end
|
|
32
|
+
unless @allowed.include?(cmd)
|
|
33
|
+
raise Blocked, "command not allowed: #{cmd.inspect} " \
|
|
34
|
+
"(allowed: #{@allowed.empty? ? '(none)' : @allowed.join(', ')})"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
cmd
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Toolbox
|
|
5
|
+
module Safety
|
|
6
|
+
# Confines a user-supplied path to a root directory. Resolves symlinks so
|
|
7
|
+
# a link inside the root can't point outside it, and tolerates
|
|
8
|
+
# not-yet-existing files (for write/edit tools) by resolving the parent
|
|
9
|
+
# directory instead.
|
|
10
|
+
class PathJail
|
|
11
|
+
class Jailbreak < StandardError; end
|
|
12
|
+
|
|
13
|
+
def initialize(root, enforce: true)
|
|
14
|
+
@root = File.realpath(File.expand_path(root.to_s))
|
|
15
|
+
@enforce = enforce
|
|
16
|
+
rescue Errno::ENOENT
|
|
17
|
+
raise Jailbreak, "fs_root does not exist: #{root}"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
attr_reader :root
|
|
21
|
+
|
|
22
|
+
# Returns the absolute, symlink-resolved path if it lies within root;
|
|
23
|
+
# raises Jailbreak otherwise. When enforce is false (an operator-granted
|
|
24
|
+
# unsafe override), the path is resolved anywhere on the host.
|
|
25
|
+
def resolve(path)
|
|
26
|
+
raise Jailbreak, "path must be provided" if path.nil? || path.to_s.empty?
|
|
27
|
+
|
|
28
|
+
candidate = File.expand_path(path.to_s, @root)
|
|
29
|
+
real = existing_realpath(candidate)
|
|
30
|
+
return real unless @enforce
|
|
31
|
+
|
|
32
|
+
unless real == @root || real.start_with?(@root + File::SEPARATOR)
|
|
33
|
+
raise Jailbreak, "path escapes fs_root: #{path}"
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
real
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
# realpath only works on paths that exist. For a target that doesn't
|
|
42
|
+
# exist yet, resolve the nearest existing ancestor (so symlinked parents
|
|
43
|
+
# are still caught) and re-append the remaining components.
|
|
44
|
+
def existing_realpath(candidate)
|
|
45
|
+
return File.realpath(candidate) if File.exist?(candidate)
|
|
46
|
+
|
|
47
|
+
parent = File.dirname(candidate)
|
|
48
|
+
base = File.basename(candidate)
|
|
49
|
+
parent = File.realpath(parent) if File.exist?(parent)
|
|
50
|
+
File.join(parent, base)
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "uri"
|
|
4
|
+
require "ipaddr"
|
|
5
|
+
require "resolv"
|
|
6
|
+
|
|
7
|
+
module RubyLLM
|
|
8
|
+
module Toolbox
|
|
9
|
+
module Safety
|
|
10
|
+
# SSRF defense for tools that fetch arbitrary URLs. It:
|
|
11
|
+
# - allows only http/https,
|
|
12
|
+
# - rejects embedded credentials,
|
|
13
|
+
# - enforces optional domain allow/deny lists,
|
|
14
|
+
# - resolves the host and blocks the request if ANY resolved address is
|
|
15
|
+
# private, loopback, link-local (incl. the cloud metadata IP), CGNAT,
|
|
16
|
+
# unique-local IPv6, or otherwise reserved.
|
|
17
|
+
#
|
|
18
|
+
# resolve! also returns a vetted IP so the caller can pin the socket to it
|
|
19
|
+
# (Net::HTTP#ipaddr=), closing the DNS-rebinding window: the address that
|
|
20
|
+
# was vetted is exactly the one connected to, while TLS/SNI/cert checks
|
|
21
|
+
# still use the hostname. Re-run resolve! on every redirect hop (the fetch
|
|
22
|
+
# helpers do).
|
|
23
|
+
class UrlGuard
|
|
24
|
+
class Blocked < StandardError; end
|
|
25
|
+
|
|
26
|
+
# A vetted result: the parsed URI plus a concrete IP that every
|
|
27
|
+
# connection should be pinned to, so a second DNS lookup at connect time
|
|
28
|
+
# can't swap in a private address (DNS rebinding).
|
|
29
|
+
Resolution = Struct.new(:uri, :address, keyword_init: true)
|
|
30
|
+
|
|
31
|
+
ALLOWED_SCHEMES = %w[http https].freeze
|
|
32
|
+
|
|
33
|
+
BLOCKED_RANGES = %w[
|
|
34
|
+
0.0.0.0/8 10.0.0.0/8 100.64.0.0/10 127.0.0.0/8 169.254.0.0/16
|
|
35
|
+
172.16.0.0/12 192.0.0.0/24 192.0.2.0/24 192.168.0.0/16 198.18.0.0/15
|
|
36
|
+
198.51.100.0/24 203.0.113.0/24 224.0.0.0/4 240.0.0.0/4 255.255.255.255/32
|
|
37
|
+
::1/128 ::/128 fc00::/7 fe80::/10
|
|
38
|
+
].map { |cidr| IPAddr.new(cidr) }.freeze
|
|
39
|
+
|
|
40
|
+
def initialize(allowlist: [], denylist: [])
|
|
41
|
+
@allowlist = Array(allowlist).map { |d| d.to_s.downcase }
|
|
42
|
+
@denylist = Array(denylist).map { |d| d.to_s.downcase }
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Returns a parsed URI if the URL is safe to fetch; raises Blocked
|
|
46
|
+
# otherwise.
|
|
47
|
+
def check!(url)
|
|
48
|
+
resolve!(url).uri
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Like check!, but also returns a vetted IP address to pin the connection
|
|
52
|
+
# to (see Resolution). Raises Blocked otherwise.
|
|
53
|
+
def resolve!(url)
|
|
54
|
+
uri = parse(url)
|
|
55
|
+
raise Blocked, "only http/https URLs are allowed" unless ALLOWED_SCHEMES.include?(uri.scheme)
|
|
56
|
+
raise Blocked, "URL must include a host" if uri.host.nil? || uri.host.empty?
|
|
57
|
+
raise Blocked, "URLs with embedded credentials are not allowed" if uri.userinfo
|
|
58
|
+
|
|
59
|
+
host = uri.host.downcase
|
|
60
|
+
enforce_domain_lists(host)
|
|
61
|
+
address = vetted_address(host)
|
|
62
|
+
Resolution.new(uri: uri, address: address)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
private
|
|
66
|
+
|
|
67
|
+
def parse(url)
|
|
68
|
+
URI.parse(url.to_s)
|
|
69
|
+
rescue URI::InvalidURIError => e
|
|
70
|
+
raise Blocked, "invalid URL: #{e.message}"
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def enforce_domain_lists(host)
|
|
74
|
+
if @denylist.any? { |d| host == d || host.end_with?(".#{d}") }
|
|
75
|
+
raise Blocked, "host is denylisted: #{host}"
|
|
76
|
+
end
|
|
77
|
+
return if @allowlist.empty?
|
|
78
|
+
|
|
79
|
+
allowed = @allowlist.any? { |d| host == d || host.end_with?(".#{d}") }
|
|
80
|
+
raise Blocked, "host is not on the allowlist: #{host}" unless allowed
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Resolve the host, block the request if ANY resolved address is in a
|
|
84
|
+
# blocked range, and return the first address for the caller to pin to.
|
|
85
|
+
def vetted_address(host)
|
|
86
|
+
addresses = resolve(host)
|
|
87
|
+
raise Blocked, "could not resolve host: #{host}" if addresses.empty?
|
|
88
|
+
|
|
89
|
+
addresses.each do |addr|
|
|
90
|
+
raise Blocked, "host resolves to a blocked address (#{addr})" if blocked_ip?(addr)
|
|
91
|
+
end
|
|
92
|
+
addresses.first
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def resolve(host)
|
|
96
|
+
Resolv.getaddresses(host)
|
|
97
|
+
rescue StandardError
|
|
98
|
+
[]
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def blocked_ip?(addr)
|
|
102
|
+
ip = IPAddr.new(addr.to_s)
|
|
103
|
+
ip = ip.native if ip.respond_to?(:ipv4_mapped?) && ip.ipv4_mapped?
|
|
104
|
+
BLOCKED_RANGES.any? { |range| range.include?(ip) }
|
|
105
|
+
rescue IPAddr::Error
|
|
106
|
+
true # if we can't parse it, don't fetch it
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|