ruby_llm-toolbox 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +49 -0
  3. data/GUIDE.md +598 -0
  4. data/LICENSE +21 -0
  5. data/README.md +412 -0
  6. data/bin/verify_prism_parity +112 -0
  7. data/lib/ruby_llm/toolbox/base.rb +112 -0
  8. data/lib/ruby_llm/toolbox/configuration.rb +148 -0
  9. data/lib/ruby_llm/toolbox/data_path.rb +54 -0
  10. data/lib/ruby_llm/toolbox/process_registry.rb +226 -0
  11. data/lib/ruby_llm/toolbox/process_runner.rb +72 -0
  12. data/lib/ruby_llm/toolbox/ruby_outline.rb +213 -0
  13. data/lib/ruby_llm/toolbox/safe_math.rb +182 -0
  14. data/lib/ruby_llm/toolbox/safety/command_guard.rb +42 -0
  15. data/lib/ruby_llm/toolbox/safety/path_jail.rb +55 -0
  16. data/lib/ruby_llm/toolbox/safety/url_guard.rb +111 -0
  17. data/lib/ruby_llm/toolbox/sandbox/base.rb +151 -0
  18. data/lib/ruby_llm/toolbox/sandbox/bubblewrap.rb +70 -0
  19. data/lib/ruby_llm/toolbox/sandbox/docker.rb +69 -0
  20. data/lib/ruby_llm/toolbox/sandbox/sandbox_exec.rb +75 -0
  21. data/lib/ruby_llm/toolbox/search/brave.rb +64 -0
  22. data/lib/ruby_llm/toolbox/search/searxng.rb +64 -0
  23. data/lib/ruby_llm/toolbox/search/tavily.rb +70 -0
  24. data/lib/ruby_llm/toolbox/text_diff.rb +81 -0
  25. data/lib/ruby_llm/toolbox/toml.rb +409 -0
  26. data/lib/ruby_llm/toolbox/tools/apply_patch.rb +92 -0
  27. data/lib/ruby_llm/toolbox/tools/bash_tool.rb +101 -0
  28. data/lib/ruby_llm/toolbox/tools/bundle.rb +71 -0
  29. data/lib/ruby_llm/toolbox/tools/calculator.rb +42 -0
  30. data/lib/ruby_llm/toolbox/tools/create_directory.rb +35 -0
  31. data/lib/ruby_llm/toolbox/tools/csv_read.rb +69 -0
  32. data/lib/ruby_llm/toolbox/tools/csv_write.rb +51 -0
  33. data/lib/ruby_llm/toolbox/tools/date_time.rb +42 -0
  34. data/lib/ruby_llm/toolbox/tools/delete_file.rb +64 -0
  35. data/lib/ruby_llm/toolbox/tools/diff.rb +35 -0
  36. data/lib/ruby_llm/toolbox/tools/download_file.rb +55 -0
  37. data/lib/ruby_llm/toolbox/tools/edit_file.rb +82 -0
  38. data/lib/ruby_llm/toolbox/tools/gem_tool.rb +140 -0
  39. data/lib/ruby_llm/toolbox/tools/git_add.rb +46 -0
  40. data/lib/ruby_llm/toolbox/tools/git_blame.rb +58 -0
  41. data/lib/ruby_llm/toolbox/tools/git_branch.rb +35 -0
  42. data/lib/ruby_llm/toolbox/tools/git_checkout.rb +43 -0
  43. data/lib/ruby_llm/toolbox/tools/git_commit.rb +47 -0
  44. data/lib/ruby_llm/toolbox/tools/git_diff.rb +50 -0
  45. data/lib/ruby_llm/toolbox/tools/git_grep.rb +66 -0
  46. data/lib/ruby_llm/toolbox/tools/git_helpers.rb +68 -0
  47. data/lib/ruby_llm/toolbox/tools/git_log.rb +47 -0
  48. data/lib/ruby_llm/toolbox/tools/git_show.rb +48 -0
  49. data/lib/ruby_llm/toolbox/tools/git_status.rb +27 -0
  50. data/lib/ruby_llm/toolbox/tools/glob.rb +62 -0
  51. data/lib/ruby_llm/toolbox/tools/grep_files.rb +221 -0
  52. data/lib/ruby_llm/toolbox/tools/http_helpers.rb +130 -0
  53. data/lib/ruby_llm/toolbox/tools/http_request.rb +75 -0
  54. data/lib/ruby_llm/toolbox/tools/json_query.rb +69 -0
  55. data/lib/ruby_llm/toolbox/tools/lint.rb +67 -0
  56. data/lib/ruby_llm/toolbox/tools/list_directory.rb +87 -0
  57. data/lib/ruby_llm/toolbox/tools/move_file.rb +54 -0
  58. data/lib/ruby_llm/toolbox/tools/multi_edit.rb +107 -0
  59. data/lib/ruby_llm/toolbox/tools/parse_ruby.rb +111 -0
  60. data/lib/ruby_llm/toolbox/tools/process_kill.rb +41 -0
  61. data/lib/ruby_llm/toolbox/tools/process_list.rb +29 -0
  62. data/lib/ruby_llm/toolbox/tools/process_output.rb +55 -0
  63. data/lib/ruby_llm/toolbox/tools/process_start.rb +109 -0
  64. data/lib/ruby_llm/toolbox/tools/python_tests.rb +77 -0
  65. data/lib/ruby_llm/toolbox/tools/read_file.rb +75 -0
  66. data/lib/ruby_llm/toolbox/tools/replace_in_files.rb +139 -0
  67. data/lib/ruby_llm/toolbox/tools/run_python.rb +38 -0
  68. data/lib/ruby_llm/toolbox/tools/run_ruby.rb +37 -0
  69. data/lib/ruby_llm/toolbox/tools/run_rust.rb +42 -0
  70. data/lib/ruby_llm/toolbox/tools/run_tests.rb +81 -0
  71. data/lib/ruby_llm/toolbox/tools/sandbox_run.rb +40 -0
  72. data/lib/ruby_llm/toolbox/tools/todo_write.rb +57 -0
  73. data/lib/ruby_llm/toolbox/tools/toml_query.rb +70 -0
  74. data/lib/ruby_llm/toolbox/tools/toolchain_helpers.rb +62 -0
  75. data/lib/ruby_llm/toolbox/tools/tree.rb +87 -0
  76. data/lib/ruby_llm/toolbox/tools/web_fetch.rb +77 -0
  77. data/lib/ruby_llm/toolbox/tools/web_search.rb +81 -0
  78. data/lib/ruby_llm/toolbox/tools/write_file.rb +52 -0
  79. data/lib/ruby_llm/toolbox/tools/yaml_query.rb +73 -0
  80. data/lib/ruby_llm/toolbox/truncator.rb +68 -0
  81. data/lib/ruby_llm/toolbox/version.rb +7 -0
  82. data/lib/ruby_llm/toolbox.rb +161 -0
  83. metadata +194 -0
@@ -0,0 +1,213 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ripper"
4
+
5
+ module RubyLLM
6
+ module Toolbox
7
+ # Extracts a structural outline (classes, modules, methods, constants) from
8
+ # Ruby source, with line numbers and nesting depth. Parsing only — the code
9
+ # is never executed.
10
+ #
11
+ # Two backends sit behind one dispatcher and produce identical Entry lists:
12
+ #
13
+ # * PrismBackend — used automatically when `require "prism"` succeeds.
14
+ # Prism is bundled with Ruby 3.3+, so on a modern Ruby this needs no gem
15
+ # install. It is the same parser the VM itself uses, so its line numbers
16
+ # and structure are authoritative.
17
+ # * RipperBackend — the stdlib fallback for runtimes that don't bundle
18
+ # Prism (e.g. non-MRI). Dependency-free, always present.
19
+ #
20
+ # Parity between the two is enforced by spec/ruby_outline_parity_spec.rb and
21
+ # by bin/verify_prism_parity, which can be run under any Ruby (e.g. a
22
+ # sandboxed ruby:3.4) to confirm the backends agree.
23
+ module RubyOutline
24
+ class ParseError < StandardError; end
25
+
26
+ Entry = Struct.new(:kind, :name, :line, :depth, keyword_init: true)
27
+
28
+ module_function
29
+
30
+ # True when the Prism backend can be loaded on this Ruby. Memoized.
31
+ def prism_available?
32
+ return @prism_available if defined?(@prism_available)
33
+
34
+ @prism_available = begin
35
+ require "prism"
36
+ true
37
+ rescue LoadError
38
+ false
39
+ end
40
+ end
41
+
42
+ def active_backend
43
+ prism_available? ? PrismBackend : RipperBackend
44
+ end
45
+
46
+ # Returns an Array<Entry> in source order. Raises ParseError on a syntax
47
+ # error. Pass backend: to force a specific one (used by the parity tests).
48
+ def extract(source, backend: active_backend)
49
+ backend.extract(source.to_s)
50
+ end
51
+
52
+ # --- Prism backend ----------------------------------------------------
53
+ module PrismBackend
54
+ module_function
55
+
56
+ def extract(source)
57
+ require "prism"
58
+ result = Prism.parse(source)
59
+ raise ParseError, "source is not valid Ruby (syntax error)" unless result.success?
60
+
61
+ acc = []
62
+ visit(result.value, 0, acc)
63
+ acc
64
+ end
65
+
66
+ def visit(node, depth, acc)
67
+ return if node.nil?
68
+
69
+ case node
70
+ when Prism::ClassNode
71
+ acc << Entry.new(kind: :class, name: node.constant_path.slice,
72
+ line: node.constant_path.location.start_line, depth: depth)
73
+ visit(node.body, depth + 1, acc)
74
+ when Prism::ModuleNode
75
+ acc << Entry.new(kind: :module, name: node.constant_path.slice,
76
+ line: node.constant_path.location.start_line, depth: depth)
77
+ visit(node.body, depth + 1, acc)
78
+ when Prism::SingletonClassNode
79
+ acc << Entry.new(kind: :singleton_class, name: "<< #{node.expression.slice}",
80
+ line: node.location.start_line, depth: depth)
81
+ visit(node.body, depth + 1, acc)
82
+ when Prism::DefNode
83
+ name = node.receiver ? "#{node.receiver.slice}.#{node.name}" : node.name.to_s
84
+ acc << Entry.new(kind: :method, name: name, line: node.name_loc.start_line, depth: depth)
85
+ # method bodies are not descended into
86
+ when Prism::ConstantWriteNode
87
+ acc << Entry.new(kind: :constant, name: node.name.to_s,
88
+ line: node.name_loc.start_line, depth: depth)
89
+ else
90
+ node.compact_child_nodes.each { |child| visit(child, depth, acc) }
91
+ end
92
+ end
93
+ end
94
+
95
+ # --- Ripper backend (stdlib) ------------------------------------------
96
+ module RipperBackend
97
+ module_function
98
+
99
+ def extract(source)
100
+ sexp = Ripper.sexp(source.to_s)
101
+ raise ParseError, "source is not valid Ruby (syntax error)" if sexp.nil?
102
+
103
+ acc = []
104
+ walk(sexp, 0, acc)
105
+ acc
106
+ end
107
+
108
+ def walk(node, depth, acc)
109
+ return unless node.is_a?(Array)
110
+
111
+ if node[0].is_a?(Symbol)
112
+ dispatch(node, depth, acc)
113
+ else
114
+ node.each { |child| walk(child, depth, acc) }
115
+ end
116
+ end
117
+
118
+ def dispatch(node, depth, acc)
119
+ case node[0]
120
+ when :program
121
+ walk(node[1], depth, acc)
122
+ when :class
123
+ acc << Entry.new(kind: :class, name: const_name(node[1]), line: line_of(node[1]), depth: depth)
124
+ walk(node[3], depth + 1, acc) # bodystmt
125
+ when :module
126
+ acc << Entry.new(kind: :module, name: const_name(node[1]), line: line_of(node[1]), depth: depth)
127
+ walk(node[2], depth + 1, acc)
128
+ when :sclass # class << self
129
+ acc << Entry.new(kind: :singleton_class, name: "<< #{simple_name(node[1])}", line: line_of(node), depth: depth)
130
+ walk(node[2], depth + 1, acc)
131
+ when :def
132
+ acc << Entry.new(kind: :method, name: ident_name(node[1]), line: line_of(node[1]), depth: depth)
133
+ when :defs # def self.x / def Recv.x
134
+ name = "#{simple_name(node[1])}.#{ident_name(node[3])}"
135
+ acc << Entry.new(kind: :method, name: name, line: line_of(node[3]), depth: depth)
136
+ when :bodystmt
137
+ walk(node[1], depth, acc)
138
+ when :assign
139
+ handle_assign(node, depth, acc)
140
+ else
141
+ node[1..].each { |child| walk(child, depth, acc) }
142
+ end
143
+ end
144
+
145
+ def handle_assign(node, depth, acc)
146
+ target = node[1]
147
+ return unless target.is_a?(Array) && target[0] == :var_field
148
+
149
+ field = target[1]
150
+ return unless field.is_a?(Array) && field[0] == :@const
151
+
152
+ acc << Entry.new(kind: :constant, name: field[1], line: field[2]&.first, depth: depth)
153
+ end
154
+
155
+ def const_name(node)
156
+ case node && node[0]
157
+ when :const_ref, :top_const_ref, :var_ref
158
+ simple_name(node[1])
159
+ when :const_path_ref # Foo::Bar
160
+ "#{const_name(node[1])}::#{simple_name(node[2])}"
161
+ else
162
+ simple_name(node)
163
+ end
164
+ end
165
+
166
+ def simple_name(node)
167
+ return node.to_s unless node.is_a?(Array)
168
+
169
+ case node[0]
170
+ when :@const, :@ident, :@ivar, :@gvar, :@kw then node[1].to_s
171
+ when :const_ref, :var_ref, :var_field then simple_name(node[1])
172
+ when :const_path_ref then "#{simple_name(node[1])}::#{simple_name(node[2])}"
173
+ else
174
+ leaf = find_name_leaf(node)
175
+ leaf ? leaf[1].to_s : "?"
176
+ end
177
+ end
178
+
179
+ def ident_name(node)
180
+ return node.to_s unless node.is_a?(Array)
181
+
182
+ node[0] == :@ident || node[0] == :@const || node[0] == :@kw ? node[1].to_s : simple_name(node)
183
+ end
184
+
185
+ def line_of(node)
186
+ return nil unless node.is_a?(Array)
187
+
188
+ if node.size == 3 && node[2].is_a?(Array) && node[2].size == 2 &&
189
+ node[2][0].is_a?(Integer) && node[0].is_a?(Symbol)
190
+ return node[2][0]
191
+ end
192
+
193
+ node.each do |child|
194
+ line = line_of(child)
195
+ return line if line
196
+ end
197
+ nil
198
+ end
199
+
200
+ def find_name_leaf(node)
201
+ return nil unless node.is_a?(Array)
202
+ return node if %i[@const @ident @kw].include?(node[0])
203
+
204
+ node.each do |child|
205
+ found = find_name_leaf(child)
206
+ return found if found
207
+ end
208
+ nil
209
+ end
210
+ end
211
+ end
212
+ end
213
+ end
@@ -0,0 +1,182 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Toolbox
5
+ # A safe arithmetic evaluator. Parses and evaluates a math expression with a
6
+ # hand-written recursive-descent parser — it never calls eval, so a
7
+ # malicious expression can't execute Ruby. Supports + - * / % **, unary
8
+ # minus, parentheses, a small set of functions, and the constants pi and e.
9
+ module SafeMath
10
+ class Error < StandardError; end
11
+
12
+ FUNCTIONS = {
13
+ "sqrt" => ->(x) { Math.sqrt(x) },
14
+ "abs" => ->(x) { x.abs },
15
+ "sin" => ->(x) { Math.sin(x) },
16
+ "cos" => ->(x) { Math.cos(x) },
17
+ "tan" => ->(x) { Math.tan(x) },
18
+ "ln" => ->(x) { Math.log(x) },
19
+ "log10" => ->(x) { Math.log10(x) },
20
+ "exp" => ->(x) { Math.exp(x) },
21
+ "floor" => ->(x) { x.floor },
22
+ "ceil" => ->(x) { x.ceil },
23
+ "round" => ->(x) { x.round }
24
+ }.freeze
25
+
26
+ CONSTANTS = { "pi" => Math::PI, "e" => Math::E }.freeze
27
+
28
+ module_function
29
+
30
+ def evaluate(expression)
31
+ tokens = tokenize(expression.to_s)
32
+ parser = Parser.new(tokens)
33
+ value = parser.expression
34
+ raise Error, "unexpected token: #{parser.current.inspect}" unless parser.done?
35
+
36
+ value
37
+ end
38
+
39
+ def tokenize(str)
40
+ tokens = []
41
+ scanner = str.dup
42
+ until scanner.empty?
43
+ case scanner
44
+ when /\A\s+/ then scanner = scanner[Regexp.last_match(0).length..]
45
+ when /\A(\d+\.\d+|\.\d+|\d+)([eE][+-]?\d+)?/
46
+ tokens << [:num, Regexp.last_match(0).to_f]
47
+ scanner = scanner[Regexp.last_match(0).length..]
48
+ when /\A[A-Za-z_]\w*/
49
+ tokens << [:ident, Regexp.last_match(0)]
50
+ scanner = scanner[Regexp.last_match(0).length..]
51
+ when /\A\*\*/ then tokens << [:op, "**"]; scanner = scanner[2..]
52
+ when %r{\A[-+*/%(),]} then tokens << [:op, Regexp.last_match(0)]; scanner = scanner[1..]
53
+ else raise Error, "unexpected character: #{scanner[0].inspect}"
54
+ end
55
+ end
56
+ tokens
57
+ end
58
+
59
+ # Recursive-descent parser with standard precedence.
60
+ class Parser
61
+ def initialize(tokens)
62
+ @tokens = tokens
63
+ @pos = 0
64
+ end
65
+
66
+ def current
67
+ @tokens[@pos]
68
+ end
69
+
70
+ def done?
71
+ @pos >= @tokens.length
72
+ end
73
+
74
+ def expression
75
+ value = term
76
+ while op?(%w[+ -])
77
+ operator = take[1]
78
+ rhs = term
79
+ value = operator == "+" ? value + rhs : value - rhs
80
+ end
81
+ value
82
+ end
83
+
84
+ private
85
+
86
+ def term
87
+ value = power
88
+ while op?(%w[* / %])
89
+ operator = take[1]
90
+ rhs = power
91
+ value = apply(operator, value, rhs)
92
+ end
93
+ value
94
+ end
95
+
96
+ def power
97
+ base = unary
98
+ if op?(["**"])
99
+ take
100
+ base**power # right-associative
101
+ else
102
+ base
103
+ end
104
+ end
105
+
106
+ def unary
107
+ if op?(%w[- +])
108
+ operator = take[1]
109
+ value = unary
110
+ operator == "-" ? -value : value
111
+ else
112
+ primary
113
+ end
114
+ end
115
+
116
+ def primary
117
+ token = current
118
+ raise Error, "unexpected end of expression" if token.nil?
119
+
120
+ case token[0]
121
+ when :num
122
+ take[1]
123
+ when :ident
124
+ identifier(take[1])
125
+ when :op
126
+ raise Error, "expected '(' " unless token[1] == "("
127
+
128
+ take
129
+ value = expression
130
+ expect(")")
131
+ value
132
+ else
133
+ raise Error, "unexpected token: #{token.inspect}"
134
+ end
135
+ end
136
+
137
+ def identifier(name)
138
+ key = name.downcase
139
+ return CONSTANTS[key] if CONSTANTS.key?(key)
140
+
141
+ func = FUNCTIONS[key]
142
+ raise Error, "unknown name: #{name}" unless func
143
+
144
+ expect("(")
145
+ arg = expression
146
+ expect(")")
147
+ func.call(arg)
148
+ end
149
+
150
+ def apply(operator, lhs, rhs)
151
+ case operator
152
+ when "*" then lhs * rhs
153
+ when "/"
154
+ raise Error, "division by zero" if rhs.zero?
155
+
156
+ lhs / rhs
157
+ when "%"
158
+ raise Error, "modulo by zero" if rhs.zero?
159
+
160
+ lhs % rhs
161
+ end
162
+ end
163
+
164
+ def op?(values)
165
+ token = current
166
+ token && token[0] == :op && values.include?(token[1])
167
+ end
168
+
169
+ def take
170
+ token = current
171
+ @pos += 1
172
+ token
173
+ end
174
+
175
+ def expect(symbol)
176
+ token = take
177
+ raise Error, "expected #{symbol.inspect}" unless token && token[1] == symbol
178
+ end
179
+ end
180
+ end
181
+ end
182
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Toolbox
5
+ module Safety
6
+ # Validates the executable name for BashTool. The actual execution path
7
+ # uses array-form spawning (no shell), so this guard only has to ensure
8
+ # the program itself is on the allowlist and isn't smuggling a path or
9
+ # shell metacharacters. Arguments are passed verbatim as argv and are
10
+ # therefore inert — there is no shell to interpret them.
11
+ class CommandGuard
12
+ class Blocked < StandardError; end
13
+
14
+ SHELL_META = /[;&|<>`$(){}\[\]*?!#~\n\r]/
15
+ PATH_SEP = %r{[/\\]}
16
+
17
+ def initialize(allowed)
18
+ @allowed = Array(allowed).map(&:to_s)
19
+ end
20
+
21
+ # Returns the validated executable name, or raises Blocked.
22
+ def check!(command)
23
+ cmd = command.to_s
24
+ raise Blocked, "no command given" if cmd.empty?
25
+
26
+ if cmd.match?(PATH_SEP)
27
+ raise Blocked, "executable name may not contain a path: #{cmd.inspect}"
28
+ end
29
+ if cmd.match?(SHELL_META)
30
+ raise Blocked, "executable name may not contain shell metacharacters: #{cmd.inspect}"
31
+ end
32
+ unless @allowed.include?(cmd)
33
+ raise Blocked, "command not allowed: #{cmd.inspect} " \
34
+ "(allowed: #{@allowed.empty? ? '(none)' : @allowed.join(', ')})"
35
+ end
36
+
37
+ cmd
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Toolbox
5
+ module Safety
6
+ # Confines a user-supplied path to a root directory. Resolves symlinks so
7
+ # a link inside the root can't point outside it, and tolerates
8
+ # not-yet-existing files (for write/edit tools) by resolving the parent
9
+ # directory instead.
10
+ class PathJail
11
+ class Jailbreak < StandardError; end
12
+
13
+ def initialize(root, enforce: true)
14
+ @root = File.realpath(File.expand_path(root.to_s))
15
+ @enforce = enforce
16
+ rescue Errno::ENOENT
17
+ raise Jailbreak, "fs_root does not exist: #{root}"
18
+ end
19
+
20
+ attr_reader :root
21
+
22
+ # Returns the absolute, symlink-resolved path if it lies within root;
23
+ # raises Jailbreak otherwise. When enforce is false (an operator-granted
24
+ # unsafe override), the path is resolved anywhere on the host.
25
+ def resolve(path)
26
+ raise Jailbreak, "path must be provided" if path.nil? || path.to_s.empty?
27
+
28
+ candidate = File.expand_path(path.to_s, @root)
29
+ real = existing_realpath(candidate)
30
+ return real unless @enforce
31
+
32
+ unless real == @root || real.start_with?(@root + File::SEPARATOR)
33
+ raise Jailbreak, "path escapes fs_root: #{path}"
34
+ end
35
+
36
+ real
37
+ end
38
+
39
+ private
40
+
41
+ # realpath only works on paths that exist. For a target that doesn't
42
+ # exist yet, resolve the nearest existing ancestor (so symlinked parents
43
+ # are still caught) and re-append the remaining components.
44
+ def existing_realpath(candidate)
45
+ return File.realpath(candidate) if File.exist?(candidate)
46
+
47
+ parent = File.dirname(candidate)
48
+ base = File.basename(candidate)
49
+ parent = File.realpath(parent) if File.exist?(parent)
50
+ File.join(parent, base)
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "uri"
4
+ require "ipaddr"
5
+ require "resolv"
6
+
7
+ module RubyLLM
8
+ module Toolbox
9
+ module Safety
10
+ # SSRF defense for tools that fetch arbitrary URLs. It:
11
+ # - allows only http/https,
12
+ # - rejects embedded credentials,
13
+ # - enforces optional domain allow/deny lists,
14
+ # - resolves the host and blocks the request if ANY resolved address is
15
+ # private, loopback, link-local (incl. the cloud metadata IP), CGNAT,
16
+ # unique-local IPv6, or otherwise reserved.
17
+ #
18
+ # resolve! also returns a vetted IP so the caller can pin the socket to it
19
+ # (Net::HTTP#ipaddr=), closing the DNS-rebinding window: the address that
20
+ # was vetted is exactly the one connected to, while TLS/SNI/cert checks
21
+ # still use the hostname. Re-run resolve! on every redirect hop (the fetch
22
+ # helpers do).
23
+ class UrlGuard
24
+ class Blocked < StandardError; end
25
+
26
+ # A vetted result: the parsed URI plus a concrete IP that every
27
+ # connection should be pinned to, so a second DNS lookup at connect time
28
+ # can't swap in a private address (DNS rebinding).
29
+ Resolution = Struct.new(:uri, :address, keyword_init: true)
30
+
31
+ ALLOWED_SCHEMES = %w[http https].freeze
32
+
33
+ BLOCKED_RANGES = %w[
34
+ 0.0.0.0/8 10.0.0.0/8 100.64.0.0/10 127.0.0.0/8 169.254.0.0/16
35
+ 172.16.0.0/12 192.0.0.0/24 192.0.2.0/24 192.168.0.0/16 198.18.0.0/15
36
+ 198.51.100.0/24 203.0.113.0/24 224.0.0.0/4 240.0.0.0/4 255.255.255.255/32
37
+ ::1/128 ::/128 fc00::/7 fe80::/10
38
+ ].map { |cidr| IPAddr.new(cidr) }.freeze
39
+
40
+ def initialize(allowlist: [], denylist: [])
41
+ @allowlist = Array(allowlist).map { |d| d.to_s.downcase }
42
+ @denylist = Array(denylist).map { |d| d.to_s.downcase }
43
+ end
44
+
45
+ # Returns a parsed URI if the URL is safe to fetch; raises Blocked
46
+ # otherwise.
47
+ def check!(url)
48
+ resolve!(url).uri
49
+ end
50
+
51
+ # Like check!, but also returns a vetted IP address to pin the connection
52
+ # to (see Resolution). Raises Blocked otherwise.
53
+ def resolve!(url)
54
+ uri = parse(url)
55
+ raise Blocked, "only http/https URLs are allowed" unless ALLOWED_SCHEMES.include?(uri.scheme)
56
+ raise Blocked, "URL must include a host" if uri.host.nil? || uri.host.empty?
57
+ raise Blocked, "URLs with embedded credentials are not allowed" if uri.userinfo
58
+
59
+ host = uri.host.downcase
60
+ enforce_domain_lists(host)
61
+ address = vetted_address(host)
62
+ Resolution.new(uri: uri, address: address)
63
+ end
64
+
65
+ private
66
+
67
+ def parse(url)
68
+ URI.parse(url.to_s)
69
+ rescue URI::InvalidURIError => e
70
+ raise Blocked, "invalid URL: #{e.message}"
71
+ end
72
+
73
+ def enforce_domain_lists(host)
74
+ if @denylist.any? { |d| host == d || host.end_with?(".#{d}") }
75
+ raise Blocked, "host is denylisted: #{host}"
76
+ end
77
+ return if @allowlist.empty?
78
+
79
+ allowed = @allowlist.any? { |d| host == d || host.end_with?(".#{d}") }
80
+ raise Blocked, "host is not on the allowlist: #{host}" unless allowed
81
+ end
82
+
83
+ # Resolve the host, block the request if ANY resolved address is in a
84
+ # blocked range, and return the first address for the caller to pin to.
85
+ def vetted_address(host)
86
+ addresses = resolve(host)
87
+ raise Blocked, "could not resolve host: #{host}" if addresses.empty?
88
+
89
+ addresses.each do |addr|
90
+ raise Blocked, "host resolves to a blocked address (#{addr})" if blocked_ip?(addr)
91
+ end
92
+ addresses.first
93
+ end
94
+
95
+ def resolve(host)
96
+ Resolv.getaddresses(host)
97
+ rescue StandardError
98
+ []
99
+ end
100
+
101
+ def blocked_ip?(addr)
102
+ ip = IPAddr.new(addr.to_s)
103
+ ip = ip.native if ip.respond_to?(:ipv4_mapped?) && ip.ipv4_mapped?
104
+ BLOCKED_RANGES.any? { |range| range.include?(ip) }
105
+ rescue IPAddr::Error
106
+ true # if we can't parse it, don't fetch it
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end