pikuri 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +62 -0
  3. data/GETTING_STARTED.md +223 -0
  4. data/LICENSE +21 -0
  5. data/README.md +193 -0
  6. data/lib/pikuri/agent/chat_transport.rb +41 -0
  7. data/lib/pikuri/agent/context_window_detector.rb +101 -0
  8. data/lib/pikuri/agent/listener/in_memory_message_list.rb +33 -0
  9. data/lib/pikuri/agent/listener/message_listener.rb +93 -0
  10. data/lib/pikuri/agent/listener/step_limit.rb +97 -0
  11. data/lib/pikuri/agent/listener/terminal.rb +137 -0
  12. data/lib/pikuri/agent/listener/token_log.rb +166 -0
  13. data/lib/pikuri/agent/listener_list.rb +113 -0
  14. data/lib/pikuri/agent/message.rb +61 -0
  15. data/lib/pikuri/agent/synthesizer.rb +120 -0
  16. data/lib/pikuri/agent/tokens.rb +56 -0
  17. data/lib/pikuri/agent.rb +286 -0
  18. data/lib/pikuri/subprocess.rb +166 -0
  19. data/lib/pikuri/tool/bash.rb +272 -0
  20. data/lib/pikuri/tool/calculator.rb +82 -0
  21. data/lib/pikuri/tool/confirmer.rb +96 -0
  22. data/lib/pikuri/tool/edit.rb +196 -0
  23. data/lib/pikuri/tool/fetch.rb +167 -0
  24. data/lib/pikuri/tool/glob.rb +310 -0
  25. data/lib/pikuri/tool/grep.rb +338 -0
  26. data/lib/pikuri/tool/parameters.rb +314 -0
  27. data/lib/pikuri/tool/read.rb +254 -0
  28. data/lib/pikuri/tool/scraper/fetch_error.rb +16 -0
  29. data/lib/pikuri/tool/scraper/html.rb +285 -0
  30. data/lib/pikuri/tool/scraper/pdf.rb +54 -0
  31. data/lib/pikuri/tool/scraper/simple.rb +177 -0
  32. data/lib/pikuri/tool/search/brave.rb +184 -0
  33. data/lib/pikuri/tool/search/duckduckgo.rb +196 -0
  34. data/lib/pikuri/tool/search/engines.rb +154 -0
  35. data/lib/pikuri/tool/search/exa.rb +217 -0
  36. data/lib/pikuri/tool/search/rate_limiter.rb +92 -0
  37. data/lib/pikuri/tool/search/result.rb +29 -0
  38. data/lib/pikuri/tool/skill.rb +80 -0
  39. data/lib/pikuri/tool/skill_catalog.rb +376 -0
  40. data/lib/pikuri/tool/sub_agent.rb +102 -0
  41. data/lib/pikuri/tool/web_scrape.rb +117 -0
  42. data/lib/pikuri/tool/web_search.rb +38 -0
  43. data/lib/pikuri/tool/workspace.rb +150 -0
  44. data/lib/pikuri/tool/write.rb +170 -0
  45. data/lib/pikuri/tool.rb +118 -0
  46. data/lib/pikuri/url_cache.rb +106 -0
  47. data/lib/pikuri/version.rb +10 -0
  48. data/lib/pikuri.rb +165 -0
  49. data/prompts/coding-system-prompt.txt +28 -0
  50. data/prompts/pikuri-chat.txt +15 -0
  51. metadata +259 -0
@@ -0,0 +1,338 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pikuri
4
+ class Tool
5
+ # The +grep+ tool — content search across the workspace via
6
+ # +ripgrep+. Instantiating +Tool::Grep.new(workspace: ws)+ produces a
7
+ # tool whose {Tool#to_ruby_llm_tool} wiring is identical to any
8
+ # bundled tool's. Same shape as {Tool::Read} (workspace captured by
9
+ # the +execute+ closure, no confirmer — search is read-only).
10
+ #
11
+ # == ripgrep dependency
12
+ #
13
+ # Hard dependency: {.check_binaries!} runs in +initialize+ and raises
14
+ # if +rg+ isn't on +PATH+. Mirrors {Tool::Bash}'s posture for
15
+ # +bash+/+timeout+. We don't ship a Ruby fallback — replicating
16
+ # rg's Rust-regex dialect, glob handling, and +.gitignore+ parsing
17
+ # is a research-loop dead end. Failure message includes the install
18
+ # hint.
19
+ #
20
+ # == Argv
21
+ #
22
+ # rg --line-number --color=never --no-heading --with-filename \
23
+ # --hidden --max-columns=2000 --max-columns-preview \
24
+ # --sort=path \
25
+ # [-i] [--glob <g>] [--files-with-matches|--count-matches] \
26
+ # -- <pattern> <relative-path-or-dot>
27
+ #
28
+ # * +--no-heading+ + +--with-filename+ → flat +path:line:content+ rows
29
+ # regardless of whether the search target is a directory or a single
30
+ # file (rg defaults to suppressing the filename for single-file
31
+ # searches — we force it on for output consistency).
32
+ # * +--hidden+ → search dotfiles (still respects +.gitignore+).
33
+ # * +--max-columns=2000 --max-columns-preview+ → rg truncates lines
34
+ # longer than {MAX_LINE_LENGTH} bytes server-side and appends a
35
+ # preview marker, sparing us per-line truncation.
36
+ # * +--sort=path+ → deterministic output (single-threaded; fine for
37
+ # typical repos under ~10k files). Makes specs assertable and gives
38
+ # the model a stable order to scan.
39
+ # * Subprocess runs with +chdir: workspace.cwd+ and is *always* given
40
+ # an explicit path argument. {Pikuri::Subprocess.spawn} uses
41
+ # +popen2e+ which gives the child a piped (non-tty) stdin; rg's
42
+ # default heuristic on no-path-arg-with-piped-stdin is to search
43
+ # stdin (which we then close — yielding zero matches). Passing the
44
+ # path argument explicitly bypasses the heuristic. Output paths
45
+ # come back as +./...+ when the path is +.+; the leading +./+ is
46
+ # stripped post-rg so the model sees clean workspace-relative paths.
47
+ #
48
+ # == Output modes
49
+ #
50
+ # * +content+ (default) — +path:line:content+ rows.
51
+ # * +files_with_matches+ — just file paths, one per line.
52
+ # * +count+ — +path:count+ per file.
53
+ #
54
+ # Use +files_with_matches+ to scope a broad search cheaply before
55
+ # paying tokens for +content+.
56
+ #
57
+ # == Truncation
58
+ #
59
+ # Total output is head-truncated to {MAX_BYTES} (head-only — grep
60
+ # tails usually carry less signal than the first matches; opposite
61
+ # bias from {Tool::Bash}). Cut at the last line boundary, with a
62
+ # marker reporting omitted bytes and the original total so the model
63
+ # knows how much it missed.
64
+ #
65
+ # == Exit codes
66
+ #
67
+ # * +0+ → matches; format with footer.
68
+ # * +1+ → no matches; return +"No matches for pattern '...'"+.
69
+ # * +2+ → rg error (bad regex, missing path); return +"Error: ripgrep: ..."+.
70
+ #
71
+ # == Refusals
72
+ #
73
+ # All returned as +"Error: ..."+ observations:
74
+ #
75
+ # * Empty +pattern+ → fast reject.
76
+ # * Unknown +output_mode+ → enum error listing valid values.
77
+ # * Path outside the workspace → caught from {Tool::Workspace::Error}.
78
+ # * Nonexistent path → +"Error: path not found: <path>"+.
79
+ class Grep < Tool
80
+ # @return [Integer] hard byte cap on combined rg output. Same value
81
+ # as {Tool::Read::MAX_BYTES} so the two file-touching tools share
82
+ # a budget shape.
83
+ MAX_BYTES = 50 * 1024
84
+
85
+ # @return [String] human-readable form of {MAX_BYTES} for the
86
+ # truncation marker.
87
+ MAX_BYTES_LABEL = "#{MAX_BYTES / 1024} KB"
88
+
89
+ # @return [Integer] per-line cap passed to rg's +--max-columns+.
90
+ # Long lines are truncated server-side with a preview marker.
91
+ MAX_LINE_LENGTH = 2000
92
+
93
+ # @return [Array<String>] valid +output_mode+ values.
94
+ OUTPUT_MODES = %w[content files_with_matches count].freeze
95
+
96
+ # @return [String] default +output_mode+.
97
+ DEFAULT_OUTPUT_MODE = 'content'
98
+
99
+ # Description shown to the LLM. opencode-shape (summary + +Usage:+
100
+ # bullets). Per-parameter constraints live in parameter
101
+ # descriptions.
102
+ #
103
+ # @return [String]
104
+ DESCRIPTION = <<~DESC
105
+ Search file contents for a regex pattern across the workspace.
106
+
107
+ Usage:
108
+ - Wraps `ripgrep` — regex syntax is rg's Rust-regex dialect (mostly PCRE-compatible; no lookbehind).
109
+ - Default search root is the workspace root; pass `path` to narrow to a file or subdirectory.
110
+ - Respects `.gitignore` — for unfiltered search use bash `rg --no-ignore <pattern>`.
111
+ - Use `glob` to filter by filename, e.g. `"*.rb"` or `"src/**/*.{ts,tsx}"`.
112
+ - `output_mode` controls verbosity: `content` (default, file:line:text), `files_with_matches` (paths only), `count` (matches per file).
113
+ - Use `files_with_matches` first to scope a broad search, then `content` (or `read`) to investigate — saves tokens.
114
+ - Output is truncated to #{MAX_BYTES_LABEL}; refine the pattern or narrow `path` if the response ends in a truncation marker.
115
+ - Long lines are truncated to #{MAX_LINE_LENGTH} chars with a preview marker; use `read` to see full lines.
116
+ DESC
117
+
118
+ # @param workspace [Tool::Workspace] captured for path resolution
119
+ # and as +chdir+ for rg. All path arguments route through
120
+ # +workspace.resolve_for_read+.
121
+ # @raise [RuntimeError] if +rg+ isn't on +PATH+; fail-loud at
122
+ # construction rather than the first tool call.
123
+ # @return [Grep]
124
+ def initialize(workspace:)
125
+ Grep.send(:check_binaries!)
126
+ super(
127
+ name: 'grep',
128
+ description: DESCRIPTION,
129
+ parameters: Parameters.build { |p|
130
+ p.required_string :pattern,
131
+ 'Regex pattern to search for (rg Rust-regex ' \
132
+ 'dialect), e.g. "def\s+\w+" or "TODO".'
133
+ p.optional_string :path,
134
+ 'File or directory to search. Relative paths ' \
135
+ 'resolve against the workspace root. Defaults ' \
136
+ 'to the workspace root, e.g. "lib/" or "lib/foo.rb".'
137
+ p.optional_string :glob,
138
+ 'Filename glob to filter files, e.g. "*.rb" ' \
139
+ 'or "src/**/*.{ts,tsx}".'
140
+ p.optional_boolean :case_insensitive,
141
+ 'Match case-insensitively. Defaults to false, e.g. true.'
142
+ p.optional_string :output_mode,
143
+ "One of #{OUTPUT_MODES.join(', ')}. Defaults to " \
144
+ "#{DEFAULT_OUTPUT_MODE}, e.g. \"files_with_matches\"."
145
+ },
146
+ execute: lambda { |pattern:, path: nil, glob: nil,
147
+ case_insensitive: false, output_mode: DEFAULT_OUTPUT_MODE|
148
+ Grep.search(workspace: workspace, pattern: pattern, path: path,
149
+ glob: glob, case_insensitive: case_insensitive,
150
+ output_mode: output_mode)
151
+ }
152
+ )
153
+ end
154
+
155
+ # Validate inputs, resolve the path against the workspace, spawn
156
+ # rg, and render the observation. Returns either the formatted
157
+ # results, a "no matches" string, or +"Error: ..."+.
158
+ #
159
+ # @param workspace [Tool::Workspace]
160
+ # @param pattern [String]
161
+ # @param path [String, nil]
162
+ # @param glob [String, nil]
163
+ # @param case_insensitive [Boolean]
164
+ # @param output_mode [String]
165
+ # @return [String]
166
+ def self.search(workspace:, pattern:, path:, glob:, case_insensitive:, output_mode:)
167
+ return 'Error: empty pattern.' if pattern.empty?
168
+ unless OUTPUT_MODES.include?(output_mode)
169
+ return "Error: output_mode must be one of #{OUTPUT_MODES.join(', ')}, " \
170
+ "got #{output_mode.inspect}."
171
+ end
172
+
173
+ search_target = '.'
174
+ if path
175
+ resolved = workspace.resolve_for_read(path)
176
+ return "Error: path not found: #{path}" unless resolved.exist?
177
+
178
+ rel = resolved.relative_path_from(workspace.cwd).to_s
179
+ search_target = rel
180
+ end
181
+
182
+ argv = build_argv(pattern: pattern, glob: glob,
183
+ case_insensitive: case_insensitive,
184
+ output_mode: output_mode, path: search_target)
185
+
186
+ result = Pikuri::Subprocess.spawn(*argv, chdir: workspace.cwd.to_s).wait
187
+ exit_code = result.status.exitstatus
188
+
189
+ case exit_code
190
+ when 0
191
+ format_output(result.output, output_mode: output_mode,
192
+ pattern: pattern, path: path)
193
+ when 1
194
+ no_match_message(pattern: pattern, path: path)
195
+ else
196
+ stderr = result.output.strip
197
+ stderr = "exited #{exit_code}" if stderr.empty?
198
+ "Error: ripgrep: #{stderr}"
199
+ end
200
+ rescue Tool::Workspace::Error => e
201
+ "Error: #{e.message}"
202
+ end
203
+
204
+ # Build the +rg+ argv. Path is always passed (defaults to +.+) —
205
+ # see the class header for why.
206
+ #
207
+ # @return [Array<String>]
208
+ def self.build_argv(pattern:, glob:, case_insensitive:, output_mode:, path:)
209
+ argv = [
210
+ 'rg',
211
+ '--line-number',
212
+ '--color=never',
213
+ '--no-heading',
214
+ '--with-filename',
215
+ '--hidden',
216
+ "--max-columns=#{MAX_LINE_LENGTH}",
217
+ '--max-columns-preview',
218
+ '--sort=path'
219
+ ]
220
+ argv << '-i' if case_insensitive
221
+ argv.push('--glob', glob) if glob
222
+ case output_mode
223
+ when 'files_with_matches' then argv << '--files-with-matches'
224
+ when 'count' then argv << '--count-matches'
225
+ end
226
+ argv.push('--', pattern, path)
227
+ argv
228
+ end
229
+ private_class_method :build_argv
230
+
231
+ # Render rg output: strip the +./+ prefix rg adds when path is
232
+ # +.+, head-truncate at {MAX_BYTES}, append a footer summarizing
233
+ # the result count.
234
+ #
235
+ # @return [String]
236
+ def self.format_output(raw, output_mode:, pattern:, path:)
237
+ cleaned = strip_dot_slash(raw)
238
+ content, truncation_marker = head_truncate(cleaned)
239
+ stripped = content.chomp
240
+
241
+ return no_match_message(pattern: pattern, path: path) if stripped.empty?
242
+
243
+ footer = build_footer(stripped, output_mode)
244
+ [stripped, '', footer + truncation_marker].join("\n")
245
+ end
246
+ private_class_method :format_output
247
+
248
+ # Strip leading +./+ from each line of rg output. rg emits this
249
+ # prefix when invoked with +.+ as the search path; we want clean
250
+ # workspace-relative paths regardless of whether the user passed a
251
+ # path or we defaulted to +.+.
252
+ #
253
+ # @return [String]
254
+ def self.strip_dot_slash(raw)
255
+ raw.gsub(/^\.\//, '')
256
+ end
257
+ private_class_method :strip_dot_slash
258
+
259
+ # Head-truncate +raw+ to {MAX_BYTES}, cutting at the last newline
260
+ # boundary so the final row is never partial. Returns the truncated
261
+ # content and a marker String (empty if no truncation).
262
+ #
263
+ # @return [Array(String, String)]
264
+ def self.head_truncate(raw)
265
+ total = raw.bytesize
266
+ return [raw, ''] if total <= MAX_BYTES
267
+
268
+ head = raw.byteslice(0, MAX_BYTES)
269
+ last_nl = head.rindex("\n")
270
+ head = head.byteslice(0, last_nl) if last_nl
271
+ omitted = total - head.bytesize
272
+ marker = "\n\n... [#{omitted} bytes omitted; total was #{total} bytes; " \
273
+ 'refine pattern or path] ...'
274
+ [head, marker]
275
+ end
276
+ private_class_method :head_truncate
277
+
278
+ # Compose a one-line footer summarizing the result. Format depends
279
+ # on +output_mode+; counts derive from rg's text output.
280
+ #
281
+ # @return [String]
282
+ def self.build_footer(content, output_mode)
283
+ lines = content.split("\n").reject(&:empty?)
284
+ case output_mode
285
+ when 'content'
286
+ files = lines.map { |l| l.split(':', 2).first }.uniq
287
+ "Found #{pluralize(lines.size, 'match', 'matches')} in " \
288
+ "#{pluralize(files.size, 'file', 'files')}."
289
+ when 'files_with_matches'
290
+ "Found #{pluralize(lines.size, 'file', 'files')}."
291
+ when 'count'
292
+ total = lines.sum { |l| Integer(l.split(':').last) }
293
+ "Found #{pluralize(total, 'match', 'matches')} in " \
294
+ "#{pluralize(lines.size, 'file', 'files')}."
295
+ end
296
+ end
297
+ private_class_method :build_footer
298
+
299
+ # @return [String] +"1 match"+ / +"2 matches"+
300
+ def self.pluralize(n, sing, plural)
301
+ "#{n} #{n == 1 ? sing : plural}"
302
+ end
303
+ private_class_method :pluralize
304
+
305
+ # @return [String]
306
+ def self.no_match_message(pattern:, path:)
307
+ base = "No matches for pattern '#{pattern}'"
308
+ base += " in #{path}" if path
309
+ "#{base}."
310
+ end
311
+ private_class_method :no_match_message
312
+
313
+ # Verify +rg+ is reachable on +PATH+. Routed through
314
+ # {Pikuri::Subprocess.spawn} to honor the subprocess seam.
315
+ # rg missing surfaces as +Errno::ENOENT+; an installed rg returns
316
+ # exit 0 from +--version+.
317
+ #
318
+ # @return [void]
319
+ # @raise [RuntimeError] if rg is missing
320
+ def self.check_binaries!
321
+ result = Pikuri::Subprocess.spawn('rg', '--version', chdir: '/').wait
322
+ return if result.status.success?
323
+
324
+ raise install_hint
325
+ rescue Errno::ENOENT
326
+ raise install_hint
327
+ end
328
+ private_class_method :check_binaries!
329
+
330
+ # @return [String]
331
+ def self.install_hint
332
+ "Tool::Grep requires 'rg' (ripgrep) on PATH; install via your " \
333
+ "distro's package manager (e.g. 'apt install ripgrep')."
334
+ end
335
+ private_class_method :install_hint
336
+ end
337
+ end
338
+ end
@@ -0,0 +1,314 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'did_you_mean'
4
+
5
+ module Pikuri
6
+ # Loaded by +lib/tools.rb+ after {Tool} itself is defined; the +class Tool+
7
+ # reopening below assumes that order.
8
+ class Tool
9
+ # Schema for a {Tool}'s arguments. Built up via the fluent
10
+ # +<required|optional>_<type>+ methods, then frozen by {.build}; serializes
11
+ # to the OpenAI JSON-Schema shape via {#to_h} and validates LLM-supplied
12
+ # argument hashes via {#validate}.
13
+ #
14
+ # @example
15
+ # params = Tool::Parameters.build { |p| p.required_string :query, 'The query.' }
16
+ # params.to_h
17
+ # # => {type: 'object',
18
+ # # properties: {query: {type: 'string', description: 'The query.'}},
19
+ # # required: ['query']}
20
+ # params.validate('query' => 'cats') # => {query: 'cats'}
21
+ class Parameters
22
+ # Raised by {Parameters#validate} when arguments do not match the declared
23
+ # schema. The message lists every problem and reprints the schema, so it
24
+ # can be fed back to the LLM verbatim as the next tool-call observation.
25
+ class ValidationError < StandardError; end
26
+
27
+ # Yield a fresh builder, freeze it, and return it.
28
+ #
29
+ # @yieldparam builder [Parameters]
30
+ # @return [Parameters] frozen builder, safe to share between calls
31
+ def self.build
32
+ builder = new
33
+ yield builder
34
+ builder.freeze
35
+ end
36
+
37
+ # @return [Parameters]
38
+ def initialize
39
+ @properties = {}
40
+ @required = []
41
+ end
42
+
43
+ # Freeze the builder along with its internal collections, so post-build
44
+ # mutation attempts raise +FrozenError+ instead of silently succeeding.
45
+ #
46
+ # @return [self]
47
+ def freeze
48
+ @properties.freeze
49
+ @required.freeze
50
+ super
51
+ end
52
+
53
+ # Add a required +string+ property.
54
+ #
55
+ # @param name [Symbol] property name
56
+ # @param description [String] human-readable description shown to the LLM
57
+ # @return [self]
58
+ def required_string(name, description)
59
+ add(name, 'string', description, required: true)
60
+ end
61
+
62
+ # Add an optional +string+ property.
63
+ #
64
+ # @param name [Symbol] property name
65
+ # @param description [String] human-readable description shown to the LLM
66
+ # @return [self]
67
+ def optional_string(name, description)
68
+ add(name, 'string', description, required: false)
69
+ end
70
+
71
+ # Add a required +integer+ property. Accepts Integers, Floats with a
72
+ # zero fractional part (e.g. +1.0+), and base-10 numeric Strings (after
73
+ # trimming) that resolve to whole numbers; rejects everything else.
74
+ #
75
+ # @param name [Symbol] property name
76
+ # @param description [String] human-readable description shown to the LLM
77
+ # @return [self]
78
+ def required_integer(name, description)
79
+ add(name, 'integer', description, required: true)
80
+ end
81
+
82
+ # Add an optional +integer+ property. See {#required_integer} for
83
+ # accepted shapes.
84
+ #
85
+ # @param name [Symbol] property name
86
+ # @param description [String] human-readable description shown to the LLM
87
+ # @return [self]
88
+ def optional_integer(name, description)
89
+ add(name, 'integer', description, required: false)
90
+ end
91
+
92
+ # Add a required +number+ property (JSON-Schema +number+: Integer or
93
+ # finite Float). Numeric Strings (after trimming) are parsed; NaN and
94
+ # Infinity are rejected.
95
+ #
96
+ # @param name [Symbol] property name
97
+ # @param description [String] human-readable description shown to the LLM
98
+ # @return [self]
99
+ def required_number(name, description)
100
+ add(name, 'number', description, required: true)
101
+ end
102
+
103
+ # Add an optional +number+ property. See {#required_number} for
104
+ # accepted shapes.
105
+ #
106
+ # @param name [Symbol] property name
107
+ # @param description [String] human-readable description shown to the LLM
108
+ # @return [self]
109
+ def optional_number(name, description)
110
+ add(name, 'number', description, required: false)
111
+ end
112
+
113
+ # Add a required +boolean+ property. Accepts Ruby +true+/+false+
114
+ # as-is, and the literal Strings +"true"+/+"false"+ (some models
115
+ # surface JSON booleans as Strings) after trimming surrounding
116
+ # whitespace. Other Strings, numbers, and +nil+ are rejected —
117
+ # there is no truthy-coercion of +"yes"+ / +0+ / etc.
118
+ #
119
+ # @param name [Symbol] property name
120
+ # @param description [String] human-readable description shown to the LLM
121
+ # @return [self]
122
+ def required_boolean(name, description)
123
+ add(name, 'boolean', description, required: true)
124
+ end
125
+
126
+ # Add an optional +boolean+ property. See {#required_boolean} for
127
+ # accepted shapes.
128
+ #
129
+ # @param name [Symbol] property name
130
+ # @param description [String] human-readable description shown to the LLM
131
+ # @return [self]
132
+ def optional_boolean(name, description)
133
+ add(name, 'boolean', description, required: false)
134
+ end
135
+
136
+ # Schema in OpenAI JSON-Schema shape.
137
+ #
138
+ # @return [Hash] +{type: 'object', properties: {...}, required: [...]}+
139
+ def to_h
140
+ { type: 'object', properties: @properties, required: @required }
141
+ end
142
+
143
+ # Validate a tool-call argument hash against the declared schema. Returns
144
+ # a symbol-keyed hash safe to splat as kwargs into a tool's +execute+
145
+ # Proc; raises {ValidationError} with an LLM-actionable message listing
146
+ # every missing/unknown/mistyped field and reprinting the schema.
147
+ #
148
+ # Strict: unknown keys are rejected (with DidYouMean suggestions), wrong
149
+ # types are rejected. All issues are collected and reported together so
150
+ # the LLM can fix them in one round trip.
151
+ #
152
+ # @param args [Hash] arguments as decoded from the tool-call JSON; keys
153
+ # may be Strings or Symbols
154
+ # @return [Hash{Symbol=>Object}] validated, symbol-keyed arguments
155
+ # @raise [ValidationError] if +args+ is not a Hash, contains unknown
156
+ # keys, omits a required key, or has a value of the wrong type
157
+ def validate(args)
158
+ raise ValidationError, "Arguments must be an object, got #{args.class}." unless args.is_a?(Hash)
159
+
160
+ symbolized = args.transform_keys(&:to_sym)
161
+ errors = []
162
+ result = {}
163
+
164
+ (symbolized.keys - @properties.keys).each do |unknown|
165
+ errors << unknown_key_error(unknown)
166
+ end
167
+
168
+ @properties.each do |name, schema|
169
+ if symbolized.key?(name)
170
+ begin
171
+ result[name] = coerce(symbolized[name], schema[:type])
172
+ rescue CoercionError => e
173
+ errors << "Parameter `#{name}` #{e.message}."
174
+ end
175
+ elsif @required.include?(name.to_s)
176
+ errors << "Missing required parameter `#{name}` (#{schema[:type]}): #{schema[:description]}"
177
+ end
178
+ end
179
+
180
+ return result if errors.empty?
181
+
182
+ raise ValidationError, build_error_message(errors)
183
+ end
184
+
185
+ private
186
+
187
+ # Internal coercion failure. Caught by {#validate} and turned into a
188
+ # {ValidationError} message — never escapes the class.
189
+ class CoercionError < StandardError; end
190
+ private_constant :CoercionError
191
+
192
+ def add(name, type, description, required:)
193
+ @properties[name] = { type: type, description: description }
194
+ @required << name.to_s if required
195
+ self
196
+ end
197
+
198
+ # Coerce +value+ to a Ruby value matching the JSON-Schema +type+,
199
+ # returning the coerced value. Raises {CoercionError} on failure.
200
+ def coerce(value, type)
201
+ case type
202
+ when 'string'
203
+ return value if value.is_a?(String)
204
+
205
+ raise CoercionError, type_message('string', value)
206
+ when 'integer'
207
+ coerce_integer(value)
208
+ when 'number'
209
+ coerce_number(value)
210
+ when 'boolean'
211
+ coerce_boolean(value)
212
+ end
213
+ end
214
+
215
+ def coerce_boolean(value)
216
+ return value if value == true || value == false
217
+
218
+ if value.is_a?(String)
219
+ case value.strip
220
+ when 'true' then return true
221
+ when 'false' then return false
222
+ end
223
+ end
224
+
225
+ raise CoercionError, type_message('boolean', value)
226
+ end
227
+
228
+ def coerce_integer(value)
229
+ case value
230
+ when Integer
231
+ value
232
+ when Float
233
+ raise CoercionError, type_message('integer', value) unless value.finite? && value.modulo(1).zero?
234
+
235
+ value.to_i
236
+ when String
237
+ parsed = parse_numeric_string(value)
238
+ raise CoercionError, type_message('integer', value) unless parsed && parsed.modulo(1).zero?
239
+
240
+ parsed.to_i
241
+ else
242
+ raise CoercionError, type_message('integer', value)
243
+ end
244
+ end
245
+
246
+ def coerce_number(value)
247
+ case value
248
+ when Integer
249
+ value
250
+ when Float
251
+ raise CoercionError, type_message('number', value) unless value.finite?
252
+
253
+ value
254
+ when String
255
+ parsed = parse_numeric_string(value)
256
+ raise CoercionError, type_message('number', value) unless parsed
257
+
258
+ parsed
259
+ else
260
+ raise CoercionError, type_message('number', value)
261
+ end
262
+ end
263
+
264
+ # Matches the decimal-numeric subset that JSON allows: optional sign,
265
+ # mantissa (with optional fractional part), optional decimal exponent.
266
+ # Rejects hex (+0x10+), underscores (+1_000+), +NaN+, +Infinity+.
267
+ DECIMAL_NUMERIC = /\A[-+]?(?:\d+\.?\d*|\.\d+)(?:[eE][-+]?\d+)?\z/
268
+ private_constant :DECIMAL_NUMERIC
269
+
270
+ # Strict base-10 numeric-string parse. Returns a finite Float, or +nil+
271
+ # for empty/whitespace/garbage/hex/NaN/Infinity input.
272
+ def parse_numeric_string(str)
273
+ trimmed = str.strip
274
+ return nil unless trimmed.match?(DECIMAL_NUMERIC)
275
+
276
+ parsed = Float(trimmed, exception: false)
277
+ return nil unless parsed&.finite?
278
+
279
+ parsed
280
+ end
281
+
282
+ def type_message(type, value)
283
+ article = type == 'integer' ? 'an' : 'a'
284
+ "must be #{article} #{type} (got #{value.class}: #{value.inspect})"
285
+ end
286
+
287
+ def unknown_key_error(unknown)
288
+ suggestion = DidYouMean::SpellChecker
289
+ .new(dictionary: @properties.keys.map(&:to_s))
290
+ .correct(unknown.to_s).first
291
+ msg = "Unknown parameter `#{unknown}`."
292
+ msg += suggestion ? " Did you mean `#{suggestion}`?" : " Valid parameters: #{valid_keys_list}."
293
+ msg
294
+ end
295
+
296
+ def valid_keys_list
297
+ @properties.keys.map { |k| "`#{k}`" }.join(', ')
298
+ end
299
+
300
+ def build_error_message(errors)
301
+ [
302
+ 'Invalid arguments:',
303
+ *errors.map { |e| "- #{e}" },
304
+ '',
305
+ 'Expected schema:',
306
+ *@properties.map { |name, prop|
307
+ req = @required.include?(name.to_s) ? 'required' : 'optional'
308
+ " - `#{name}` (#{prop[:type]}, #{req}): #{prop[:description]}"
309
+ }
310
+ ].join("\n")
311
+ end
312
+ end
313
+ end
314
+ end