pikuri 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +62 -0
- data/GETTING_STARTED.md +223 -0
- data/LICENSE +21 -0
- data/README.md +193 -0
- data/lib/pikuri/agent/chat_transport.rb +41 -0
- data/lib/pikuri/agent/context_window_detector.rb +101 -0
- data/lib/pikuri/agent/listener/in_memory_message_list.rb +33 -0
- data/lib/pikuri/agent/listener/message_listener.rb +93 -0
- data/lib/pikuri/agent/listener/step_limit.rb +97 -0
- data/lib/pikuri/agent/listener/terminal.rb +137 -0
- data/lib/pikuri/agent/listener/token_log.rb +166 -0
- data/lib/pikuri/agent/listener_list.rb +113 -0
- data/lib/pikuri/agent/message.rb +61 -0
- data/lib/pikuri/agent/synthesizer.rb +120 -0
- data/lib/pikuri/agent/tokens.rb +56 -0
- data/lib/pikuri/agent.rb +286 -0
- data/lib/pikuri/subprocess.rb +166 -0
- data/lib/pikuri/tool/bash.rb +272 -0
- data/lib/pikuri/tool/calculator.rb +82 -0
- data/lib/pikuri/tool/confirmer.rb +96 -0
- data/lib/pikuri/tool/edit.rb +196 -0
- data/lib/pikuri/tool/fetch.rb +167 -0
- data/lib/pikuri/tool/glob.rb +310 -0
- data/lib/pikuri/tool/grep.rb +338 -0
- data/lib/pikuri/tool/parameters.rb +314 -0
- data/lib/pikuri/tool/read.rb +254 -0
- data/lib/pikuri/tool/scraper/fetch_error.rb +16 -0
- data/lib/pikuri/tool/scraper/html.rb +285 -0
- data/lib/pikuri/tool/scraper/pdf.rb +54 -0
- data/lib/pikuri/tool/scraper/simple.rb +177 -0
- data/lib/pikuri/tool/search/brave.rb +184 -0
- data/lib/pikuri/tool/search/duckduckgo.rb +196 -0
- data/lib/pikuri/tool/search/engines.rb +154 -0
- data/lib/pikuri/tool/search/exa.rb +217 -0
- data/lib/pikuri/tool/search/rate_limiter.rb +92 -0
- data/lib/pikuri/tool/search/result.rb +29 -0
- data/lib/pikuri/tool/skill.rb +80 -0
- data/lib/pikuri/tool/skill_catalog.rb +376 -0
- data/lib/pikuri/tool/sub_agent.rb +102 -0
- data/lib/pikuri/tool/web_scrape.rb +117 -0
- data/lib/pikuri/tool/web_search.rb +38 -0
- data/lib/pikuri/tool/workspace.rb +150 -0
- data/lib/pikuri/tool/write.rb +170 -0
- data/lib/pikuri/tool.rb +118 -0
- data/lib/pikuri/url_cache.rb +106 -0
- data/lib/pikuri/version.rb +10 -0
- data/lib/pikuri.rb +165 -0
- data/prompts/coding-system-prompt.txt +28 -0
- data/prompts/pikuri-chat.txt +15 -0
- metadata +259 -0
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pikuri
|
|
4
|
+
class Tool
|
|
5
|
+
# The +grep+ tool — content search across the workspace via
|
|
6
|
+
# +ripgrep+. Instantiating +Tool::Grep.new(workspace: ws)+ produces a
|
|
7
|
+
# tool whose {Tool#to_ruby_llm_tool} wiring is identical to any
|
|
8
|
+
# bundled tool's. Same shape as {Tool::Read} (workspace captured by
|
|
9
|
+
# the +execute+ closure, no confirmer — search is read-only).
|
|
10
|
+
#
|
|
11
|
+
# == ripgrep dependency
|
|
12
|
+
#
|
|
13
|
+
# Hard dependency: {.check_binaries!} runs in +initialize+ and raises
|
|
14
|
+
# if +rg+ isn't on +PATH+. Mirrors {Tool::Bash}'s posture for
|
|
15
|
+
# +bash+/+timeout+. We don't ship a Ruby fallback — replicating
|
|
16
|
+
# rg's Rust-regex dialect, glob handling, and +.gitignore+ parsing
|
|
17
|
+
# is a research-loop dead end. Failure message includes the install
|
|
18
|
+
# hint.
|
|
19
|
+
#
|
|
20
|
+
# == Argv
|
|
21
|
+
#
|
|
22
|
+
# rg --line-number --color=never --no-heading --with-filename \
|
|
23
|
+
# --hidden --max-columns=2000 --max-columns-preview \
|
|
24
|
+
# --sort=path \
|
|
25
|
+
# [-i] [--glob <g>] [--files-with-matches|--count-matches] \
|
|
26
|
+
# -- <pattern> <relative-path-or-dot>
|
|
27
|
+
#
|
|
28
|
+
# * +--no-heading+ + +--with-filename+ → flat +path:line:content+ rows
|
|
29
|
+
# regardless of whether the search target is a directory or a single
|
|
30
|
+
# file (rg defaults to suppressing the filename for single-file
|
|
31
|
+
# searches — we force it on for output consistency).
|
|
32
|
+
# * +--hidden+ → search dotfiles (still respects +.gitignore+).
|
|
33
|
+
# * +--max-columns=2000 --max-columns-preview+ → rg truncates lines
|
|
34
|
+
# longer than {MAX_LINE_LENGTH} bytes server-side and appends a
|
|
35
|
+
# preview marker, sparing us per-line truncation.
|
|
36
|
+
# * +--sort=path+ → deterministic output (single-threaded; fine for
|
|
37
|
+
# typical repos under ~10k files). Makes specs assertable and gives
|
|
38
|
+
# the model a stable order to scan.
|
|
39
|
+
# * Subprocess runs with +chdir: workspace.cwd+ and is *always* given
|
|
40
|
+
# an explicit path argument. {Pikuri::Subprocess.spawn} uses
|
|
41
|
+
# +popen2e+ which gives the child a piped (non-tty) stdin; rg's
|
|
42
|
+
# default heuristic on no-path-arg-with-piped-stdin is to search
|
|
43
|
+
# stdin (which we then close — yielding zero matches). Passing the
|
|
44
|
+
# path argument explicitly bypasses the heuristic. Output paths
|
|
45
|
+
# come back as +./...+ when the path is +.+; the leading +./+ is
|
|
46
|
+
# stripped post-rg so the model sees clean workspace-relative paths.
|
|
47
|
+
#
|
|
48
|
+
# == Output modes
|
|
49
|
+
#
|
|
50
|
+
# * +content+ (default) — +path:line:content+ rows.
|
|
51
|
+
# * +files_with_matches+ — just file paths, one per line.
|
|
52
|
+
# * +count+ — +path:count+ per file.
|
|
53
|
+
#
|
|
54
|
+
# Use +files_with_matches+ to scope a broad search cheaply before
|
|
55
|
+
# paying tokens for +content+.
|
|
56
|
+
#
|
|
57
|
+
# == Truncation
|
|
58
|
+
#
|
|
59
|
+
# Total output is head-truncated to {MAX_BYTES} (head-only — grep
|
|
60
|
+
# tails usually carry less signal than the first matches; opposite
|
|
61
|
+
# bias from {Tool::Bash}). Cut at the last line boundary, with a
|
|
62
|
+
# marker reporting omitted bytes and the original total so the model
|
|
63
|
+
# knows how much it missed.
|
|
64
|
+
#
|
|
65
|
+
# == Exit codes
|
|
66
|
+
#
|
|
67
|
+
# * +0+ → matches; format with footer.
|
|
68
|
+
# * +1+ → no matches; return +"No matches for pattern '...'"+.
|
|
69
|
+
# * +2+ → rg error (bad regex, missing path); return +"Error: ripgrep: ..."+.
|
|
70
|
+
#
|
|
71
|
+
# == Refusals
|
|
72
|
+
#
|
|
73
|
+
# All returned as +"Error: ..."+ observations:
|
|
74
|
+
#
|
|
75
|
+
# * Empty +pattern+ → fast reject.
|
|
76
|
+
# * Unknown +output_mode+ → enum error listing valid values.
|
|
77
|
+
# * Path outside the workspace → caught from {Tool::Workspace::Error}.
|
|
78
|
+
# * Nonexistent path → +"Error: path not found: <path>"+.
|
|
79
|
+
class Grep < Tool
|
|
80
|
+
# @return [Integer] hard byte cap on combined rg output. Same value
|
|
81
|
+
# as {Tool::Read::MAX_BYTES} so the two file-touching tools share
|
|
82
|
+
# a budget shape.
|
|
83
|
+
MAX_BYTES = 50 * 1024
|
|
84
|
+
|
|
85
|
+
# @return [String] human-readable form of {MAX_BYTES} for the
|
|
86
|
+
# truncation marker.
|
|
87
|
+
MAX_BYTES_LABEL = "#{MAX_BYTES / 1024} KB"
|
|
88
|
+
|
|
89
|
+
# @return [Integer] per-line cap passed to rg's +--max-columns+.
|
|
90
|
+
# Long lines are truncated server-side with a preview marker.
|
|
91
|
+
MAX_LINE_LENGTH = 2000
|
|
92
|
+
|
|
93
|
+
# @return [Array<String>] valid +output_mode+ values.
|
|
94
|
+
OUTPUT_MODES = %w[content files_with_matches count].freeze
|
|
95
|
+
|
|
96
|
+
# @return [String] default +output_mode+.
|
|
97
|
+
DEFAULT_OUTPUT_MODE = 'content'
|
|
98
|
+
|
|
99
|
+
# Description shown to the LLM. opencode-shape (summary + +Usage:+
|
|
100
|
+
# bullets). Per-parameter constraints live in parameter
|
|
101
|
+
# descriptions.
|
|
102
|
+
#
|
|
103
|
+
# @return [String]
|
|
104
|
+
DESCRIPTION = <<~DESC
|
|
105
|
+
Search file contents for a regex pattern across the workspace.
|
|
106
|
+
|
|
107
|
+
Usage:
|
|
108
|
+
- Wraps `ripgrep` — regex syntax is rg's Rust-regex dialect (mostly PCRE-compatible; no lookbehind).
|
|
109
|
+
- Default search root is the workspace root; pass `path` to narrow to a file or subdirectory.
|
|
110
|
+
- Respects `.gitignore` — for unfiltered search use bash `rg --no-ignore <pattern>`.
|
|
111
|
+
- Use `glob` to filter by filename, e.g. `"*.rb"` or `"src/**/*.{ts,tsx}"`.
|
|
112
|
+
- `output_mode` controls verbosity: `content` (default, file:line:text), `files_with_matches` (paths only), `count` (matches per file).
|
|
113
|
+
- Use `files_with_matches` first to scope a broad search, then `content` (or `read`) to investigate — saves tokens.
|
|
114
|
+
- Output is truncated to #{MAX_BYTES_LABEL}; refine the pattern or narrow `path` if the response ends in a truncation marker.
|
|
115
|
+
- Long lines are truncated to #{MAX_LINE_LENGTH} chars with a preview marker; use `read` to see full lines.
|
|
116
|
+
DESC
|
|
117
|
+
|
|
118
|
+
# @param workspace [Tool::Workspace] captured for path resolution
|
|
119
|
+
# and as +chdir+ for rg. All path arguments route through
|
|
120
|
+
# +workspace.resolve_for_read+.
|
|
121
|
+
# @raise [RuntimeError] if +rg+ isn't on +PATH+; fail-loud at
|
|
122
|
+
# construction rather than the first tool call.
|
|
123
|
+
# @return [Grep]
|
|
124
|
+
def initialize(workspace:)
|
|
125
|
+
Grep.send(:check_binaries!)
|
|
126
|
+
super(
|
|
127
|
+
name: 'grep',
|
|
128
|
+
description: DESCRIPTION,
|
|
129
|
+
parameters: Parameters.build { |p|
|
|
130
|
+
p.required_string :pattern,
|
|
131
|
+
'Regex pattern to search for (rg Rust-regex ' \
|
|
132
|
+
'dialect), e.g. "def\s+\w+" or "TODO".'
|
|
133
|
+
p.optional_string :path,
|
|
134
|
+
'File or directory to search. Relative paths ' \
|
|
135
|
+
'resolve against the workspace root. Defaults ' \
|
|
136
|
+
'to the workspace root, e.g. "lib/" or "lib/foo.rb".'
|
|
137
|
+
p.optional_string :glob,
|
|
138
|
+
'Filename glob to filter files, e.g. "*.rb" ' \
|
|
139
|
+
'or "src/**/*.{ts,tsx}".'
|
|
140
|
+
p.optional_boolean :case_insensitive,
|
|
141
|
+
'Match case-insensitively. Defaults to false, e.g. true.'
|
|
142
|
+
p.optional_string :output_mode,
|
|
143
|
+
"One of #{OUTPUT_MODES.join(', ')}. Defaults to " \
|
|
144
|
+
"#{DEFAULT_OUTPUT_MODE}, e.g. \"files_with_matches\"."
|
|
145
|
+
},
|
|
146
|
+
execute: lambda { |pattern:, path: nil, glob: nil,
|
|
147
|
+
case_insensitive: false, output_mode: DEFAULT_OUTPUT_MODE|
|
|
148
|
+
Grep.search(workspace: workspace, pattern: pattern, path: path,
|
|
149
|
+
glob: glob, case_insensitive: case_insensitive,
|
|
150
|
+
output_mode: output_mode)
|
|
151
|
+
}
|
|
152
|
+
)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Validate inputs, resolve the path against the workspace, spawn
|
|
156
|
+
# rg, and render the observation. Returns either the formatted
|
|
157
|
+
# results, a "no matches" string, or +"Error: ..."+.
|
|
158
|
+
#
|
|
159
|
+
# @param workspace [Tool::Workspace]
|
|
160
|
+
# @param pattern [String]
|
|
161
|
+
# @param path [String, nil]
|
|
162
|
+
# @param glob [String, nil]
|
|
163
|
+
# @param case_insensitive [Boolean]
|
|
164
|
+
# @param output_mode [String]
|
|
165
|
+
# @return [String]
|
|
166
|
+
def self.search(workspace:, pattern:, path:, glob:, case_insensitive:, output_mode:)
|
|
167
|
+
return 'Error: empty pattern.' if pattern.empty?
|
|
168
|
+
unless OUTPUT_MODES.include?(output_mode)
|
|
169
|
+
return "Error: output_mode must be one of #{OUTPUT_MODES.join(', ')}, " \
|
|
170
|
+
"got #{output_mode.inspect}."
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
search_target = '.'
|
|
174
|
+
if path
|
|
175
|
+
resolved = workspace.resolve_for_read(path)
|
|
176
|
+
return "Error: path not found: #{path}" unless resolved.exist?
|
|
177
|
+
|
|
178
|
+
rel = resolved.relative_path_from(workspace.cwd).to_s
|
|
179
|
+
search_target = rel
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
argv = build_argv(pattern: pattern, glob: glob,
|
|
183
|
+
case_insensitive: case_insensitive,
|
|
184
|
+
output_mode: output_mode, path: search_target)
|
|
185
|
+
|
|
186
|
+
result = Pikuri::Subprocess.spawn(*argv, chdir: workspace.cwd.to_s).wait
|
|
187
|
+
exit_code = result.status.exitstatus
|
|
188
|
+
|
|
189
|
+
case exit_code
|
|
190
|
+
when 0
|
|
191
|
+
format_output(result.output, output_mode: output_mode,
|
|
192
|
+
pattern: pattern, path: path)
|
|
193
|
+
when 1
|
|
194
|
+
no_match_message(pattern: pattern, path: path)
|
|
195
|
+
else
|
|
196
|
+
stderr = result.output.strip
|
|
197
|
+
stderr = "exited #{exit_code}" if stderr.empty?
|
|
198
|
+
"Error: ripgrep: #{stderr}"
|
|
199
|
+
end
|
|
200
|
+
rescue Tool::Workspace::Error => e
|
|
201
|
+
"Error: #{e.message}"
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# Build the +rg+ argv. Path is always passed (defaults to +.+) —
|
|
205
|
+
# see the class header for why.
|
|
206
|
+
#
|
|
207
|
+
# @return [Array<String>]
|
|
208
|
+
def self.build_argv(pattern:, glob:, case_insensitive:, output_mode:, path:)
|
|
209
|
+
argv = [
|
|
210
|
+
'rg',
|
|
211
|
+
'--line-number',
|
|
212
|
+
'--color=never',
|
|
213
|
+
'--no-heading',
|
|
214
|
+
'--with-filename',
|
|
215
|
+
'--hidden',
|
|
216
|
+
"--max-columns=#{MAX_LINE_LENGTH}",
|
|
217
|
+
'--max-columns-preview',
|
|
218
|
+
'--sort=path'
|
|
219
|
+
]
|
|
220
|
+
argv << '-i' if case_insensitive
|
|
221
|
+
argv.push('--glob', glob) if glob
|
|
222
|
+
case output_mode
|
|
223
|
+
when 'files_with_matches' then argv << '--files-with-matches'
|
|
224
|
+
when 'count' then argv << '--count-matches'
|
|
225
|
+
end
|
|
226
|
+
argv.push('--', pattern, path)
|
|
227
|
+
argv
|
|
228
|
+
end
|
|
229
|
+
private_class_method :build_argv
|
|
230
|
+
|
|
231
|
+
# Render rg output: strip the +./+ prefix rg adds when path is
|
|
232
|
+
# +.+, head-truncate at {MAX_BYTES}, append a footer summarizing
|
|
233
|
+
# the result count.
|
|
234
|
+
#
|
|
235
|
+
# @return [String]
|
|
236
|
+
def self.format_output(raw, output_mode:, pattern:, path:)
|
|
237
|
+
cleaned = strip_dot_slash(raw)
|
|
238
|
+
content, truncation_marker = head_truncate(cleaned)
|
|
239
|
+
stripped = content.chomp
|
|
240
|
+
|
|
241
|
+
return no_match_message(pattern: pattern, path: path) if stripped.empty?
|
|
242
|
+
|
|
243
|
+
footer = build_footer(stripped, output_mode)
|
|
244
|
+
[stripped, '', footer + truncation_marker].join("\n")
|
|
245
|
+
end
|
|
246
|
+
private_class_method :format_output
|
|
247
|
+
|
|
248
|
+
# Strip leading +./+ from each line of rg output. rg emits this
|
|
249
|
+
# prefix when invoked with +.+ as the search path; we want clean
|
|
250
|
+
# workspace-relative paths regardless of whether the user passed a
|
|
251
|
+
# path or we defaulted to +.+.
|
|
252
|
+
#
|
|
253
|
+
# @return [String]
|
|
254
|
+
def self.strip_dot_slash(raw)
|
|
255
|
+
raw.gsub(/^\.\//, '')
|
|
256
|
+
end
|
|
257
|
+
private_class_method :strip_dot_slash
|
|
258
|
+
|
|
259
|
+
# Head-truncate +raw+ to {MAX_BYTES}, cutting at the last newline
|
|
260
|
+
# boundary so the final row is never partial. Returns the truncated
|
|
261
|
+
# content and a marker String (empty if no truncation).
|
|
262
|
+
#
|
|
263
|
+
# @return [Array(String, String)]
|
|
264
|
+
def self.head_truncate(raw)
|
|
265
|
+
total = raw.bytesize
|
|
266
|
+
return [raw, ''] if total <= MAX_BYTES
|
|
267
|
+
|
|
268
|
+
head = raw.byteslice(0, MAX_BYTES)
|
|
269
|
+
last_nl = head.rindex("\n")
|
|
270
|
+
head = head.byteslice(0, last_nl) if last_nl
|
|
271
|
+
omitted = total - head.bytesize
|
|
272
|
+
marker = "\n\n... [#{omitted} bytes omitted; total was #{total} bytes; " \
|
|
273
|
+
'refine pattern or path] ...'
|
|
274
|
+
[head, marker]
|
|
275
|
+
end
|
|
276
|
+
private_class_method :head_truncate
|
|
277
|
+
|
|
278
|
+
# Compose a one-line footer summarizing the result. Format depends
|
|
279
|
+
# on +output_mode+; counts derive from rg's text output.
|
|
280
|
+
#
|
|
281
|
+
# @return [String]
|
|
282
|
+
def self.build_footer(content, output_mode)
|
|
283
|
+
lines = content.split("\n").reject(&:empty?)
|
|
284
|
+
case output_mode
|
|
285
|
+
when 'content'
|
|
286
|
+
files = lines.map { |l| l.split(':', 2).first }.uniq
|
|
287
|
+
"Found #{pluralize(lines.size, 'match', 'matches')} in " \
|
|
288
|
+
"#{pluralize(files.size, 'file', 'files')}."
|
|
289
|
+
when 'files_with_matches'
|
|
290
|
+
"Found #{pluralize(lines.size, 'file', 'files')}."
|
|
291
|
+
when 'count'
|
|
292
|
+
total = lines.sum { |l| Integer(l.split(':').last) }
|
|
293
|
+
"Found #{pluralize(total, 'match', 'matches')} in " \
|
|
294
|
+
"#{pluralize(lines.size, 'file', 'files')}."
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
private_class_method :build_footer
|
|
298
|
+
|
|
299
|
+
# @return [String] +"1 match"+ / +"2 matches"+
|
|
300
|
+
def self.pluralize(n, sing, plural)
|
|
301
|
+
"#{n} #{n == 1 ? sing : plural}"
|
|
302
|
+
end
|
|
303
|
+
private_class_method :pluralize
|
|
304
|
+
|
|
305
|
+
# @return [String]
|
|
306
|
+
def self.no_match_message(pattern:, path:)
|
|
307
|
+
base = "No matches for pattern '#{pattern}'"
|
|
308
|
+
base += " in #{path}" if path
|
|
309
|
+
"#{base}."
|
|
310
|
+
end
|
|
311
|
+
private_class_method :no_match_message
|
|
312
|
+
|
|
313
|
+
# Verify +rg+ is reachable on +PATH+. Routed through
|
|
314
|
+
# {Pikuri::Subprocess.spawn} to honor the subprocess seam.
|
|
315
|
+
# rg missing surfaces as +Errno::ENOENT+; an installed rg returns
|
|
316
|
+
# exit 0 from +--version+.
|
|
317
|
+
#
|
|
318
|
+
# @return [void]
|
|
319
|
+
# @raise [RuntimeError] if rg is missing
|
|
320
|
+
def self.check_binaries!
|
|
321
|
+
result = Pikuri::Subprocess.spawn('rg', '--version', chdir: '/').wait
|
|
322
|
+
return if result.status.success?
|
|
323
|
+
|
|
324
|
+
raise install_hint
|
|
325
|
+
rescue Errno::ENOENT
|
|
326
|
+
raise install_hint
|
|
327
|
+
end
|
|
328
|
+
private_class_method :check_binaries!
|
|
329
|
+
|
|
330
|
+
# @return [String]
|
|
331
|
+
def self.install_hint
|
|
332
|
+
"Tool::Grep requires 'rg' (ripgrep) on PATH; install via your " \
|
|
333
|
+
"distro's package manager (e.g. 'apt install ripgrep')."
|
|
334
|
+
end
|
|
335
|
+
private_class_method :install_hint
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
end
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'did_you_mean'
|
|
4
|
+
|
|
5
|
+
module Pikuri
|
|
6
|
+
# Loaded by +lib/tools.rb+ after {Tool} itself is defined; the +class Tool+
|
|
7
|
+
# reopening below assumes that order.
|
|
8
|
+
class Tool
|
|
9
|
+
# Schema for a {Tool}'s arguments. Built up via the fluent
|
|
10
|
+
# +<required|optional>_<type>+ methods, then frozen by {.build}; serializes
|
|
11
|
+
# to the OpenAI JSON-Schema shape via {#to_h} and validates LLM-supplied
|
|
12
|
+
# argument hashes via {#validate}.
|
|
13
|
+
#
|
|
14
|
+
# @example
|
|
15
|
+
# params = Tool::Parameters.build { |p| p.required_string :query, 'The query.' }
|
|
16
|
+
# params.to_h
|
|
17
|
+
# # => {type: 'object',
|
|
18
|
+
# # properties: {query: {type: 'string', description: 'The query.'}},
|
|
19
|
+
# # required: ['query']}
|
|
20
|
+
# params.validate('query' => 'cats') # => {query: 'cats'}
|
|
21
|
+
class Parameters
|
|
22
|
+
# Raised by {Parameters#validate} when arguments do not match the declared
|
|
23
|
+
# schema. The message lists every problem and reprints the schema, so it
|
|
24
|
+
# can be fed back to the LLM verbatim as the next tool-call observation.
|
|
25
|
+
class ValidationError < StandardError; end
|
|
26
|
+
|
|
27
|
+
# Yield a fresh builder, freeze it, and return it.
|
|
28
|
+
#
|
|
29
|
+
# @yieldparam builder [Parameters]
|
|
30
|
+
# @return [Parameters] frozen builder, safe to share between calls
|
|
31
|
+
def self.build
|
|
32
|
+
builder = new
|
|
33
|
+
yield builder
|
|
34
|
+
builder.freeze
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# @return [Parameters]
|
|
38
|
+
def initialize
|
|
39
|
+
@properties = {}
|
|
40
|
+
@required = []
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Freeze the builder along with its internal collections, so post-build
|
|
44
|
+
# mutation attempts raise +FrozenError+ instead of silently succeeding.
|
|
45
|
+
#
|
|
46
|
+
# @return [self]
|
|
47
|
+
def freeze
|
|
48
|
+
@properties.freeze
|
|
49
|
+
@required.freeze
|
|
50
|
+
super
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Add a required +string+ property.
|
|
54
|
+
#
|
|
55
|
+
# @param name [Symbol] property name
|
|
56
|
+
# @param description [String] human-readable description shown to the LLM
|
|
57
|
+
# @return [self]
|
|
58
|
+
def required_string(name, description)
|
|
59
|
+
add(name, 'string', description, required: true)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Add an optional +string+ property.
|
|
63
|
+
#
|
|
64
|
+
# @param name [Symbol] property name
|
|
65
|
+
# @param description [String] human-readable description shown to the LLM
|
|
66
|
+
# @return [self]
|
|
67
|
+
def optional_string(name, description)
|
|
68
|
+
add(name, 'string', description, required: false)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Add a required +integer+ property. Accepts Integers, Floats with a
|
|
72
|
+
# zero fractional part (e.g. +1.0+), and base-10 numeric Strings (after
|
|
73
|
+
# trimming) that resolve to whole numbers; rejects everything else.
|
|
74
|
+
#
|
|
75
|
+
# @param name [Symbol] property name
|
|
76
|
+
# @param description [String] human-readable description shown to the LLM
|
|
77
|
+
# @return [self]
|
|
78
|
+
def required_integer(name, description)
|
|
79
|
+
add(name, 'integer', description, required: true)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Add an optional +integer+ property. See {#required_integer} for
|
|
83
|
+
# accepted shapes.
|
|
84
|
+
#
|
|
85
|
+
# @param name [Symbol] property name
|
|
86
|
+
# @param description [String] human-readable description shown to the LLM
|
|
87
|
+
# @return [self]
|
|
88
|
+
def optional_integer(name, description)
|
|
89
|
+
add(name, 'integer', description, required: false)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Add a required +number+ property (JSON-Schema +number+: Integer or
|
|
93
|
+
# finite Float). Numeric Strings (after trimming) are parsed; NaN and
|
|
94
|
+
# Infinity are rejected.
|
|
95
|
+
#
|
|
96
|
+
# @param name [Symbol] property name
|
|
97
|
+
# @param description [String] human-readable description shown to the LLM
|
|
98
|
+
# @return [self]
|
|
99
|
+
def required_number(name, description)
|
|
100
|
+
add(name, 'number', description, required: true)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Add an optional +number+ property. See {#required_number} for
|
|
104
|
+
# accepted shapes.
|
|
105
|
+
#
|
|
106
|
+
# @param name [Symbol] property name
|
|
107
|
+
# @param description [String] human-readable description shown to the LLM
|
|
108
|
+
# @return [self]
|
|
109
|
+
def optional_number(name, description)
|
|
110
|
+
add(name, 'number', description, required: false)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Add a required +boolean+ property. Accepts Ruby +true+/+false+
|
|
114
|
+
# as-is, and the literal Strings +"true"+/+"false"+ (some models
|
|
115
|
+
# surface JSON booleans as Strings) after trimming surrounding
|
|
116
|
+
# whitespace. Other Strings, numbers, and +nil+ are rejected —
|
|
117
|
+
# there is no truthy-coercion of +"yes"+ / +0+ / etc.
|
|
118
|
+
#
|
|
119
|
+
# @param name [Symbol] property name
|
|
120
|
+
# @param description [String] human-readable description shown to the LLM
|
|
121
|
+
# @return [self]
|
|
122
|
+
def required_boolean(name, description)
|
|
123
|
+
add(name, 'boolean', description, required: true)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Add an optional +boolean+ property. See {#required_boolean} for
|
|
127
|
+
# accepted shapes.
|
|
128
|
+
#
|
|
129
|
+
# @param name [Symbol] property name
|
|
130
|
+
# @param description [String] human-readable description shown to the LLM
|
|
131
|
+
# @return [self]
|
|
132
|
+
def optional_boolean(name, description)
|
|
133
|
+
add(name, 'boolean', description, required: false)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Schema in OpenAI JSON-Schema shape.
|
|
137
|
+
#
|
|
138
|
+
# @return [Hash] +{type: 'object', properties: {...}, required: [...]}+
|
|
139
|
+
def to_h
|
|
140
|
+
{ type: 'object', properties: @properties, required: @required }
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Validate a tool-call argument hash against the declared schema. Returns
|
|
144
|
+
# a symbol-keyed hash safe to splat as kwargs into a tool's +execute+
|
|
145
|
+
# Proc; raises {ValidationError} with an LLM-actionable message listing
|
|
146
|
+
# every missing/unknown/mistyped field and reprinting the schema.
|
|
147
|
+
#
|
|
148
|
+
# Strict: unknown keys are rejected (with DidYouMean suggestions), wrong
|
|
149
|
+
# types are rejected. All issues are collected and reported together so
|
|
150
|
+
# the LLM can fix them in one round trip.
|
|
151
|
+
#
|
|
152
|
+
# @param args [Hash] arguments as decoded from the tool-call JSON; keys
|
|
153
|
+
# may be Strings or Symbols
|
|
154
|
+
# @return [Hash{Symbol=>Object}] validated, symbol-keyed arguments
|
|
155
|
+
# @raise [ValidationError] if +args+ is not a Hash, contains unknown
|
|
156
|
+
# keys, omits a required key, or has a value of the wrong type
|
|
157
|
+
def validate(args)
|
|
158
|
+
raise ValidationError, "Arguments must be an object, got #{args.class}." unless args.is_a?(Hash)
|
|
159
|
+
|
|
160
|
+
symbolized = args.transform_keys(&:to_sym)
|
|
161
|
+
errors = []
|
|
162
|
+
result = {}
|
|
163
|
+
|
|
164
|
+
(symbolized.keys - @properties.keys).each do |unknown|
|
|
165
|
+
errors << unknown_key_error(unknown)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
@properties.each do |name, schema|
|
|
169
|
+
if symbolized.key?(name)
|
|
170
|
+
begin
|
|
171
|
+
result[name] = coerce(symbolized[name], schema[:type])
|
|
172
|
+
rescue CoercionError => e
|
|
173
|
+
errors << "Parameter `#{name}` #{e.message}."
|
|
174
|
+
end
|
|
175
|
+
elsif @required.include?(name.to_s)
|
|
176
|
+
errors << "Missing required parameter `#{name}` (#{schema[:type]}): #{schema[:description]}"
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
return result if errors.empty?
|
|
181
|
+
|
|
182
|
+
raise ValidationError, build_error_message(errors)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
private
|
|
186
|
+
|
|
187
|
+
# Internal coercion failure. Caught by {#validate} and turned into a
|
|
188
|
+
# {ValidationError} message — never escapes the class.
|
|
189
|
+
class CoercionError < StandardError; end
|
|
190
|
+
private_constant :CoercionError
|
|
191
|
+
|
|
192
|
+
def add(name, type, description, required:)
|
|
193
|
+
@properties[name] = { type: type, description: description }
|
|
194
|
+
@required << name.to_s if required
|
|
195
|
+
self
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
# Coerce +value+ to a Ruby value matching the JSON-Schema +type+,
|
|
199
|
+
# returning the coerced value. Raises {CoercionError} on failure.
|
|
200
|
+
def coerce(value, type)
|
|
201
|
+
case type
|
|
202
|
+
when 'string'
|
|
203
|
+
return value if value.is_a?(String)
|
|
204
|
+
|
|
205
|
+
raise CoercionError, type_message('string', value)
|
|
206
|
+
when 'integer'
|
|
207
|
+
coerce_integer(value)
|
|
208
|
+
when 'number'
|
|
209
|
+
coerce_number(value)
|
|
210
|
+
when 'boolean'
|
|
211
|
+
coerce_boolean(value)
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def coerce_boolean(value)
|
|
216
|
+
return value if value == true || value == false
|
|
217
|
+
|
|
218
|
+
if value.is_a?(String)
|
|
219
|
+
case value.strip
|
|
220
|
+
when 'true' then return true
|
|
221
|
+
when 'false' then return false
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
raise CoercionError, type_message('boolean', value)
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def coerce_integer(value)
|
|
229
|
+
case value
|
|
230
|
+
when Integer
|
|
231
|
+
value
|
|
232
|
+
when Float
|
|
233
|
+
raise CoercionError, type_message('integer', value) unless value.finite? && value.modulo(1).zero?
|
|
234
|
+
|
|
235
|
+
value.to_i
|
|
236
|
+
when String
|
|
237
|
+
parsed = parse_numeric_string(value)
|
|
238
|
+
raise CoercionError, type_message('integer', value) unless parsed && parsed.modulo(1).zero?
|
|
239
|
+
|
|
240
|
+
parsed.to_i
|
|
241
|
+
else
|
|
242
|
+
raise CoercionError, type_message('integer', value)
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
def coerce_number(value)
|
|
247
|
+
case value
|
|
248
|
+
when Integer
|
|
249
|
+
value
|
|
250
|
+
when Float
|
|
251
|
+
raise CoercionError, type_message('number', value) unless value.finite?
|
|
252
|
+
|
|
253
|
+
value
|
|
254
|
+
when String
|
|
255
|
+
parsed = parse_numeric_string(value)
|
|
256
|
+
raise CoercionError, type_message('number', value) unless parsed
|
|
257
|
+
|
|
258
|
+
parsed
|
|
259
|
+
else
|
|
260
|
+
raise CoercionError, type_message('number', value)
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
# Matches the decimal-numeric subset that JSON allows: optional sign,
|
|
265
|
+
# mantissa (with optional fractional part), optional decimal exponent.
|
|
266
|
+
# Rejects hex (+0x10+), underscores (+1_000+), +NaN+, +Infinity+.
|
|
267
|
+
DECIMAL_NUMERIC = /\A[-+]?(?:\d+\.?\d*|\.\d+)(?:[eE][-+]?\d+)?\z/
|
|
268
|
+
private_constant :DECIMAL_NUMERIC
|
|
269
|
+
|
|
270
|
+
# Strict base-10 numeric-string parse. Returns a finite Float, or +nil+
|
|
271
|
+
# for empty/whitespace/garbage/hex/NaN/Infinity input.
|
|
272
|
+
def parse_numeric_string(str)
|
|
273
|
+
trimmed = str.strip
|
|
274
|
+
return nil unless trimmed.match?(DECIMAL_NUMERIC)
|
|
275
|
+
|
|
276
|
+
parsed = Float(trimmed, exception: false)
|
|
277
|
+
return nil unless parsed&.finite?
|
|
278
|
+
|
|
279
|
+
parsed
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
def type_message(type, value)
|
|
283
|
+
article = type == 'integer' ? 'an' : 'a'
|
|
284
|
+
"must be #{article} #{type} (got #{value.class}: #{value.inspect})"
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def unknown_key_error(unknown)
|
|
288
|
+
suggestion = DidYouMean::SpellChecker
|
|
289
|
+
.new(dictionary: @properties.keys.map(&:to_s))
|
|
290
|
+
.correct(unknown.to_s).first
|
|
291
|
+
msg = "Unknown parameter `#{unknown}`."
|
|
292
|
+
msg += suggestion ? " Did you mean `#{suggestion}`?" : " Valid parameters: #{valid_keys_list}."
|
|
293
|
+
msg
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
def valid_keys_list
|
|
297
|
+
@properties.keys.map { |k| "`#{k}`" }.join(', ')
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
def build_error_message(errors)
|
|
301
|
+
[
|
|
302
|
+
'Invalid arguments:',
|
|
303
|
+
*errors.map { |e| "- #{e}" },
|
|
304
|
+
'',
|
|
305
|
+
'Expected schema:',
|
|
306
|
+
*@properties.map { |name, prop|
|
|
307
|
+
req = @required.include?(name.to_s) ? 'required' : 'optional'
|
|
308
|
+
" - `#{name}` (#{prop[:type]}, #{req}): #{prop[:description]}"
|
|
309
|
+
}
|
|
310
|
+
].join("\n")
|
|
311
|
+
end
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
end
|