pikuri 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +62 -0
  3. data/GETTING_STARTED.md +223 -0
  4. data/LICENSE +21 -0
  5. data/README.md +193 -0
  6. data/lib/pikuri/agent/chat_transport.rb +41 -0
  7. data/lib/pikuri/agent/context_window_detector.rb +101 -0
  8. data/lib/pikuri/agent/listener/in_memory_message_list.rb +33 -0
  9. data/lib/pikuri/agent/listener/message_listener.rb +93 -0
  10. data/lib/pikuri/agent/listener/step_limit.rb +97 -0
  11. data/lib/pikuri/agent/listener/terminal.rb +137 -0
  12. data/lib/pikuri/agent/listener/token_log.rb +166 -0
  13. data/lib/pikuri/agent/listener_list.rb +113 -0
  14. data/lib/pikuri/agent/message.rb +61 -0
  15. data/lib/pikuri/agent/synthesizer.rb +120 -0
  16. data/lib/pikuri/agent/tokens.rb +56 -0
  17. data/lib/pikuri/agent.rb +286 -0
  18. data/lib/pikuri/subprocess.rb +166 -0
  19. data/lib/pikuri/tool/bash.rb +272 -0
  20. data/lib/pikuri/tool/calculator.rb +82 -0
  21. data/lib/pikuri/tool/confirmer.rb +96 -0
  22. data/lib/pikuri/tool/edit.rb +196 -0
  23. data/lib/pikuri/tool/fetch.rb +167 -0
  24. data/lib/pikuri/tool/glob.rb +310 -0
  25. data/lib/pikuri/tool/grep.rb +338 -0
  26. data/lib/pikuri/tool/parameters.rb +314 -0
  27. data/lib/pikuri/tool/read.rb +254 -0
  28. data/lib/pikuri/tool/scraper/fetch_error.rb +16 -0
  29. data/lib/pikuri/tool/scraper/html.rb +285 -0
  30. data/lib/pikuri/tool/scraper/pdf.rb +54 -0
  31. data/lib/pikuri/tool/scraper/simple.rb +177 -0
  32. data/lib/pikuri/tool/search/brave.rb +184 -0
  33. data/lib/pikuri/tool/search/duckduckgo.rb +196 -0
  34. data/lib/pikuri/tool/search/engines.rb +154 -0
  35. data/lib/pikuri/tool/search/exa.rb +217 -0
  36. data/lib/pikuri/tool/search/rate_limiter.rb +92 -0
  37. data/lib/pikuri/tool/search/result.rb +29 -0
  38. data/lib/pikuri/tool/skill.rb +80 -0
  39. data/lib/pikuri/tool/skill_catalog.rb +376 -0
  40. data/lib/pikuri/tool/sub_agent.rb +102 -0
  41. data/lib/pikuri/tool/web_scrape.rb +117 -0
  42. data/lib/pikuri/tool/web_search.rb +38 -0
  43. data/lib/pikuri/tool/workspace.rb +150 -0
  44. data/lib/pikuri/tool/write.rb +170 -0
  45. data/lib/pikuri/tool.rb +118 -0
  46. data/lib/pikuri/url_cache.rb +106 -0
  47. data/lib/pikuri/version.rb +10 -0
  48. data/lib/pikuri.rb +165 -0
  49. data/prompts/coding-system-prompt.txt +28 -0
  50. data/prompts/pikuri-chat.txt +15 -0
  51. metadata +259 -0
@@ -0,0 +1,154 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pikuri
4
+ class Tool
5
+ module Search
6
+ # Search-orchestration entry point: the cascade across configured
7
+ # providers, the result cache, and the {Unavailable} protocol marker
8
+ # the cascade uses to fall back. The LLM-facing tool itself
9
+ # ({Tool::WEB_SEARCH}) lives in +lib/tool/web_search.rb+ and calls
10
+ # into {.search} below. Each {Tool::Search} provider module
11
+ # ({DuckDuckGo}, {Brave}, {Exa}) raises {Unavailable} when it wants
12
+ # the cascade to try the next one.
13
+ module Engines
14
+ # Subsystem logger; set its level with +PIKURI_LOG_ENGINES+
15
+ # (e.g. +PIKURI_LOG_ENGINES=debug+) or the global +PIKURI_LOG+.
16
+ #
17
+ # @return [Logger]
18
+ LOGGER = Pikuri.logger_for('Engines')
19
+
20
+ # Raised by a provider when it is temporarily unavailable (rate-limited,
21
+ # bot-blocked, quota-exhausted, or otherwise saying "try again later"
22
+ # rather than "your request is wrong"). The cascade in {Engines.search}
23
+ # catches this and tries the next provider; any other exception bubbles
24
+ # up unchanged so genuine bugs and config errors stay visible.
25
+ class Unavailable < StandardError; end
26
+
27
+ # All providers that are currently configured. {DuckDuckGo} is always
28
+ # available (no API key needed); {Brave} and {Exa} each join the
29
+ # list when their API token is present in the environment. Recomputed
30
+ # on every call so a process picks up a newly-set token without a
31
+ # restart.
32
+ #
33
+ # @return [Array<Module>] +Tool::Search::*+ provider modules, each
34
+ # exposing +.search(query, max_results:)+ → +Array<Result>+
35
+ def self.providers
36
+ list = [DuckDuckGo]
37
+ list << Brave unless ENV[Brave::ENV_KEY].to_s.strip.empty?
38
+ list << Exa unless ENV[Exa::ENV_KEY].to_s.strip.empty?
39
+ list
40
+ end
41
+
42
+ # On-disk cache used by {.search} to memoize answered queries.
43
+ # Defined as a method so specs can swap it for an isolated cache
44
+ # or {UrlCache::NULL} without touching the shared instance.
45
+ #
46
+ # @return [UrlCache, #fetch]
47
+ CACHE = UrlCache.new(ttl: UrlCache::DEFAULT_TTL, dir: "#{UrlCache::ROOT_DIR}/web_search")
48
+ def self.cache
49
+ CACHE
50
+ end
51
+
52
+ # Run +query+ through the configured providers in random order, falling
53
+ # back to the next one each time a provider raises {Unavailable}. The
54
+ # shuffle spreads load so a single provider isn't always hit first
55
+ # (and exhausted first); revisit if it stops being the right default.
56
+ #
57
+ # The query is whitespace-trimmed and runs of whitespace collapsed
58
+ # to a single space before the cascade runs. The winning provider's
59
+ # +Array<Result>+ is rendered into smolagents-style Markdown here
60
+ # (+"## Search Results"+ header, then +[title](url)\nbody+ entries
61
+ # joined by blank lines; an empty array becomes +"No results found."+),
62
+ # and the rendered Markdown is cached on disk via {.cache}, keyed by
63
+ # the cleaned query. A cache hit short-circuits the cascade entirely
64
+ # (and benefits whichever provider would have answered next time too
65
+ # — once a query is cached, the cooldown state of the original
66
+ # answering provider no longer matters). +max_results+ is not part
67
+ # of the cache key, so callers passing a non-default value may get
68
+ # a result rendered with the previously-cached size.
69
+ #
70
+ # If every provider reports temporary unavailability, returns an
71
+ # +"Error: ..."+ string instead of raising — same convention as
72
+ # {Tool::Calculator.calculate}, so the agent loop can feed the failure
73
+ # back to the model as the next observation. Any non-{Unavailable}
74
+ # exception (network error, parser failure, malformed response, bad
75
+ # API key) bubbles up to the caller.
76
+ #
77
+ # @param query [String] search query
78
+ # @param max_results [Integer] maximum number of result entries
79
+ # @return [String] Markdown-formatted result list, or +"Error: ..."+
80
+ # when all providers are exhausted
81
+ # @raise [ArgumentError] if the query is empty after normalization
82
+ def self.search(query, max_results:)
83
+ cleaned = query.to_s.strip.gsub(/\s+/, ' ')
84
+ raise ArgumentError, 'query is empty' if cleaned.empty?
85
+
86
+ current_providers = providers
87
+ log_providers(current_providers)
88
+
89
+ hit = true
90
+ result = cache.fetch(cleaned) do
91
+ hit = false
92
+ failures = []
93
+ results = nil
94
+ chosen = nil
95
+ current_providers.shuffle.each do |provider|
96
+ results = provider.search(cleaned, max_results: max_results)
97
+ chosen = provider
98
+ break
99
+ rescue Unavailable => e
100
+ failures << "#{provider.name.split('::').last} (#{e.message})"
101
+ end
102
+ # Raise so {UrlCache#fetch} does NOT persist the all-unavailable
103
+ # message — otherwise that string would block every future search
104
+ # for this query until the TTL expires. The outer +rescue+ turns
105
+ # the raise back into the calculator-style "Error: …" string.
106
+ chosen or raise Unavailable, "all search providers temporarily unavailable: #{failures.join('; ')}"
107
+
108
+ LOGGER.info do
109
+ "engine=#{chosen.name.split('::').last} query=#{cleaned.inspect} results=#{results.size}"
110
+ end
111
+ render(results)
112
+ end
113
+ LOGGER.info { "cache=hit query=#{cleaned.inspect} bytes=#{result.bytesize}" } if hit
114
+ result
115
+ rescue Unavailable => e
116
+ "Error: #{e.message}"
117
+ end
118
+
119
+ # Render an +Array<Result>+ into the smolagents-style Markdown the
120
+ # LLM consumes: +"## Search Results"+ header, then +[title](url)\nbody+
121
+ # entries joined by blank lines. An empty array becomes the
122
+ # +"No results found."+ stub so the agent still gets a real
123
+ # observation to act on.
124
+ #
125
+ # @param results [Array<Result>] hits from the winning provider
126
+ # @return [String] Markdown-formatted result list
127
+ def self.render(results)
128
+ return "## Search Results\n\nNo results found." if results.empty?
129
+
130
+ "## Search Results\n\n" + results.map { |r| "[#{r.title}](#{r.url})\n#{r.body}" }.join("\n\n")
131
+ end
132
+ private_class_method :render
133
+
134
+ # Emit an INFO log line listing the currently-available providers,
135
+ # but only when the set differs from the last one we logged.
136
+ # {.providers} is recomputed on every {.search} call so a process
137
+ # picks up newly-set API keys without a restart; the memo here
138
+ # keeps the log to one line per distinct configuration rather
139
+ # than one per search.
140
+ #
141
+ # @param current [Array<Module>] providers returned by {.providers}
142
+ # @return [void]
143
+ def self.log_providers(current)
144
+ return if @last_logged_providers == current
145
+
146
+ @last_logged_providers = current
147
+ names = current.map { |p| p.name.split('::').last }.join(', ')
148
+ LOGGER.info("engines available: #{names}")
149
+ end
150
+ private_class_method :log_providers
151
+ end
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,217 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'faraday'
4
+ require 'json'
5
+
6
+ module Pikuri
7
+ class Tool
8
+ module Search
9
+ # Performs an Exa search via the official +/search+ endpoint and
10
+ # returns the hits as a list of {Result} rows. Split into a thin HTTP
11
+ # fetch (#search) and a pure parser (#parse) so tests can exercise
12
+ # the parser against fixture JSON without hitting the network. The
13
+ # cascade in {Engines.search} owns the final Markdown rendering.
14
+ #
15
+ # Requires an Exa API key. Get one at https://exa.ai — the service is
16
+ # paid, so the cascade in {Engines.providers} only includes Exa when
17
+ # {ENV_KEY} is set in the environment; users who haven't registered
18
+ # never spend money on it.
19
+ #
20
+ # Calls request +type: "auto"+ (Exa picks neural vs keyword per
21
+ # query) and +contents: { highlights: true }+ so each result carries
22
+ # a short neural-ranked snippet — the closest analog to Brave's
23
+ # +description+ field, populating {Result#body} consistently across
24
+ # providers.
25
+ #
26
+ # == Privacy posture
27
+ #
28
+ # Exa's Privacy Policy states +Query Data is used to improve our
29
+ # products and technology, including by training and fine-tuning
30
+ # models that power our Services+, and the Terms of Service §1.2(c)
31
+ # grant Exa a +perpetual and irrevocable+, +sub-licensable+,
32
+ # worldwide license over User Input that can be disclosed to third
33
+ # parties +as needed+. Business customers under a Master Subscription
34
+ # Agreement / DPA get carve-outs; the default pay-as-you-go API key
35
+ # (which is what pikuri uses) does not.
36
+ #
37
+ # Bottom line: Exa does not sell queries to data brokers, but it
38
+ # does mine them to train competing models, and the license it
39
+ # claims is effectively "do what we want with this, forever". If a
40
+ # query would be embarrassing or sensitive in a training set, drop
41
+ # Exa out of the cascade by unsetting {ENV_KEY} — {Engines.providers}
42
+ # is recomputed every call.
43
+ module Exa
44
+ # @return [String] Search endpoint (POST, JSON body)
45
+ ENDPOINT = 'https://api.exa.ai/search'
46
+ # @return [Integer] default number of results returned, matching
47
+ # {DuckDuckGo::DEFAULT_MAX_RESULTS}
48
+ DEFAULT_MAX_RESULTS = 10
49
+ # @return [String] env var holding the API key; sent as +x-api-key+
50
+ ENV_KEY = 'EXA_API_KEY'
51
+ # @return [RateLimiter] Exa is paid and doesn't aggressively
52
+ # throttle, so no minimum interval is enforced. The 5-minute
53
+ # cooldown still applies on {Engines::Unavailable} so the user's
54
+ # budget isn't burned on doomed retries while a 429 / 5xx
55
+ # condition persists.
56
+ LIMITER = RateLimiter.new(min_interval: 0.0, cooldown: 300.0)
57
+
58
+ # Fetch results for +query+ and return them as an +Array<Result>+.
59
+ # Calls are circuit-broken for 5 minutes on rate-limit / unavailable
60
+ # responses; see {LIMITER}. The caller (typically {Engines.search})
61
+ # is expected to have already normalized the query and to wrap this
62
+ # in a result cache.
63
+ #
64
+ # @param query [String] search query (already normalized)
65
+ # @param max_results [Integer] maximum number of result entries;
66
+ # passed through as Exa's +numResults+
67
+ # @param api_key [String] Exa API key; defaults to the {ENV_KEY}
68
+ # environment variable
69
+ # @return [Array<Result>] hits, possibly empty when Exa ran the
70
+ # query and matched nothing
71
+ # @raise [ArgumentError] if no API key is available
72
+ # @raise [Engines::Unavailable] when Exa returns HTTP 429
73
+ # (rate limit / quota exhausted) or 5xx — "try again later"
74
+ # responses the cascade in {Engines.search} can fall back from.
75
+ # Also raised immediately if {LIMITER} is in cooldown. Other
76
+ # non-2xx (e.g. 401/403 from a bad API key) bubble up as
77
+ # +RuntimeError+ so config problems stay visible.
78
+ # @raise [RuntimeError] for non-rate-limit HTTP failures or when the
79
+ # response shape contains no results and isn't a recognized
80
+ # empty-results payload.
81
+ def self.search(query, max_results: DEFAULT_MAX_RESULTS, api_key: ENV.fetch(ENV_KEY, nil))
82
+ raise ArgumentError, "Exa Search API key not set (#{ENV_KEY})" if api_key.to_s.strip.empty?
83
+
84
+ LIMITER.call do
85
+ response = Faraday.post(ENDPOINT) do |req|
86
+ req.headers['x-api-key'] = api_key
87
+ req.headers['Content-Type'] = 'application/json'
88
+ req.headers['Accept'] = 'application/json'
89
+ req.body = JSON.dump(
90
+ query: query,
91
+ type: 'auto',
92
+ numResults: max_results,
93
+ contents: { highlights: true }
94
+ )
95
+ end
96
+ unless response.success?
97
+ if response.status == 429 || response.status >= 500
98
+ raise Engines::Unavailable, "HTTP #{response.status}"
99
+ end
100
+
101
+ raise "Exa Search request failed: #{response.status} #{response.body}"
102
+ end
103
+
104
+ parse(response.body, max_results: max_results)
105
+ end
106
+ end
107
+
108
+ # Parse an Exa Search JSON response into a list of {Result} rows,
109
+ # where +body+ is the first non-empty +highlights+ snippet (empty
110
+ # when Exa returned no highlight for that result — e.g. for
111
+ # navigational results).
112
+ #
113
+ # When the response yields zero result entries, two cases are
114
+ # distinguished: a genuine "no results" payload (response carries
115
+ # a +requestId+ and an empty +results+ array — Exa ran the query
116
+ # but matched nothing) returns an empty array instead of raising,
117
+ # so {Engines.search} can render its standard no-results stub.
118
+ # Anything else (unknown shape, structured error) raises with a
119
+ # diagnostic so the failure surfaces.
120
+ #
121
+ # @param json [String] response body from {ENDPOINT}
122
+ # @param max_results [Integer] maximum number of result entries
123
+ # @return [Array<Result>] hits, possibly empty on a recognized
124
+ # empty-results payload
125
+ # @raise [RuntimeError] when the response yields no result entries and
126
+ # is not recognized as a genuine empty-results payload
127
+ def self.parse(json, max_results: DEFAULT_MAX_RESULTS)
128
+ data = JSON.parse(json)
129
+ results = Array(data['results']).take(max_results).filter_map do |r|
130
+ href = r['url'].to_s
131
+ next nil if href.empty?
132
+
133
+ Result.new(
134
+ url: href,
135
+ title: clean(r['title']) || href,
136
+ body: first_highlight(r['highlights'])
137
+ )
138
+ end
139
+
140
+ if results.empty?
141
+ return [] if genuine_no_results?(data)
142
+
143
+ raise diagnose_empty(data, json)
144
+ end
145
+
146
+ results
147
+ end
148
+
149
+ # Collapse whitespace and strip; returns +nil+ for nil/empty input
150
+ # so the caller can fall back (typically to the URL when a result
151
+ # has no usable title).
152
+ #
153
+ # @param text [String, nil] raw text from an Exa result field
154
+ # @return [String, nil] cleaned text, or +nil+ if input was blank
155
+ def self.clean(text)
156
+ return nil if text.nil?
157
+
158
+ cleaned = text.to_s.gsub(/\s+/, ' ').strip
159
+ cleaned.empty? ? nil : cleaned
160
+ end
161
+ private_class_method :clean
162
+
163
+ # First non-empty entry from a +highlights+ array, cleaned. Exa
164
+ # returns highlights as an array sorted by relevance; we surface
165
+ # only the top one to keep the observation compact and match the
166
+ # one-line +body+ convention used by Brave / DuckDuckGo.
167
+ #
168
+ # @param highlights [Array<String>, nil] +highlights+ field
169
+ # @return [String] cleaned snippet, or empty string if none usable
170
+ def self.first_highlight(highlights)
171
+ return '' unless highlights.is_a?(Array)
172
+
173
+ highlights.each do |h|
174
+ cleaned = clean(h)
175
+ return cleaned if cleaned
176
+ end
177
+ ''
178
+ end
179
+ private_class_method :first_highlight
180
+
181
+ # True when a parsed response with zero +results+ entries looks
182
+ # like Exa's own "search ran, nothing matched" payload rather than
183
+ # a malformed or error response. The marker is the +requestId+
184
+ # field, which Exa always sets on a successful request.
185
+ #
186
+ # @param data [Hash, Object] parsed response
187
+ # @return [Boolean]
188
+ def self.genuine_no_results?(data)
189
+ return false unless data.is_a?(Hash)
190
+ return false unless data.key?('requestId')
191
+
192
+ Array(data['results']).empty?
193
+ end
194
+ private_class_method :genuine_no_results?
195
+
196
+ # Build an error message for a parsed response that yielded zero
197
+ # results. Quotes Exa's +error+ / +message+ / +detail+ field if
198
+ # present, otherwise truncates the raw body so the caller can see
199
+ # the actual payload.
200
+ #
201
+ # @param data [Hash, Object] parsed response
202
+ # @param raw [String] raw response body
203
+ # @return [String] human-readable diagnostic to feed to +raise+
204
+ def self.diagnose_empty(data, raw)
205
+ if data.is_a?(Hash) && (msg = data['error'] || data['message'] || data['detail'])
206
+ return "Exa Search returned an error: #{msg}"
207
+ end
208
+
209
+ snippet = raw.to_s[0, 800]
210
+ snippet += '…' if raw.to_s.length > 800
211
+ "Exa Search returned no results. Body: #{snippet}"
212
+ end
213
+ private_class_method :diagnose_empty
214
+ end
215
+ end
216
+ end
217
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pikuri
4
+ # Loaded after {Tool} itself is defined; the +class Tool+ reopening below
5
+ # assumes that order.
6
+ class Tool
7
+ module Search
8
+ # Thread-safe pacing + circuit-breaker wrapper for a search provider.
9
+ #
10
+ # +#call { ... }+ enforces a minimum interval between consecutive
11
+ # invocations of the block (sleeping if the previous one was too
12
+ # recent), and watches for {Engines::Unavailable} raised by the
13
+ # block: when that happens, a cooldown deadline is recorded and
14
+ # further calls within the window raise {Engines::Unavailable}
15
+ # immediately without invoking the block. This stops a provider
16
+ # that has been rate-limited or bot-blocked from being hammered
17
+ # with retries.
18
+ #
19
+ # The mutex is held across the block, so concurrent callers
20
+ # serialize — matching the behavior {DuckDuckGo} has always
21
+ # required to keep its IP-spacing throttle correct under
22
+ # concurrent agents.
23
+ #
24
+ # Uses wall-clock {Time.now} rather than the monotonic clock; the
25
+ # intervals here are 1s–5min, well above any realistic NTP step,
26
+ # and {Time.now} keeps tests trivially fakeable with Timecop.
27
+ class RateLimiter
28
+ # @param min_interval [Float] minimum seconds between consecutive
29
+ # block invocations. {#call} sleeps if a previous call was more
30
+ # recent.
31
+ # @param cooldown [Float] seconds to refuse calls after the block
32
+ # raises {Engines::Unavailable}. Calls within this window raise
33
+ # {Engines::Unavailable} immediately without invoking the block.
34
+ def initialize(min_interval:, cooldown:)
35
+ @min_interval = min_interval
36
+ @cooldown = cooldown
37
+ @mutex = Mutex.new
38
+ @last_call_at = nil
39
+ @cooldown_until = nil
40
+ end
41
+
42
+ # Run the given block subject to throttle and cooldown rules.
43
+ #
44
+ # The block is invoked with the mutex held, so concurrent calls
45
+ # serialize: only one block runs at a time per limiter instance.
46
+ # If the block raises {Engines::Unavailable}, the cooldown is
47
+ # armed and the exception is re-raised. Any other exception
48
+ # bubbles up without arming cooldown — only "try again later"
49
+ # signals from the provider are treated as backoff triggers.
50
+ #
51
+ # @yieldreturn [Object] block's return value is passed through
52
+ # @return [Object] whatever the block returned
53
+ # @raise [Engines::Unavailable] either re-raised from the block,
54
+ # or raised directly when the limiter is currently in cooldown
55
+ def call
56
+ @mutex.synchronize do
57
+ now = Time.now
58
+ if @cooldown_until && now < @cooldown_until
59
+ remaining = (@cooldown_until - now).ceil
60
+ raise Engines::Unavailable, "rate-limiter cooldown active for another #{remaining}s"
61
+ end
62
+
63
+ if @last_call_at
64
+ elapsed = now - @last_call_at
65
+ sleep_for(@min_interval - elapsed) if elapsed < @min_interval
66
+ end
67
+ @last_call_at = Time.now
68
+
69
+ begin
70
+ yield
71
+ rescue Engines::Unavailable
72
+ @cooldown_until = Time.now + @cooldown
73
+ raise
74
+ end
75
+ end
76
+ end
77
+
78
+ # Sleep for +seconds+. Isolated as a private method so tests can
79
+ # override it on a single instance (typically to advance a frozen
80
+ # Timecop clock by the same amount) without monkey-patching the
81
+ # global +sleep+.
82
+ #
83
+ # @param seconds [Float] non-negative duration to sleep
84
+ # @return [void]
85
+ def sleep_for(seconds)
86
+ sleep(seconds)
87
+ end
88
+ private :sleep_for
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pikuri
4
+ class Tool
5
+ module Search
6
+ # A single search hit produced by a {Tool::Search} provider
7
+ # ({DuckDuckGo}, {Brave}, {Exa}). Providers return +Array<Result>+
8
+ # from +.parse+ / +.search+; {Engines.search} concatenates the
9
+ # rows into the smolagents-style Markdown the LLM sees.
10
+ #
11
+ # Splitting structure from rendering keeps the three providers
12
+ # interchangeable — they only have to agree on these three fields
13
+ # (provider-specific extras like relevance scores or published
14
+ # dates are discarded today, which is fine because no caller uses
15
+ # them).
16
+ #
17
+ # @!attribute [r] url
18
+ # @return [String] absolute URL of the hit
19
+ # @!attribute [r] title
20
+ # @return [String] plain-text title, with provider-specific
21
+ # highlight markup ({Brave}'s +<strong>+, {DuckDuckGo}'s
22
+ # +<b>+) already stripped
23
+ # @!attribute [r] body
24
+ # @return [String] plain-text snippet, possibly empty (e.g. an
25
+ # {Exa} navigational result with no highlights)
26
+ Result = Data.define(:url, :title, :body)
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pikuri
4
+ class Tool
5
+ # The +skill+ tool: instantiating +Tool::Skill.new(catalog:)+ produces
6
+ # a tool whose +execute+ closure looks the requested name up in the
7
+ # bound {SkillCatalog} and returns the skill's body wrapped in a
8
+ # +<skill>+ block with the absolute base directory, so the LLM can
9
+ # resolve relative sidecar paths against it via the regular +read+
10
+ # tool.
11
+ #
12
+ # The catalog of available skills is *not* duplicated into this tool's
13
+ # description. It lives in the system prompt — see
14
+ # {SkillCatalog#format_for_prompt} — because skills describe what the
15
+ # agent is, not just what one of its tools returns. Putting the
16
+ # catalog in the system prompt keeps it next to the agent's persona
17
+ # and matches what PI and Claude Code do (opencode is the outlier
18
+ # here). The tool description below is therefore *static* — it
19
+ # explains the load mechanism, not the inventory.
20
+ #
21
+ # This tool is not registered manually. {Agent#initialize} auto-
22
+ # registers it whenever +skill_catalog+ is non-empty, and skips it
23
+ # otherwise; sub-agents inherit it through the parent's tool snapshot.
24
+ # The bound catalog rides along in the +execute+ closure, so any
25
+ # sub-agent invoking +skill+ resolves names against the same catalog
26
+ # the parent saw.
27
+ class Skill < Tool
28
+ # Description shown to the LLM. Follows the opencode-shape (summary
29
+ # + +Usage:+ bullets) prescribed by the project's tool-description
30
+ # convention. The list of which skills exist is not here — see
31
+ # the class header.
32
+ #
33
+ # @return [String]
34
+ DESCRIPTION = <<~DESC
35
+ Load a specialized skill that provides domain-specific instructions and resources for a particular task.
36
+
37
+ Usage:
38
+ - The catalog of available skills is listed in your system prompt under `<available_skills>`.
39
+ - Invoke this tool with a skill's `name` to inject its full instructions into the conversation.
40
+ - The loaded skill may reference helper scripts and files in its base directory — use the `read` tool to load those when the skill's instructions tell you to.
41
+ - On `Error: skill '...' not found`, do NOT retry with a guessed name. Pick a name that actually appears in `<available_skills>`, or report to the user that no matching skill is installed.
42
+ DESC
43
+
44
+ # @param catalog [SkillCatalog] the catalog to resolve names
45
+ # against. Captured by closure so the tool retains access to
46
+ # the same instance even when copied into a sub-agent's tool
47
+ # snapshot.
48
+ # @return [Skill]
49
+ def initialize(catalog:)
50
+ super(
51
+ name: 'skill',
52
+ description: DESCRIPTION,
53
+ parameters: Parameters.build { |p|
54
+ p.required_string :name,
55
+ 'Name of the skill to load, e.g. "pdf-extraction". ' \
56
+ 'Must match a name listed under `<available_skills>` ' \
57
+ 'in the system prompt.'
58
+ },
59
+ execute: lambda { |name:|
60
+ loaded = catalog.get(name)
61
+ if loaded.nil?
62
+ available = catalog.list.map(&:name).sort
63
+ list = available.empty? ? 'none' : available.join(', ')
64
+ next "Error: skill '#{name}' not found. Available skills: #{list}."
65
+ end
66
+
67
+ base_dir = File.dirname(loaded.location)
68
+ <<~OUT
69
+ <skill name="#{loaded.name}" location="#{loaded.location}">
70
+ Sidecar paths in this skill (e.g. scripts/, references/) are relative to #{base_dir}.
71
+
72
+ #{loaded.body}
73
+ </skill>
74
+ OUT
75
+ }
76
+ )
77
+ end
78
+ end
79
+ end
80
+ end