pikuri-core 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -3
- data/lib/pikuri/agent/chat_transport.rb +135 -11
- data/lib/pikuri/agent/configurator.rb +4 -4
- data/lib/pikuri/agent/context_window_detector.rb +103 -52
- data/lib/pikuri/agent/control/step_limit.rb +39 -7
- data/lib/pikuri/agent/event.rb +43 -16
- data/lib/pikuri/agent/extension.rb +31 -17
- data/lib/pikuri/agent/extension_context.rb +147 -0
- data/lib/pikuri/agent/listener/terminal.rb +13 -2
- data/lib/pikuri/agent/listener/token_log.rb +60 -13
- data/lib/pikuri/agent/listener.rb +12 -5
- data/lib/pikuri/agent/listener_list.rb +7 -17
- data/lib/pikuri/agent/synthesizer.rb +93 -67
- data/lib/pikuri/agent.rb +358 -403
- data/lib/pikuri/sanitizer.rb +179 -0
- data/lib/pikuri/tool/parameters.rb +65 -2
- data/lib/pikuri/tool/search/brave.rb +32 -18
- data/lib/pikuri/tool/search/duckduckgo.rb +18 -7
- data/lib/pikuri/tool/search/engines.rb +72 -49
- data/lib/pikuri/tool/search/exa.rb +34 -22
- data/lib/pikuri/tool/web_search.rb +45 -26
- data/lib/pikuri/version.rb +1 -1
- data/lib/pikuri-core.rb +11 -9
- metadata +5 -6
|
@@ -8,14 +8,17 @@ module Pikuri
|
|
|
8
8
|
module Search
|
|
9
9
|
# Performs an Exa search via the official +/search+ endpoint and
|
|
10
10
|
# returns the hits as a list of {Result} rows. Split into a thin HTTP
|
|
11
|
-
# fetch (#search) and a pure parser (
|
|
11
|
+
# fetch (#search) and a pure parser (.parse) so tests can exercise
|
|
12
12
|
# the parser against fixture JSON without hitting the network. The
|
|
13
|
-
# cascade in {Engines
|
|
13
|
+
# cascade in {Engines#search} owns the final Markdown rendering.
|
|
14
14
|
#
|
|
15
|
-
#
|
|
16
|
-
#
|
|
17
|
-
#
|
|
18
|
-
#
|
|
15
|
+
# A class constructed with the API key it should use
|
|
16
|
+
# (+Exa.new(api_key:)+); {Engines} builds one only when an Exa key was
|
|
17
|
+
# configured, so users who haven't registered never spend money on it,
|
|
18
|
+
# and then drives it through the same +#search+ / +#label+ interface
|
|
19
|
+
# as every other provider. pikuri reads no key from the environment
|
|
20
|
+
# (see CLAUDE.md "Environment is not a secret store"). Get a key at
|
|
21
|
+
# https://exa.ai — the service is paid.
|
|
19
22
|
#
|
|
20
23
|
# Calls request +type: "auto"+ (Exa picks neural vs keyword per
|
|
21
24
|
# query) and +contents: { highlights: true }+ so each result carries
|
|
@@ -37,17 +40,15 @@ module Pikuri
|
|
|
37
40
|
# Bottom line: Exa does not sell queries to data brokers, but it
|
|
38
41
|
# does mine them to train competing models, and the license it
|
|
39
42
|
# claims is effectively "do what we want with this, forever". If a
|
|
40
|
-
# query would be embarrassing or sensitive in a training set,
|
|
41
|
-
#
|
|
42
|
-
#
|
|
43
|
-
|
|
43
|
+
# query would be embarrassing or sensitive in a training set, simply
|
|
44
|
+
# don't configure an Exa key — {Engines#providers} leaves Exa out of
|
|
45
|
+
# the cascade unless its key was supplied to the constructor.
|
|
46
|
+
class Exa
|
|
44
47
|
# @return [String] Search endpoint (POST, JSON body)
|
|
45
48
|
ENDPOINT = 'https://api.exa.ai/search'
|
|
46
49
|
# @return [Integer] default number of results returned, matching
|
|
47
50
|
# {DuckDuckGo::DEFAULT_MAX_RESULTS}
|
|
48
51
|
DEFAULT_MAX_RESULTS = 10
|
|
49
|
-
# @return [String] env var holding the API key; sent as +x-api-key+
|
|
50
|
-
ENV_KEY = 'EXA_API_KEY'
|
|
51
52
|
# @return [RateLimiter] Exa is paid and doesn't aggressively
|
|
52
53
|
# throttle, so no minimum interval is enforced. The 5-minute
|
|
53
54
|
# cooldown still applies on {Engines::Unavailable} so the user's
|
|
@@ -55,35 +56,46 @@ module Pikuri
|
|
|
55
56
|
# condition persists.
|
|
56
57
|
LIMITER = RateLimiter.new(min_interval: 0.0, cooldown: 300.0)
|
|
57
58
|
|
|
59
|
+
# @param api_key [String] Exa API key. Required and non-blank:
|
|
60
|
+
# pikuri reads no key from the environment — the host supplies it
|
|
61
|
+
# ({Engines} only constructs an Exa when a key was configured).
|
|
62
|
+
# @raise [ArgumentError] if +api_key+ is blank
|
|
63
|
+
def initialize(api_key:)
|
|
64
|
+
raise ArgumentError, 'Exa Search API key is blank' if api_key.to_s.strip.empty?
|
|
65
|
+
|
|
66
|
+
@api_key = api_key
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# @return [String] short provider label for {Engines} logging /
|
|
70
|
+
# fallback messages.
|
|
71
|
+
def label
|
|
72
|
+
'Exa'
|
|
73
|
+
end
|
|
74
|
+
|
|
58
75
|
# Fetch results for +query+ and return them as an +Array<Result>+.
|
|
59
76
|
# Calls are circuit-broken for 5 minutes on rate-limit / unavailable
|
|
60
|
-
# responses; see {LIMITER}. The caller (typically {Engines
|
|
77
|
+
# responses; see {LIMITER}. The caller (typically {Engines#search})
|
|
61
78
|
# is expected to have already normalized the query and to wrap this
|
|
62
79
|
# in a result cache.
|
|
63
80
|
#
|
|
64
81
|
# @param query [String] search query (already normalized)
|
|
65
82
|
# @param max_results [Integer] maximum number of result entries;
|
|
66
83
|
# passed through as Exa's +numResults+
|
|
67
|
-
# @param api_key [String] Exa API key; defaults to the {ENV_KEY}
|
|
68
|
-
# environment variable
|
|
69
84
|
# @return [Array<Result>] hits, possibly empty when Exa ran the
|
|
70
85
|
# query and matched nothing
|
|
71
|
-
# @raise [ArgumentError] if no API key is available
|
|
72
86
|
# @raise [Engines::Unavailable] when Exa returns HTTP 429
|
|
73
87
|
# (rate limit / quota exhausted) or 5xx — "try again later"
|
|
74
|
-
# responses the cascade in {Engines
|
|
88
|
+
# responses the cascade in {Engines#search} can fall back from.
|
|
75
89
|
# Also raised immediately if {LIMITER} is in cooldown. Other
|
|
76
90
|
# non-2xx (e.g. 401/403 from a bad API key) bubble up as
|
|
77
91
|
# +RuntimeError+ so config problems stay visible.
|
|
78
92
|
# @raise [RuntimeError] for non-rate-limit HTTP failures or when the
|
|
79
93
|
# response shape contains no results and isn't a recognized
|
|
80
94
|
# empty-results payload.
|
|
81
|
-
def
|
|
82
|
-
raise ArgumentError, "Exa Search API key not set (#{ENV_KEY})" if api_key.to_s.strip.empty?
|
|
83
|
-
|
|
95
|
+
def search(query, max_results: DEFAULT_MAX_RESULTS)
|
|
84
96
|
LIMITER.call do
|
|
85
97
|
response = Faraday.post(ENDPOINT) do |req|
|
|
86
|
-
req.headers['x-api-key'] = api_key
|
|
98
|
+
req.headers['x-api-key'] = @api_key
|
|
87
99
|
req.headers['Content-Type'] = 'application/json'
|
|
88
100
|
req.headers['Accept'] = 'application/json'
|
|
89
101
|
req.body = JSON.dump(
|
|
@@ -101,7 +113,7 @@ module Pikuri
|
|
|
101
113
|
raise "Exa Search request failed: #{response.status} #{response.body}"
|
|
102
114
|
end
|
|
103
115
|
|
|
104
|
-
parse(response.body, max_results: max_results)
|
|
116
|
+
self.class.parse(response.body, max_results: max_results)
|
|
105
117
|
end
|
|
106
118
|
end
|
|
107
119
|
|
|
@@ -2,37 +2,56 @@
|
|
|
2
2
|
|
|
3
3
|
module Pikuri
|
|
4
4
|
class Tool
|
|
5
|
-
#
|
|
6
|
-
#
|
|
7
|
-
#
|
|
8
|
-
module WebSearch; end
|
|
9
|
-
|
|
10
|
-
# Web-search tool exposed to the agent loop in OpenAI tool-call shape.
|
|
11
|
-
# Calls {Tool::Search::Engines.search}, which cascades through whichever
|
|
12
|
-
# providers are configured (DuckDuckGo always, Brave when its API key is
|
|
13
|
-
# present) in random order, falling back on temporary-unavailability
|
|
14
|
-
# errors. Providers return structured {Tool::Search::Result} rows;
|
|
15
|
-
# +Engines.search+ renders the winning provider's rows into the
|
|
16
|
-
# smolagents-style Markdown shape the LLM sees, so the format stays
|
|
17
|
-
# stable regardless of which provider ran.
|
|
5
|
+
# Builder for the LLM-facing web-search tool. The search orchestration
|
|
6
|
+
# lives in {Tool::Search::Engines}; this module owns only the {.build}
|
|
7
|
+
# factory that wires a configured {Search::Engines} into a {Tool}.
|
|
18
8
|
#
|
|
19
|
-
#
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
9
|
+
# Unlike the stateless bundled tools (+CALCULATOR+ / +WEB_SCRAPE+ /
|
|
10
|
+
# +FETCH+, shared value constants), web_search is *host-configured*:
|
|
11
|
+
# the paid providers (Brave / Exa) join the cascade only when the host
|
|
12
|
+
# passes their API key, so the tool is built per-wiring — like
|
|
13
|
+
# {Code::Bash} — rather than handed out as a single shared constant.
|
|
14
|
+
# pikuri reads no key from the environment; the host supplies them (the
|
|
15
|
+
# bundled +bin/+ examples load a JSON config file by convention). See
|
|
16
|
+
# CLAUDE.md "Environment is not a secret store".
|
|
17
|
+
module WebSearch
|
|
18
|
+
# Description shown to the LLM, opencode-shape (summary + +Usage:+).
|
|
19
|
+
#
|
|
20
|
+
# @return [String]
|
|
21
|
+
DESCRIPTION = <<~DESC
|
|
23
22
|
Searches the web for a query and returns the top results as a Markdown list of titles, URLs, and short snippets.
|
|
24
23
|
|
|
25
24
|
Usage:
|
|
26
25
|
- Use this to find candidate URLs, then call web_scrape on the most promising one(s) for full content. Snippets alone rarely answer a question.
|
|
27
26
|
DESC
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
27
|
+
|
|
28
|
+
# Build the +web_search+ tool. It calls {Search::Engines#search},
|
|
29
|
+
# which cascades through DuckDuckGo (always) plus whichever keyed
|
|
30
|
+
# providers were configured, in random order, falling back on
|
|
31
|
+
# temporary-unavailability errors and rendering the winning
|
|
32
|
+
# provider's rows into a stable Markdown shape.
|
|
33
|
+
#
|
|
34
|
+
# @param brave_key [String, nil] Brave Search API key; when present
|
|
35
|
+
# (non-blank) Brave joins the cascade. Get one at
|
|
36
|
+
# https://api-dashboard.search.brave.com.
|
|
37
|
+
# @param exa_key [String, nil] Exa API key; when present (non-blank)
|
|
38
|
+
# Exa joins the cascade. Get one at https://exa.ai.
|
|
39
|
+
# @return [Tool] the +web_search+ tool in OpenAI tool-call shape
|
|
40
|
+
def self.build(brave_key: nil, exa_key: nil)
|
|
41
|
+
engines = Search::Engines.new(brave_key: brave_key, exa_key: exa_key)
|
|
42
|
+
Tool.new(
|
|
43
|
+
name: 'web_search',
|
|
44
|
+
description: DESCRIPTION,
|
|
45
|
+
parameters: Parameters.build { |p|
|
|
46
|
+
p.required_string :query,
|
|
47
|
+
'The search query, e.g. "BigDecimal precision Ruby".'
|
|
48
|
+
p.optional_integer :max_results,
|
|
49
|
+
'Maximum number of result entries to return. ' \
|
|
50
|
+
'Defaults to 10; most providers cap this at 20.'
|
|
51
|
+
},
|
|
52
|
+
execute: ->(query:, max_results: 10) { engines.search(query, max_results: max_results) }
|
|
53
|
+
)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
37
56
|
end
|
|
38
57
|
end
|
data/lib/pikuri/version.rb
CHANGED
data/lib/pikuri-core.rb
CHANGED
|
@@ -28,15 +28,17 @@ require_relative 'pikuri/version'
|
|
|
28
28
|
#
|
|
29
29
|
# == Why eager-load
|
|
30
30
|
#
|
|
31
|
-
#
|
|
32
|
-
# +Pikuri::Tool::
|
|
33
|
-
#
|
|
34
|
-
#
|
|
35
|
-
#
|
|
36
|
-
#
|
|
37
|
-
#
|
|
38
|
-
#
|
|
39
|
-
#
|
|
31
|
+
# The stateless bundled tools (+Pikuri::Tool::CALCULATOR+,
|
|
32
|
+
# +Pikuri::Tool::WEB_SCRAPE+, +Pikuri::Tool::FETCH+) are +ALL_CAPS+ value
|
|
33
|
+
# constants rather than classes/modules, and Zeitwerk only auto-loads
|
|
34
|
+
# constants that match its filename-↔-CamelCase convention. Eager-loading
|
|
35
|
+
# at boot guarantees the files defining those values run, so the bin
|
|
36
|
+
# script can drop them straight into the +Agent.new+ block via
|
|
37
|
+
# +c.add_tool+ without per-file +require+ ceremony. (web_search is instead
|
|
38
|
+
# host-configured — built per-wiring via {Pikuri::Tool::WebSearch.build}
|
|
39
|
+
# with the host's provider keys — so it is not a value constant.) The cost
|
|
40
|
+
# is a few milliseconds of startup — negligible compared to a single LLM
|
|
41
|
+
# round-trip.
|
|
40
42
|
module Pikuri
|
|
41
43
|
# Search path for bundled system prompts. Mutable list: each pikuri
|
|
42
44
|
# gem appends its own +prompts/+ directory when it boots, so a
|
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: pikuri-core
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.7
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Martin Vysny
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: faraday
|
|
@@ -136,6 +135,7 @@ files:
|
|
|
136
135
|
- lib/pikuri/agent/control/step_limit.rb
|
|
137
136
|
- lib/pikuri/agent/event.rb
|
|
138
137
|
- lib/pikuri/agent/extension.rb
|
|
138
|
+
- lib/pikuri/agent/extension_context.rb
|
|
139
139
|
- lib/pikuri/agent/listener.rb
|
|
140
140
|
- lib/pikuri/agent/listener/in_memory_event_list.rb
|
|
141
141
|
- lib/pikuri/agent/listener/rate_limited.rb
|
|
@@ -149,6 +149,7 @@ files:
|
|
|
149
149
|
- lib/pikuri/file_type.rb
|
|
150
150
|
- lib/pikuri/finalizers.rb
|
|
151
151
|
- lib/pikuri/paths.rb
|
|
152
|
+
- lib/pikuri/sanitizer.rb
|
|
152
153
|
- lib/pikuri/subprocess.rb
|
|
153
154
|
- lib/pikuri/tool.rb
|
|
154
155
|
- lib/pikuri/tool/calculator.rb
|
|
@@ -174,7 +175,6 @@ metadata:
|
|
|
174
175
|
changelog_uri: https://codeberg.org/mvysny/pikuri/src/branch/master/CHANGELOG.md
|
|
175
176
|
bug_tracker_uri: https://codeberg.org/mvysny/pikuri/issues
|
|
176
177
|
rubygems_mfa_required: 'true'
|
|
177
|
-
post_install_message:
|
|
178
178
|
rdoc_options: []
|
|
179
179
|
require_paths:
|
|
180
180
|
- lib
|
|
@@ -189,8 +189,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
189
189
|
- !ruby/object:Gem::Version
|
|
190
190
|
version: '0'
|
|
191
191
|
requirements: []
|
|
192
|
-
rubygems_version: 3.
|
|
193
|
-
signing_key:
|
|
192
|
+
rubygems_version: 3.6.7
|
|
194
193
|
specification_version: 4
|
|
195
194
|
summary: The minimal core of the pikuri AI-assistant toolkit (Agent, Tool framework,
|
|
196
195
|
web tools, bin/pikuri-chat).
|