pikuri-core 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -3
- data/lib/pikuri/agent/chat_transport.rb +135 -11
- data/lib/pikuri/agent/configurator.rb +4 -4
- data/lib/pikuri/agent/context_window_detector.rb +103 -52
- data/lib/pikuri/agent/control/step_limit.rb +39 -7
- data/lib/pikuri/agent/event.rb +43 -16
- data/lib/pikuri/agent/extension.rb +31 -17
- data/lib/pikuri/agent/extension_context.rb +147 -0
- data/lib/pikuri/agent/listener/terminal.rb +30 -37
- data/lib/pikuri/agent/listener/token_log.rb +60 -13
- data/lib/pikuri/agent/listener.rb +12 -5
- data/lib/pikuri/agent/listener_list.rb +7 -17
- data/lib/pikuri/agent/synthesizer.rb +93 -67
- data/lib/pikuri/agent.rb +358 -403
- data/lib/pikuri/extractor/html.rb +303 -0
- data/lib/pikuri/extractor/passthrough.rb +64 -0
- data/lib/pikuri/extractor.rb +314 -0
- data/lib/pikuri/file_type.rb +74 -266
- data/lib/pikuri/sanitizer.rb +179 -0
- data/lib/pikuri/subprocess.rb +73 -2
- data/lib/pikuri/tool/calculator.rb +213 -41
- data/lib/pikuri/tool/fetch.rb +10 -9
- data/lib/pikuri/tool/parameters.rb +65 -2
- data/lib/pikuri/tool/scraper.rb +186 -0
- data/lib/pikuri/tool/search/brave.rb +32 -18
- data/lib/pikuri/tool/search/duckduckgo.rb +18 -7
- data/lib/pikuri/tool/search/engines.rb +72 -49
- data/lib/pikuri/tool/search/exa.rb +34 -22
- data/lib/pikuri/tool/web_scrape.rb +5 -5
- data/lib/pikuri/tool/web_search.rb +45 -26
- data/lib/pikuri/version.rb +1 -1
- data/lib/pikuri-core.rb +11 -10
- metadata +9 -66
- data/lib/pikuri/tool/scraper/fetch_error.rb +0 -16
- data/lib/pikuri/tool/scraper/html.rb +0 -285
- data/lib/pikuri/tool/scraper/pdf.rb +0 -54
- data/lib/pikuri/tool/scraper/simple.rb +0 -183
|
@@ -8,14 +8,17 @@ module Pikuri
|
|
|
8
8
|
module Search
|
|
9
9
|
# Performs an Exa search via the official +/search+ endpoint and
|
|
10
10
|
# returns the hits as a list of {Result} rows. Split into a thin HTTP
|
|
11
|
-
# fetch (#search) and a pure parser (
|
|
11
|
+
# fetch (#search) and a pure parser (.parse) so tests can exercise
|
|
12
12
|
# the parser against fixture JSON without hitting the network. The
|
|
13
|
-
# cascade in {Engines
|
|
13
|
+
# cascade in {Engines#search} owns the final Markdown rendering.
|
|
14
14
|
#
|
|
15
|
-
#
|
|
16
|
-
#
|
|
17
|
-
#
|
|
18
|
-
#
|
|
15
|
+
# A class constructed with the API key it should use
|
|
16
|
+
# (+Exa.new(api_key:)+); {Engines} builds one only when an Exa key was
|
|
17
|
+
# configured, so users who haven't registered never spend money on it,
|
|
18
|
+
# and then drives it through the same +#search+ / +#label+ interface
|
|
19
|
+
# as every other provider. pikuri reads no key from the environment
|
|
20
|
+
# (see CLAUDE.md "Environment is not a secret store"). Get a key at
|
|
21
|
+
# https://exa.ai — the service is paid.
|
|
19
22
|
#
|
|
20
23
|
# Calls request +type: "auto"+ (Exa picks neural vs keyword per
|
|
21
24
|
# query) and +contents: { highlights: true }+ so each result carries
|
|
@@ -37,17 +40,15 @@ module Pikuri
|
|
|
37
40
|
# Bottom line: Exa does not sell queries to data brokers, but it
|
|
38
41
|
# does mine them to train competing models, and the license it
|
|
39
42
|
# claims is effectively "do what we want with this, forever". If a
|
|
40
|
-
# query would be embarrassing or sensitive in a training set,
|
|
41
|
-
#
|
|
42
|
-
#
|
|
43
|
-
|
|
43
|
+
# query would be embarrassing or sensitive in a training set, simply
|
|
44
|
+
# don't configure an Exa key — {Engines#providers} leaves Exa out of
|
|
45
|
+
# the cascade unless its key was supplied to the constructor.
|
|
46
|
+
class Exa
|
|
44
47
|
# @return [String] Search endpoint (POST, JSON body)
|
|
45
48
|
ENDPOINT = 'https://api.exa.ai/search'
|
|
46
49
|
# @return [Integer] default number of results returned, matching
|
|
47
50
|
# {DuckDuckGo::DEFAULT_MAX_RESULTS}
|
|
48
51
|
DEFAULT_MAX_RESULTS = 10
|
|
49
|
-
# @return [String] env var holding the API key; sent as +x-api-key+
|
|
50
|
-
ENV_KEY = 'EXA_API_KEY'
|
|
51
52
|
# @return [RateLimiter] Exa is paid and doesn't aggressively
|
|
52
53
|
# throttle, so no minimum interval is enforced. The 5-minute
|
|
53
54
|
# cooldown still applies on {Engines::Unavailable} so the user's
|
|
@@ -55,35 +56,46 @@ module Pikuri
|
|
|
55
56
|
# condition persists.
|
|
56
57
|
LIMITER = RateLimiter.new(min_interval: 0.0, cooldown: 300.0)
|
|
57
58
|
|
|
59
|
+
# @param api_key [String] Exa API key. Required and non-blank:
|
|
60
|
+
# pikuri reads no key from the environment — the host supplies it
|
|
61
|
+
# ({Engines} only constructs an Exa when a key was configured).
|
|
62
|
+
# @raise [ArgumentError] if +api_key+ is blank
|
|
63
|
+
def initialize(api_key:)
|
|
64
|
+
raise ArgumentError, 'Exa Search API key is blank' if api_key.to_s.strip.empty?
|
|
65
|
+
|
|
66
|
+
@api_key = api_key
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# @return [String] short provider label for {Engines} logging /
|
|
70
|
+
# fallback messages.
|
|
71
|
+
def label
|
|
72
|
+
'Exa'
|
|
73
|
+
end
|
|
74
|
+
|
|
58
75
|
# Fetch results for +query+ and return them as an +Array<Result>+.
|
|
59
76
|
# Calls are circuit-broken for 5 minutes on rate-limit / unavailable
|
|
60
|
-
# responses; see {LIMITER}. The caller (typically {Engines
|
|
77
|
+
# responses; see {LIMITER}. The caller (typically {Engines#search})
|
|
61
78
|
# is expected to have already normalized the query and to wrap this
|
|
62
79
|
# in a result cache.
|
|
63
80
|
#
|
|
64
81
|
# @param query [String] search query (already normalized)
|
|
65
82
|
# @param max_results [Integer] maximum number of result entries;
|
|
66
83
|
# passed through as Exa's +numResults+
|
|
67
|
-
# @param api_key [String] Exa API key; defaults to the {ENV_KEY}
|
|
68
|
-
# environment variable
|
|
69
84
|
# @return [Array<Result>] hits, possibly empty when Exa ran the
|
|
70
85
|
# query and matched nothing
|
|
71
|
-
# @raise [ArgumentError] if no API key is available
|
|
72
86
|
# @raise [Engines::Unavailable] when Exa returns HTTP 429
|
|
73
87
|
# (rate limit / quota exhausted) or 5xx — "try again later"
|
|
74
|
-
# responses the cascade in {Engines
|
|
88
|
+
# responses the cascade in {Engines#search} can fall back from.
|
|
75
89
|
# Also raised immediately if {LIMITER} is in cooldown. Other
|
|
76
90
|
# non-2xx (e.g. 401/403 from a bad API key) bubble up as
|
|
77
91
|
# +RuntimeError+ so config problems stay visible.
|
|
78
92
|
# @raise [RuntimeError] for non-rate-limit HTTP failures or when the
|
|
79
93
|
# response shape contains no results and isn't a recognized
|
|
80
94
|
# empty-results payload.
|
|
81
|
-
def
|
|
82
|
-
raise ArgumentError, "Exa Search API key not set (#{ENV_KEY})" if api_key.to_s.strip.empty?
|
|
83
|
-
|
|
95
|
+
def search(query, max_results: DEFAULT_MAX_RESULTS)
|
|
84
96
|
LIMITER.call do
|
|
85
97
|
response = Faraday.post(ENDPOINT) do |req|
|
|
86
|
-
req.headers['x-api-key'] = api_key
|
|
98
|
+
req.headers['x-api-key'] = @api_key
|
|
87
99
|
req.headers['Content-Type'] = 'application/json'
|
|
88
100
|
req.headers['Accept'] = 'application/json'
|
|
89
101
|
req.body = JSON.dump(
|
|
@@ -101,7 +113,7 @@ module Pikuri
|
|
|
101
113
|
raise "Exa Search request failed: #{response.status} #{response.body}"
|
|
102
114
|
end
|
|
103
115
|
|
|
104
|
-
parse(response.body, max_results: max_results)
|
|
116
|
+
self.class.parse(response.body, max_results: max_results)
|
|
105
117
|
end
|
|
106
118
|
end
|
|
107
119
|
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
module Pikuri
|
|
4
4
|
class Tool
|
|
5
5
|
# Truncation policy and Tool spec for the +web_scrape+ tool. The actual
|
|
6
|
-
# scraping lives in {Tool::Scraper
|
|
6
|
+
# scraping lives in {Tool::Scraper}; this module is a thin
|
|
7
7
|
# wrapper that picks the scraper, applies a character cap so the LLM
|
|
8
8
|
# doesn't drown in long-form content, and exposes the result to the
|
|
9
9
|
# agent loop in OpenAI tool-call shape.
|
|
@@ -37,7 +37,7 @@ module Pikuri
|
|
|
37
37
|
CACHE
|
|
38
38
|
end
|
|
39
39
|
|
|
40
|
-
# Fetch +url+ via {Tool::Scraper
|
|
40
|
+
# Fetch +url+ via {Tool::Scraper} and truncate the rendered
|
|
41
41
|
# Markdown to +max_chars+ characters.
|
|
42
42
|
#
|
|
43
43
|
# The full extracted Markdown is cached on disk via {.cache}, keyed
|
|
@@ -65,7 +65,7 @@ module Pikuri
|
|
|
65
65
|
# truncated, or +"Error: ..."+ on a recoverable fetch failure
|
|
66
66
|
def self.visit(url, max_chars: DEFAULT_MAX_CHARS)
|
|
67
67
|
max_chars = max_chars.clamp(1, MAX_MAX_CHARS)
|
|
68
|
-
markdown = cache.fetch(url) { Scraper
|
|
68
|
+
markdown = cache.fetch(url) { Scraper.visit(url) }
|
|
69
69
|
truncate(markdown, max_chars)
|
|
70
70
|
rescue Scraper::FetchError => e
|
|
71
71
|
"Error: #{e.message}"
|
|
@@ -95,10 +95,10 @@ module Pikuri
|
|
|
95
95
|
WEB_SCRAPE = new(
|
|
96
96
|
name: 'web_scrape',
|
|
97
97
|
description: <<~DESC,
|
|
98
|
-
Scrapes the rendered webpage
|
|
98
|
+
Scrapes the rendered webpage or text file at the given URL and returns its main content as Markdown.
|
|
99
99
|
|
|
100
100
|
Usage:
|
|
101
|
-
- Use for HTML pages
|
|
101
|
+
- Use for HTML pages where you want readable content — readability extraction strips nav, sidebars, and boilerplate.
|
|
102
102
|
- For raw textual payloads (JSON, CSV, robots.txt, source files), use fetch instead — it returns bytes verbatim, while web_scrape would corrupt them with a Markdown pass.
|
|
103
103
|
- A Single Page App may return very little or no content. Do NOT retry with a larger max_chars; try a different URL instead.
|
|
104
104
|
DESC
|
|
@@ -2,37 +2,56 @@
|
|
|
2
2
|
|
|
3
3
|
module Pikuri
|
|
4
4
|
class Tool
|
|
5
|
-
#
|
|
6
|
-
#
|
|
7
|
-
#
|
|
8
|
-
module WebSearch; end
|
|
9
|
-
|
|
10
|
-
# Web-search tool exposed to the agent loop in OpenAI tool-call shape.
|
|
11
|
-
# Calls {Tool::Search::Engines.search}, which cascades through whichever
|
|
12
|
-
# providers are configured (DuckDuckGo always, Brave when its API key is
|
|
13
|
-
# present) in random order, falling back on temporary-unavailability
|
|
14
|
-
# errors. Providers return structured {Tool::Search::Result} rows;
|
|
15
|
-
# +Engines.search+ renders the winning provider's rows into the
|
|
16
|
-
# smolagents-style Markdown shape the LLM sees, so the format stays
|
|
17
|
-
# stable regardless of which provider ran.
|
|
5
|
+
# Builder for the LLM-facing web-search tool. The search orchestration
|
|
6
|
+
# lives in {Tool::Search::Engines}; this module owns only the {.build}
|
|
7
|
+
# factory that wires a configured {Search::Engines} into a {Tool}.
|
|
18
8
|
#
|
|
19
|
-
#
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
9
|
+
# Unlike the stateless bundled tools (+CALCULATOR+ / +WEB_SCRAPE+ /
|
|
10
|
+
# +FETCH+, shared value constants), web_search is *host-configured*:
|
|
11
|
+
# the paid providers (Brave / Exa) join the cascade only when the host
|
|
12
|
+
# passes their API key, so the tool is built per-wiring — like
|
|
13
|
+
# {Code::Bash} — rather than handed out as a single shared constant.
|
|
14
|
+
# pikuri reads no key from the environment; the host supplies them (the
|
|
15
|
+
# bundled +bin/+ examples load a JSON config file by convention). See
|
|
16
|
+
# CLAUDE.md "Environment is not a secret store".
|
|
17
|
+
module WebSearch
|
|
18
|
+
# Description shown to the LLM, opencode-shape (summary + +Usage:+).
|
|
19
|
+
#
|
|
20
|
+
# @return [String]
|
|
21
|
+
DESCRIPTION = <<~DESC
|
|
23
22
|
Searches the web for a query and returns the top results as a Markdown list of titles, URLs, and short snippets.
|
|
24
23
|
|
|
25
24
|
Usage:
|
|
26
25
|
- Use this to find candidate URLs, then call web_scrape on the most promising one(s) for full content. Snippets alone rarely answer a question.
|
|
27
26
|
DESC
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
27
|
+
|
|
28
|
+
# Build the +web_search+ tool. It calls {Search::Engines#search},
|
|
29
|
+
# which cascades through DuckDuckGo (always) plus whichever keyed
|
|
30
|
+
# providers were configured, in random order, falling back on
|
|
31
|
+
# temporary-unavailability errors and rendering the winning
|
|
32
|
+
# provider's rows into a stable Markdown shape.
|
|
33
|
+
#
|
|
34
|
+
# @param brave_key [String, nil] Brave Search API key; when present
|
|
35
|
+
# (non-blank) Brave joins the cascade. Get one at
|
|
36
|
+
# https://api-dashboard.search.brave.com.
|
|
37
|
+
# @param exa_key [String, nil] Exa API key; when present (non-blank)
|
|
38
|
+
# Exa joins the cascade. Get one at https://exa.ai.
|
|
39
|
+
# @return [Tool] the +web_search+ tool in OpenAI tool-call shape
|
|
40
|
+
def self.build(brave_key: nil, exa_key: nil)
|
|
41
|
+
engines = Search::Engines.new(brave_key: brave_key, exa_key: exa_key)
|
|
42
|
+
Tool.new(
|
|
43
|
+
name: 'web_search',
|
|
44
|
+
description: DESCRIPTION,
|
|
45
|
+
parameters: Parameters.build { |p|
|
|
46
|
+
p.required_string :query,
|
|
47
|
+
'The search query, e.g. "BigDecimal precision Ruby".'
|
|
48
|
+
p.optional_integer :max_results,
|
|
49
|
+
'Maximum number of result entries to return. ' \
|
|
50
|
+
'Defaults to 10; most providers cap this at 20.'
|
|
51
|
+
},
|
|
52
|
+
execute: ->(query:, max_results: 10) { engines.search(query, max_results: max_results) }
|
|
53
|
+
)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
37
56
|
end
|
|
38
57
|
end
|
data/lib/pikuri/version.rb
CHANGED
data/lib/pikuri-core.rb
CHANGED
|
@@ -28,15 +28,17 @@ require_relative 'pikuri/version'
|
|
|
28
28
|
#
|
|
29
29
|
# == Why eager-load
|
|
30
30
|
#
|
|
31
|
-
#
|
|
32
|
-
# +Pikuri::Tool::
|
|
33
|
-
#
|
|
34
|
-
#
|
|
35
|
-
#
|
|
36
|
-
#
|
|
37
|
-
#
|
|
38
|
-
#
|
|
39
|
-
#
|
|
31
|
+
# The stateless bundled tools (+Pikuri::Tool::CALCULATOR+,
|
|
32
|
+
# +Pikuri::Tool::WEB_SCRAPE+, +Pikuri::Tool::FETCH+) are +ALL_CAPS+ value
|
|
33
|
+
# constants rather than classes/modules, and Zeitwerk only auto-loads
|
|
34
|
+
# constants that match its filename-↔-CamelCase convention. Eager-loading
|
|
35
|
+
# at boot guarantees the files defining those values run, so the bin
|
|
36
|
+
# script can drop them straight into the +Agent.new+ block via
|
|
37
|
+
# +c.add_tool+ without per-file +require+ ceremony. (web_search is instead
|
|
38
|
+
# host-configured — built per-wiring via {Pikuri::Tool::WebSearch.build}
|
|
39
|
+
# with the host's provider keys — so it is not a value constant.) The cost
|
|
40
|
+
# is a few milliseconds of startup — negligible compared to a single LLM
|
|
41
|
+
# round-trip.
|
|
40
42
|
module Pikuri
|
|
41
43
|
# Search path for bundled system prompts. Mutable list: each pikuri
|
|
42
44
|
# gem appends its own +prompts/+ directory when it boots, so a
|
|
@@ -169,7 +171,6 @@ module Pikuri
|
|
|
169
171
|
Loader.ignore(File.expand_path('pikuri/version.rb', __dir__))
|
|
170
172
|
Loader.inflector.inflect(
|
|
171
173
|
'html' => 'HTML',
|
|
172
|
-
'pdf' => 'PDF',
|
|
173
174
|
'duckduckgo' => 'DuckDuckGo'
|
|
174
175
|
)
|
|
175
176
|
Loader.setup
|
metadata
CHANGED
|
@@ -1,29 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: pikuri-core
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.7
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Martin Vysny
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
|
-
- !ruby/object:Gem::Dependency
|
|
14
|
-
name: dentaku
|
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
|
16
|
-
requirements:
|
|
17
|
-
- - "~>"
|
|
18
|
-
- !ruby/object:Gem::Version
|
|
19
|
-
version: '3.5'
|
|
20
|
-
type: :runtime
|
|
21
|
-
prerelease: false
|
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
-
requirements:
|
|
24
|
-
- - "~>"
|
|
25
|
-
- !ruby/object:Gem::Version
|
|
26
|
-
version: '3.5'
|
|
27
12
|
- !ruby/object:Gem::Dependency
|
|
28
13
|
name: faraday
|
|
29
14
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -52,20 +37,6 @@ dependencies:
|
|
|
52
37
|
- - "~>"
|
|
53
38
|
- !ruby/object:Gem::Version
|
|
54
39
|
version: '1.19'
|
|
55
|
-
- !ruby/object:Gem::Dependency
|
|
56
|
-
name: pdf-reader
|
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
|
58
|
-
requirements:
|
|
59
|
-
- - "~>"
|
|
60
|
-
- !ruby/object:Gem::Version
|
|
61
|
-
version: '2.15'
|
|
62
|
-
type: :runtime
|
|
63
|
-
prerelease: false
|
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
-
requirements:
|
|
66
|
-
- - "~>"
|
|
67
|
-
- !ruby/object:Gem::Version
|
|
68
|
-
version: '2.15'
|
|
69
40
|
- !ruby/object:Gem::Dependency
|
|
70
41
|
name: rainbow
|
|
71
42
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -122,34 +93,6 @@ dependencies:
|
|
|
122
93
|
- - "~>"
|
|
123
94
|
- !ruby/object:Gem::Version
|
|
124
95
|
version: '1.15'
|
|
125
|
-
- !ruby/object:Gem::Dependency
|
|
126
|
-
name: tsort
|
|
127
|
-
requirement: !ruby/object:Gem::Requirement
|
|
128
|
-
requirements:
|
|
129
|
-
- - "~>"
|
|
130
|
-
- !ruby/object:Gem::Version
|
|
131
|
-
version: '0.2'
|
|
132
|
-
type: :runtime
|
|
133
|
-
prerelease: false
|
|
134
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
135
|
-
requirements:
|
|
136
|
-
- - "~>"
|
|
137
|
-
- !ruby/object:Gem::Version
|
|
138
|
-
version: '0.2'
|
|
139
|
-
- !ruby/object:Gem::Dependency
|
|
140
|
-
name: tty-markdown
|
|
141
|
-
requirement: !ruby/object:Gem::Requirement
|
|
142
|
-
requirements:
|
|
143
|
-
- - "~>"
|
|
144
|
-
- !ruby/object:Gem::Version
|
|
145
|
-
version: '0.7'
|
|
146
|
-
type: :runtime
|
|
147
|
-
prerelease: false
|
|
148
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
149
|
-
requirements:
|
|
150
|
-
- - "~>"
|
|
151
|
-
- !ruby/object:Gem::Version
|
|
152
|
-
version: '0.7'
|
|
153
96
|
- !ruby/object:Gem::Dependency
|
|
154
97
|
name: zeitwerk
|
|
155
98
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -192,6 +135,7 @@ files:
|
|
|
192
135
|
- lib/pikuri/agent/control/step_limit.rb
|
|
193
136
|
- lib/pikuri/agent/event.rb
|
|
194
137
|
- lib/pikuri/agent/extension.rb
|
|
138
|
+
- lib/pikuri/agent/extension_context.rb
|
|
195
139
|
- lib/pikuri/agent/listener.rb
|
|
196
140
|
- lib/pikuri/agent/listener/in_memory_event_list.rb
|
|
197
141
|
- lib/pikuri/agent/listener/rate_limited.rb
|
|
@@ -199,18 +143,19 @@ files:
|
|
|
199
143
|
- lib/pikuri/agent/listener/token_log.rb
|
|
200
144
|
- lib/pikuri/agent/listener_list.rb
|
|
201
145
|
- lib/pikuri/agent/synthesizer.rb
|
|
146
|
+
- lib/pikuri/extractor.rb
|
|
147
|
+
- lib/pikuri/extractor/html.rb
|
|
148
|
+
- lib/pikuri/extractor/passthrough.rb
|
|
202
149
|
- lib/pikuri/file_type.rb
|
|
203
150
|
- lib/pikuri/finalizers.rb
|
|
204
151
|
- lib/pikuri/paths.rb
|
|
152
|
+
- lib/pikuri/sanitizer.rb
|
|
205
153
|
- lib/pikuri/subprocess.rb
|
|
206
154
|
- lib/pikuri/tool.rb
|
|
207
155
|
- lib/pikuri/tool/calculator.rb
|
|
208
156
|
- lib/pikuri/tool/fetch.rb
|
|
209
157
|
- lib/pikuri/tool/parameters.rb
|
|
210
|
-
- lib/pikuri/tool/scraper
|
|
211
|
-
- lib/pikuri/tool/scraper/html.rb
|
|
212
|
-
- lib/pikuri/tool/scraper/pdf.rb
|
|
213
|
-
- lib/pikuri/tool/scraper/simple.rb
|
|
158
|
+
- lib/pikuri/tool/scraper.rb
|
|
214
159
|
- lib/pikuri/tool/search/brave.rb
|
|
215
160
|
- lib/pikuri/tool/search/duckduckgo.rb
|
|
216
161
|
- lib/pikuri/tool/search/engines.rb
|
|
@@ -230,7 +175,6 @@ metadata:
|
|
|
230
175
|
changelog_uri: https://codeberg.org/mvysny/pikuri/src/branch/master/CHANGELOG.md
|
|
231
176
|
bug_tracker_uri: https://codeberg.org/mvysny/pikuri/issues
|
|
232
177
|
rubygems_mfa_required: 'true'
|
|
233
|
-
post_install_message:
|
|
234
178
|
rdoc_options: []
|
|
235
179
|
require_paths:
|
|
236
180
|
- lib
|
|
@@ -245,8 +189,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
245
189
|
- !ruby/object:Gem::Version
|
|
246
190
|
version: '0'
|
|
247
191
|
requirements: []
|
|
248
|
-
rubygems_version: 3.
|
|
249
|
-
signing_key:
|
|
192
|
+
rubygems_version: 3.6.7
|
|
250
193
|
specification_version: 4
|
|
251
194
|
summary: The minimal core of the pikuri AI-assistant toolkit (Agent, Tool framework,
|
|
252
195
|
web tools, bin/pikuri-chat).
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Pikuri
|
|
4
|
-
class Tool
|
|
5
|
-
module Scraper
|
|
6
|
-
# Raised by anything in the scraper stack when a URL cannot be
|
|
7
|
-
# rendered into Markdown text — HTTP non-2xx, network failure,
|
|
8
|
-
# redirect-loop, missing +Location+, unsupported content-type, or a
|
|
9
|
-
# parse failure that reads as "try a different URL" to the LLM.
|
|
10
|
-
# Catching this in {Tool::WEB_SCRAPE} / {Tool::FETCH} turns the
|
|
11
|
-
# failure into an +"Error: ..."+ observation; anything else bubbles
|
|
12
|
-
# up so genuine bugs stay visible.
|
|
13
|
-
class FetchError < StandardError; end
|
|
14
|
-
end
|
|
15
|
-
end
|
|
16
|
-
end
|