kward 0.66.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +9 -0
- data/CHANGELOG.md +12 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +90 -0
- data/LICENSE +21 -0
- data/README.md +101 -0
- data/Rakefile +20 -0
- data/doc/authentication.md +105 -0
- data/doc/code-search.md +56 -0
- data/doc/configuration.md +310 -0
- data/doc/extensibility.md +186 -0
- data/doc/getting-started.md +127 -0
- data/doc/memory.md +192 -0
- data/doc/plugins.md +223 -0
- data/doc/releasing.md +36 -0
- data/doc/rpc.md +635 -0
- data/doc/usage.md +179 -0
- data/doc/web-search.md +28 -0
- data/exe/kward +5 -0
- data/kward.gemspec +33 -0
- data/lib/kward/agent.rb +234 -0
- data/lib/kward/ansi.rb +276 -0
- data/lib/kward/auth/file.rb +11 -0
- data/lib/kward/auth/github_oauth.rb +222 -0
- data/lib/kward/auth/openai_oauth.rb +323 -0
- data/lib/kward/auth/openrouter_api_key.rb +40 -0
- data/lib/kward/cancellation.rb +54 -0
- data/lib/kward/cli.rb +2122 -0
- data/lib/kward/clipboard.rb +84 -0
- data/lib/kward/compactor.rb +998 -0
- data/lib/kward/config_files.rb +564 -0
- data/lib/kward/conversation.rb +148 -0
- data/lib/kward/events.rb +13 -0
- data/lib/kward/export_path.rb +28 -0
- data/lib/kward/image_attachments.rb +331 -0
- data/lib/kward/markdown_transcript.rb +72 -0
- data/lib/kward/memory/manager.rb +652 -0
- data/lib/kward/message_access.rb +42 -0
- data/lib/kward/model/chat_invocation.rb +23 -0
- data/lib/kward/model/client.rb +875 -0
- data/lib/kward/model/context_overflow.rb +55 -0
- data/lib/kward/model/context_usage.rb +104 -0
- data/lib/kward/model/model_info.rb +188 -0
- data/lib/kward/model/retry_message.rb +11 -0
- data/lib/kward/model/stream_parser.rb +205 -0
- data/lib/kward/pan/index.html.erb +143 -0
- data/lib/kward/pan/server.rb +397 -0
- data/lib/kward/plugin_registry.rb +327 -0
- data/lib/kward/private_file.rb +18 -0
- data/lib/kward/prompt_interface.rb +2437 -0
- data/lib/kward/prompts/commands.rb +50 -0
- data/lib/kward/prompts/templates.rb +60 -0
- data/lib/kward/prompts.rb +58 -0
- data/lib/kward/resources/avatar_kward_logo.rb +48 -0
- data/lib/kward/resources/pixel_logo.rb +230 -0
- data/lib/kward/rpc/auth_manager.rb +265 -0
- data/lib/kward/rpc/config_manager.rb +58 -0
- data/lib/kward/rpc/prompt_bridge.rb +104 -0
- data/lib/kward/rpc/redactor.rb +47 -0
- data/lib/kward/rpc/server.rb +639 -0
- data/lib/kward/rpc/session_manager.rb +1122 -0
- data/lib/kward/rpc/tool_event_normalizer.rb +68 -0
- data/lib/kward/rpc/tool_metadata.rb +80 -0
- data/lib/kward/rpc/transcript_normalizer.rb +307 -0
- data/lib/kward/rpc/transport.rb +58 -0
- data/lib/kward/session_diff.rb +125 -0
- data/lib/kward/session_store.rb +493 -0
- data/lib/kward/skills/registry.rb +76 -0
- data/lib/kward/starter_pack_installer.rb +110 -0
- data/lib/kward/steering.rb +56 -0
- data/lib/kward/telemetry/logger.rb +195 -0
- data/lib/kward/telemetry/stats.rb +466 -0
- data/lib/kward/tools/ask_user_question.rb +107 -0
- data/lib/kward/tools/base.rb +45 -0
- data/lib/kward/tools/code_search.rb +65 -0
- data/lib/kward/tools/edit_file.rb +41 -0
- data/lib/kward/tools/list_directory.rb +21 -0
- data/lib/kward/tools/read_file.rb +30 -0
- data/lib/kward/tools/read_skill.rb +27 -0
- data/lib/kward/tools/registry.rb +117 -0
- data/lib/kward/tools/run_shell_command.rb +28 -0
- data/lib/kward/tools/search/code.rb +445 -0
- data/lib/kward/tools/search/web.rb +747 -0
- data/lib/kward/tools/tool_call.rb +87 -0
- data/lib/kward/tools/web_search.rb +48 -0
- data/lib/kward/tools/write_file.rb +29 -0
- data/lib/kward/transcript_export.rb +40 -0
- data/lib/kward/version.rb +4 -0
- data/lib/kward/workspace.rb +377 -0
- data/lib/kward.rb +6 -0
- data/lib/main.rb +3 -0
- metadata +232 -0
|
@@ -0,0 +1,747 @@
|
|
|
1
|
+
require "cgi"
|
|
2
|
+
require "json"
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "nokogiri"
|
|
5
|
+
require "uri"
|
|
6
|
+
require_relative "../../config_files"
|
|
7
|
+
|
|
8
|
+
module Kward
|
|
9
|
+
class WebSearch
|
|
10
|
+
DEFAULT_MAX_RESULTS = 5
|
|
11
|
+
MAX_MAX_RESULTS = 20
|
|
12
|
+
MAX_QUERIES = 4
|
|
13
|
+
MAX_OUTPUT_BYTES = 8 * 1024
|
|
14
|
+
MODEL_PROVIDER_MAX_TOKENS = 512
|
|
15
|
+
MAX_ANSWER_CHARS = 2_000
|
|
16
|
+
MAX_EXCERPT_CHARS = 300
|
|
17
|
+
HTTP_TIMEOUT_SECONDS = 10
|
|
18
|
+
DUCKDUCKGO_URL = "https://html.duckduckgo.com/html/"
|
|
19
|
+
EXA_MCP_URL = "https://mcp.exa.ai/mcp"
|
|
20
|
+
EXA_ANSWER_URL = "https://api.exa.ai/answer"
|
|
21
|
+
EXA_SEARCH_URL = "https://api.exa.ai/search"
|
|
22
|
+
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
|
23
|
+
GEMINI_API_BASE = "https://generativelanguage.googleapis.com/v1beta"
|
|
24
|
+
DEFAULT_GEMINI_MODEL = "gemini-2.5-flash"
|
|
25
|
+
PUBLIC_SEARXNG_INSTANCES = [
|
|
26
|
+
"https://searx.be",
|
|
27
|
+
"https://search.inetol.net",
|
|
28
|
+
"https://searx.tiekoetter.com"
|
|
29
|
+
].freeze
|
|
30
|
+
PROVIDERS = %w[auto exa perplexity gemini legacy duckduckgo].freeze
|
|
31
|
+
|
|
32
|
+
Result = Struct.new(:title, :url, :excerpt, :provider, keyword_init: true)
|
|
33
|
+
SearchResponse = Struct.new(:answer, :results, :provider, :note, keyword_init: true)
|
|
34
|
+
|
|
35
|
+
def initialize(http_client: NetHttpClient.new, searxng_instances: PUBLIC_SEARXNG_INSTANCES, max_output_bytes: MAX_OUTPUT_BYTES, config: nil)
|
|
36
|
+
@http_client = http_client
|
|
37
|
+
@searxng_instances = searxng_instances
|
|
38
|
+
@max_output_bytes = max_output_bytes
|
|
39
|
+
@config = config
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def available?
|
|
43
|
+
enabled = boolean_config_value("enabled")
|
|
44
|
+
return enabled unless enabled.nil?
|
|
45
|
+
|
|
46
|
+
true
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def search(args)
|
|
50
|
+
queries = args_value(args, "queries")
|
|
51
|
+
return "Error: queries must be an array with 1-#{MAX_QUERIES} strings" unless valid_queries?(queries)
|
|
52
|
+
|
|
53
|
+
max_results = bounded_max_results(args_value(args, "max_results") || args_value(args, "num_results"))
|
|
54
|
+
provider = normalize_provider(args_value(args, "provider") || config_value("provider") || "auto")
|
|
55
|
+
return "Error: provider must be one of: #{PROVIDERS.join(", ")}" unless provider
|
|
56
|
+
|
|
57
|
+
options = {
|
|
58
|
+
max_results: max_results,
|
|
59
|
+
recency_filter: normalize_recency(args_value(args, "recency_filter") || args_value(args, "recencyFilter")),
|
|
60
|
+
domain_filter: normalize_domain_filter(args_value(args, "domain_filter") || args_value(args, "domainFilter")),
|
|
61
|
+
provider: provider
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
sections = ["# Web search"]
|
|
65
|
+
failures = []
|
|
66
|
+
any_results = false
|
|
67
|
+
|
|
68
|
+
queries.each do |query|
|
|
69
|
+
response, error = search_query(query, options)
|
|
70
|
+
any_results = true if successful_response?(response)
|
|
71
|
+
failures << "#{query}: #{error}" if error && !successful_response?(response)
|
|
72
|
+
sections << format_query_results(query, response, error)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
unless any_results
|
|
76
|
+
return "Error: web_search found no results\n#{failures.map { |failure| "- #{failure}" }.join("\n")}".strip
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
truncate_output(sections.join("\n\n"))
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
private
|
|
83
|
+
|
|
84
|
+
def search_query(query, options)
|
|
85
|
+
errors = []
|
|
86
|
+
provider_order(options[:provider]).each do |provider|
|
|
87
|
+
begin
|
|
88
|
+
response = case provider
|
|
89
|
+
when "exa"
|
|
90
|
+
exa_search(query, options)
|
|
91
|
+
when "perplexity"
|
|
92
|
+
perplexity_search(query, options)
|
|
93
|
+
when "gemini"
|
|
94
|
+
gemini_search(query, options)
|
|
95
|
+
when "legacy"
|
|
96
|
+
legacy_search(query, options)
|
|
97
|
+
end
|
|
98
|
+
return [response, errors.empty? ? nil : errors.join("; ")] if successful_response?(response)
|
|
99
|
+
errors << "#{provider}: no results"
|
|
100
|
+
rescue StandardError => e
|
|
101
|
+
errors << "#{provider}: #{redact_secrets(e.message)}"
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
[nil, errors.join("; ")]
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def provider_order(provider)
|
|
109
|
+
case provider
|
|
110
|
+
when "auto"
|
|
111
|
+
order = ["exa"]
|
|
112
|
+
if allow_model_provider_fallback?
|
|
113
|
+
order << "perplexity" if api_key("perplexity")
|
|
114
|
+
order << "gemini" if api_key("gemini")
|
|
115
|
+
end
|
|
116
|
+
order << "legacy"
|
|
117
|
+
order
|
|
118
|
+
when "duckduckgo"
|
|
119
|
+
["legacy"]
|
|
120
|
+
else
|
|
121
|
+
[provider]
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def exa_search(query, options)
|
|
126
|
+
key = api_key("exa")
|
|
127
|
+
return exa_api_search(query, options, key) if key
|
|
128
|
+
|
|
129
|
+
exa_mcp_search(query, options)
|
|
130
|
+
rescue StandardError
|
|
131
|
+
raise if key.nil?
|
|
132
|
+
|
|
133
|
+
# A configured key should not make the no-key path worse; fall back to Exa MCP.
|
|
134
|
+
exa_mcp_search(query, options)
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def exa_mcp_search(query, options)
|
|
138
|
+
text = call_exa_mcp(
|
|
139
|
+
"web_search_exa",
|
|
140
|
+
{
|
|
141
|
+
"query" => enriched_query(query, options),
|
|
142
|
+
"numResults" => options[:max_results],
|
|
143
|
+
"livecrawl" => "fallback",
|
|
144
|
+
"type" => "auto",
|
|
145
|
+
"contextMaxCharacters" => 3000
|
|
146
|
+
}
|
|
147
|
+
)
|
|
148
|
+
results = parse_exa_mcp_results(text, options[:max_results])
|
|
149
|
+
SearchResponse.new(answer: answer_from_results(results), results: results, provider: "exa")
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def call_exa_mcp(tool_name, arguments)
|
|
153
|
+
response = @http_client.post_json(
|
|
154
|
+
EXA_MCP_URL,
|
|
155
|
+
body: {
|
|
156
|
+
"jsonrpc" => "2.0",
|
|
157
|
+
"id" => 1,
|
|
158
|
+
"method" => "tools/call",
|
|
159
|
+
"params" => { "name" => tool_name, "arguments" => arguments }
|
|
160
|
+
},
|
|
161
|
+
headers: {
|
|
162
|
+
"Accept" => "application/json, text/event-stream",
|
|
163
|
+
"Content-Type" => "application/json"
|
|
164
|
+
}
|
|
165
|
+
)
|
|
166
|
+
raise "Exa MCP failed with HTTP #{response.code}" unless success?(response)
|
|
167
|
+
|
|
168
|
+
parsed = parse_mcp_rpc_response(response.body.to_s)
|
|
169
|
+
raise "Exa MCP returned an empty response" unless parsed
|
|
170
|
+
if parsed["error"].is_a?(Hash)
|
|
171
|
+
raise "Exa MCP error: #{parsed["error"]["message"] || "unknown error"}"
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
result = parsed["result"]
|
|
175
|
+
if result.is_a?(Hash) && result["isError"]
|
|
176
|
+
message = Array(result["content"]).find { |item| item.is_a?(Hash) && item["type"] == "text" }.to_h["text"]
|
|
177
|
+
raise(message.to_s.empty? ? "Exa MCP returned an error" : message.to_s)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
text = Array(result.to_h["content"]).find { |item| item.is_a?(Hash) && item["type"] == "text" }.to_h["text"].to_s
|
|
181
|
+
raise "Exa MCP returned empty content" if text.strip.empty?
|
|
182
|
+
|
|
183
|
+
text
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def parse_mcp_rpc_response(body)
|
|
187
|
+
body.each_line do |line|
|
|
188
|
+
stripped_line = line.strip
|
|
189
|
+
next unless stripped_line.start_with?("data:")
|
|
190
|
+
|
|
191
|
+
payload = stripped_line.delete_prefix("data:").strip
|
|
192
|
+
next if payload.empty? || payload == "[DONE]"
|
|
193
|
+
|
|
194
|
+
parsed = JSON.parse(payload)
|
|
195
|
+
return parsed if parsed.is_a?(Hash) && (parsed.key?("result") || parsed.key?("error"))
|
|
196
|
+
rescue JSON::ParserError
|
|
197
|
+
next
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
parsed = JSON.parse(body)
|
|
201
|
+
parsed if parsed.is_a?(Hash) && (parsed.key?("result") || parsed.key?("error"))
|
|
202
|
+
rescue JSON::ParserError
|
|
203
|
+
nil
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def parse_exa_mcp_results(text, max_results)
|
|
207
|
+
blocks = text.split(/(?=^Title: )/).map(&:strip).reject(&:empty?)
|
|
208
|
+
parsed = blocks.filter_map do |block|
|
|
209
|
+
title = block[/^Title: (.+)/, 1].to_s.strip
|
|
210
|
+
url = block[/^URL: (.+)/, 1].to_s.strip
|
|
211
|
+
next if url.empty?
|
|
212
|
+
|
|
213
|
+
content = ""
|
|
214
|
+
if (index = block.index("\nText: "))
|
|
215
|
+
content = block[(index + 7)..].to_s.strip
|
|
216
|
+
elsif (match = block.match(/\nHighlights:\s*\n/))
|
|
217
|
+
content = block[(match.end(0))..].to_s.strip
|
|
218
|
+
end
|
|
219
|
+
content = content.sub(/\n---\s*\z/, "").strip
|
|
220
|
+
Result.new(title: title.empty? ? url : title, url: url, excerpt: truncate_text(content, MAX_EXCERPT_CHARS), provider: "exa")
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
parsed.first(max_results)
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def exa_api_search(query, options, key)
|
|
227
|
+
if options[:recency_filter] || options[:domain_filter].any? || options[:max_results] != DEFAULT_MAX_RESULTS
|
|
228
|
+
exa_api_structured_search(query, options, key)
|
|
229
|
+
else
|
|
230
|
+
exa_api_answer_search(query, key)
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
def exa_api_answer_search(query, key)
|
|
235
|
+
response = @http_client.post_json(
|
|
236
|
+
EXA_ANSWER_URL,
|
|
237
|
+
body: { "query" => query, "text" => true },
|
|
238
|
+
headers: { "x-api-key" => key, "Content-Type" => "application/json" }
|
|
239
|
+
)
|
|
240
|
+
raise "Exa API failed with HTTP #{response.code}: #{response.body.to_s[0, 300]}" unless success?(response)
|
|
241
|
+
|
|
242
|
+
data = JSON.parse(response.body.to_s)
|
|
243
|
+
results = results_from_exa_records(Array(data["citations"]), DEFAULT_MAX_RESULTS)
|
|
244
|
+
SearchResponse.new(answer: truncate_text(data["answer"], MAX_ANSWER_CHARS), results: results, provider: "exa")
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
def exa_api_structured_search(query, options, key)
|
|
248
|
+
body = {
|
|
249
|
+
"query" => query,
|
|
250
|
+
"type" => "auto",
|
|
251
|
+
"numResults" => options[:max_results],
|
|
252
|
+
"contents" => { "text" => { "maxCharacters" => 3000 }, "highlights" => true }
|
|
253
|
+
}.merge(exa_domain_filters(options[:domain_filter]))
|
|
254
|
+
body["startPublishedDate"] = recency_start_date(options[:recency_filter]) if options[:recency_filter]
|
|
255
|
+
|
|
256
|
+
response = @http_client.post_json(
|
|
257
|
+
EXA_SEARCH_URL,
|
|
258
|
+
body: body,
|
|
259
|
+
headers: { "x-api-key" => key, "Content-Type" => "application/json" }
|
|
260
|
+
)
|
|
261
|
+
raise "Exa API failed with HTTP #{response.code}: #{response.body.to_s[0, 300]}" unless success?(response)
|
|
262
|
+
|
|
263
|
+
data = JSON.parse(response.body.to_s)
|
|
264
|
+
records = Array(data["results"])
|
|
265
|
+
results = results_from_exa_records(records, options[:max_results])
|
|
266
|
+
SearchResponse.new(answer: answer_from_results(results), results: results, provider: "exa")
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def results_from_exa_records(records, max_results)
|
|
270
|
+
records.first(max_results).filter_map do |record|
|
|
271
|
+
next unless record.is_a?(Hash)
|
|
272
|
+
|
|
273
|
+
url = record["url"].to_s
|
|
274
|
+
next if url.empty?
|
|
275
|
+
|
|
276
|
+
text = if record["text"].is_a?(String)
|
|
277
|
+
record["text"]
|
|
278
|
+
elsif record["highlights"].is_a?(Array)
|
|
279
|
+
record["highlights"].join(" ")
|
|
280
|
+
else
|
|
281
|
+
record["snippet"].to_s
|
|
282
|
+
end
|
|
283
|
+
Result.new(
|
|
284
|
+
title: record["title"].to_s.empty? ? url : clean_text(record["title"].to_s),
|
|
285
|
+
url: url,
|
|
286
|
+
excerpt: truncate_text(clean_text(text), MAX_EXCERPT_CHARS),
|
|
287
|
+
provider: "exa"
|
|
288
|
+
)
|
|
289
|
+
end
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
def perplexity_search(query, options)
|
|
293
|
+
key = api_key("perplexity")
|
|
294
|
+
raise "Perplexity API key not configured" unless key
|
|
295
|
+
|
|
296
|
+
body = {
|
|
297
|
+
"model" => config_value("perplexity_model") || "sonar",
|
|
298
|
+
"messages" => [{ "role" => "user", "content" => query }],
|
|
299
|
+
"max_tokens" => MODEL_PROVIDER_MAX_TOKENS,
|
|
300
|
+
"return_related_questions" => false
|
|
301
|
+
}
|
|
302
|
+
body["search_recency_filter"] = options[:recency_filter] if options[:recency_filter]
|
|
303
|
+
body["search_domain_filter"] = options[:domain_filter].first(20) unless options[:domain_filter].empty?
|
|
304
|
+
|
|
305
|
+
response = @http_client.post_json(
|
|
306
|
+
PERPLEXITY_API_URL,
|
|
307
|
+
body: body,
|
|
308
|
+
headers: { "Authorization" => "Bearer #{key}", "Content-Type" => "application/json" }
|
|
309
|
+
)
|
|
310
|
+
raise "Perplexity API failed with HTTP #{response.code}: #{response.body.to_s[0, 300]}" unless success?(response)
|
|
311
|
+
|
|
312
|
+
data = JSON.parse(response.body.to_s)
|
|
313
|
+
answer = truncate_text(Array(data["choices"]).first.to_h.dig("message", "content"), MAX_ANSWER_CHARS)
|
|
314
|
+
citations = Array(data["citations"])
|
|
315
|
+
results = citations.first(options[:max_results]).each_with_index.filter_map do |citation, index|
|
|
316
|
+
if citation.is_a?(String)
|
|
317
|
+
Result.new(title: "Source #{index + 1}", url: citation, excerpt: "", provider: "perplexity")
|
|
318
|
+
elsif citation.is_a?(Hash) && citation["url"].to_s != ""
|
|
319
|
+
Result.new(title: citation["title"].to_s.empty? ? "Source #{index + 1}" : citation["title"].to_s, url: citation["url"].to_s, excerpt: truncate_text(citation["snippet"], MAX_EXCERPT_CHARS), provider: "perplexity")
|
|
320
|
+
end
|
|
321
|
+
end
|
|
322
|
+
SearchResponse.new(answer: answer, results: results, provider: "perplexity")
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
def gemini_search(query, options)
|
|
326
|
+
key = api_key("gemini")
|
|
327
|
+
raise "Gemini API key not configured" unless key
|
|
328
|
+
|
|
329
|
+
prompt = enriched_query(query, options)
|
|
330
|
+
model = config_value("gemini_model") || DEFAULT_GEMINI_MODEL
|
|
331
|
+
response = @http_client.post_json(
|
|
332
|
+
"#{GEMINI_API_BASE}/models/#{CGI.escape(model)}:generateContent?key=#{CGI.escape(key)}",
|
|
333
|
+
body: {
|
|
334
|
+
"contents" => [{ "parts" => [{ "text" => prompt }] }],
|
|
335
|
+
"tools" => [{ "google_search" => {} }]
|
|
336
|
+
},
|
|
337
|
+
headers: { "Content-Type" => "application/json" }
|
|
338
|
+
)
|
|
339
|
+
raise "Gemini API failed with HTTP #{response.code}: #{response.body.to_s[0, 300]}" unless success?(response)
|
|
340
|
+
|
|
341
|
+
data = JSON.parse(response.body.to_s)
|
|
342
|
+
candidate = Array(data["candidates"]).first.to_h
|
|
343
|
+
answer = truncate_text(Array(candidate.dig("content", "parts")).map { |part| part.to_h["text"] }.compact.join("\n"), MAX_ANSWER_CHARS)
|
|
344
|
+
chunks = Array(candidate.dig("groundingMetadata", "groundingChunks"))
|
|
345
|
+
results = chunks.first(options[:max_results]).filter_map do |chunk|
|
|
346
|
+
web = chunk.to_h["web"].to_h
|
|
347
|
+
url = web["uri"].to_s
|
|
348
|
+
next if url.empty?
|
|
349
|
+
|
|
350
|
+
Result.new(title: web["title"].to_s.empty? ? url : web["title"].to_s, url: url, excerpt: "", provider: "gemini")
|
|
351
|
+
end
|
|
352
|
+
SearchResponse.new(answer: answer, results: results, provider: "gemini")
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
def legacy_search(query, options)
|
|
356
|
+
legacy_query = query_with_domain_filter(query, options[:domain_filter])
|
|
357
|
+
results, error = legacy_search_query(legacy_query, options[:max_results], options[:recency_filter])
|
|
358
|
+
raise error if results.empty? && error
|
|
359
|
+
|
|
360
|
+
SearchResponse.new(answer: "", results: results, provider: results.first&.provider || "legacy", note: error)
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
def legacy_search_query(query, max_results, recency_filter)
|
|
364
|
+
begin
|
|
365
|
+
duckduckgo_results = duckduckgo_search(query, max_results, recency_filter)
|
|
366
|
+
return [duckduckgo_results, nil] unless duckduckgo_results.empty?
|
|
367
|
+
|
|
368
|
+
duckduckgo_error = "DuckDuckGo returned no results"
|
|
369
|
+
rescue StandardError => e
|
|
370
|
+
duckduckgo_error = e.message
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
searxng_results, searxng_error = searxng_search(query, max_results, recency_filter)
|
|
374
|
+
error = [duckduckgo_error, searxng_error].compact.join("; ")
|
|
375
|
+
[searxng_results, error.empty? ? nil : error]
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
def duckduckgo_search(query, max_results, recency_filter)
|
|
379
|
+
form = { "q" => query, "kl" => "wt-wt" }
|
|
380
|
+
form["df"] = duckduckgo_recency(recency_filter) if recency_filter
|
|
381
|
+
response = @http_client.post(
|
|
382
|
+
DUCKDUCKGO_URL,
|
|
383
|
+
form: form,
|
|
384
|
+
headers: browser_headers("text/html")
|
|
385
|
+
)
|
|
386
|
+
raise "DuckDuckGo search failed with HTTP #{response.code}" unless success?(response)
|
|
387
|
+
|
|
388
|
+
document = Nokogiri::HTML(response.body.to_s)
|
|
389
|
+
document.css("div.result").first(max_results).filter_map do |node|
|
|
390
|
+
link = node.at_css("a.result__a") || node.at_css("h2 a") || node.at_css("a[href]")
|
|
391
|
+
next unless link
|
|
392
|
+
|
|
393
|
+
Result.new(
|
|
394
|
+
title: clean_text(link.text),
|
|
395
|
+
url: clean_result_url(link["href"].to_s),
|
|
396
|
+
excerpt: clean_text((node.at_css("a.result__snippet") || node.at_css(".result__snippet"))&.text),
|
|
397
|
+
provider: "duckduckgo"
|
|
398
|
+
)
|
|
399
|
+
end.reject { |result| result.title.empty? || result.url.empty? }
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
def searxng_search(query, max_results, recency_filter)
|
|
403
|
+
errors = []
|
|
404
|
+
|
|
405
|
+
@searxng_instances.each do |instance|
|
|
406
|
+
begin
|
|
407
|
+
results = searxng_instance_search(instance, query, max_results, recency_filter)
|
|
408
|
+
return [results, nil] unless results.empty?
|
|
409
|
+
|
|
410
|
+
errors << "#{instance} returned no results"
|
|
411
|
+
rescue StandardError => e
|
|
412
|
+
errors << "#{instance}: #{e.message}"
|
|
413
|
+
end
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
[[], errors.join("; ")]
|
|
417
|
+
end
|
|
418
|
+
|
|
419
|
+
def searxng_instance_search(instance, query, max_results, recency_filter)
|
|
420
|
+
begin
|
|
421
|
+
results = searxng_json_search(instance, query, max_results, recency_filter)
|
|
422
|
+
return results unless results.empty?
|
|
423
|
+
|
|
424
|
+
json_error = "SearXNG JSON search returned no results"
|
|
425
|
+
rescue StandardError => e
|
|
426
|
+
json_error = e.message
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
begin
|
|
430
|
+
results = searxng_html_search(instance, query, max_results, recency_filter)
|
|
431
|
+
return results unless results.empty?
|
|
432
|
+
|
|
433
|
+
raise "SearXNG HTML search returned no results"
|
|
434
|
+
rescue StandardError => e
|
|
435
|
+
raise "#{json_error}; #{e.message}"
|
|
436
|
+
end
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
def searxng_json_search(instance, query, max_results, recency_filter)
|
|
440
|
+
params = { q: query, format: "json" }
|
|
441
|
+
params[:time_range] = recency_filter if recency_filter
|
|
442
|
+
uri = searxng_search_uri(instance, params)
|
|
443
|
+
response = @http_client.get(uri.to_s, headers: { "Accept" => "application/json" })
|
|
444
|
+
raise "SearXNG search failed with HTTP #{response.code}" unless success?(response)
|
|
445
|
+
|
|
446
|
+
data = JSON.parse(response.body.to_s)
|
|
447
|
+
results_from_records(Array(data["results"]), max_results)
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
def searxng_html_search(instance, query, max_results, recency_filter)
|
|
451
|
+
params = { q: query }
|
|
452
|
+
params[:time_range] = recency_filter if recency_filter
|
|
453
|
+
uri = searxng_search_uri(instance, params)
|
|
454
|
+
response = @http_client.get(uri.to_s, headers: browser_headers("text/html"))
|
|
455
|
+
raise "SearXNG HTML search failed with HTTP #{response.code}" unless success?(response)
|
|
456
|
+
|
|
457
|
+
document = Nokogiri::HTML(response.body.to_s)
|
|
458
|
+
records = document.css("article.result, div.result").map do |node|
|
|
459
|
+
link = node.at_css("h3 a, a[href]")
|
|
460
|
+
next unless link
|
|
461
|
+
|
|
462
|
+
{
|
|
463
|
+
"title" => link.text,
|
|
464
|
+
"url" => link["href"],
|
|
465
|
+
"content" => node.at_css(".content, p")&.text
|
|
466
|
+
}
|
|
467
|
+
end.compact
|
|
468
|
+
results_from_records(records, max_results)
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
def searxng_search_uri(instance, params)
|
|
472
|
+
uri = URI.join(instance.end_with?("/") ? instance : "#{instance}/", "search")
|
|
473
|
+
uri.query = URI.encode_www_form(params)
|
|
474
|
+
uri
|
|
475
|
+
end
|
|
476
|
+
|
|
477
|
+
def results_from_records(records, max_results)
|
|
478
|
+
records.first(max_results).filter_map do |record|
|
|
479
|
+
result_from_hash(record, "searxng")
|
|
480
|
+
end
|
|
481
|
+
end
|
|
482
|
+
|
|
483
|
+
def result_from_hash(record, provider)
|
|
484
|
+
return nil unless record.is_a?(Hash)
|
|
485
|
+
|
|
486
|
+
title = clean_text(record["title"].to_s)
|
|
487
|
+
url = clean_result_url(record["url"].to_s)
|
|
488
|
+
excerpt = truncate_text(clean_text((record["content"] || record["snippet"] || record["description"]).to_s), MAX_EXCERPT_CHARS)
|
|
489
|
+
return nil if title.empty? || url.empty?
|
|
490
|
+
|
|
491
|
+
Result.new(title: title, url: url, excerpt: excerpt, provider: provider)
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
def format_query_results(query, response, error)
|
|
495
|
+
lines = ["## Query: #{query}"]
|
|
496
|
+
fallback_note = [error, response&.note].compact.reject(&:empty?).join("; ")
|
|
497
|
+
lines << "Provider fallback note: #{fallback_note}" if !fallback_note.empty? && successful_response?(response)
|
|
498
|
+
unless successful_response?(response)
|
|
499
|
+
lines << "No results. #{error}"
|
|
500
|
+
return lines.join("\n")
|
|
501
|
+
end
|
|
502
|
+
|
|
503
|
+
answer = response.answer.to_s.strip
|
|
504
|
+
unless answer.empty?
|
|
505
|
+
lines << "Provider: #{response.provider}"
|
|
506
|
+
lines << "Answer:"
|
|
507
|
+
lines << answer
|
|
508
|
+
end
|
|
509
|
+
|
|
510
|
+
results = response.results || []
|
|
511
|
+
unless results.empty?
|
|
512
|
+
lines << "Sources:" unless answer.empty?
|
|
513
|
+
results.each_with_index do |result, index|
|
|
514
|
+
lines << "#{index + 1}. #{result.title}"
|
|
515
|
+
lines << " URL: #{result.url}"
|
|
516
|
+
lines << " Provider: #{result.provider}"
|
|
517
|
+
lines << " Excerpt: #{result.excerpt}" unless result.excerpt.to_s.empty?
|
|
518
|
+
end
|
|
519
|
+
end
|
|
520
|
+
lines.join("\n")
|
|
521
|
+
end
|
|
522
|
+
|
|
523
|
+
def answer_from_results(results)
|
|
524
|
+
results.filter_map do |result|
|
|
525
|
+
excerpt = result.excerpt.to_s.strip
|
|
526
|
+
next if excerpt.empty?
|
|
527
|
+
|
|
528
|
+
"#{excerpt}\nSource: #{result.title} (#{result.url})"
|
|
529
|
+
end.join("\n\n")
|
|
530
|
+
end
|
|
531
|
+
|
|
532
|
+
def successful_response?(response)
|
|
533
|
+
response && (!response.answer.to_s.strip.empty? || !Array(response.results).empty?)
|
|
534
|
+
end
|
|
535
|
+
|
|
536
|
+
def bounded_max_results(value)
|
|
537
|
+
max_results = value.to_i
|
|
538
|
+
max_results = DEFAULT_MAX_RESULTS if max_results <= 0
|
|
539
|
+
[max_results, MAX_MAX_RESULTS].min
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
def valid_queries?(queries)
|
|
543
|
+
queries.is_a?(Array) && queries.length.between?(1, MAX_QUERIES) && queries.all? { |query| query.is_a?(String) && !query.strip.empty? }
|
|
544
|
+
end
|
|
545
|
+
|
|
546
|
+
def args_value(args, key)
|
|
547
|
+
return nil unless args.is_a?(Hash)
|
|
548
|
+
|
|
549
|
+
args[key] || args[key.to_sym]
|
|
550
|
+
end
|
|
551
|
+
|
|
552
|
+
def success?(response)
|
|
553
|
+
response.code.to_i.between?(200, 299)
|
|
554
|
+
end
|
|
555
|
+
|
|
556
|
+
def clean_text(text)
|
|
557
|
+
text.to_s.gsub(/\s+/, " ").strip
|
|
558
|
+
end
|
|
559
|
+
|
|
560
|
+
def truncate_text(text, max_chars)
|
|
561
|
+
value = text.to_s.strip
|
|
562
|
+
return value if value.length <= max_chars
|
|
563
|
+
|
|
564
|
+
"#{value[0, max_chars].rstrip}\n... truncated to #{max_chars} characters"
|
|
565
|
+
end
|
|
566
|
+
|
|
567
|
+
def clean_result_url(url)
|
|
568
|
+
text = url.to_s.strip
|
|
569
|
+
uri = URI.parse(text)
|
|
570
|
+
if uri.host == "duckduckgo.com" && uri.path == "/l/"
|
|
571
|
+
params = URI.decode_www_form(uri.query.to_s).to_h
|
|
572
|
+
return params["uddg"].to_s unless params["uddg"].to_s.empty?
|
|
573
|
+
end
|
|
574
|
+
text
|
|
575
|
+
rescue URI::InvalidURIError
|
|
576
|
+
text
|
|
577
|
+
end
|
|
578
|
+
|
|
579
|
+
def browser_headers(accept)
|
|
580
|
+
{
|
|
581
|
+
"Accept" => accept,
|
|
582
|
+
"Accept-Language" => "en-US,en;q=0.9",
|
|
583
|
+
"User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
|
584
|
+
"Sec-Fetch-Dest" => "document",
|
|
585
|
+
"Sec-Fetch-Mode" => "navigate",
|
|
586
|
+
"Sec-Fetch-Site" => "none",
|
|
587
|
+
"Sec-Fetch-User" => "?1"
|
|
588
|
+
}
|
|
589
|
+
end
|
|
590
|
+
|
|
591
|
+
def truncate_output(output)
|
|
592
|
+
return output if output.bytesize <= @max_output_bytes
|
|
593
|
+
|
|
594
|
+
truncated = output.byteslice(0, @max_output_bytes).to_s.scrub
|
|
595
|
+
"#{truncated}\n... truncated to #{@max_output_bytes} bytes"
|
|
596
|
+
end
|
|
597
|
+
|
|
598
|
+
def config
|
|
599
|
+
return @config if @config
|
|
600
|
+
|
|
601
|
+
@config = ConfigFiles.read_config
|
|
602
|
+
rescue StandardError
|
|
603
|
+
@config = {}
|
|
604
|
+
end
|
|
605
|
+
|
|
606
|
+
def web_config
|
|
607
|
+
value = config["web_search"] || config["webSearch"] || config["web_research"] || config["webResearch"] || {}
|
|
608
|
+
value.is_a?(Hash) ? value : {}
|
|
609
|
+
end
|
|
610
|
+
|
|
611
|
+
def config_value(key)
|
|
612
|
+
snake = key.to_s
|
|
613
|
+
camel = snake.gsub(/_([a-z])/) { Regexp.last_match(1).upcase }
|
|
614
|
+
prefixed = "web_search_#{snake}"
|
|
615
|
+
legacy_prefixed = "web_research_#{snake}"
|
|
616
|
+
return web_config[snake] if web_config.key?(snake)
|
|
617
|
+
return web_config[camel] if web_config.key?(camel)
|
|
618
|
+
return config[prefixed] if config.key?(prefixed)
|
|
619
|
+
return config[legacy_prefixed] if config.key?(legacy_prefixed)
|
|
620
|
+
return config[snake] if config.key?(snake)
|
|
621
|
+
return config[camel] if config.key?(camel)
|
|
622
|
+
|
|
623
|
+
nil
|
|
624
|
+
end
|
|
625
|
+
|
|
626
|
+
def boolean_config_value(key)
|
|
627
|
+
value = config_value(key)
|
|
628
|
+
return value if value == true || value == false
|
|
629
|
+
|
|
630
|
+
normalized = value.to_s.strip.downcase
|
|
631
|
+
return true if %w[1 true yes on].include?(normalized)
|
|
632
|
+
return false if %w[0 false no off].include?(normalized)
|
|
633
|
+
|
|
634
|
+
nil
|
|
635
|
+
end
|
|
636
|
+
|
|
637
|
+
def allow_model_provider_fallback?
|
|
638
|
+
boolean_config_value("allow_model_providers") == true
|
|
639
|
+
end
|
|
640
|
+
|
|
641
|
+
def api_key(provider)
|
|
642
|
+
env_name = "#{provider.upcase}_API_KEY"
|
|
643
|
+
value = ENV[env_name].to_s.strip
|
|
644
|
+
return value unless value.empty?
|
|
645
|
+
|
|
646
|
+
configured = config_value("#{provider}_api_key").to_s.strip
|
|
647
|
+
configured.empty? ? nil : configured
|
|
648
|
+
end
|
|
649
|
+
|
|
650
|
+
def redact_secrets(message)
|
|
651
|
+
redacted = message.to_s.dup
|
|
652
|
+
%w[exa perplexity gemini].each do |provider|
|
|
653
|
+
key = api_key(provider)
|
|
654
|
+
redacted.gsub!(key, "[REDACTED]") if key && !key.empty?
|
|
655
|
+
end
|
|
656
|
+
redacted.gsub!(/key=([^\s&]+)/, "key=[REDACTED]")
|
|
657
|
+
redacted.gsub!(/Bearer\s+[^\s]+/, "Bearer [REDACTED]")
|
|
658
|
+
redacted
|
|
659
|
+
end
|
|
660
|
+
|
|
661
|
+
def normalize_provider(value)
|
|
662
|
+
normalized = value.to_s.strip.downcase
|
|
663
|
+
PROVIDERS.include?(normalized) ? normalized : nil
|
|
664
|
+
end
|
|
665
|
+
|
|
666
|
+
def normalize_recency(value)
|
|
667
|
+
normalized = value.to_s.strip.downcase
|
|
668
|
+
%w[day week month year].include?(normalized) ? normalized : nil
|
|
669
|
+
end
|
|
670
|
+
|
|
671
|
+
def normalize_domain_filter(value)
|
|
672
|
+
Array(value).filter_map do |domain|
|
|
673
|
+
text = domain.to_s.strip
|
|
674
|
+
text.empty? ? nil : text
|
|
675
|
+
end
|
|
676
|
+
end
|
|
677
|
+
|
|
678
|
+
def enriched_query(query, options)
|
|
679
|
+
parts = [query_with_domain_filter(query, options[:domain_filter])]
|
|
680
|
+
if options[:recency_filter]
|
|
681
|
+
labels = { "day" => "past 24 hours", "week" => "past week", "month" => "past month", "year" => "past year" }
|
|
682
|
+
parts << labels[options[:recency_filter]]
|
|
683
|
+
end
|
|
684
|
+
parts.join(" ")
|
|
685
|
+
end
|
|
686
|
+
|
|
687
|
+
def query_with_domain_filter(query, domain_filter)
|
|
688
|
+
return query if domain_filter.empty?
|
|
689
|
+
|
|
690
|
+
terms = domain_filter.map do |domain|
|
|
691
|
+
domain.start_with?("-") ? "-site:#{domain[1..]}" : "site:#{domain}"
|
|
692
|
+
end
|
|
693
|
+
([query] + terms).join(" ")
|
|
694
|
+
end
|
|
695
|
+
|
|
696
|
+
def exa_domain_filters(domain_filter)
|
|
697
|
+
includes = domain_filter.reject { |domain| domain.start_with?("-") }
|
|
698
|
+
excludes = domain_filter.select { |domain| domain.start_with?("-") }.map { |domain| domain[1..] }.reject(&:empty?)
|
|
699
|
+
result = {}
|
|
700
|
+
result["includeDomains"] = includes unless includes.empty?
|
|
701
|
+
result["excludeDomains"] = excludes unless excludes.empty?
|
|
702
|
+
result
|
|
703
|
+
end
|
|
704
|
+
|
|
705
|
+
def recency_start_date(filter)
|
|
706
|
+
days = { "day" => 1, "week" => 7, "month" => 30, "year" => 365 }.fetch(filter, 0)
|
|
707
|
+
(Time.now.utc - (days * 86_400)).iso8601
|
|
708
|
+
end
|
|
709
|
+
|
|
710
|
+
def duckduckgo_recency(filter)
|
|
711
|
+
{ "day" => "d", "week" => "w", "month" => "m", "year" => "y" }[filter]
|
|
712
|
+
end
|
|
713
|
+
|
|
714
|
+
class NetHttpClient
|
|
715
|
+
Response = Struct.new(:code, :body, keyword_init: true)
|
|
716
|
+
|
|
717
|
+
def get(url, headers: {})
|
|
718
|
+
request(url, Net::HTTP::Get, headers: headers)
|
|
719
|
+
end
|
|
720
|
+
|
|
721
|
+
def post(url, form:, headers: {})
|
|
722
|
+
request(url, Net::HTTP::Post, headers: headers) do |http_request|
|
|
723
|
+
http_request.set_form_data(form)
|
|
724
|
+
end
|
|
725
|
+
end
|
|
726
|
+
|
|
727
|
+
def post_json(url, body:, headers: {})
|
|
728
|
+
request(url, Net::HTTP::Post, headers: headers) do |http_request|
|
|
729
|
+
http_request.body = JSON.generate(body)
|
|
730
|
+
end
|
|
731
|
+
end
|
|
732
|
+
|
|
733
|
+
private
|
|
734
|
+
|
|
735
|
+
def request(url, request_class, headers: {})
|
|
736
|
+
uri = URI.parse(url)
|
|
737
|
+
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https", open_timeout: HTTP_TIMEOUT_SECONDS, read_timeout: HTTP_TIMEOUT_SECONDS) do |http|
|
|
738
|
+
http_request = request_class.new(uri)
|
|
739
|
+
headers.each { |key, value| http_request[key] = value }
|
|
740
|
+
yield http_request if block_given?
|
|
741
|
+
response = http.request(http_request)
|
|
742
|
+
Response.new(code: response.code, body: response.body)
|
|
743
|
+
end
|
|
744
|
+
end
|
|
745
|
+
end
|
|
746
|
+
end
|
|
747
|
+
end
|