kward 0.66.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +9 -0
  3. data/CHANGELOG.md +12 -0
  4. data/Gemfile +8 -0
  5. data/Gemfile.lock +90 -0
  6. data/LICENSE +21 -0
  7. data/README.md +101 -0
  8. data/Rakefile +20 -0
  9. data/doc/authentication.md +105 -0
  10. data/doc/code-search.md +56 -0
  11. data/doc/configuration.md +310 -0
  12. data/doc/extensibility.md +186 -0
  13. data/doc/getting-started.md +127 -0
  14. data/doc/memory.md +192 -0
  15. data/doc/plugins.md +223 -0
  16. data/doc/releasing.md +36 -0
  17. data/doc/rpc.md +635 -0
  18. data/doc/usage.md +179 -0
  19. data/doc/web-search.md +28 -0
  20. data/exe/kward +5 -0
  21. data/kward.gemspec +33 -0
  22. data/lib/kward/agent.rb +234 -0
  23. data/lib/kward/ansi.rb +276 -0
  24. data/lib/kward/auth/file.rb +11 -0
  25. data/lib/kward/auth/github_oauth.rb +222 -0
  26. data/lib/kward/auth/openai_oauth.rb +323 -0
  27. data/lib/kward/auth/openrouter_api_key.rb +40 -0
  28. data/lib/kward/cancellation.rb +54 -0
  29. data/lib/kward/cli.rb +2122 -0
  30. data/lib/kward/clipboard.rb +84 -0
  31. data/lib/kward/compactor.rb +998 -0
  32. data/lib/kward/config_files.rb +564 -0
  33. data/lib/kward/conversation.rb +148 -0
  34. data/lib/kward/events.rb +13 -0
  35. data/lib/kward/export_path.rb +28 -0
  36. data/lib/kward/image_attachments.rb +331 -0
  37. data/lib/kward/markdown_transcript.rb +72 -0
  38. data/lib/kward/memory/manager.rb +652 -0
  39. data/lib/kward/message_access.rb +42 -0
  40. data/lib/kward/model/chat_invocation.rb +23 -0
  41. data/lib/kward/model/client.rb +875 -0
  42. data/lib/kward/model/context_overflow.rb +55 -0
  43. data/lib/kward/model/context_usage.rb +104 -0
  44. data/lib/kward/model/model_info.rb +188 -0
  45. data/lib/kward/model/retry_message.rb +11 -0
  46. data/lib/kward/model/stream_parser.rb +205 -0
  47. data/lib/kward/pan/index.html.erb +143 -0
  48. data/lib/kward/pan/server.rb +397 -0
  49. data/lib/kward/plugin_registry.rb +327 -0
  50. data/lib/kward/private_file.rb +18 -0
  51. data/lib/kward/prompt_interface.rb +2437 -0
  52. data/lib/kward/prompts/commands.rb +50 -0
  53. data/lib/kward/prompts/templates.rb +60 -0
  54. data/lib/kward/prompts.rb +58 -0
  55. data/lib/kward/resources/avatar_kward_logo.rb +48 -0
  56. data/lib/kward/resources/pixel_logo.rb +230 -0
  57. data/lib/kward/rpc/auth_manager.rb +265 -0
  58. data/lib/kward/rpc/config_manager.rb +58 -0
  59. data/lib/kward/rpc/prompt_bridge.rb +104 -0
  60. data/lib/kward/rpc/redactor.rb +47 -0
  61. data/lib/kward/rpc/server.rb +639 -0
  62. data/lib/kward/rpc/session_manager.rb +1122 -0
  63. data/lib/kward/rpc/tool_event_normalizer.rb +68 -0
  64. data/lib/kward/rpc/tool_metadata.rb +80 -0
  65. data/lib/kward/rpc/transcript_normalizer.rb +307 -0
  66. data/lib/kward/rpc/transport.rb +58 -0
  67. data/lib/kward/session_diff.rb +125 -0
  68. data/lib/kward/session_store.rb +493 -0
  69. data/lib/kward/skills/registry.rb +76 -0
  70. data/lib/kward/starter_pack_installer.rb +110 -0
  71. data/lib/kward/steering.rb +56 -0
  72. data/lib/kward/telemetry/logger.rb +195 -0
  73. data/lib/kward/telemetry/stats.rb +466 -0
  74. data/lib/kward/tools/ask_user_question.rb +107 -0
  75. data/lib/kward/tools/base.rb +45 -0
  76. data/lib/kward/tools/code_search.rb +65 -0
  77. data/lib/kward/tools/edit_file.rb +41 -0
  78. data/lib/kward/tools/list_directory.rb +21 -0
  79. data/lib/kward/tools/read_file.rb +30 -0
  80. data/lib/kward/tools/read_skill.rb +27 -0
  81. data/lib/kward/tools/registry.rb +117 -0
  82. data/lib/kward/tools/run_shell_command.rb +28 -0
  83. data/lib/kward/tools/search/code.rb +445 -0
  84. data/lib/kward/tools/search/web.rb +747 -0
  85. data/lib/kward/tools/tool_call.rb +87 -0
  86. data/lib/kward/tools/web_search.rb +48 -0
  87. data/lib/kward/tools/write_file.rb +29 -0
  88. data/lib/kward/transcript_export.rb +40 -0
  89. data/lib/kward/version.rb +4 -0
  90. data/lib/kward/workspace.rb +377 -0
  91. data/lib/kward.rb +6 -0
  92. data/lib/main.rb +3 -0
  93. metadata +232 -0
@@ -0,0 +1,747 @@
1
+ require "cgi"
2
+ require "json"
3
+ require "net/http"
4
+ require "nokogiri"
5
+ require "uri"
6
+ require_relative "../../config_files"
7
+
8
+ module Kward
9
+ class WebSearch
10
+ DEFAULT_MAX_RESULTS = 5
11
+ MAX_MAX_RESULTS = 20
12
+ MAX_QUERIES = 4
13
+ MAX_OUTPUT_BYTES = 8 * 1024
14
+ MODEL_PROVIDER_MAX_TOKENS = 512
15
+ MAX_ANSWER_CHARS = 2_000
16
+ MAX_EXCERPT_CHARS = 300
17
+ HTTP_TIMEOUT_SECONDS = 10
18
+ DUCKDUCKGO_URL = "https://html.duckduckgo.com/html/"
19
+ EXA_MCP_URL = "https://mcp.exa.ai/mcp"
20
+ EXA_ANSWER_URL = "https://api.exa.ai/answer"
21
+ EXA_SEARCH_URL = "https://api.exa.ai/search"
22
+ PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
23
+ GEMINI_API_BASE = "https://generativelanguage.googleapis.com/v1beta"
24
+ DEFAULT_GEMINI_MODEL = "gemini-2.5-flash"
25
+ PUBLIC_SEARXNG_INSTANCES = [
26
+ "https://searx.be",
27
+ "https://search.inetol.net",
28
+ "https://searx.tiekoetter.com"
29
+ ].freeze
30
+ PROVIDERS = %w[auto exa perplexity gemini legacy duckduckgo].freeze
31
+
32
+ Result = Struct.new(:title, :url, :excerpt, :provider, keyword_init: true)
33
+ SearchResponse = Struct.new(:answer, :results, :provider, :note, keyword_init: true)
34
+
35
+ def initialize(http_client: NetHttpClient.new, searxng_instances: PUBLIC_SEARXNG_INSTANCES, max_output_bytes: MAX_OUTPUT_BYTES, config: nil)
36
+ @http_client = http_client
37
+ @searxng_instances = searxng_instances
38
+ @max_output_bytes = max_output_bytes
39
+ @config = config
40
+ end
41
+
42
+ def available?
43
+ enabled = boolean_config_value("enabled")
44
+ return enabled unless enabled.nil?
45
+
46
+ true
47
+ end
48
+
49
+ def search(args)
50
+ queries = args_value(args, "queries")
51
+ return "Error: queries must be an array with 1-#{MAX_QUERIES} strings" unless valid_queries?(queries)
52
+
53
+ max_results = bounded_max_results(args_value(args, "max_results") || args_value(args, "num_results"))
54
+ provider = normalize_provider(args_value(args, "provider") || config_value("provider") || "auto")
55
+ return "Error: provider must be one of: #{PROVIDERS.join(", ")}" unless provider
56
+
57
+ options = {
58
+ max_results: max_results,
59
+ recency_filter: normalize_recency(args_value(args, "recency_filter") || args_value(args, "recencyFilter")),
60
+ domain_filter: normalize_domain_filter(args_value(args, "domain_filter") || args_value(args, "domainFilter")),
61
+ provider: provider
62
+ }
63
+
64
+ sections = ["# Web search"]
65
+ failures = []
66
+ any_results = false
67
+
68
+ queries.each do |query|
69
+ response, error = search_query(query, options)
70
+ any_results = true if successful_response?(response)
71
+ failures << "#{query}: #{error}" if error && !successful_response?(response)
72
+ sections << format_query_results(query, response, error)
73
+ end
74
+
75
+ unless any_results
76
+ return "Error: web_search found no results\n#{failures.map { |failure| "- #{failure}" }.join("\n")}".strip
77
+ end
78
+
79
+ truncate_output(sections.join("\n\n"))
80
+ end
81
+
82
+ private
83
+
84
+ def search_query(query, options)
85
+ errors = []
86
+ provider_order(options[:provider]).each do |provider|
87
+ begin
88
+ response = case provider
89
+ when "exa"
90
+ exa_search(query, options)
91
+ when "perplexity"
92
+ perplexity_search(query, options)
93
+ when "gemini"
94
+ gemini_search(query, options)
95
+ when "legacy"
96
+ legacy_search(query, options)
97
+ end
98
+ return [response, errors.empty? ? nil : errors.join("; ")] if successful_response?(response)
99
+ errors << "#{provider}: no results"
100
+ rescue StandardError => e
101
+ errors << "#{provider}: #{redact_secrets(e.message)}"
102
+ end
103
+ end
104
+
105
+ [nil, errors.join("; ")]
106
+ end
107
+
108
+ def provider_order(provider)
109
+ case provider
110
+ when "auto"
111
+ order = ["exa"]
112
+ if allow_model_provider_fallback?
113
+ order << "perplexity" if api_key("perplexity")
114
+ order << "gemini" if api_key("gemini")
115
+ end
116
+ order << "legacy"
117
+ order
118
+ when "duckduckgo"
119
+ ["legacy"]
120
+ else
121
+ [provider]
122
+ end
123
+ end
124
+
125
+ def exa_search(query, options)
126
+ key = api_key("exa")
127
+ return exa_api_search(query, options, key) if key
128
+
129
+ exa_mcp_search(query, options)
130
+ rescue StandardError
131
+ raise if key.nil?
132
+
133
+ # A configured key should not make the no-key path worse; fall back to Exa MCP.
134
+ exa_mcp_search(query, options)
135
+ end
136
+
137
+ def exa_mcp_search(query, options)
138
+ text = call_exa_mcp(
139
+ "web_search_exa",
140
+ {
141
+ "query" => enriched_query(query, options),
142
+ "numResults" => options[:max_results],
143
+ "livecrawl" => "fallback",
144
+ "type" => "auto",
145
+ "contextMaxCharacters" => 3000
146
+ }
147
+ )
148
+ results = parse_exa_mcp_results(text, options[:max_results])
149
+ SearchResponse.new(answer: answer_from_results(results), results: results, provider: "exa")
150
+ end
151
+
152
+ def call_exa_mcp(tool_name, arguments)
153
+ response = @http_client.post_json(
154
+ EXA_MCP_URL,
155
+ body: {
156
+ "jsonrpc" => "2.0",
157
+ "id" => 1,
158
+ "method" => "tools/call",
159
+ "params" => { "name" => tool_name, "arguments" => arguments }
160
+ },
161
+ headers: {
162
+ "Accept" => "application/json, text/event-stream",
163
+ "Content-Type" => "application/json"
164
+ }
165
+ )
166
+ raise "Exa MCP failed with HTTP #{response.code}" unless success?(response)
167
+
168
+ parsed = parse_mcp_rpc_response(response.body.to_s)
169
+ raise "Exa MCP returned an empty response" unless parsed
170
+ if parsed["error"].is_a?(Hash)
171
+ raise "Exa MCP error: #{parsed["error"]["message"] || "unknown error"}"
172
+ end
173
+
174
+ result = parsed["result"]
175
+ if result.is_a?(Hash) && result["isError"]
176
+ message = Array(result["content"]).find { |item| item.is_a?(Hash) && item["type"] == "text" }.to_h["text"]
177
+ raise(message.to_s.empty? ? "Exa MCP returned an error" : message.to_s)
178
+ end
179
+
180
+ text = Array(result.to_h["content"]).find { |item| item.is_a?(Hash) && item["type"] == "text" }.to_h["text"].to_s
181
+ raise "Exa MCP returned empty content" if text.strip.empty?
182
+
183
+ text
184
+ end
185
+
186
+ def parse_mcp_rpc_response(body)
187
+ body.each_line do |line|
188
+ stripped_line = line.strip
189
+ next unless stripped_line.start_with?("data:")
190
+
191
+ payload = stripped_line.delete_prefix("data:").strip
192
+ next if payload.empty? || payload == "[DONE]"
193
+
194
+ parsed = JSON.parse(payload)
195
+ return parsed if parsed.is_a?(Hash) && (parsed.key?("result") || parsed.key?("error"))
196
+ rescue JSON::ParserError
197
+ next
198
+ end
199
+
200
+ parsed = JSON.parse(body)
201
+ parsed if parsed.is_a?(Hash) && (parsed.key?("result") || parsed.key?("error"))
202
+ rescue JSON::ParserError
203
+ nil
204
+ end
205
+
206
+ def parse_exa_mcp_results(text, max_results)
207
+ blocks = text.split(/(?=^Title: )/).map(&:strip).reject(&:empty?)
208
+ parsed = blocks.filter_map do |block|
209
+ title = block[/^Title: (.+)/, 1].to_s.strip
210
+ url = block[/^URL: (.+)/, 1].to_s.strip
211
+ next if url.empty?
212
+
213
+ content = ""
214
+ if (index = block.index("\nText: "))
215
+ content = block[(index + 7)..].to_s.strip
216
+ elsif (match = block.match(/\nHighlights:\s*\n/))
217
+ content = block[(match.end(0))..].to_s.strip
218
+ end
219
+ content = content.sub(/\n---\s*\z/, "").strip
220
+ Result.new(title: title.empty? ? url : title, url: url, excerpt: truncate_text(content, MAX_EXCERPT_CHARS), provider: "exa")
221
+ end
222
+
223
+ parsed.first(max_results)
224
+ end
225
+
226
+ def exa_api_search(query, options, key)
227
+ if options[:recency_filter] || options[:domain_filter].any? || options[:max_results] != DEFAULT_MAX_RESULTS
228
+ exa_api_structured_search(query, options, key)
229
+ else
230
+ exa_api_answer_search(query, key)
231
+ end
232
+ end
233
+
234
+ def exa_api_answer_search(query, key)
235
+ response = @http_client.post_json(
236
+ EXA_ANSWER_URL,
237
+ body: { "query" => query, "text" => true },
238
+ headers: { "x-api-key" => key, "Content-Type" => "application/json" }
239
+ )
240
+ raise "Exa API failed with HTTP #{response.code}: #{response.body.to_s[0, 300]}" unless success?(response)
241
+
242
+ data = JSON.parse(response.body.to_s)
243
+ results = results_from_exa_records(Array(data["citations"]), DEFAULT_MAX_RESULTS)
244
+ SearchResponse.new(answer: truncate_text(data["answer"], MAX_ANSWER_CHARS), results: results, provider: "exa")
245
+ end
246
+
247
+ def exa_api_structured_search(query, options, key)
248
+ body = {
249
+ "query" => query,
250
+ "type" => "auto",
251
+ "numResults" => options[:max_results],
252
+ "contents" => { "text" => { "maxCharacters" => 3000 }, "highlights" => true }
253
+ }.merge(exa_domain_filters(options[:domain_filter]))
254
+ body["startPublishedDate"] = recency_start_date(options[:recency_filter]) if options[:recency_filter]
255
+
256
+ response = @http_client.post_json(
257
+ EXA_SEARCH_URL,
258
+ body: body,
259
+ headers: { "x-api-key" => key, "Content-Type" => "application/json" }
260
+ )
261
+ raise "Exa API failed with HTTP #{response.code}: #{response.body.to_s[0, 300]}" unless success?(response)
262
+
263
+ data = JSON.parse(response.body.to_s)
264
+ records = Array(data["results"])
265
+ results = results_from_exa_records(records, options[:max_results])
266
+ SearchResponse.new(answer: answer_from_results(results), results: results, provider: "exa")
267
+ end
268
+
269
+ def results_from_exa_records(records, max_results)
270
+ records.first(max_results).filter_map do |record|
271
+ next unless record.is_a?(Hash)
272
+
273
+ url = record["url"].to_s
274
+ next if url.empty?
275
+
276
+ text = if record["text"].is_a?(String)
277
+ record["text"]
278
+ elsif record["highlights"].is_a?(Array)
279
+ record["highlights"].join(" ")
280
+ else
281
+ record["snippet"].to_s
282
+ end
283
+ Result.new(
284
+ title: record["title"].to_s.empty? ? url : clean_text(record["title"].to_s),
285
+ url: url,
286
+ excerpt: truncate_text(clean_text(text), MAX_EXCERPT_CHARS),
287
+ provider: "exa"
288
+ )
289
+ end
290
+ end
291
+
292
+ def perplexity_search(query, options)
293
+ key = api_key("perplexity")
294
+ raise "Perplexity API key not configured" unless key
295
+
296
+ body = {
297
+ "model" => config_value("perplexity_model") || "sonar",
298
+ "messages" => [{ "role" => "user", "content" => query }],
299
+ "max_tokens" => MODEL_PROVIDER_MAX_TOKENS,
300
+ "return_related_questions" => false
301
+ }
302
+ body["search_recency_filter"] = options[:recency_filter] if options[:recency_filter]
303
+ body["search_domain_filter"] = options[:domain_filter].first(20) unless options[:domain_filter].empty?
304
+
305
+ response = @http_client.post_json(
306
+ PERPLEXITY_API_URL,
307
+ body: body,
308
+ headers: { "Authorization" => "Bearer #{key}", "Content-Type" => "application/json" }
309
+ )
310
+ raise "Perplexity API failed with HTTP #{response.code}: #{response.body.to_s[0, 300]}" unless success?(response)
311
+
312
+ data = JSON.parse(response.body.to_s)
313
+ answer = truncate_text(Array(data["choices"]).first.to_h.dig("message", "content"), MAX_ANSWER_CHARS)
314
+ citations = Array(data["citations"])
315
+ results = citations.first(options[:max_results]).each_with_index.filter_map do |citation, index|
316
+ if citation.is_a?(String)
317
+ Result.new(title: "Source #{index + 1}", url: citation, excerpt: "", provider: "perplexity")
318
+ elsif citation.is_a?(Hash) && citation["url"].to_s != ""
319
+ Result.new(title: citation["title"].to_s.empty? ? "Source #{index + 1}" : citation["title"].to_s, url: citation["url"].to_s, excerpt: truncate_text(citation["snippet"], MAX_EXCERPT_CHARS), provider: "perplexity")
320
+ end
321
+ end
322
+ SearchResponse.new(answer: answer, results: results, provider: "perplexity")
323
+ end
324
+
325
+ def gemini_search(query, options)
326
+ key = api_key("gemini")
327
+ raise "Gemini API key not configured" unless key
328
+
329
+ prompt = enriched_query(query, options)
330
+ model = config_value("gemini_model") || DEFAULT_GEMINI_MODEL
331
+ response = @http_client.post_json(
332
+ "#{GEMINI_API_BASE}/models/#{CGI.escape(model)}:generateContent?key=#{CGI.escape(key)}",
333
+ body: {
334
+ "contents" => [{ "parts" => [{ "text" => prompt }] }],
335
+ "tools" => [{ "google_search" => {} }]
336
+ },
337
+ headers: { "Content-Type" => "application/json" }
338
+ )
339
+ raise "Gemini API failed with HTTP #{response.code}: #{response.body.to_s[0, 300]}" unless success?(response)
340
+
341
+ data = JSON.parse(response.body.to_s)
342
+ candidate = Array(data["candidates"]).first.to_h
343
+ answer = truncate_text(Array(candidate.dig("content", "parts")).map { |part| part.to_h["text"] }.compact.join("\n"), MAX_ANSWER_CHARS)
344
+ chunks = Array(candidate.dig("groundingMetadata", "groundingChunks"))
345
+ results = chunks.first(options[:max_results]).filter_map do |chunk|
346
+ web = chunk.to_h["web"].to_h
347
+ url = web["uri"].to_s
348
+ next if url.empty?
349
+
350
+ Result.new(title: web["title"].to_s.empty? ? url : web["title"].to_s, url: url, excerpt: "", provider: "gemini")
351
+ end
352
+ SearchResponse.new(answer: answer, results: results, provider: "gemini")
353
+ end
354
+
355
+ def legacy_search(query, options)
356
+ legacy_query = query_with_domain_filter(query, options[:domain_filter])
357
+ results, error = legacy_search_query(legacy_query, options[:max_results], options[:recency_filter])
358
+ raise error if results.empty? && error
359
+
360
+ SearchResponse.new(answer: "", results: results, provider: results.first&.provider || "legacy", note: error)
361
+ end
362
+
363
+ def legacy_search_query(query, max_results, recency_filter)
364
+ begin
365
+ duckduckgo_results = duckduckgo_search(query, max_results, recency_filter)
366
+ return [duckduckgo_results, nil] unless duckduckgo_results.empty?
367
+
368
+ duckduckgo_error = "DuckDuckGo returned no results"
369
+ rescue StandardError => e
370
+ duckduckgo_error = e.message
371
+ end
372
+
373
+ searxng_results, searxng_error = searxng_search(query, max_results, recency_filter)
374
+ error = [duckduckgo_error, searxng_error].compact.join("; ")
375
+ [searxng_results, error.empty? ? nil : error]
376
+ end
377
+
378
+ def duckduckgo_search(query, max_results, recency_filter)
379
+ form = { "q" => query, "kl" => "wt-wt" }
380
+ form["df"] = duckduckgo_recency(recency_filter) if recency_filter
381
+ response = @http_client.post(
382
+ DUCKDUCKGO_URL,
383
+ form: form,
384
+ headers: browser_headers("text/html")
385
+ )
386
+ raise "DuckDuckGo search failed with HTTP #{response.code}" unless success?(response)
387
+
388
+ document = Nokogiri::HTML(response.body.to_s)
389
+ document.css("div.result").first(max_results).filter_map do |node|
390
+ link = node.at_css("a.result__a") || node.at_css("h2 a") || node.at_css("a[href]")
391
+ next unless link
392
+
393
+ Result.new(
394
+ title: clean_text(link.text),
395
+ url: clean_result_url(link["href"].to_s),
396
+ excerpt: clean_text((node.at_css("a.result__snippet") || node.at_css(".result__snippet"))&.text),
397
+ provider: "duckduckgo"
398
+ )
399
+ end.reject { |result| result.title.empty? || result.url.empty? }
400
+ end
401
+
402
+ def searxng_search(query, max_results, recency_filter)
403
+ errors = []
404
+
405
+ @searxng_instances.each do |instance|
406
+ begin
407
+ results = searxng_instance_search(instance, query, max_results, recency_filter)
408
+ return [results, nil] unless results.empty?
409
+
410
+ errors << "#{instance} returned no results"
411
+ rescue StandardError => e
412
+ errors << "#{instance}: #{e.message}"
413
+ end
414
+ end
415
+
416
+ [[], errors.join("; ")]
417
+ end
418
+
419
+ def searxng_instance_search(instance, query, max_results, recency_filter)
420
+ begin
421
+ results = searxng_json_search(instance, query, max_results, recency_filter)
422
+ return results unless results.empty?
423
+
424
+ json_error = "SearXNG JSON search returned no results"
425
+ rescue StandardError => e
426
+ json_error = e.message
427
+ end
428
+
429
+ begin
430
+ results = searxng_html_search(instance, query, max_results, recency_filter)
431
+ return results unless results.empty?
432
+
433
+ raise "SearXNG HTML search returned no results"
434
+ rescue StandardError => e
435
+ raise "#{json_error}; #{e.message}"
436
+ end
437
+ end
438
+
439
+ def searxng_json_search(instance, query, max_results, recency_filter)
440
+ params = { q: query, format: "json" }
441
+ params[:time_range] = recency_filter if recency_filter
442
+ uri = searxng_search_uri(instance, params)
443
+ response = @http_client.get(uri.to_s, headers: { "Accept" => "application/json" })
444
+ raise "SearXNG search failed with HTTP #{response.code}" unless success?(response)
445
+
446
+ data = JSON.parse(response.body.to_s)
447
+ results_from_records(Array(data["results"]), max_results)
448
+ end
449
+
450
+ def searxng_html_search(instance, query, max_results, recency_filter)
451
+ params = { q: query }
452
+ params[:time_range] = recency_filter if recency_filter
453
+ uri = searxng_search_uri(instance, params)
454
+ response = @http_client.get(uri.to_s, headers: browser_headers("text/html"))
455
+ raise "SearXNG HTML search failed with HTTP #{response.code}" unless success?(response)
456
+
457
+ document = Nokogiri::HTML(response.body.to_s)
458
+ records = document.css("article.result, div.result").map do |node|
459
+ link = node.at_css("h3 a, a[href]")
460
+ next unless link
461
+
462
+ {
463
+ "title" => link.text,
464
+ "url" => link["href"],
465
+ "content" => node.at_css(".content, p")&.text
466
+ }
467
+ end.compact
468
+ results_from_records(records, max_results)
469
+ end
470
+
471
+ def searxng_search_uri(instance, params)
472
+ uri = URI.join(instance.end_with?("/") ? instance : "#{instance}/", "search")
473
+ uri.query = URI.encode_www_form(params)
474
+ uri
475
+ end
476
+
477
+ def results_from_records(records, max_results)
478
+ records.first(max_results).filter_map do |record|
479
+ result_from_hash(record, "searxng")
480
+ end
481
+ end
482
+
483
+ def result_from_hash(record, provider)
484
+ return nil unless record.is_a?(Hash)
485
+
486
+ title = clean_text(record["title"].to_s)
487
+ url = clean_result_url(record["url"].to_s)
488
+ excerpt = truncate_text(clean_text((record["content"] || record["snippet"] || record["description"]).to_s), MAX_EXCERPT_CHARS)
489
+ return nil if title.empty? || url.empty?
490
+
491
+ Result.new(title: title, url: url, excerpt: excerpt, provider: provider)
492
+ end
493
+
494
+ def format_query_results(query, response, error)
495
+ lines = ["## Query: #{query}"]
496
+ fallback_note = [error, response&.note].compact.reject(&:empty?).join("; ")
497
+ lines << "Provider fallback note: #{fallback_note}" if !fallback_note.empty? && successful_response?(response)
498
+ unless successful_response?(response)
499
+ lines << "No results. #{error}"
500
+ return lines.join("\n")
501
+ end
502
+
503
+ answer = response.answer.to_s.strip
504
+ unless answer.empty?
505
+ lines << "Provider: #{response.provider}"
506
+ lines << "Answer:"
507
+ lines << answer
508
+ end
509
+
510
+ results = response.results || []
511
+ unless results.empty?
512
+ lines << "Sources:" unless answer.empty?
513
+ results.each_with_index do |result, index|
514
+ lines << "#{index + 1}. #{result.title}"
515
+ lines << " URL: #{result.url}"
516
+ lines << " Provider: #{result.provider}"
517
+ lines << " Excerpt: #{result.excerpt}" unless result.excerpt.to_s.empty?
518
+ end
519
+ end
520
+ lines.join("\n")
521
+ end
522
+
523
+ def answer_from_results(results)
524
+ results.filter_map do |result|
525
+ excerpt = result.excerpt.to_s.strip
526
+ next if excerpt.empty?
527
+
528
+ "#{excerpt}\nSource: #{result.title} (#{result.url})"
529
+ end.join("\n\n")
530
+ end
531
+
532
+ def successful_response?(response)
533
+ response && (!response.answer.to_s.strip.empty? || !Array(response.results).empty?)
534
+ end
535
+
536
+ def bounded_max_results(value)
537
+ max_results = value.to_i
538
+ max_results = DEFAULT_MAX_RESULTS if max_results <= 0
539
+ [max_results, MAX_MAX_RESULTS].min
540
+ end
541
+
542
+ def valid_queries?(queries)
543
+ queries.is_a?(Array) && queries.length.between?(1, MAX_QUERIES) && queries.all? { |query| query.is_a?(String) && !query.strip.empty? }
544
+ end
545
+
546
+ def args_value(args, key)
547
+ return nil unless args.is_a?(Hash)
548
+
549
+ args[key] || args[key.to_sym]
550
+ end
551
+
552
+ def success?(response)
553
+ response.code.to_i.between?(200, 299)
554
+ end
555
+
556
+ def clean_text(text)
557
+ text.to_s.gsub(/\s+/, " ").strip
558
+ end
559
+
560
+ def truncate_text(text, max_chars)
561
+ value = text.to_s.strip
562
+ return value if value.length <= max_chars
563
+
564
+ "#{value[0, max_chars].rstrip}\n... truncated to #{max_chars} characters"
565
+ end
566
+
567
+ def clean_result_url(url)
568
+ text = url.to_s.strip
569
+ uri = URI.parse(text)
570
+ if uri.host == "duckduckgo.com" && uri.path == "/l/"
571
+ params = URI.decode_www_form(uri.query.to_s).to_h
572
+ return params["uddg"].to_s unless params["uddg"].to_s.empty?
573
+ end
574
+ text
575
+ rescue URI::InvalidURIError
576
+ text
577
+ end
578
+
579
+ def browser_headers(accept)
580
+ {
581
+ "Accept" => accept,
582
+ "Accept-Language" => "en-US,en;q=0.9",
583
+ "User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
584
+ "Sec-Fetch-Dest" => "document",
585
+ "Sec-Fetch-Mode" => "navigate",
586
+ "Sec-Fetch-Site" => "none",
587
+ "Sec-Fetch-User" => "?1"
588
+ }
589
+ end
590
+
591
+ def truncate_output(output)
592
+ return output if output.bytesize <= @max_output_bytes
593
+
594
+ truncated = output.byteslice(0, @max_output_bytes).to_s.scrub
595
+ "#{truncated}\n... truncated to #{@max_output_bytes} bytes"
596
+ end
597
+
598
+ def config
599
+ return @config if @config
600
+
601
+ @config = ConfigFiles.read_config
602
+ rescue StandardError
603
+ @config = {}
604
+ end
605
+
606
+ def web_config
607
+ value = config["web_search"] || config["webSearch"] || config["web_research"] || config["webResearch"] || {}
608
+ value.is_a?(Hash) ? value : {}
609
+ end
610
+
611
+ def config_value(key)
612
+ snake = key.to_s
613
+ camel = snake.gsub(/_([a-z])/) { Regexp.last_match(1).upcase }
614
+ prefixed = "web_search_#{snake}"
615
+ legacy_prefixed = "web_research_#{snake}"
616
+ return web_config[snake] if web_config.key?(snake)
617
+ return web_config[camel] if web_config.key?(camel)
618
+ return config[prefixed] if config.key?(prefixed)
619
+ return config[legacy_prefixed] if config.key?(legacy_prefixed)
620
+ return config[snake] if config.key?(snake)
621
+ return config[camel] if config.key?(camel)
622
+
623
+ nil
624
+ end
625
+
626
+ def boolean_config_value(key)
627
+ value = config_value(key)
628
+ return value if value == true || value == false
629
+
630
+ normalized = value.to_s.strip.downcase
631
+ return true if %w[1 true yes on].include?(normalized)
632
+ return false if %w[0 false no off].include?(normalized)
633
+
634
+ nil
635
+ end
636
+
637
+ def allow_model_provider_fallback?
638
+ boolean_config_value("allow_model_providers") == true
639
+ end
640
+
641
+ def api_key(provider)
642
+ env_name = "#{provider.upcase}_API_KEY"
643
+ value = ENV[env_name].to_s.strip
644
+ return value unless value.empty?
645
+
646
+ configured = config_value("#{provider}_api_key").to_s.strip
647
+ configured.empty? ? nil : configured
648
+ end
649
+
650
+ def redact_secrets(message)
651
+ redacted = message.to_s.dup
652
+ %w[exa perplexity gemini].each do |provider|
653
+ key = api_key(provider)
654
+ redacted.gsub!(key, "[REDACTED]") if key && !key.empty?
655
+ end
656
+ redacted.gsub!(/key=([^\s&]+)/, "key=[REDACTED]")
657
+ redacted.gsub!(/Bearer\s+[^\s]+/, "Bearer [REDACTED]")
658
+ redacted
659
+ end
660
+
661
+ def normalize_provider(value)
662
+ normalized = value.to_s.strip.downcase
663
+ PROVIDERS.include?(normalized) ? normalized : nil
664
+ end
665
+
666
+ def normalize_recency(value)
667
+ normalized = value.to_s.strip.downcase
668
+ %w[day week month year].include?(normalized) ? normalized : nil
669
+ end
670
+
671
+ def normalize_domain_filter(value)
672
+ Array(value).filter_map do |domain|
673
+ text = domain.to_s.strip
674
+ text.empty? ? nil : text
675
+ end
676
+ end
677
+
678
+ def enriched_query(query, options)
679
+ parts = [query_with_domain_filter(query, options[:domain_filter])]
680
+ if options[:recency_filter]
681
+ labels = { "day" => "past 24 hours", "week" => "past week", "month" => "past month", "year" => "past year" }
682
+ parts << labels[options[:recency_filter]]
683
+ end
684
+ parts.join(" ")
685
+ end
686
+
687
+ def query_with_domain_filter(query, domain_filter)
688
+ return query if domain_filter.empty?
689
+
690
+ terms = domain_filter.map do |domain|
691
+ domain.start_with?("-") ? "-site:#{domain[1..]}" : "site:#{domain}"
692
+ end
693
+ ([query] + terms).join(" ")
694
+ end
695
+
696
+ def exa_domain_filters(domain_filter)
697
+ includes = domain_filter.reject { |domain| domain.start_with?("-") }
698
+ excludes = domain_filter.select { |domain| domain.start_with?("-") }.map { |domain| domain[1..] }.reject(&:empty?)
699
+ result = {}
700
+ result["includeDomains"] = includes unless includes.empty?
701
+ result["excludeDomains"] = excludes unless excludes.empty?
702
+ result
703
+ end
704
+
705
+ def recency_start_date(filter)
706
+ days = { "day" => 1, "week" => 7, "month" => 30, "year" => 365 }.fetch(filter, 0)
707
+ (Time.now.utc - (days * 86_400)).iso8601
708
+ end
709
+
710
+ def duckduckgo_recency(filter)
711
+ { "day" => "d", "week" => "w", "month" => "m", "year" => "y" }[filter]
712
+ end
713
+
714
+ class NetHttpClient
715
+ Response = Struct.new(:code, :body, keyword_init: true)
716
+
717
+ def get(url, headers: {})
718
+ request(url, Net::HTTP::Get, headers: headers)
719
+ end
720
+
721
+ def post(url, form:, headers: {})
722
+ request(url, Net::HTTP::Post, headers: headers) do |http_request|
723
+ http_request.set_form_data(form)
724
+ end
725
+ end
726
+
727
+ def post_json(url, body:, headers: {})
728
+ request(url, Net::HTTP::Post, headers: headers) do |http_request|
729
+ http_request.body = JSON.generate(body)
730
+ end
731
+ end
732
+
733
+ private
734
+
735
+ def request(url, request_class, headers: {})
736
+ uri = URI.parse(url)
737
+ Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https", open_timeout: HTTP_TIMEOUT_SECONDS, read_timeout: HTTP_TIMEOUT_SECONDS) do |http|
738
+ http_request = request_class.new(uri)
739
+ headers.each { |key, value| http_request[key] = value }
740
+ yield http_request if block_given?
741
+ response = http.request(http_request)
742
+ Response.new(code: response.code, body: response.body)
743
+ end
744
+ end
745
+ end
746
+ end
747
+ end