brave_search 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rubocop.yml +27 -0
- data/README.md +179 -0
- data/Rakefile +8 -0
- data/brave_search.gemspec +41 -0
- data/dummy_app/.dockerignore +45 -0
- data/dummy_app/.github/dependabot.yml +12 -0
- data/dummy_app/.github/workflows/ci.yml +66 -0
- data/dummy_app/.rubocop.yml +8 -0
- data/dummy_app/Dockerfile +69 -0
- data/dummy_app/Gemfile +48 -0
- data/dummy_app/README.md +24 -0
- data/dummy_app/Rakefile +6 -0
- data/dummy_app/app/controllers/application_controller.rb +2 -0
- data/dummy_app/app/controllers/concerns/.keep +0 -0
- data/dummy_app/app/controllers/searches_controller.rb +117 -0
- data/dummy_app/app/jobs/application_job.rb +7 -0
- data/dummy_app/app/mailers/application_mailer.rb +4 -0
- data/dummy_app/app/models/application_record.rb +3 -0
- data/dummy_app/app/models/concerns/.keep +0 -0
- data/dummy_app/app/views/layouts/mailer.html.erb +13 -0
- data/dummy_app/app/views/layouts/mailer.text.erb +1 -0
- data/dummy_app/bin/brakeman +7 -0
- data/dummy_app/bin/dev +2 -0
- data/dummy_app/bin/docker-entrypoint +14 -0
- data/dummy_app/bin/rails +4 -0
- data/dummy_app/bin/rake +4 -0
- data/dummy_app/bin/rubocop +8 -0
- data/dummy_app/bin/setup +34 -0
- data/dummy_app/bin/thrust +5 -0
- data/dummy_app/config/application.rb +32 -0
- data/dummy_app/config/boot.rb +4 -0
- data/dummy_app/config/cable.yml +10 -0
- data/dummy_app/config/database.yml +41 -0
- data/dummy_app/config/environment.rb +5 -0
- data/dummy_app/config/environments/development.rb +70 -0
- data/dummy_app/config/environments/production.rb +86 -0
- data/dummy_app/config/environments/test.rb +53 -0
- data/dummy_app/config/initializers/brave_search.rb +10 -0
- data/dummy_app/config/initializers/cors.rb +16 -0
- data/dummy_app/config/initializers/filter_parameter_logging.rb +8 -0
- data/dummy_app/config/initializers/inflections.rb +16 -0
- data/dummy_app/config/locales/en.yml +31 -0
- data/dummy_app/config/puma.rb +41 -0
- data/dummy_app/config/routes.rb +16 -0
- data/dummy_app/config/storage.yml +34 -0
- data/dummy_app/config.ru +6 -0
- data/dummy_app/db/seeds.rb +9 -0
- data/dummy_app/lib/tasks/.keep +0 -0
- data/dummy_app/public/robots.txt +1 -0
- data/dummy_app/script/.keep +0 -0
- data/dummy_app/test/controllers/.keep +0 -0
- data/dummy_app/test/fixtures/files/.keep +0 -0
- data/dummy_app/test/integration/.keep +0 -0
- data/dummy_app/test/mailers/.keep +0 -0
- data/dummy_app/test/models/.keep +0 -0
- data/dummy_app/test/test_helper.rb +15 -0
- data/dummy_app/vendor/.keep +0 -0
- data/example.rb +32 -0
- data/lib/brave_search/async_client.rb +52 -0
- data/lib/brave_search/client.rb +140 -0
- data/lib/brave_search/configuration.rb +21 -0
- data/lib/brave_search/exporter.rb +43 -0
- data/lib/brave_search/exporters/base.rb +23 -0
- data/lib/brave_search/exporters/csv.rb +32 -0
- data/lib/brave_search/exporters/json.rb +25 -0
- data/lib/brave_search/exporters/xlsx.rb +47 -0
- data/lib/brave_search/jobs/export_job.rb +40 -0
- data/lib/brave_search/jobs/pdf_download_job.rb +38 -0
- data/lib/brave_search/pdf_downloader.rb +46 -0
- data/lib/brave_search/railtie.rb +15 -0
- data/lib/brave_search/results.rb +93 -0
- data/lib/brave_search/storage/s3.rb +47 -0
- data/lib/brave_search/storage.rb +21 -0
- data/lib/brave_search/summarizer.rb +38 -0
- data/lib/brave_search/summary_result.rb +76 -0
- data/lib/brave_search/version.rb +5 -0
- data/lib/brave_search.rb +38 -0
- data/lib/generators/brave_search/install_generator.rb +44 -0
- data/test_with_real_api.rb +69 -0
- metadata +248 -0
File without changes
|
data/example.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require_relative "lib/brave_search"
|
5
|
+
|
6
|
+
# Basic usage
|
7
|
+
puts "BraveSearch Ruby Gem Example"
|
8
|
+
puts "=" * 40
|
9
|
+
|
10
|
+
# Configure
|
11
|
+
BraveSearch.configure do |config|
|
12
|
+
config.api_key = ENV["BRAVE_API_KEY"] || "your_api_key_here"
|
13
|
+
config.timeout = 30
|
14
|
+
end
|
15
|
+
|
16
|
+
begin
|
17
|
+
BraveSearch::Client.new
|
18
|
+
puts "Client initialized successfully!"
|
19
|
+
|
20
|
+
# NOTE: This will fail without a real API key
|
21
|
+
# results = client.search(q: "ruby programming", count: 5)
|
22
|
+
# puts "Found #{results[:web][:results].length} results"
|
23
|
+
rescue BraveSearch::AuthenticationError => e
|
24
|
+
puts "Error: #{e.message}"
|
25
|
+
puts "Set BRAVE_API_KEY environment variable with your API key from https://brave.com/search/api/"
|
26
|
+
rescue StandardError => e
|
27
|
+
puts "Error: #{e.message}"
|
28
|
+
end
|
29
|
+
|
30
|
+
puts "\nTo use with a real API key:"
|
31
|
+
puts "export BRAVE_API_KEY=your_key_here"
|
32
|
+
puts "ruby example.rb"
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "async"
|
4
|
+
|
5
|
+
module BraveSearch
|
6
|
+
class AsyncClient < Client
|
7
|
+
def search(q:, count: 10, **options)
|
8
|
+
Async do
|
9
|
+
super(q: q, count: count, **options)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def news_search(q:, count: 10, **options)
|
14
|
+
Async do
|
15
|
+
super(q: q, count: count, **options)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def video_search(q:, count: 10, **options)
|
20
|
+
Async do
|
21
|
+
super(q: q, count: count, **options)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def image_search(q:, count: 10, **options)
|
26
|
+
Async do
|
27
|
+
super(q: q, count: count, **options)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def concurrent_search(queries)
|
32
|
+
Async do
|
33
|
+
tasks = queries.map do |query_params|
|
34
|
+
Async do
|
35
|
+
if query_params.is_a?(String)
|
36
|
+
params = build_params(q: query_params)
|
37
|
+
response = make_request("/web/search", params)
|
38
|
+
data = handle_response(response)
|
39
|
+
BraveSearch::Results.new(data)
|
40
|
+
else
|
41
|
+
params = build_params(**query_params)
|
42
|
+
response = make_request("/web/search", params)
|
43
|
+
data = handle_response(response)
|
44
|
+
BraveSearch::Results.new(data)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
tasks.map(&:wait)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,140 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "httparty"
|
4
|
+
require "json"
|
5
|
+
|
6
|
+
module BraveSearch
|
7
|
+
class Client
|
8
|
+
include HTTParty
|
9
|
+
|
10
|
+
def initialize(api_key: nil)
|
11
|
+
@api_key = api_key || BraveSearch.config.api_key
|
12
|
+
raise AuthenticationError, "API key is required" unless @api_key
|
13
|
+
|
14
|
+
self.class.base_uri BraveSearch.config.base_url
|
15
|
+
self.class.default_timeout BraveSearch.config.timeout
|
16
|
+
end
|
17
|
+
|
18
|
+
def search(q:, count: 10, **options)
|
19
|
+
params = build_params(q: q, count: count, **options)
|
20
|
+
response = make_request("/web/search", params)
|
21
|
+
data = handle_response(response)
|
22
|
+
Results.new(data)
|
23
|
+
end
|
24
|
+
|
25
|
+
def search_and_download_pdfs(q:, count: 10, storage: nil, folder: "pdfs", &progress_callback)
|
26
|
+
results = search(q: q, count: count)
|
27
|
+
results.download_pdfs(storage: storage, folder: folder, &progress_callback)
|
28
|
+
end
|
29
|
+
|
30
|
+
def search_and_export(q:, format:, storage: nil, key: nil, count: 10, **options)
|
31
|
+
results = search(q: q, count: count, **options)
|
32
|
+
|
33
|
+
if storage && key
|
34
|
+
results.export_to_storage(format: format, storage: storage, key: key)
|
35
|
+
else
|
36
|
+
results.export(format: format)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def search_and_export_async(q:, format:, storage_config: nil, key: nil, **options)
|
41
|
+
return unless defined?(ActiveJob)
|
42
|
+
|
43
|
+
Jobs::ExportJob.perform_later(
|
44
|
+
query: q,
|
45
|
+
format: format,
|
46
|
+
storage_config: storage_config,
|
47
|
+
key: key,
|
48
|
+
**options
|
49
|
+
)
|
50
|
+
end
|
51
|
+
|
52
|
+
def search_and_download_pdfs_async(q:, storage_config: nil, folder: "pdfs", **options)
|
53
|
+
return unless defined?(ActiveJob)
|
54
|
+
|
55
|
+
Jobs::PdfDownloadJob.perform_later(
|
56
|
+
query: q,
|
57
|
+
storage_config: storage_config,
|
58
|
+
folder: folder,
|
59
|
+
**options
|
60
|
+
)
|
61
|
+
end
|
62
|
+
|
63
|
+
def news_search(q:, count: 10, **options)
|
64
|
+
params = build_params(q: q, count: count, **options)
|
65
|
+
response = make_request("/news/search", params)
|
66
|
+
data = handle_response(response)
|
67
|
+
Results.new(data)
|
68
|
+
end
|
69
|
+
|
70
|
+
def video_search(q:, count: 10, **options)
|
71
|
+
params = build_params(q: q, count: count, **options)
|
72
|
+
response = make_request("/videos/search", params)
|
73
|
+
data = handle_response(response)
|
74
|
+
Results.new(data)
|
75
|
+
end
|
76
|
+
|
77
|
+
def image_search(q:, count: 10, **options)
|
78
|
+
params = build_params(q: q, count: count, **options)
|
79
|
+
response = make_request("/images/search", params)
|
80
|
+
data = handle_response(response)
|
81
|
+
Results.new(data)
|
82
|
+
end
|
83
|
+
|
84
|
+
def suggest(q:, **options)
|
85
|
+
params = build_params(q: q, **options)
|
86
|
+
response = make_request("/suggest/search", params)
|
87
|
+
handle_response(response)
|
88
|
+
end
|
89
|
+
|
90
|
+
def spellcheck(q:, **options)
|
91
|
+
params = build_params(q: q, **options)
|
92
|
+
response = make_request("/spellcheck", params)
|
93
|
+
handle_response(response)
|
94
|
+
end
|
95
|
+
|
96
|
+
def summarizer
|
97
|
+
@summarizer ||= Summarizer.new(self)
|
98
|
+
end
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
def build_params(q:, count: nil, **options)
|
103
|
+
params = { q: q }
|
104
|
+
params[:count] = count if count
|
105
|
+
params.merge(options)
|
106
|
+
end
|
107
|
+
|
108
|
+
def make_request(endpoint, params, method: :get)
|
109
|
+
headers = {
|
110
|
+
"X-Subscription-Token" => @api_key,
|
111
|
+
"Accept" => "application/json"
|
112
|
+
}
|
113
|
+
|
114
|
+
case method
|
115
|
+
when :get
|
116
|
+
self.class.get(endpoint, { query: params, headers: headers })
|
117
|
+
when :post
|
118
|
+
headers["Content-Type"] = "application/json"
|
119
|
+
self.class.post(endpoint, { body: params.to_json, headers: headers })
|
120
|
+
else
|
121
|
+
raise ArgumentError, "Unsupported HTTP method: #{method}"
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def handle_response(response)
|
126
|
+
case response.code
|
127
|
+
when 200
|
128
|
+
JSON.parse(response.body, symbolize_names: true)
|
129
|
+
when 401
|
130
|
+
raise AuthenticationError, "Invalid API key"
|
131
|
+
when 429
|
132
|
+
raise RateLimitError, "Rate limit exceeded"
|
133
|
+
when 402
|
134
|
+
raise QuotaExceededError, "Quota exceeded"
|
135
|
+
else
|
136
|
+
raise Error, "HTTP #{response.code}: #{response.message}"
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module BraveSearch
|
4
|
+
class Configuration
|
5
|
+
attr_accessor :api_key, :base_url, :timeout, :retry_attempts, :storage_provider, :storage_bucket, :storage_endpoint
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@api_key = ENV.fetch("BRAVE_API_KEY", nil)
|
9
|
+
@base_url = "https://api.search.brave.com/res/v1"
|
10
|
+
@timeout = 30
|
11
|
+
@retry_attempts = 3
|
12
|
+
@storage_provider = :aws
|
13
|
+
@storage_bucket = ENV.fetch("BRAVE_SEARCH_BUCKET", "brave-search-downloads")
|
14
|
+
@storage_endpoint = nil
|
15
|
+
end
|
16
|
+
|
17
|
+
def storage(**options)
|
18
|
+
Storage.for(storage_provider, bucket: storage_bucket, endpoint: storage_endpoint, **options)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module BraveSearch
|
4
|
+
class Exporter
|
5
|
+
def self.for(format)
|
6
|
+
case format
|
7
|
+
when :json
|
8
|
+
require_relative "exporters/json"
|
9
|
+
Exporters::Json.new
|
10
|
+
when :csv
|
11
|
+
require_relative "exporters/csv"
|
12
|
+
Exporters::Csv.new
|
13
|
+
when :xlsx
|
14
|
+
require_relative "exporters/xlsx"
|
15
|
+
Exporters::Xlsx.new
|
16
|
+
else
|
17
|
+
raise ArgumentError, "Unknown export format: #{format}"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def export(results)
|
22
|
+
raise NotImplementedError, "Subclasses must implement #export"
|
23
|
+
end
|
24
|
+
|
25
|
+
def export_to_storage(results, storage:, key:)
|
26
|
+
export_result = export(results)
|
27
|
+
upload_result = storage.upload(export_result[:content], key: key)
|
28
|
+
|
29
|
+
{
|
30
|
+
key: key,
|
31
|
+
url: upload_result[:url],
|
32
|
+
size: export_result[:size],
|
33
|
+
format: format_name
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def format_name
|
40
|
+
self.class.name.split("::").last.downcase
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module BraveSearch
|
4
|
+
module Exporters
|
5
|
+
class Base < BraveSearch::Exporter
|
6
|
+
private
|
7
|
+
|
8
|
+
def validate_results(results)
|
9
|
+
raise ArgumentError, "Results cannot be nil" if results.nil?
|
10
|
+
raise ArgumentError, "Results must respond to web_results" unless results.respond_to?(:web_results)
|
11
|
+
end
|
12
|
+
|
13
|
+
def build_metadata(results)
|
14
|
+
{
|
15
|
+
exported_at: Time.now.iso8601,
|
16
|
+
query: results.query,
|
17
|
+
total_results: results.count,
|
18
|
+
format: format_name
|
19
|
+
}
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "csv"
|
4
|
+
require_relative "base"
|
5
|
+
|
6
|
+
module BraveSearch
|
7
|
+
module Exporters
|
8
|
+
class Csv < Base
|
9
|
+
def export(results)
|
10
|
+
validate_results(results)
|
11
|
+
content = generate_csv(results)
|
12
|
+
|
13
|
+
{
|
14
|
+
content: content,
|
15
|
+
size: content.bytesize
|
16
|
+
}
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def generate_csv(results)
|
22
|
+
CSV.generate do |csv|
|
23
|
+
csv << %w[title url description]
|
24
|
+
|
25
|
+
results.web_results.each do |result|
|
26
|
+
csv << [result[:title], result[:url], result[:description]]
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "json"
|
4
|
+
require_relative "base"
|
5
|
+
|
6
|
+
module BraveSearch
|
7
|
+
module Exporters
|
8
|
+
class Json < Base
|
9
|
+
def export(results)
|
10
|
+
validate_results(results)
|
11
|
+
|
12
|
+
export_data = results.to_h.merge(
|
13
|
+
metadata: build_metadata(results)
|
14
|
+
)
|
15
|
+
|
16
|
+
content = JSON.pretty_generate(export_data)
|
17
|
+
|
18
|
+
{
|
19
|
+
content: content,
|
20
|
+
size: content.bytesize
|
21
|
+
}
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "caxlsx"
|
4
|
+
require_relative "base"
|
5
|
+
|
6
|
+
module BraveSearch
|
7
|
+
module Exporters
|
8
|
+
class Xlsx < Base
|
9
|
+
def export(results)
|
10
|
+
validate_results(results)
|
11
|
+
content = generate_xlsx(results)
|
12
|
+
|
13
|
+
{
|
14
|
+
content: content,
|
15
|
+
size: content.bytesize
|
16
|
+
}
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def generate_xlsx(results)
|
22
|
+
package = Axlsx::Package.new
|
23
|
+
workbook = package.workbook
|
24
|
+
|
25
|
+
workbook.add_worksheet(name: "Search Results") do |sheet|
|
26
|
+
sheet.add_row %w[Title URL Description]
|
27
|
+
|
28
|
+
results.web_results.each do |result|
|
29
|
+
sheet.add_row [result[:title], result[:url], result[:description]]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
add_metadata_sheet(workbook, results)
|
34
|
+
package.to_stream.read
|
35
|
+
end
|
36
|
+
|
37
|
+
def add_metadata_sheet(workbook, results)
|
38
|
+
workbook.add_worksheet(name: "Metadata") do |sheet|
|
39
|
+
metadata = build_metadata(results)
|
40
|
+
|
41
|
+
sheet.add_row %w[Property Value]
|
42
|
+
metadata.each { |key, value| sheet.add_row [key.to_s.tr("_", " ").capitalize, value] }
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module BraveSearch
|
4
|
+
module Jobs
|
5
|
+
class ExportJob < ActiveJob::Base
|
6
|
+
queue_as :brave_search
|
7
|
+
|
8
|
+
def perform(query:, format:, storage_config: nil, key: nil, **search_options)
|
9
|
+
client = BraveSearch::Client.new
|
10
|
+
|
11
|
+
if storage_config && key
|
12
|
+
storage = build_storage(storage_config)
|
13
|
+
result = client.search_and_export(
|
14
|
+
q: query,
|
15
|
+
format: format.to_sym,
|
16
|
+
storage: storage,
|
17
|
+
key: key,
|
18
|
+
**search_options
|
19
|
+
)
|
20
|
+
|
21
|
+
Rails.logger.info "Export completed: #{result[:url]}"
|
22
|
+
else
|
23
|
+
result = client.search_and_export(q: query, format: format.to_sym, **search_options)
|
24
|
+
Rails.logger.info "Export completed locally: #{result[:size]} bytes"
|
25
|
+
end
|
26
|
+
|
27
|
+
result
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def build_storage(config)
|
33
|
+
BraveSearch::Storage.for(
|
34
|
+
config[:provider],
|
35
|
+
**config[:options]
|
36
|
+
)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module BraveSearch
|
4
|
+
module Jobs
|
5
|
+
class PdfDownloadJob < ActiveJob::Base
|
6
|
+
queue_as :brave_search
|
7
|
+
|
8
|
+
def perform(query:, storage_config: nil, folder: "pdfs", **search_options)
|
9
|
+
client = BraveSearch::Client.new
|
10
|
+
storage = storage_config ? build_storage(storage_config) : nil
|
11
|
+
|
12
|
+
progress_callback = lambda do |current, total, url|
|
13
|
+
Rails.logger.info "Downloading PDFs: #{current}/#{total} - #{url}"
|
14
|
+
end
|
15
|
+
|
16
|
+
result = client.search_and_download_pdfs(
|
17
|
+
q: query,
|
18
|
+
storage: storage,
|
19
|
+
folder: folder,
|
20
|
+
**search_options,
|
21
|
+
&progress_callback
|
22
|
+
)
|
23
|
+
|
24
|
+
Rails.logger.info "PDF download completed: #{result[:files].size} files"
|
25
|
+
result
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def build_storage(config)
|
31
|
+
BraveSearch::Storage.for(
|
32
|
+
config[:provider],
|
33
|
+
**config[:options]
|
34
|
+
)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "concurrent-ruby"
|
4
|
+
|
5
|
+
module BraveSearch
|
6
|
+
class PdfDownloader
|
7
|
+
def initialize(storage: nil)
|
8
|
+
@storage = storage || default_storage
|
9
|
+
end
|
10
|
+
|
11
|
+
def download(url, folder: "pdfs")
|
12
|
+
filename = extract_filename(url)
|
13
|
+
key = "#{folder}/#{filename}"
|
14
|
+
|
15
|
+
@storage.download(url, key: key)
|
16
|
+
end
|
17
|
+
|
18
|
+
def batch_download(urls, folder: "pdfs", &progress_callback)
|
19
|
+
total = urls.length
|
20
|
+
completed = Concurrent::AtomicFixnum.new(0)
|
21
|
+
|
22
|
+
futures = urls.map do |url|
|
23
|
+
Concurrent::Future.execute do
|
24
|
+
result = download(url, folder: folder)
|
25
|
+
current = completed.increment
|
26
|
+
progress_callback&.call(current, total)
|
27
|
+
result
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
futures.map(&:value)
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def default_storage
|
37
|
+
Storage.for(:aws, bucket: ENV.fetch("BRAVE_SEARCH_BUCKET", "brave-search-downloads"))
|
38
|
+
end
|
39
|
+
|
40
|
+
def extract_filename(url)
|
41
|
+
uri = URI(url)
|
42
|
+
filename = File.basename(uri.path)
|
43
|
+
filename.empty? ? "document.pdf" : filename
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module BraveSearch
|
4
|
+
class Railtie < Rails::Railtie
|
5
|
+
initializer "brave_search.configure" do |app|
|
6
|
+
app.config.to_prepare do
|
7
|
+
if Rails.application.credentials.brave_api_key
|
8
|
+
BraveSearch.configure do |config|
|
9
|
+
config.api_key = Rails.application.credentials.brave_api_key
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module BraveSearch
|
4
|
+
class Results
|
5
|
+
attr_reader :raw_data, :query
|
6
|
+
|
7
|
+
def initialize(data)
|
8
|
+
@raw_data = data
|
9
|
+
@query = data[:query]
|
10
|
+
end
|
11
|
+
|
12
|
+
def web_results
|
13
|
+
@raw_data.dig(:web, :results) || []
|
14
|
+
end
|
15
|
+
|
16
|
+
def news_results
|
17
|
+
@raw_data.dig(:news, :results) || []
|
18
|
+
end
|
19
|
+
|
20
|
+
def video_results
|
21
|
+
@raw_data.dig(:videos, :results) || []
|
22
|
+
end
|
23
|
+
|
24
|
+
def image_results
|
25
|
+
@raw_data.dig(:images, :results) || []
|
26
|
+
end
|
27
|
+
|
28
|
+
def locations
|
29
|
+
@raw_data.dig(:mixed, :locations) || []
|
30
|
+
end
|
31
|
+
|
32
|
+
def infobox
|
33
|
+
@raw_data.dig(:mixed, :infobox)
|
34
|
+
end
|
35
|
+
|
36
|
+
def spell
|
37
|
+
@raw_data[:spell]
|
38
|
+
end
|
39
|
+
|
40
|
+
def empty?
|
41
|
+
web_results.empty? && news_results.empty? && video_results.empty? && image_results.empty?
|
42
|
+
end
|
43
|
+
|
44
|
+
def count
|
45
|
+
web_results.length + news_results.length + video_results.length + image_results.length
|
46
|
+
end
|
47
|
+
|
48
|
+
# Support pattern matching (Ruby 3+)
|
49
|
+
def deconstruct_keys(keys)
|
50
|
+
@raw_data.slice(*keys) if keys
|
51
|
+
@raw_data
|
52
|
+
end
|
53
|
+
|
54
|
+
# Convert to hash for easy access
|
55
|
+
def to_h
|
56
|
+
@raw_data
|
57
|
+
end
|
58
|
+
|
59
|
+
def [](key)
|
60
|
+
@raw_data[key]
|
61
|
+
end
|
62
|
+
|
63
|
+
def pdf_urls
|
64
|
+
web_results.filter_map { |result| result[:url] if result[:url]&.end_with?(".pdf") }
|
65
|
+
end
|
66
|
+
|
67
|
+
def download_pdfs(storage: nil, folder: "pdfs", &progress_callback)
|
68
|
+
downloader = PdfDownloader.new(storage: storage)
|
69
|
+
downloader.batch_download(pdf_urls, folder: folder, &progress_callback)
|
70
|
+
end
|
71
|
+
|
72
|
+
def export(format:)
|
73
|
+
exporter = Exporter.for(format)
|
74
|
+
exporter.export(self)
|
75
|
+
end
|
76
|
+
|
77
|
+
def export_to_storage(format:, storage:, key:)
|
78
|
+
exporter = Exporter.for(format)
|
79
|
+
exporter.export_to_storage(self, storage: storage, key: key)
|
80
|
+
end
|
81
|
+
|
82
|
+
def summarize_with(client)
|
83
|
+
client.summarizer.search_and_summarize(q: @query || "search results")
|
84
|
+
end
|
85
|
+
|
86
|
+
def all_text_content
|
87
|
+
content = []
|
88
|
+
content += web_results.flat_map { |r| [r[:title], r[:description]].compact }
|
89
|
+
content += news_results.flat_map { |r| [r[:title], r[:description]].compact }
|
90
|
+
content.join(" ")
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|