duck_search 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +13 -0
- data/lib/duck_search/client.rb +96 -0
- data/lib/duck_search/errors.rb +15 -0
- data/lib/duck_search/result.rb +3 -0
- data/lib/duck_search.rb +10 -0
- metadata +141 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: b30f0ccc51c4d05deef7e46119c461ab3f4d1267c7ba617e7385f24aa61ea4c4
|
|
4
|
+
data.tar.gz: eb98b71458dea2141e75467a942bf5ccf5cbfb3c72c3165cfd20af81ad9cfa7e
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: e512526190e909700a253240d032a77db2502301eb1599328c4f1bd4b46047f67b5ef3339c6db68e3e557fe7741b6b85f126f56c316c4b4a1afa683e3aa0b4ab
|
|
7
|
+
data.tar.gz: 34d234bab472e6e3db41deda79047f841cf3d443ab01c11b1c27948b0b450810be8f6acd582ff95bb0aebd569bad096b6e678b82775faf9610db12e7e8d95f1a
|
data/README.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# DuckSearch
|
|
2
|
+
|
|
3
|
+
Lightweight DuckDuckGo HTML search client. Fetches and parses DuckDuckGo's no-JS HTML search endpoint, returning titles, snippets, and decoded URLs. No API key required.
|
|
4
|
+
|
|
5
|
+
## Usage
|
|
6
|
+
|
|
7
|
+
```ruby
|
|
8
|
+
client = DuckSearch::Client.new
|
|
9
|
+
results = client.search("FG0326 specifications")
|
|
10
|
+
results.each do |r|
|
|
11
|
+
puts "#{r.title} - #{r.url}"
|
|
12
|
+
end
|
|
13
|
+
```
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
require "uri"
|
|
2
|
+
require "cgi"
|
|
3
|
+
require "faraday/retry"
|
|
4
|
+
|
|
5
|
+
module DuckSearch
|
|
6
|
+
class Client
|
|
7
|
+
BASE_URL = "https://html.duckduckgo.com"
|
|
8
|
+
SEARCH_PATH = "/html"
|
|
9
|
+
DEFAULT_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
10
|
+
RESULT_CAP = 5
|
|
11
|
+
|
|
12
|
+
attr_reader :proxy, :timeout, :open_timeout, :user_agent
|
|
13
|
+
|
|
14
|
+
def initialize(proxy: nil, timeout: 15, open_timeout: 10, user_agent: DEFAULT_USER_AGENT)
|
|
15
|
+
@proxy = proxy
|
|
16
|
+
@timeout = timeout
|
|
17
|
+
@open_timeout = open_timeout
|
|
18
|
+
@user_agent = user_agent
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def search(query)
|
|
22
|
+
response = connection.post(SEARCH_PATH) do |req|
|
|
23
|
+
req.headers["User-Agent"] = user_agent
|
|
24
|
+
req.headers["DNT"] = "1"
|
|
25
|
+
req.headers["Content-Type"] = "application/x-www-form-urlencoded"
|
|
26
|
+
req.body = URI.encode_www_form(
|
|
27
|
+
q: query,
|
|
28
|
+
b: "",
|
|
29
|
+
kf: "-1",
|
|
30
|
+
kh: "1",
|
|
31
|
+
kp: "1",
|
|
32
|
+
k1: "-1"
|
|
33
|
+
)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
raise HttpError.new("DuckDuckGo returned HTTP #{response.status}",
|
|
37
|
+
status: response.status,
|
|
38
|
+
url: "#{BASE_URL}#{SEARCH_PATH}") unless response.success?
|
|
39
|
+
|
|
40
|
+
parse_html(response.body)
|
|
41
|
+
rescue Faraday::Error => e
|
|
42
|
+
raise HttpError.new("DuckDuckGo connection failed: #{e.message}",
|
|
43
|
+
url: "#{BASE_URL}#{SEARCH_PATH}")
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def connection
|
|
49
|
+
@connection ||= Faraday.new(url: BASE_URL) do |f|
|
|
50
|
+
f.proxy = proxy if proxy
|
|
51
|
+
f.request :retry, max: 2, interval: 0.5, backoff_factor: 2,
|
|
52
|
+
retry_statuses: [429, 500, 502, 503, 504],
|
|
53
|
+
methods: [:post]
|
|
54
|
+
f.options.timeout = timeout
|
|
55
|
+
f.options.open_timeout = open_timeout
|
|
56
|
+
f.adapter Faraday.default_adapter
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def parse_html(html_body)
|
|
61
|
+
return [] if html_body.nil? || html_body.strip.empty?
|
|
62
|
+
|
|
63
|
+
doc = Nokogiri::HTML(html_body)
|
|
64
|
+
|
|
65
|
+
results = doc.css(".result").map do |node|
|
|
66
|
+
anchor = node.at_css(".result__a")
|
|
67
|
+
snippet_node = node.at_css(".result__snippet")
|
|
68
|
+
|
|
69
|
+
next unless anchor
|
|
70
|
+
|
|
71
|
+
DuckSearch::Result.new(
|
|
72
|
+
title: anchor.text.strip,
|
|
73
|
+
description: snippet_node&.text&.strip || "",
|
|
74
|
+
url: clean_url(anchor["href"])
|
|
75
|
+
)
|
|
76
|
+
end.compact
|
|
77
|
+
|
|
78
|
+
results.first(RESULT_CAP)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def clean_url(href)
|
|
82
|
+
return nil if href.nil? || href.strip.empty?
|
|
83
|
+
|
|
84
|
+
if href.include?("uddg=")
|
|
85
|
+
parsed = URI.parse(href.start_with?("http") ? href : "https:#{href}")
|
|
86
|
+
params = URI.decode_www_form(parsed.query || "")
|
|
87
|
+
uddg = params.find { |k, _| k == "uddg" }
|
|
88
|
+
uddg ? CGI.unescape(uddg[1]) : href
|
|
89
|
+
else
|
|
90
|
+
href.strip
|
|
91
|
+
end
|
|
92
|
+
rescue URI::InvalidURIError
|
|
93
|
+
href
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
module DuckSearch
|
|
2
|
+
class Error < StandardError; end
|
|
3
|
+
|
|
4
|
+
class HttpError < Error
|
|
5
|
+
attr_reader :status, :url
|
|
6
|
+
|
|
7
|
+
def initialize(message = nil, status: nil, url: nil)
|
|
8
|
+
@status = status
|
|
9
|
+
@url = url
|
|
10
|
+
super(message || "DuckDuckGo request failed (HTTP #{status})")
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
class ParseError < Error; end
|
|
15
|
+
end
|
data/lib/duck_search.rb
ADDED
metadata
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: duck_search
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Wenmar Pro
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: faraday
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - ">="
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '2.0'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - ">="
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '2.0'
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: faraday-retry
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - ">="
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '2.0'
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - ">="
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '2.0'
|
|
40
|
+
- !ruby/object:Gem::Dependency
|
|
41
|
+
name: nokogiri
|
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - ">="
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: '1.16'
|
|
47
|
+
type: :runtime
|
|
48
|
+
prerelease: false
|
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - ">="
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: '1.16'
|
|
54
|
+
- !ruby/object:Gem::Dependency
|
|
55
|
+
name: minitest
|
|
56
|
+
requirement: !ruby/object:Gem::Requirement
|
|
57
|
+
requirements:
|
|
58
|
+
- - "~>"
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: '5.0'
|
|
61
|
+
type: :development
|
|
62
|
+
prerelease: false
|
|
63
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
64
|
+
requirements:
|
|
65
|
+
- - "~>"
|
|
66
|
+
- !ruby/object:Gem::Version
|
|
67
|
+
version: '5.0'
|
|
68
|
+
- !ruby/object:Gem::Dependency
|
|
69
|
+
name: rake
|
|
70
|
+
requirement: !ruby/object:Gem::Requirement
|
|
71
|
+
requirements:
|
|
72
|
+
- - "~>"
|
|
73
|
+
- !ruby/object:Gem::Version
|
|
74
|
+
version: '13.0'
|
|
75
|
+
type: :development
|
|
76
|
+
prerelease: false
|
|
77
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
78
|
+
requirements:
|
|
79
|
+
- - "~>"
|
|
80
|
+
- !ruby/object:Gem::Version
|
|
81
|
+
version: '13.0'
|
|
82
|
+
- !ruby/object:Gem::Dependency
|
|
83
|
+
name: webmock
|
|
84
|
+
requirement: !ruby/object:Gem::Requirement
|
|
85
|
+
requirements:
|
|
86
|
+
- - "~>"
|
|
87
|
+
- !ruby/object:Gem::Version
|
|
88
|
+
version: '3.0'
|
|
89
|
+
type: :development
|
|
90
|
+
prerelease: false
|
|
91
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
92
|
+
requirements:
|
|
93
|
+
- - "~>"
|
|
94
|
+
- !ruby/object:Gem::Version
|
|
95
|
+
version: '3.0'
|
|
96
|
+
- !ruby/object:Gem::Dependency
|
|
97
|
+
name: irb
|
|
98
|
+
requirement: !ruby/object:Gem::Requirement
|
|
99
|
+
requirements:
|
|
100
|
+
- - "~>"
|
|
101
|
+
- !ruby/object:Gem::Version
|
|
102
|
+
version: 1.18.0
|
|
103
|
+
type: :development
|
|
104
|
+
prerelease: false
|
|
105
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
106
|
+
requirements:
|
|
107
|
+
- - "~>"
|
|
108
|
+
- !ruby/object:Gem::Version
|
|
109
|
+
version: 1.18.0
|
|
110
|
+
description: Fetches and parses DuckDuckGo's no-JS HTML search results. Returns titles,
|
|
111
|
+
snippets, and URLs. No API key required.
|
|
112
|
+
executables: []
|
|
113
|
+
extensions: []
|
|
114
|
+
extra_rdoc_files: []
|
|
115
|
+
files:
|
|
116
|
+
- README.md
|
|
117
|
+
- lib/duck_search.rb
|
|
118
|
+
- lib/duck_search/client.rb
|
|
119
|
+
- lib/duck_search/errors.rb
|
|
120
|
+
- lib/duck_search/result.rb
|
|
121
|
+
licenses:
|
|
122
|
+
- MIT
|
|
123
|
+
metadata: {}
|
|
124
|
+
rdoc_options: []
|
|
125
|
+
require_paths:
|
|
126
|
+
- lib
|
|
127
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
128
|
+
requirements:
|
|
129
|
+
- - ">="
|
|
130
|
+
- !ruby/object:Gem::Version
|
|
131
|
+
version: '3.2'
|
|
132
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
133
|
+
requirements:
|
|
134
|
+
- - ">="
|
|
135
|
+
- !ruby/object:Gem::Version
|
|
136
|
+
version: '0'
|
|
137
|
+
requirements: []
|
|
138
|
+
rubygems_version: 4.0.10
|
|
139
|
+
specification_version: 4
|
|
140
|
+
summary: Lightweight DuckDuckGo HTML search client
|
|
141
|
+
test_files: []
|