duck_search 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: b30f0ccc51c4d05deef7e46119c461ab3f4d1267c7ba617e7385f24aa61ea4c4
4
+ data.tar.gz: eb98b71458dea2141e75467a942bf5ccf5cbfb3c72c3165cfd20af81ad9cfa7e
5
+ SHA512:
6
+ metadata.gz: e512526190e909700a253240d032a77db2502301eb1599328c4f1bd4b46047f67b5ef3339c6db68e3e557fe7741b6b85f126f56c316c4b4a1afa683e3aa0b4ab
7
+ data.tar.gz: 34d234bab472e6e3db41deda79047f841cf3d443ab01c11b1c27948b0b450810be8f6acd582ff95bb0aebd569bad096b6e678b82775faf9610db12e7e8d95f1a
data/README.md ADDED
@@ -0,0 +1,13 @@
1
+ # DuckSearch
2
+
3
+ Lightweight DuckDuckGo HTML search client. Fetches and parses DuckDuckGo's no-JS HTML search endpoint, returning titles, snippets, and decoded URLs. No API key required.
4
+
5
+ ## Usage
6
+
7
+ ```ruby
8
+ client = DuckSearch::Client.new
9
+ results = client.search("FG0326 specifications")
10
+ results.each do |r|
11
+ puts "#{r.title} - #{r.url}"
12
+ end
13
+ ```
@@ -0,0 +1,96 @@
1
+ require "uri"
2
+ require "cgi"
3
+ require "faraday/retry"
4
+
5
+ module DuckSearch
6
+ class Client
7
+ BASE_URL = "https://html.duckduckgo.com"
8
+ SEARCH_PATH = "/html"
9
+ DEFAULT_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
10
+ RESULT_CAP = 5
11
+
12
+ attr_reader :proxy, :timeout, :open_timeout, :user_agent
13
+
14
+ def initialize(proxy: nil, timeout: 15, open_timeout: 10, user_agent: DEFAULT_USER_AGENT)
15
+ @proxy = proxy
16
+ @timeout = timeout
17
+ @open_timeout = open_timeout
18
+ @user_agent = user_agent
19
+ end
20
+
21
+ def search(query)
22
+ response = connection.post(SEARCH_PATH) do |req|
23
+ req.headers["User-Agent"] = user_agent
24
+ req.headers["DNT"] = "1"
25
+ req.headers["Content-Type"] = "application/x-www-form-urlencoded"
26
+ req.body = URI.encode_www_form(
27
+ q: query,
28
+ b: "",
29
+ kf: "-1",
30
+ kh: "1",
31
+ kp: "1",
32
+ k1: "-1"
33
+ )
34
+ end
35
+
36
+ raise HttpError.new("DuckDuckGo returned HTTP #{response.status}",
37
+ status: response.status,
38
+ url: "#{BASE_URL}#{SEARCH_PATH}") unless response.success?
39
+
40
+ parse_html(response.body)
41
+ rescue Faraday::Error => e
42
+ raise HttpError.new("DuckDuckGo connection failed: #{e.message}",
43
+ url: "#{BASE_URL}#{SEARCH_PATH}")
44
+ end
45
+
46
+ private
47
+
48
+ def connection
49
+ @connection ||= Faraday.new(url: BASE_URL) do |f|
50
+ f.proxy = proxy if proxy
51
+ f.request :retry, max: 2, interval: 0.5, backoff_factor: 2,
52
+ retry_statuses: [429, 500, 502, 503, 504],
53
+ methods: [:post]
54
+ f.options.timeout = timeout
55
+ f.options.open_timeout = open_timeout
56
+ f.adapter Faraday.default_adapter
57
+ end
58
+ end
59
+
60
+ def parse_html(html_body)
61
+ return [] if html_body.nil? || html_body.strip.empty?
62
+
63
+ doc = Nokogiri::HTML(html_body)
64
+
65
+ results = doc.css(".result").map do |node|
66
+ anchor = node.at_css(".result__a")
67
+ snippet_node = node.at_css(".result__snippet")
68
+
69
+ next unless anchor
70
+
71
+ DuckSearch::Result.new(
72
+ title: anchor.text.strip,
73
+ description: snippet_node&.text&.strip || "",
74
+ url: clean_url(anchor["href"])
75
+ )
76
+ end.compact
77
+
78
+ results.first(RESULT_CAP)
79
+ end
80
+
81
+ def clean_url(href)
82
+ return nil if href.nil? || href.strip.empty?
83
+
84
+ if href.include?("uddg=")
85
+ parsed = URI.parse(href.start_with?("http") ? href : "https:#{href}")
86
+ params = URI.decode_www_form(parsed.query || "")
87
+ uddg = params.find { |k, _| k == "uddg" }
88
+ uddg ? CGI.unescape(uddg[1]) : href
89
+ else
90
+ href.strip
91
+ end
92
+ rescue URI::InvalidURIError
93
+ href
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,15 @@
1
+ module DuckSearch
2
+ class Error < StandardError; end
3
+
4
+ class HttpError < Error
5
+ attr_reader :status, :url
6
+
7
+ def initialize(message = nil, status: nil, url: nil)
8
+ @status = status
9
+ @url = url
10
+ super(message || "DuckDuckGo request failed (HTTP #{status})")
11
+ end
12
+ end
13
+
14
+ class ParseError < Error; end
15
+ end
@@ -0,0 +1,3 @@
1
+ module DuckSearch
2
+ Result = Struct.new(:title, :description, :url, keyword_init: true)
3
+ end
@@ -0,0 +1,10 @@
1
+ require "faraday"
2
+ require "nokogiri"
3
+
4
+ require_relative "duck_search/errors"
5
+ require_relative "duck_search/result"
6
+ require_relative "duck_search/client"
7
+
8
+ module DuckSearch
9
+ VERSION = "0.1.0"
10
+ end
metadata ADDED
@@ -0,0 +1,141 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: duck_search
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Wenmar Pro
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: faraday
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '2.0'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '2.0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: faraday-retry
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '2.0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '2.0'
40
+ - !ruby/object:Gem::Dependency
41
+ name: nokogiri
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '1.16'
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '1.16'
54
+ - !ruby/object:Gem::Dependency
55
+ name: minitest
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '5.0'
61
+ type: :development
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '5.0'
68
+ - !ruby/object:Gem::Dependency
69
+ name: rake
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '13.0'
75
+ type: :development
76
+ prerelease: false
77
+ version_requirements: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '13.0'
82
+ - !ruby/object:Gem::Dependency
83
+ name: webmock
84
+ requirement: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '3.0'
89
+ type: :development
90
+ prerelease: false
91
+ version_requirements: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - "~>"
94
+ - !ruby/object:Gem::Version
95
+ version: '3.0'
96
+ - !ruby/object:Gem::Dependency
97
+ name: irb
98
+ requirement: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - "~>"
101
+ - !ruby/object:Gem::Version
102
+ version: 1.18.0
103
+ type: :development
104
+ prerelease: false
105
+ version_requirements: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: 1.18.0
110
+ description: Fetches and parses DuckDuckGo's no-JS HTML search results. Returns titles,
111
+ snippets, and URLs. No API key required.
112
+ executables: []
113
+ extensions: []
114
+ extra_rdoc_files: []
115
+ files:
116
+ - README.md
117
+ - lib/duck_search.rb
118
+ - lib/duck_search/client.rb
119
+ - lib/duck_search/errors.rb
120
+ - lib/duck_search/result.rb
121
+ licenses:
122
+ - MIT
123
+ metadata: {}
124
+ rdoc_options: []
125
+ require_paths:
126
+ - lib
127
+ required_ruby_version: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '3.2'
132
+ required_rubygems_version: !ruby/object:Gem::Requirement
133
+ requirements:
134
+ - - ">="
135
+ - !ruby/object:Gem::Version
136
+ version: '0'
137
+ requirements: []
138
+ rubygems_version: 4.0.10
139
+ specification_version: 4
140
+ summary: Lightweight DuckDuckGo HTML search client
141
+ test_files: []