ruby-web-search 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 43274af7b83bb539b1fe626b071703383557dee7
4
+ data.tar.gz: 3912a6170ba16659fd750aef7b9de5be2dc197ad
5
+ SHA512:
6
+ metadata.gz: 4ded64c95d7103196627d4380fe10f0721e5324f6e9d1c51f972923f5ed6d88822c70a4ebf39ec4fd3641eb186b8eb75bc7159f829c161564d677b41c3cf0fe5
7
+ data.tar.gz: db41a3c20a6706793a494fb8831b32c2d0e7407381c67e013ae492beeb4fe0da4efbeac66a43db306265642b0ad5f8bf751d374a915ccfb18c28778d46263fd0
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Matt Aimonetti
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,37 @@
1
+ # Ruby Web Search
2
+
3
+ This gem allows you to query google search engine from Ruby.
4
+ So far, only Google is supported.
5
+
6
+
7
+ Simple example on how to query Google:
8
+
9
+ >> require 'ruby-web-search'
10
+ => true
11
+ >> response = RubyWebSearch::Google.search(:query => "Natalie Portman")
12
+ >> response.results
13
+ => [{:content=>"<b>Natalie Portman</b>, Star Wars, Phantom Menace, Attack of the Clones, Amidala, Leon, Professional, Where The Heart Is, Anywhere But Here, Seagull, Heat, <b>...</b>", :title=>"Natalie Portman . Com - News", :url=>"http://www.natalieportman.com/", :domain=>"www.natalieportman.com", :cache_url=>"http://www.google.com/search?q=cache:9hGoJVGBJ2sJ:www.natalieportman.com"}, {:content=>"<b>Natalie Portman</b> was born on June 9th, 1981 in Jerusalem, Israel, as the... Visit IMDb for Photos, Filmography, Discussions, Bio, News, Awards, Agent, <b>...</b>", :title=>"Natalie Portman", :url=>"http://www.imdb.com/name/nm0000204/", :domain=>"www.imdb.com", :cache_url=>"http://www.google.com/search?q=cache:JLzGjsYYdlkJ:www.imdb.com"}, {:content=>"<b>Natalie Portman</b> (Hebrew: \327\240\327\230\327\234\327\231 \327\244\327\225\327\250\327\230\327\236\327\237\342\200\216; born <b>Natalie</b> Hershlag June 9, 1981) is an Israeli-American actress. <b>Portman</b> began her career in the early 1990s, <b>...</b>", :title=>"Natalie Portman - Wikipedia, the free encyclopedia", :url=>"http://en.wikipedia.org/wiki/Natalie_Portman", :domain=>"en.wikipedia.org", :cache_url=>"http://www.google.com/search?q=cache:32A4VEkC23gJ:en.wikipedia.org"}, {:content=>"Aug 30, 2008 <b>...</b> media on Miss <b>Portman</b>. You may recognize <b>Natalie</b> for her roles in <b>....</b> is in in no way affiliated with <b>Natalie Portman</b> or her management. <b>...</b>", :title=>"Natalie Portman ORG ++{natalie-p.org} | your premiere NATALIE ...", :url=>"http://www.natalie-p.org/", :domain=>"www.natalie-p.org", :cache_url=>"http://www.google.com/search?q=cache:wv-CVcMW2SEJ:www.natalie-p.org"}]
14
+
15
+ A google search returns a Response instance. Call `results` on the response to get the array on result.
16
+ A Result is a simple hash object with few keys available:
17
+
18
+ * title Title of the result
19
+ * url Url of the result
20
+ * domain Root url of the result
21
+ * content Snippet of the result content
22
+ * cache\_url Google cache url
23
+
24
+
25
+ By default, only the 4 top results get retrieved, you can specify the exact amount of results you want by passing the size argument.
26
+ RubyWebSearch::Google.search(:query => "Natalie Portman", :size => 10)
27
+
28
+ ## TODO
29
+
30
+ * Full support of the google api
31
+ * support more search engines (Yahoo, live etc...)
32
+
33
+ ## Experimentations
34
+
35
+ Here are some benchmarks, it looks like running multiple concurrent threads is often not worth it
36
+ http://gist.github.com/45350
37
+ warmed up jruby benchmarks
@@ -0,0 +1,58 @@
1
+ require 'rubygems'
2
+ require 'rake/gempackagetask'
3
+ require 'rubygems/specification'
4
+ require 'date'
5
+ require 'spec/rake/spectask'
6
+
7
+ GEM = "ruby-web-search"
8
+ GEM_VERSION = "0.0.2"
9
+ AUTHOR = "Matt Aimonetti"
10
+ EMAIL = "mattaimonetti@gmail.com"
11
+ HOMEPAGE = "http://merbist.com"
12
+ SUMMARY = "A Ruby gem that provides a way to retrieve search results via the main search engines using Ruby"
13
+
14
+ spec = Gem::Specification.new do |s|
15
+ s.name = GEM
16
+ s.version = GEM_VERSION
17
+ s.platform = Gem::Platform::RUBY
18
+ s.has_rdoc = true
19
+ s.extra_rdoc_files = ["LICENSE"]
20
+ s.summary = SUMMARY
21
+ s.description = s.summary
22
+ s.author = AUTHOR
23
+ s.email = EMAIL
24
+ s.homepage = HOMEPAGE
25
+
26
+ # Uncomment this to add a dependency
27
+ # s.add_dependency "curb"
28
+ s.add_dependency "json"
29
+
30
+ s.require_path = 'lib'
31
+ s.autorequire = GEM
32
+ s.files = %w(LICENSE README.markdown Rakefile) + Dir.glob("{lib,spec}/**/*")
33
+ end
34
+
35
+ task :default => :spec
36
+
37
+ desc "Run specs"
38
+ Spec::Rake::SpecTask.new do |t|
39
+ t.spec_files = FileList['spec/**/*_spec.rb']
40
+ t.spec_opts = %w(-fs --color)
41
+ end
42
+
43
+
44
+ Rake::GemPackageTask.new(spec) do |pkg|
45
+ pkg.gem_spec = spec
46
+ end
47
+
48
+ desc "install the gem locally"
49
+ task :install => [:package] do
50
+ sh %{sudo gem install pkg/#{GEM}-#{GEM_VERSION}}
51
+ end
52
+
53
+ desc "create a gemspec file"
54
+ task :make_spec do
55
+ File.open("#{GEM}.gemspec", "w") do |file|
56
+ file.puts spec.to_ruby
57
+ end
58
+ end
@@ -0,0 +1,68 @@
1
+ require 'net/http'
2
+
3
+ module Curl
4
+ module Err
5
+ class CurlError < RuntimeError; end
6
+ class GotNothingError < CurlError; end
7
+ class ConnectionFailedError < CurlError; end
8
+ class TimeoutError < CurlError; end
9
+ class HttpError < CurlError; end
10
+ end
11
+ class Easy
12
+ attr_accessor :timeout, :url, :body_str, :headers, :conn
13
+
14
+ def initialize(url = nil)
15
+ @url = url
16
+ @headers = {}
17
+ @body_str = nil
18
+ end
19
+
20
+ #Not yet implemented.. only needed for importing from LibraryThing
21
+ def header_str
22
+ ""
23
+ end
24
+
25
+ #Curl::Easy.perform("http://old-xisbn.oclc.org/xid/isbn/1234").body_str
26
+ #Curl::Easy.perform("http://old-xisbn.oclc.org/xid/isbn/1234").header_str
27
+ def self.perform(url)
28
+ c = self.new(url)
29
+ yield(c) if block_given?
30
+ c.perform
31
+ c
32
+ end
33
+
34
+ def self.http_get(url)
35
+ c = self.new(url)
36
+ yield(c) if block_given?
37
+ c.perform
38
+ c
39
+ end
40
+
41
+ #Curl::Easy.http_post("http://foo.com", {"img_url" => url}) { |r| r.headers = 'Content-Type: text/json' }.body_str)
42
+ def self.http_post(url, options = {})
43
+ c = self.new(url)
44
+ yield(c) if block_given?
45
+ c.http_post(options)
46
+ c
47
+ end
48
+
49
+ def perform
50
+ uri = URI.parse(url)
51
+ res = Net::HTTP.start(uri.host, uri.port) {|http|
52
+ http.request(Net::HTTP::Get.new(uri.request_uri))
53
+ }
54
+ @body_str = res.body
55
+ rescue => e
56
+ raise ::Curl::Err::HttpError, e.message
57
+ end
58
+
59
+ def http_post(options = {})
60
+ uri = URI.parse(url)
61
+ http = Net::HTTP.new(uri.host, uri.port)
62
+ resp, data = http.post(uri.request_uri, options, headers)
63
+ @body_str = data
64
+ rescue => e
65
+ raise ::Curl::Err::HttpError, e.message
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,544 @@
1
+ require 'rubygems'
2
+ require 'cgi'
3
+ require 'json'
4
+
5
+ # begin
6
+ # gem 'curb'
7
+ # require 'curb'
8
+ # rescue LoadError
9
+ require File.join(File.dirname(__FILE__), 'curbemu')
10
+ # end
11
+
12
+
13
+ $RUBY_WEB_SEARCH_DEBUG = true
14
+
15
+ class RubyWebSearch
16
+
17
+ # http://code.google.com/apis/ajaxsearch/documentation/reference.html
18
+ class Google
19
+
20
+ def self.search(options={})
21
+ query = ::RubyWebSearch::Google::Query.new(options)
22
+ query.execute
23
+ end
24
+
25
+ def self.unthreaded_search(options={})
26
+ query = ::RubyWebSearch::Google::Query.new(options)
27
+ query.execute_unthreaded
28
+ end
29
+
30
+ class Query
31
+ attr_accessor :query, :start_index, :result_size, :filter, :country_code, :language_code, :global
32
+ attr_accessor :safe_search, :type, :custom_search_engine_id, :version, :referer, :request_url
33
+ attr_accessor :size, :cursor, :custom_request_url, :response
34
+
35
+ class Error < StandardError; end
36
+
37
+ SEARCH_BASE_URLS = { :web => "http://ajax.googleapis.com/ajax/services/search/web",
38
+ :local => "http://ajax.googleapis.com/ajax/services/search/local",
39
+ :video => "http://ajax.googleapis.com/ajax/services/search/video",
40
+ :blog => "http://ajax.googleapis.com/ajax/services/search/blogs",
41
+ :news => "http://ajax.googleapis.com/ajax/services/search/news",
42
+ :book => "http://ajax.googleapis.com/ajax/services/search/books",
43
+ :image => "http://ajax.googleapis.com/ajax/services/search/images",
44
+ :patent => "http://ajax.googleapis.com/ajax/services/search/patent"
45
+ }
46
+
47
+ #
48
+ # You can overwrite the query building process by passing the request url to use.
49
+ #
50
+ # ==== Params
51
+ # query<String>
52
+ # start_index<Integer>
53
+ # size<Integer> number of results default: 4
54
+ # filter
55
+ # country_code<String> 2 letters language code for the country you want
56
+ # to limit to
57
+ # language_code<String> (Web only)
58
+ # safe_search<String> active, moderate or off. Default: active (web only)
59
+ # custom_search_engine_id<String> optional argument supplying the unique id for
60
+ # the Custom Search Engine that should be used for the request (e.g., 000455696194071821846:reviews).
61
+ # (web only)
62
+ #
63
+ def initialize(options={})
64
+ if options[:custom_request_url]
65
+ @custom_request_url = options[:request_url]
66
+ else
67
+ @query = options[:query]
68
+ raise Google::Query::Error, "You need to pass a query" unless @query
69
+ @cursor = options[:start_index] || 0
70
+ @result_size = options[:result_size]
71
+ @filter = options[:filter]
72
+ @type = options[:type] || :web
73
+ @country_code = options[:country_code]
74
+ @language_code = options[:language_code] ? "lang_#{options[:language_code]}" : nil
75
+ @safe_search = options[:safe_search]
76
+ @custom_search_engine_id = options[:custom_search_engine_id]
77
+ @version = options[:version] || "1.0"
78
+ @referer = options[:referer] || "http://github.com/mattetti/"
79
+ @size = options[:size] || 4
80
+ @global = options[:global]
81
+ @result_size = "large" if size > 4 # increase the result set size to avoid making too many requests
82
+ @size = 8 if (@result_size == "large" && size < 8)
83
+ end
84
+ @response ||= Response.new(:query => (query || custom_request_url), :size => size)
85
+ end
86
+
87
+ def build_request
88
+ if custom_request_url
89
+ custom_request_url
90
+ else
91
+ @request_url = "#{SEARCH_BASE_URLS[type]}?v=#{version}&q=#{CGI.escape(query)}"
92
+ @request_url << "&rsz=#{result_size}" if result_size
93
+ @request_url << "&start=#{cursor}" if cursor > 0
94
+ @request_url << "&lr=#{language_code}" if language_code
95
+ @request_url << "&hl=#{country_code}" if country_code
96
+ @request_url << "&gl=#{global}" if global
97
+
98
+ puts request_url if $RUBY_WEB_SEARCH_DEBUG
99
+ request_url
100
+ end
101
+ end
102
+
103
+ def build_requests
104
+ if custom_request_url
105
+ requests = [custom_request_url]
106
+ else
107
+ requests = []
108
+ # create an array of requests based on the fact that google limits
109
+ # us to 8 responses per request but let us use a cursor
110
+ (size / 8.to_f).ceil.times do |n|
111
+ url = "#{SEARCH_BASE_URLS[type]}?v=#{version}&q=#{CGI.escape(query)}"
112
+ url << "&rsz=#{result_size}" if result_size
113
+ url << "&lr=#{language_code}" if language_code
114
+ url << "&hl=#{country_code}" if country_code
115
+ url << "&gl=#{global}" if global
116
+ url << "&start=#{cursor}"
117
+ @cursor += 8
118
+ requests << url
119
+ end
120
+
121
+ puts requests.inspect if $RUBY_WEB_SEARCH_DEBUG
122
+ requests
123
+ end
124
+ end
125
+
126
+ # Makes the request to Google
127
+ # if a larger set was requested than what is returned,
128
+ # more requests are made until the correct amount is available
129
+ def execute_unthreaded
130
+ @curl_request ||= ::Curl::Easy.new(){ |curl| curl.headers["Referer"] = referer }
131
+ @curl_request.url = build_request
132
+ @curl_request.perform
133
+ results = JSON.load(@curl_request.body_str)
134
+
135
+ response.process(results)
136
+ @cursor = response.results.size - 1
137
+ if ((cursor + 1) < size && custom_request_url.nil?)
138
+ puts "cursor: #{cursor} requested results size: #{size}" if $RUBY_WEB_SEARCH_DEBUG
139
+ execute_unthreaded
140
+ else
141
+ response.limit(size)
142
+ end
143
+ end
144
+
145
+ # Makes the request to Google
146
+ # if a larger set was requested than what is returned,
147
+ # more requests are made until the correct amount is available
148
+ def execute
149
+ threads = build_requests.map do |req|
150
+ Thread.new do
151
+ curl_request = ::Curl::Easy.new(req){ |curl| curl.headers["Referer"] = referer }
152
+ curl_request.perform
153
+ JSON.load(curl_request.body_str)
154
+ end
155
+ end
156
+ threads.each do |t|
157
+ response.process(t.value)
158
+ end
159
+ response.limit(size)
160
+ end
161
+
162
+ end #of Query
163
+
164
+
165
+ class Response
166
+ attr_reader :results, :status, :query, :size, :estimated_result_count
167
+ def initialize(google_raw_response={})
168
+ process(google_raw_response) unless google_raw_response.empty?
169
+ end
170
+
171
+ def process(google_raw_response={})
172
+ @query ||= google_raw_response[:query]
173
+ @size ||= google_raw_response[:size]
174
+ @results ||= []
175
+ @status = google_raw_response["responseStatus"]
176
+ if google_raw_response["responseData"] && status && status == 200
177
+ @estimated_result_count ||= google_raw_response['responseData']["cursor"]["estimatedResultCount"] if google_raw_response['responseData']["cursor"]
178
+ @results += google_raw_response["responseData"]["results"].map do |r|
179
+ {
180
+ :title => r["titleNoFormatting"],
181
+ :url => r["unescapedUrl"],
182
+ :cache_url => r["cacheUrl"],
183
+ :content => r["content"],
184
+ :domain => r["visibleUrl"]
185
+ }
186
+ end
187
+ end
188
+
189
+ def limit(req_size)
190
+ @results = @results[0...req_size]
191
+ self
192
+ end
193
+
194
+ end
195
+ end #of Response
196
+
197
+ end #of Google
198
+
199
+ # http://developer.yahoo.com/search/boss/
200
+ class Yahoo
201
+
202
+ def self.search(options={})
203
+ query = ::RubyWebSearch::Yahoo::Query.new(options)
204
+ query.execute
205
+ end
206
+
207
+ def self.unthreaded_search(options={})
208
+ query = ::RubyWebSearch::Yahoo::Query.new(options)
209
+ query.execute_unthreaded
210
+ end
211
+
212
+ class Query
213
+ attr_accessor :query, :start_index, :filter, :country_code, :language_code
214
+ attr_accessor :safe_search, :type, :custom_search_engine_id, :version, :referer, :request_url
215
+ attr_accessor :size, :cursor, :custom_request_url, :response, :api_key
216
+
217
+ class Error < StandardError; end
218
+
219
+ SEARCH_BASE_URLS = { :web => "http://boss.yahooapis.com/ysearch/web",
220
+ }
221
+
222
+ #
223
+ # You can overwrite the query building process by passing the request url to use.
224
+ #
225
+ # ==== Params
226
+ # query<String>
227
+ # api_key<String>
228
+ # start_index<Integer>
229
+ # size<Integer> number of results default: 10
230
+ # filter
231
+ # country_code<String> 2 letters language code for the country you want
232
+ # to limit to
233
+ # language_code<String> (Web only)
234
+ # safe_search<String> active, moderate or off. Default: active (web only)
235
+ # custom_search_engine_id<String> optional argument supplying the unique id for
236
+ # the Custom Search Engine that should be used for the request (e.g., 000455696194071821846:reviews).
237
+ # (web only)
238
+ #
239
+ def initialize(options={})
240
+ if options[:custom_request_url]
241
+ @custom_request_url = options[:request_url]
242
+ else
243
+ @query = options[:query]
244
+ raise Yahoo::Query::Error, "You need to pass a query" unless @query
245
+ @cursor = options[:start_index] || 0
246
+ @filter = options[:filter]
247
+ @type = options[:type] || :web
248
+ @country_code = options[:country_code]
249
+ @language_code = options[:language_code]
250
+ @safe_search = options[:safe_search]
251
+ @custom_search_engine_id = options[:custom_search_engine_id]
252
+ @version = options[:version] || "1"
253
+ @referer = options[:referer] || "http://github.com/mattetti/"
254
+ @api_key = options[:api_key]
255
+ raise Yahoo::Query::Error, "You need to pass an api key" unless @api_key
256
+ @size = options[:size] || 10
257
+ end
258
+ @response ||= Response.new(:query => (query || custom_request_url), :size => size)
259
+ end
260
+
261
+ def build_request
262
+ if custom_request_url
263
+ custom_request_url
264
+ else
265
+ @request_url = "#{SEARCH_BASE_URLS[type]}/v#{version}/#{CGI.escape(query)}"
266
+ @request_url << "?appid=#{api_key}"
267
+ @request_url << "&count=#{size}" if size
268
+ @request_url << "&start=#{cursor}" if cursor > 0
269
+ @request_url << "&lang=#{language_code}&region=#{country_code}" if language_code && country_code
270
+
271
+ puts request_url if $RUBY_WEB_SEARCH_DEBUG
272
+ request_url
273
+ end
274
+ end
275
+
276
+ def build_requests
277
+ if custom_request_url
278
+ requests = [custom_request_url]
279
+ else
280
+ requests = []
281
+ # limiting to 10 responses per request
282
+ (size / 10.to_f).ceil.times do |n|
283
+ url = "#{SEARCH_BASE_URLS[type]}/v#{version}/#{CGI.escape(query)}"
284
+ url << "?appid=#{api_key}"
285
+ url << "&count=#{size}" if size
286
+ url << "&lang=#{language_code}&region=#{country_code}" if language_code && country_code
287
+ url << "&start=#{cursor}" if cursor > 0
288
+ @cursor += 10
289
+ requests << url
290
+ end
291
+
292
+ puts requests.inspect if $RUBY_WEB_SEARCH_DEBUG
293
+ requests
294
+ end
295
+ end
296
+
297
+ # Makes the request to Google
298
+ # if a larger set was requested than what is returned,
299
+ # more requests are made until the correct amount is available
300
+ def execute_unthreaded
301
+ @curl_request ||= ::Curl::Easy.new(){ |curl| curl.headers["Referer"] = referer }
302
+ @curl_request.url = build_request
303
+ @curl_request.perform
304
+ results = JSON.load(@curl_request.body_str)
305
+
306
+ response.process(results)
307
+ @cursor = response.results.size - 1
308
+ if ((cursor + 1) < size && custom_request_url.nil?)
309
+ puts "cursor: #{cursor} requested results size: #{size}" if $RUBY_WEB_SEARCH_DEBUG
310
+ execute_unthreaded
311
+ else
312
+ response.limit(size)
313
+ end
314
+ end
315
+
316
+ # Makes the request to Google
317
+ # if a larger set was requested than what is returned,
318
+ # more requests are made until the correct amount is available
319
+ def execute
320
+ threads = build_requests.map do |req|
321
+ Thread.new do
322
+ curl_request = ::Curl::Easy.new(req){ |curl| curl.headers["Referer"] = referer }
323
+ curl_request.perform
324
+ JSON.load(curl_request.body_str)
325
+ end
326
+ end
327
+ threads.each do |t|
328
+ response.process(t.value)
329
+ end
330
+ response.limit(size)
331
+ end
332
+
333
+ end #of Query
334
+
335
+
336
+ class Response
337
+ attr_reader :results, :status, :query, :size, :estimated_result_count
338
+ def initialize(google_raw_response={})
339
+ process(google_raw_response) unless google_raw_response.empty?
340
+ end
341
+
342
+ def process(google_raw_response={})
343
+ @query ||= google_raw_response[:query]
344
+ @size ||= google_raw_response[:size]
345
+ @results ||= []
346
+ @status = google_raw_response["ysearchresponse"]["responsecode"].to_i if google_raw_response["ysearchresponse"]
347
+ if google_raw_response["ysearchresponse"] && google_raw_response["ysearchresponse"]["resultset_web"] && status && status == 200
348
+ estimated_result_count ||= google_raw_response["ysearchresponse"]["totalhits"]
349
+ @results += google_raw_response["ysearchresponse"]["resultset_web"].map do |r|
350
+ {
351
+ :title => r["title"],
352
+ :url => r["clickurl"],
353
+ :cache_url => r["cacheUrl"],
354
+ :content => r["abstract"],
355
+ :domain => r["url"]
356
+ }
357
+ end
358
+ end
359
+
360
+ def limit(req_size)
361
+ @results = @results[0...req_size]
362
+ self
363
+ end
364
+
365
+ end
366
+ end #of Response
367
+
368
+ end #of Yahoo
369
+
370
+ # http://www.bing.com/developers
371
+ class Bing
372
+
373
+ def self.search(options={})
374
+ query = ::RubyWebSearch::Bing::Query.new(options)
375
+ query.execute
376
+ end
377
+
378
+ def self.unthreaded_search(options={})
379
+ query = ::RubyWebSearch::Bing::Query.new(options)
380
+ query.execute_unthreaded
381
+ end
382
+
383
+ class Query
384
+ attr_accessor :query, :start_index, :filter, :country_code, :language_code
385
+ attr_accessor :safe_search, :type, :custom_search_engine_id, :version, :referer, :request_url
386
+ attr_accessor :size, :cursor, :custom_request_url, :response, :api_key
387
+
388
+ class Error < StandardError; end
389
+
390
+ SEARCH_BASE_URLS = { :web => "http://api.search.live.net/json.aspx?sources=web",
391
+ }
392
+
393
+ #
394
+ # You can overwrite the query building process by passing the request url to use.
395
+ #
396
+ # ==== Params
397
+ # query<String>
398
+ # api_key<String>
399
+ # start_index<Integer>
400
+ # size<Integer> number of results default: 10
401
+ # filter
402
+ # country_code<String> 2 letters language code for the country you want
403
+ # to limit to
404
+ # language_code<String> (Web only)
405
+ # safe_search<String> active, moderate or off. Default: active (web only)
406
+ # custom_search_engine_id<String> optional argument supplying the unique id for
407
+ # the Custom Search Engine that should be used for the request (e.g., 000455696194071821846:reviews).
408
+ # (web only)
409
+ #
410
+ def initialize(options={})
411
+ if options[:custom_request_url]
412
+ @custom_request_url = options[:request_url]
413
+ else
414
+ @query = options[:query]
415
+ raise Bing::Query::Error, "You need to pass a query" unless @query
416
+ @cursor = options[:start_index] || 0
417
+ @filter = options[:filter]
418
+ @type = options[:type] || :web
419
+ @country_code = options[:country_code]
420
+ @language_code = options[:language_code]
421
+ @safe_search = options[:safe_search]
422
+ @custom_search_engine_id = options[:custom_search_engine_id]
423
+ @version = options[:version] || "1"
424
+ @referer = options[:referer] || "http://github.com/mattetti/"
425
+ @api_key = options[:api_key]
426
+ raise Bing::Query::Error, "You need to pass an api key" unless @api_key
427
+ @size = options[:size] || 10
428
+ end
429
+ @response ||= Response.new(:query => (query || custom_request_url), :size => size)
430
+ end
431
+
432
+ def build_request
433
+ if custom_request_url
434
+ custom_request_url
435
+ else
436
+ @request_url = "#{SEARCH_BASE_URLS[type]}&query=#{CGI.escape(query)}"
437
+ @request_url << "&appid=#{api_key}"
438
+ @request_url << "&web.count=#{size}" if size
439
+ @request_url << "&web.offset=#{cursor}" if cursor > 0
440
+ @request_url << "&market=#{language_code}-#{country_code}" if language_code && country_code
441
+
442
+ puts request_url if $RUBY_WEB_SEARCH_DEBUG
443
+ request_url
444
+ end
445
+ end
446
+
447
+ def build_requests
448
+ if custom_request_url
449
+ requests = [custom_request_url]
450
+ else
451
+ requests = []
452
+ # limiting to 10 responses per request
453
+ (size / 10.to_f).ceil.times do |n|
454
+ url = "#{SEARCH_BASE_URLS[type]}&query=#{CGI.escape(query)}"
455
+ url << "&appid=#{api_key}"
456
+ url << "&web.count=#{size}" if size
457
+ url << "&market=#{language_code}-#{country_code}" if language_code && country_code
458
+ url << "&web.offset=#{cursor}" if cursor > 0
459
+ @cursor += 10
460
+ requests << url
461
+ end
462
+
463
+ puts requests.inspect if $RUBY_WEB_SEARCH_DEBUG
464
+ requests
465
+ end
466
+ end
467
+
468
+ # Makes the request to Google
469
+ # if a larger set was requested than what is returned,
470
+ # more requests are made until the correct amount is available
471
+ def execute_unthreaded
472
+ @curl_request ||= ::Curl::Easy.new(){ |curl| curl.headers["Referer"] = referer }
473
+ @curl_request.url = build_request
474
+ @curl_request.perform
475
+ results = JSON.load(@curl_request.body_str)
476
+
477
+ response.process(results)
478
+ @cursor = response.results.size - 1
479
+ if ((cursor + 1) < size && custom_request_url.nil?)
480
+ puts "cursor: #{cursor} requested results size: #{size}" if $RUBY_WEB_SEARCH_DEBUG
481
+ execute_unthreaded
482
+ else
483
+ response.limit(size)
484
+ end
485
+ end
486
+
487
+ # Makes the request to Google
488
+ # if a larger set was requested than what is returned,
489
+ # more requests are made until the correct amount is available
490
+ def execute
491
+ threads = build_requests.map do |req|
492
+ Thread.new do
493
+ curl_request = ::Curl::Easy.new(req){ |curl| curl.headers["Referer"] = referer }
494
+ curl_request.perform
495
+ JSON.load(curl_request.body_str)
496
+ end
497
+ end
498
+ threads.each do |t|
499
+ response.process(t.value)
500
+ end
501
+ response.limit(size)
502
+ end
503
+
504
+ end #of Query
505
+
506
+
507
+ class Response
508
+ attr_reader :results, :status, :query, :size, :estimated_result_count
509
+ def initialize(google_raw_response={})
510
+ process(google_raw_response) unless google_raw_response.empty?
511
+ end
512
+
513
+ def process(google_raw_response={})
514
+ @query ||= google_raw_response[:query]
515
+ @size ||= google_raw_response[:size]
516
+ @results ||= []
517
+ @status = 200
518
+ if google_raw_response["SearchResponse"] &&
519
+ google_raw_response["SearchResponse"]["Web"] &&
520
+ google_raw_response["SearchResponse"]["Web"]["Results"] &&
521
+ status && status == 200
522
+ estimated_result_count ||= google_raw_response["SearchResponse"]["Web"]["Total"]
523
+ @results += google_raw_response["SearchResponse"]["Web"]["Results"].map do |r|
524
+ {
525
+ :title => r["Title"],
526
+ :url => r["Url"],
527
+ :cache_url => r["CacheUrl"],
528
+ :content => r["Description"],
529
+ :domain => r["DisplayUrl"]
530
+ }
531
+ end
532
+ end
533
+
534
+ def limit(req_size)
535
+ @results = @results[0...req_size]
536
+ self
537
+ end
538
+
539
+ end
540
+ end #of Response
541
+
542
+ end #of Bing
543
+
544
+ end
@@ -0,0 +1,88 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ $RUBY_WEB_SEARCH_DEBUG = true
3
+
4
+ describe "ruby-web-search" do
5
+
6
+ describe "Google search" do
7
+
8
+ describe "simple format" do
9
+ before(:all) do
10
+ @response = RubyWebSearch::Google.unthreaded_search(:query => "Natalie Portman")
11
+ end
12
+
13
+ it "should return a RubyWebSeach::Google::Response " do
14
+ @response.should be_an_instance_of(RubyWebSearch::Google::Response)
15
+ end
16
+
17
+ it "should have results" do
18
+ @response.results.should be_an_instance_of(Array)
19
+ @response.results.first.should be_an_instance_of(Hash)
20
+ end
21
+
22
+ it "should have 4 results (small request set size)" do
23
+ @response.results.size.should == 4
24
+ end
25
+
26
+ describe "results" do
27
+ before(:all) do
28
+ @results = @response.results
29
+ end
30
+
31
+ it "should have a title" do
32
+ @results.first[:title].should be_an_instance_of(String)
33
+ @results.first[:title].size.should > 3
34
+ end
35
+
36
+ it "should have an url" do
37
+ @results.first[:url].should be_an_instance_of(String)
38
+ @results.first[:url].size.should > 3
39
+ end
40
+
41
+ it "should have a cache url" do
42
+ @results.first[:cache_url].should be_an_instance_of(String)
43
+ @results.first[:cache_url].size.should > 3
44
+ end
45
+
46
+ it "should have content" do
47
+ @results.first[:content].should be_an_instance_of(String)
48
+ @results.first[:content].size.should > 15
49
+ end
50
+
51
+ it "should have a domain" do
52
+ @results.first[:domain].should be_an_instance_of(String)
53
+ @results.first[:domain].size.should > 7
54
+ @results.first[:url].should include(@response.results.first[:domain])
55
+ end
56
+ end
57
+ end
58
+
59
+ describe "large result set" do
60
+ before(:all) do
61
+ @response = RubyWebSearch::Google.unthreaded_search(:query => "Natalie Portman", :result_size => "large")
62
+ end
63
+
64
+ it "should have 8 results" do
65
+ @response.results.size.should == 8
66
+ end
67
+ end
68
+
69
+ describe "custom size result set" do
70
+ before(:all) do
71
+ @response = RubyWebSearch::Google.unthreaded_search(:query => "Natalie Portman", :size => 24)
72
+ @results = @response.results
73
+ end
74
+
75
+ it "should have exactly 24 results" do
76
+ @results.size.should == 24
77
+ end
78
+
79
+ it "should have 24 unique results" do
80
+ first = @results.shift
81
+ @results.each do |result|
82
+ first[:url].should_not == result[:url]
83
+ end
84
+ end
85
+ end
86
+ end
87
+
88
+ end
@@ -0,0 +1,92 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ $RUBY_WEB_SEARCH_DEBUG = true
3
+
4
+ describe "ruby-web-search" do
5
+
6
+ describe "Google search" do
7
+
8
+ describe "simple format" do
9
+ before(:all) do
10
+ @response = RubyWebSearch::Google.search(:query => "Natalie Portman")
11
+ end
12
+
13
+ it "should return a RubyWebSeach::Google::Response " do
14
+ @response.should be_an_instance_of(RubyWebSearch::Google::Response)
15
+ end
16
+
17
+ it "should have results" do
18
+ @response.results.should be_an_instance_of(Array)
19
+ @response.results.first.should be_an_instance_of(Hash)
20
+ end
21
+
22
+ it "should have 4 results (small request set size)" do
23
+ @response.results.size.should == 4
24
+ end
25
+
26
+ it "should have a non nil estimated_result_count" do
27
+ @response.estimated_result_count.should_not == nil
28
+ end
29
+
30
+ describe "results" do
31
+ before(:all) do
32
+ @results = @response.results
33
+ end
34
+
35
+ it "should have a title" do
36
+ @results.first[:title].should be_an_instance_of(String)
37
+ @results.first[:title].size.should > 3
38
+ end
39
+
40
+ it "should have an url" do
41
+ @results.first[:url].should be_an_instance_of(String)
42
+ @results.first[:url].size.should > 3
43
+ end
44
+
45
+ it "should have a cache url" do
46
+ @results.first[:cache_url].should be_an_instance_of(String)
47
+ @results.first[:cache_url].size.should > 3
48
+ end
49
+
50
+ it "should have content" do
51
+ @results.first[:content].should be_an_instance_of(String)
52
+ @results.first[:content].size.should > 15
53
+ end
54
+
55
+ it "should have a domain" do
56
+ @results.first[:domain].should be_an_instance_of(String)
57
+ @results.first[:domain].size.should > 7
58
+ @results.first[:url].should include(@response.results.first[:domain])
59
+ end
60
+ end
61
+ end
62
+
63
+ describe "large result set" do
64
+ before(:all) do
65
+ @response = RubyWebSearch::Google.search(:query => "Natalie Portman", :result_size => "large")
66
+ end
67
+
68
+ it "should have 8 results" do
69
+ @response.results.size.should == 8
70
+ end
71
+ end
72
+
73
+ describe "custom size result set" do
74
+ before(:all) do
75
+ @response = RubyWebSearch::Google.search(:query => "Natalie Portman", :size => 24)
76
+ @results = @response.results
77
+ end
78
+
79
+ it "should have exactly 24 results" do
80
+ @results.size.should == 24
81
+ end
82
+
83
+ it "should have 24 unique results" do
84
+ first = @results.shift
85
+ @results.each do |result|
86
+ first[:url].should_not == result[:url]
87
+ end
88
+ end
89
+ end
90
+ end
91
+
92
+ end
@@ -0,0 +1,3 @@
1
+ $TESTING=true
2
+ $:.push File.join(File.dirname(__FILE__), '..', 'lib')
3
+ require 'ruby-web-search'
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby-web-search
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Matt Aimonetti
8
+ autorequire: ruby-web-search
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-02-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: json
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ description: A (very old) Ruby gem that provides a way to retrieve search results
28
+ via the main search engines using Ruby
29
+ email: mattaimonetti@gmail.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files:
33
+ - LICENSE
34
+ files:
35
+ - LICENSE
36
+ - README.markdown
37
+ - Rakefile
38
+ - lib/curbemu.rb
39
+ - lib/ruby-web-search.rb
40
+ - spec/ruby-web-search-unthreaded.rb
41
+ - spec/ruby-web-search_spec.rb
42
+ - spec/spec_helper.rb
43
+ homepage: http://merbist.com
44
+ licenses: []
45
+ metadata: {}
46
+ post_install_message:
47
+ rdoc_options: []
48
+ require_paths:
49
+ - lib
50
+ required_ruby_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ required_rubygems_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - '>='
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
60
+ requirements: []
61
+ rubyforge_project:
62
+ rubygems_version: 2.2.2
63
+ signing_key:
64
+ specification_version: 4
65
+ summary: A (very old) Ruby gem that provides a way to retrieve search results via
66
+ the main search engines using Ruby
67
+ test_files: []