ruby-web-search 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 43274af7b83bb539b1fe626b071703383557dee7
4
+ data.tar.gz: 3912a6170ba16659fd750aef7b9de5be2dc197ad
5
+ SHA512:
6
+ metadata.gz: 4ded64c95d7103196627d4380fe10f0721e5324f6e9d1c51f972923f5ed6d88822c70a4ebf39ec4fd3641eb186b8eb75bc7159f829c161564d677b41c3cf0fe5
7
+ data.tar.gz: db41a3c20a6706793a494fb8831b32c2d0e7407381c67e013ae492beeb4fe0da4efbeac66a43db306265642b0ad5f8bf751d374a915ccfb18c28778d46263fd0
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Matt Aimonetti
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,37 @@
1
+ # Ruby Web Search
2
+
3
+ This gem allows you to query google search engine from Ruby.
4
+ So far, only Google is supported.
5
+
6
+
7
+ Simple example on how to query Google:
8
+
9
+ >> require 'ruby-web-search'
10
+ => true
11
+ >> response = RubyWebSearch::Google.search(:query => "Natalie Portman")
12
+ >> response.results
13
+ => [{:content=>"<b>Natalie Portman</b>, Star Wars, Phantom Menace, Attack of the Clones, Amidala, Leon, Professional, Where The Heart Is, Anywhere But Here, Seagull, Heat, <b>...</b>", :title=>"Natalie Portman . Com - News", :url=>"http://www.natalieportman.com/", :domain=>"www.natalieportman.com", :cache_url=>"http://www.google.com/search?q=cache:9hGoJVGBJ2sJ:www.natalieportman.com"}, {:content=>"<b>Natalie Portman</b> was born on June 9th, 1981 in Jerusalem, Israel, as the... Visit IMDb for Photos, Filmography, Discussions, Bio, News, Awards, Agent, <b>...</b>", :title=>"Natalie Portman", :url=>"http://www.imdb.com/name/nm0000204/", :domain=>"www.imdb.com", :cache_url=>"http://www.google.com/search?q=cache:JLzGjsYYdlkJ:www.imdb.com"}, {:content=>"<b>Natalie Portman</b> (Hebrew: \327\240\327\230\327\234\327\231 \327\244\327\225\327\250\327\230\327\236\327\237\342\200\216; born <b>Natalie</b> Hershlag June 9, 1981) is an Israeli-American actress. <b>Portman</b> began her career in the early 1990s, <b>...</b>", :title=>"Natalie Portman - Wikipedia, the free encyclopedia", :url=>"http://en.wikipedia.org/wiki/Natalie_Portman", :domain=>"en.wikipedia.org", :cache_url=>"http://www.google.com/search?q=cache:32A4VEkC23gJ:en.wikipedia.org"}, {:content=>"Aug 30, 2008 <b>...</b> media on Miss <b>Portman</b>. You may recognize <b>Natalie</b> for her roles in <b>....</b> is in in no way affiliated with <b>Natalie Portman</b> or her management. <b>...</b>", :title=>"Natalie Portman ORG ++{natalie-p.org} | your premiere NATALIE ...", :url=>"http://www.natalie-p.org/", :domain=>"www.natalie-p.org", :cache_url=>"http://www.google.com/search?q=cache:wv-CVcMW2SEJ:www.natalie-p.org"}]
14
+
15
+ A google search returns a Response instance. Call `results` on the response to get the array on result.
16
+ A Result is a simple hash object with few keys available:
17
+
18
+ * title Title of the result
19
+ * url Url of the result
20
+ * domain Root url of the result
21
+ * content Snippet of the result content
22
+ * cache\_url Google cache url
23
+
24
+
25
+ By default, only the 4 top results get retrieved, you can specify the exact amount of results you want by passing the size argument.
26
+ RubyWebSearch::Google.search(:query => "Natalie Portman", :size => 10)
27
+
28
+ ## TODO
29
+
30
+ * Full support of the google api
31
+ * support more search engines (Yahoo, live etc...)
32
+
33
+ ## Experimentations
34
+
35
+ Here are some benchmarks, it looks like running multiple concurrent threads is often not worth it
36
+ http://gist.github.com/45350
37
+ warmed up jruby benchmarks
@@ -0,0 +1,58 @@
1
+ require 'rubygems'
2
+ require 'rake/gempackagetask'
3
+ require 'rubygems/specification'
4
+ require 'date'
5
+ require 'spec/rake/spectask'
6
+
7
+ GEM = "ruby-web-search"
8
+ GEM_VERSION = "0.0.2"
9
+ AUTHOR = "Matt Aimonetti"
10
+ EMAIL = "mattaimonetti@gmail.com"
11
+ HOMEPAGE = "http://merbist.com"
12
+ SUMMARY = "A Ruby gem that provides a way to retrieve search results via the main search engines using Ruby"
13
+
14
+ spec = Gem::Specification.new do |s|
15
+ s.name = GEM
16
+ s.version = GEM_VERSION
17
+ s.platform = Gem::Platform::RUBY
18
+ s.has_rdoc = true
19
+ s.extra_rdoc_files = ["LICENSE"]
20
+ s.summary = SUMMARY
21
+ s.description = s.summary
22
+ s.author = AUTHOR
23
+ s.email = EMAIL
24
+ s.homepage = HOMEPAGE
25
+
26
+ # Uncomment this to add a dependency
27
+ # s.add_dependency "curb"
28
+ s.add_dependency "json"
29
+
30
+ s.require_path = 'lib'
31
+ s.autorequire = GEM
32
+ s.files = %w(LICENSE README.markdown Rakefile) + Dir.glob("{lib,spec}/**/*")
33
+ end
34
+
35
+ task :default => :spec
36
+
37
+ desc "Run specs"
38
+ Spec::Rake::SpecTask.new do |t|
39
+ t.spec_files = FileList['spec/**/*_spec.rb']
40
+ t.spec_opts = %w(-fs --color)
41
+ end
42
+
43
+
44
+ Rake::GemPackageTask.new(spec) do |pkg|
45
+ pkg.gem_spec = spec
46
+ end
47
+
48
+ desc "install the gem locally"
49
+ task :install => [:package] do
50
+ sh %{sudo gem install pkg/#{GEM}-#{GEM_VERSION}}
51
+ end
52
+
53
+ desc "create a gemspec file"
54
+ task :make_spec do
55
+ File.open("#{GEM}.gemspec", "w") do |file|
56
+ file.puts spec.to_ruby
57
+ end
58
+ end
@@ -0,0 +1,68 @@
1
+ require 'net/http'
2
+
3
+ module Curl
4
+ module Err
5
+ class CurlError < RuntimeError; end
6
+ class GotNothingError < CurlError; end
7
+ class ConnectionFailedError < CurlError; end
8
+ class TimeoutError < CurlError; end
9
+ class HttpError < CurlError; end
10
+ end
11
+ class Easy
12
+ attr_accessor :timeout, :url, :body_str, :headers, :conn
13
+
14
+ def initialize(url = nil)
15
+ @url = url
16
+ @headers = {}
17
+ @body_str = nil
18
+ end
19
+
20
+ #Not yet implemented.. only needed for importing from LibraryThing
21
+ def header_str
22
+ ""
23
+ end
24
+
25
+ #Curl::Easy.perform("http://old-xisbn.oclc.org/xid/isbn/1234").body_str
26
+ #Curl::Easy.perform("http://old-xisbn.oclc.org/xid/isbn/1234").header_str
27
+ def self.perform(url)
28
+ c = self.new(url)
29
+ yield(c) if block_given?
30
+ c.perform
31
+ c
32
+ end
33
+
34
+ def self.http_get(url)
35
+ c = self.new(url)
36
+ yield(c) if block_given?
37
+ c.perform
38
+ c
39
+ end
40
+
41
+ #Curl::Easy.http_post("http://foo.com", {"img_url" => url}) { |r| r.headers = 'Content-Type: text/json' }.body_str)
42
+ def self.http_post(url, options = {})
43
+ c = self.new(url)
44
+ yield(c) if block_given?
45
+ c.http_post(options)
46
+ c
47
+ end
48
+
49
+ def perform
50
+ uri = URI.parse(url)
51
+ res = Net::HTTP.start(uri.host, uri.port) {|http|
52
+ http.request(Net::HTTP::Get.new(uri.request_uri))
53
+ }
54
+ @body_str = res.body
55
+ rescue => e
56
+ raise ::Curl::Err::HttpError, e.message
57
+ end
58
+
59
+ def http_post(options = {})
60
+ uri = URI.parse(url)
61
+ http = Net::HTTP.new(uri.host, uri.port)
62
+ resp, data = http.post(uri.request_uri, options, headers)
63
+ @body_str = data
64
+ rescue => e
65
+ raise ::Curl::Err::HttpError, e.message
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,544 @@
1
+ require 'rubygems'
2
+ require 'cgi'
3
+ require 'json'
4
+
5
+ # begin
6
+ # gem 'curb'
7
+ # require 'curb'
8
+ # rescue LoadError
9
+ require File.join(File.dirname(__FILE__), 'curbemu')
10
+ # end
11
+
12
+
13
+ $RUBY_WEB_SEARCH_DEBUG = true
14
+
15
+ class RubyWebSearch
16
+
17
+ # http://code.google.com/apis/ajaxsearch/documentation/reference.html
18
+ class Google
19
+
20
+ def self.search(options={})
21
+ query = ::RubyWebSearch::Google::Query.new(options)
22
+ query.execute
23
+ end
24
+
25
+ def self.unthreaded_search(options={})
26
+ query = ::RubyWebSearch::Google::Query.new(options)
27
+ query.execute_unthreaded
28
+ end
29
+
30
+ class Query
31
+ attr_accessor :query, :start_index, :result_size, :filter, :country_code, :language_code, :global
32
+ attr_accessor :safe_search, :type, :custom_search_engine_id, :version, :referer, :request_url
33
+ attr_accessor :size, :cursor, :custom_request_url, :response
34
+
35
+ class Error < StandardError; end
36
+
37
+ SEARCH_BASE_URLS = { :web => "http://ajax.googleapis.com/ajax/services/search/web",
38
+ :local => "http://ajax.googleapis.com/ajax/services/search/local",
39
+ :video => "http://ajax.googleapis.com/ajax/services/search/video",
40
+ :blog => "http://ajax.googleapis.com/ajax/services/search/blogs",
41
+ :news => "http://ajax.googleapis.com/ajax/services/search/news",
42
+ :book => "http://ajax.googleapis.com/ajax/services/search/books",
43
+ :image => "http://ajax.googleapis.com/ajax/services/search/images",
44
+ :patent => "http://ajax.googleapis.com/ajax/services/search/patent"
45
+ }
46
+
47
+ #
48
+ # You can overwrite the query building process by passing the request url to use.
49
+ #
50
+ # ==== Params
51
+ # query<String>
52
+ # start_index<Integer>
53
+ # size<Integer> number of results default: 4
54
+ # filter
55
+ # country_code<String> 2 letters language code for the country you want
56
+ # to limit to
57
+ # language_code<String> (Web only)
58
+ # safe_search<String> active, moderate or off. Default: active (web only)
59
+ # custom_search_engine_id<String> optional argument supplying the unique id for
60
+ # the Custom Search Engine that should be used for the request (e.g., 000455696194071821846:reviews).
61
+ # (web only)
62
+ #
63
+ def initialize(options={})
64
+ if options[:custom_request_url]
65
+ @custom_request_url = options[:request_url]
66
+ else
67
+ @query = options[:query]
68
+ raise Google::Query::Error, "You need to pass a query" unless @query
69
+ @cursor = options[:start_index] || 0
70
+ @result_size = options[:result_size]
71
+ @filter = options[:filter]
72
+ @type = options[:type] || :web
73
+ @country_code = options[:country_code]
74
+ @language_code = options[:language_code] ? "lang_#{options[:language_code]}" : nil
75
+ @safe_search = options[:safe_search]
76
+ @custom_search_engine_id = options[:custom_search_engine_id]
77
+ @version = options[:version] || "1.0"
78
+ @referer = options[:referer] || "http://github.com/mattetti/"
79
+ @size = options[:size] || 4
80
+ @global = options[:global]
81
+ @result_size = "large" if size > 4 # increase the result set size to avoid making too many requests
82
+ @size = 8 if (@result_size == "large" && size < 8)
83
+ end
84
+ @response ||= Response.new(:query => (query || custom_request_url), :size => size)
85
+ end
86
+
87
+ def build_request
88
+ if custom_request_url
89
+ custom_request_url
90
+ else
91
+ @request_url = "#{SEARCH_BASE_URLS[type]}?v=#{version}&q=#{CGI.escape(query)}"
92
+ @request_url << "&rsz=#{result_size}" if result_size
93
+ @request_url << "&start=#{cursor}" if cursor > 0
94
+ @request_url << "&lr=#{language_code}" if language_code
95
+ @request_url << "&hl=#{country_code}" if country_code
96
+ @request_url << "&gl=#{global}" if global
97
+
98
+ puts request_url if $RUBY_WEB_SEARCH_DEBUG
99
+ request_url
100
+ end
101
+ end
102
+
103
+ def build_requests
104
+ if custom_request_url
105
+ requests = [custom_request_url]
106
+ else
107
+ requests = []
108
+ # create an array of requests based on the fact that google limits
109
+ # us to 8 responses per request but let us use a cursor
110
+ (size / 8.to_f).ceil.times do |n|
111
+ url = "#{SEARCH_BASE_URLS[type]}?v=#{version}&q=#{CGI.escape(query)}"
112
+ url << "&rsz=#{result_size}" if result_size
113
+ url << "&lr=#{language_code}" if language_code
114
+ url << "&hl=#{country_code}" if country_code
115
+ url << "&gl=#{global}" if global
116
+ url << "&start=#{cursor}"
117
+ @cursor += 8
118
+ requests << url
119
+ end
120
+
121
+ puts requests.inspect if $RUBY_WEB_SEARCH_DEBUG
122
+ requests
123
+ end
124
+ end
125
+
126
+ # Makes the request to Google
127
+ # if a larger set was requested than what is returned,
128
+ # more requests are made until the correct amount is available
129
+ def execute_unthreaded
130
+ @curl_request ||= ::Curl::Easy.new(){ |curl| curl.headers["Referer"] = referer }
131
+ @curl_request.url = build_request
132
+ @curl_request.perform
133
+ results = JSON.load(@curl_request.body_str)
134
+
135
+ response.process(results)
136
+ @cursor = response.results.size - 1
137
+ if ((cursor + 1) < size && custom_request_url.nil?)
138
+ puts "cursor: #{cursor} requested results size: #{size}" if $RUBY_WEB_SEARCH_DEBUG
139
+ execute_unthreaded
140
+ else
141
+ response.limit(size)
142
+ end
143
+ end
144
+
145
+ # Makes the request to Google
146
+ # if a larger set was requested than what is returned,
147
+ # more requests are made until the correct amount is available
148
+ def execute
149
+ threads = build_requests.map do |req|
150
+ Thread.new do
151
+ curl_request = ::Curl::Easy.new(req){ |curl| curl.headers["Referer"] = referer }
152
+ curl_request.perform
153
+ JSON.load(curl_request.body_str)
154
+ end
155
+ end
156
+ threads.each do |t|
157
+ response.process(t.value)
158
+ end
159
+ response.limit(size)
160
+ end
161
+
162
+ end #of Query
163
+
164
+
165
+ class Response
166
+ attr_reader :results, :status, :query, :size, :estimated_result_count
167
+ def initialize(google_raw_response={})
168
+ process(google_raw_response) unless google_raw_response.empty?
169
+ end
170
+
171
+ def process(google_raw_response={})
172
+ @query ||= google_raw_response[:query]
173
+ @size ||= google_raw_response[:size]
174
+ @results ||= []
175
+ @status = google_raw_response["responseStatus"]
176
+ if google_raw_response["responseData"] && status && status == 200
177
+ @estimated_result_count ||= google_raw_response['responseData']["cursor"]["estimatedResultCount"] if google_raw_response['responseData']["cursor"]
178
+ @results += google_raw_response["responseData"]["results"].map do |r|
179
+ {
180
+ :title => r["titleNoFormatting"],
181
+ :url => r["unescapedUrl"],
182
+ :cache_url => r["cacheUrl"],
183
+ :content => r["content"],
184
+ :domain => r["visibleUrl"]
185
+ }
186
+ end
187
+ end
188
+
189
+ def limit(req_size)
190
+ @results = @results[0...req_size]
191
+ self
192
+ end
193
+
194
+ end
195
+ end #of Response
196
+
197
+ end #of Google
198
+
199
+ # http://developer.yahoo.com/search/boss/
200
+ class Yahoo
201
+
202
+ def self.search(options={})
203
+ query = ::RubyWebSearch::Yahoo::Query.new(options)
204
+ query.execute
205
+ end
206
+
207
+ def self.unthreaded_search(options={})
208
+ query = ::RubyWebSearch::Yahoo::Query.new(options)
209
+ query.execute_unthreaded
210
+ end
211
+
212
+ class Query
213
+ attr_accessor :query, :start_index, :filter, :country_code, :language_code
214
+ attr_accessor :safe_search, :type, :custom_search_engine_id, :version, :referer, :request_url
215
+ attr_accessor :size, :cursor, :custom_request_url, :response, :api_key
216
+
217
+ class Error < StandardError; end
218
+
219
+ SEARCH_BASE_URLS = { :web => "http://boss.yahooapis.com/ysearch/web",
220
+ }
221
+
222
+ #
223
+ # You can overwrite the query building process by passing the request url to use.
224
+ #
225
+ # ==== Params
226
+ # query<String>
227
+ # api_key<String>
228
+ # start_index<Integer>
229
+ # size<Integer> number of results default: 10
230
+ # filter
231
+ # country_code<String> 2 letters language code for the country you want
232
+ # to limit to
233
+ # language_code<String> (Web only)
234
+ # safe_search<String> active, moderate or off. Default: active (web only)
235
+ # custom_search_engine_id<String> optional argument supplying the unique id for
236
+ # the Custom Search Engine that should be used for the request (e.g., 000455696194071821846:reviews).
237
+ # (web only)
238
+ #
239
+ def initialize(options={})
240
+ if options[:custom_request_url]
241
+ @custom_request_url = options[:request_url]
242
+ else
243
+ @query = options[:query]
244
+ raise Yahoo::Query::Error, "You need to pass a query" unless @query
245
+ @cursor = options[:start_index] || 0
246
+ @filter = options[:filter]
247
+ @type = options[:type] || :web
248
+ @country_code = options[:country_code]
249
+ @language_code = options[:language_code]
250
+ @safe_search = options[:safe_search]
251
+ @custom_search_engine_id = options[:custom_search_engine_id]
252
+ @version = options[:version] || "1"
253
+ @referer = options[:referer] || "http://github.com/mattetti/"
254
+ @api_key = options[:api_key]
255
+ raise Yahoo::Query::Error, "You need to pass an api key" unless @api_key
256
+ @size = options[:size] || 10
257
+ end
258
+ @response ||= Response.new(:query => (query || custom_request_url), :size => size)
259
+ end
260
+
261
+ def build_request
262
+ if custom_request_url
263
+ custom_request_url
264
+ else
265
+ @request_url = "#{SEARCH_BASE_URLS[type]}/v#{version}/#{CGI.escape(query)}"
266
+ @request_url << "?appid=#{api_key}"
267
+ @request_url << "&count=#{size}" if size
268
+ @request_url << "&start=#{cursor}" if cursor > 0
269
+ @request_url << "&lang=#{language_code}&region=#{country_code}" if language_code && country_code
270
+
271
+ puts request_url if $RUBY_WEB_SEARCH_DEBUG
272
+ request_url
273
+ end
274
+ end
275
+
276
+ def build_requests
277
+ if custom_request_url
278
+ requests = [custom_request_url]
279
+ else
280
+ requests = []
281
+ # limiting to 10 responses per request
282
+ (size / 10.to_f).ceil.times do |n|
283
+ url = "#{SEARCH_BASE_URLS[type]}/v#{version}/#{CGI.escape(query)}"
284
+ url << "?appid=#{api_key}"
285
+ url << "&count=#{size}" if size
286
+ url << "&lang=#{language_code}&region=#{country_code}" if language_code && country_code
287
+ url << "&start=#{cursor}" if cursor > 0
288
+ @cursor += 10
289
+ requests << url
290
+ end
291
+
292
+ puts requests.inspect if $RUBY_WEB_SEARCH_DEBUG
293
+ requests
294
+ end
295
+ end
296
+
297
+ # Makes the request to Google
298
+ # if a larger set was requested than what is returned,
299
+ # more requests are made until the correct amount is available
300
+ def execute_unthreaded
301
+ @curl_request ||= ::Curl::Easy.new(){ |curl| curl.headers["Referer"] = referer }
302
+ @curl_request.url = build_request
303
+ @curl_request.perform
304
+ results = JSON.load(@curl_request.body_str)
305
+
306
+ response.process(results)
307
+ @cursor = response.results.size - 1
308
+ if ((cursor + 1) < size && custom_request_url.nil?)
309
+ puts "cursor: #{cursor} requested results size: #{size}" if $RUBY_WEB_SEARCH_DEBUG
310
+ execute_unthreaded
311
+ else
312
+ response.limit(size)
313
+ end
314
+ end
315
+
316
+ # Makes the request to Google
317
+ # if a larger set was requested than what is returned,
318
+ # more requests are made until the correct amount is available
319
+ def execute
320
+ threads = build_requests.map do |req|
321
+ Thread.new do
322
+ curl_request = ::Curl::Easy.new(req){ |curl| curl.headers["Referer"] = referer }
323
+ curl_request.perform
324
+ JSON.load(curl_request.body_str)
325
+ end
326
+ end
327
+ threads.each do |t|
328
+ response.process(t.value)
329
+ end
330
+ response.limit(size)
331
+ end
332
+
333
+ end #of Query
334
+
335
+
336
+ class Response
337
+ attr_reader :results, :status, :query, :size, :estimated_result_count
338
+ def initialize(google_raw_response={})
339
+ process(google_raw_response) unless google_raw_response.empty?
340
+ end
341
+
342
+ def process(google_raw_response={})
343
+ @query ||= google_raw_response[:query]
344
+ @size ||= google_raw_response[:size]
345
+ @results ||= []
346
+ @status = google_raw_response["ysearchresponse"]["responsecode"].to_i if google_raw_response["ysearchresponse"]
347
+ if google_raw_response["ysearchresponse"] && google_raw_response["ysearchresponse"]["resultset_web"] && status && status == 200
348
+ estimated_result_count ||= google_raw_response["ysearchresponse"]["totalhits"]
349
+ @results += google_raw_response["ysearchresponse"]["resultset_web"].map do |r|
350
+ {
351
+ :title => r["title"],
352
+ :url => r["clickurl"],
353
+ :cache_url => r["cacheUrl"],
354
+ :content => r["abstract"],
355
+ :domain => r["url"]
356
+ }
357
+ end
358
+ end
359
+
360
+ def limit(req_size)
361
+ @results = @results[0...req_size]
362
+ self
363
+ end
364
+
365
+ end
366
+ end #of Response
367
+
368
+ end #of Yahoo
369
+
370
+ # http://www.bing.com/developers
371
+ class Bing
372
+
373
+ def self.search(options={})
374
+ query = ::RubyWebSearch::Bing::Query.new(options)
375
+ query.execute
376
+ end
377
+
378
+ def self.unthreaded_search(options={})
379
+ query = ::RubyWebSearch::Bing::Query.new(options)
380
+ query.execute_unthreaded
381
+ end
382
+
383
+ class Query
384
+ attr_accessor :query, :start_index, :filter, :country_code, :language_code
385
+ attr_accessor :safe_search, :type, :custom_search_engine_id, :version, :referer, :request_url
386
+ attr_accessor :size, :cursor, :custom_request_url, :response, :api_key
387
+
388
+ class Error < StandardError; end
389
+
390
+ SEARCH_BASE_URLS = { :web => "http://api.search.live.net/json.aspx?sources=web",
391
+ }
392
+
393
+ #
394
+ # You can overwrite the query building process by passing the request url to use.
395
+ #
396
+ # ==== Params
397
+ # query<String>
398
+ # api_key<String>
399
+ # start_index<Integer>
400
+ # size<Integer> number of results default: 10
401
+ # filter
402
+ # country_code<String> 2 letters language code for the country you want
403
+ # to limit to
404
+ # language_code<String> (Web only)
405
+ # safe_search<String> active, moderate or off. Default: active (web only)
406
+ # custom_search_engine_id<String> optional argument supplying the unique id for
407
+ # the Custom Search Engine that should be used for the request (e.g., 000455696194071821846:reviews).
408
+ # (web only)
409
+ #
410
+ def initialize(options={})
411
+ if options[:custom_request_url]
412
+ @custom_request_url = options[:request_url]
413
+ else
414
+ @query = options[:query]
415
+ raise Bing::Query::Error, "You need to pass a query" unless @query
416
+ @cursor = options[:start_index] || 0
417
+ @filter = options[:filter]
418
+ @type = options[:type] || :web
419
+ @country_code = options[:country_code]
420
+ @language_code = options[:language_code]
421
+ @safe_search = options[:safe_search]
422
+ @custom_search_engine_id = options[:custom_search_engine_id]
423
+ @version = options[:version] || "1"
424
+ @referer = options[:referer] || "http://github.com/mattetti/"
425
+ @api_key = options[:api_key]
426
+ raise Bing::Query::Error, "You need to pass an api key" unless @api_key
427
+ @size = options[:size] || 10
428
+ end
429
+ @response ||= Response.new(:query => (query || custom_request_url), :size => size)
430
+ end
431
+
432
+ def build_request
433
+ if custom_request_url
434
+ custom_request_url
435
+ else
436
+ @request_url = "#{SEARCH_BASE_URLS[type]}&query=#{CGI.escape(query)}"
437
+ @request_url << "&appid=#{api_key}"
438
+ @request_url << "&web.count=#{size}" if size
439
+ @request_url << "&web.offset=#{cursor}" if cursor > 0
440
+ @request_url << "&market=#{language_code}-#{country_code}" if language_code && country_code
441
+
442
+ puts request_url if $RUBY_WEB_SEARCH_DEBUG
443
+ request_url
444
+ end
445
+ end
446
+
447
+ def build_requests
448
+ if custom_request_url
449
+ requests = [custom_request_url]
450
+ else
451
+ requests = []
452
+ # limiting to 10 responses per request
453
+ (size / 10.to_f).ceil.times do |n|
454
+ url = "#{SEARCH_BASE_URLS[type]}&query=#{CGI.escape(query)}"
455
+ url << "&appid=#{api_key}"
456
+ url << "&web.count=#{size}" if size
457
+ url << "&market=#{language_code}-#{country_code}" if language_code && country_code
458
+ url << "&web.offset=#{cursor}" if cursor > 0
459
+ @cursor += 10
460
+ requests << url
461
+ end
462
+
463
+ puts requests.inspect if $RUBY_WEB_SEARCH_DEBUG
464
+ requests
465
+ end
466
+ end
467
+
468
+ # Makes the request to Google
469
+ # if a larger set was requested than what is returned,
470
+ # more requests are made until the correct amount is available
471
+ def execute_unthreaded
472
+ @curl_request ||= ::Curl::Easy.new(){ |curl| curl.headers["Referer"] = referer }
473
+ @curl_request.url = build_request
474
+ @curl_request.perform
475
+ results = JSON.load(@curl_request.body_str)
476
+
477
+ response.process(results)
478
+ @cursor = response.results.size - 1
479
+ if ((cursor + 1) < size && custom_request_url.nil?)
480
+ puts "cursor: #{cursor} requested results size: #{size}" if $RUBY_WEB_SEARCH_DEBUG
481
+ execute_unthreaded
482
+ else
483
+ response.limit(size)
484
+ end
485
+ end
486
+
487
+ # Makes the request to Google
488
+ # if a larger set was requested than what is returned,
489
+ # more requests are made until the correct amount is available
490
+ def execute
491
+ threads = build_requests.map do |req|
492
+ Thread.new do
493
+ curl_request = ::Curl::Easy.new(req){ |curl| curl.headers["Referer"] = referer }
494
+ curl_request.perform
495
+ JSON.load(curl_request.body_str)
496
+ end
497
+ end
498
+ threads.each do |t|
499
+ response.process(t.value)
500
+ end
501
+ response.limit(size)
502
+ end
503
+
504
+ end #of Query
505
+
506
+
507
+ class Response
508
+ attr_reader :results, :status, :query, :size, :estimated_result_count
509
+ def initialize(google_raw_response={})
510
+ process(google_raw_response) unless google_raw_response.empty?
511
+ end
512
+
513
+ def process(google_raw_response={})
514
+ @query ||= google_raw_response[:query]
515
+ @size ||= google_raw_response[:size]
516
+ @results ||= []
517
+ @status = 200
518
+ if google_raw_response["SearchResponse"] &&
519
+ google_raw_response["SearchResponse"]["Web"] &&
520
+ google_raw_response["SearchResponse"]["Web"]["Results"] &&
521
+ status && status == 200
522
+ estimated_result_count ||= google_raw_response["SearchResponse"]["Web"]["Total"]
523
+ @results += google_raw_response["SearchResponse"]["Web"]["Results"].map do |r|
524
+ {
525
+ :title => r["Title"],
526
+ :url => r["Url"],
527
+ :cache_url => r["CacheUrl"],
528
+ :content => r["Description"],
529
+ :domain => r["DisplayUrl"]
530
+ }
531
+ end
532
+ end
533
+
534
+ def limit(req_size)
535
+ @results = @results[0...req_size]
536
+ self
537
+ end
538
+
539
+ end
540
+ end #of Response
541
+
542
+ end #of Bing
543
+
544
+ end
@@ -0,0 +1,88 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ $RUBY_WEB_SEARCH_DEBUG = true
3
+
4
+ describe "ruby-web-search" do
5
+
6
+ describe "Google search" do
7
+
8
+ describe "simple format" do
9
+ before(:all) do
10
+ @response = RubyWebSearch::Google.unthreaded_search(:query => "Natalie Portman")
11
+ end
12
+
13
+ it "should return a RubyWebSeach::Google::Response " do
14
+ @response.should be_an_instance_of(RubyWebSearch::Google::Response)
15
+ end
16
+
17
+ it "should have results" do
18
+ @response.results.should be_an_instance_of(Array)
19
+ @response.results.first.should be_an_instance_of(Hash)
20
+ end
21
+
22
+ it "should have 4 results (small request set size)" do
23
+ @response.results.size.should == 4
24
+ end
25
+
26
+ describe "results" do
27
+ before(:all) do
28
+ @results = @response.results
29
+ end
30
+
31
+ it "should have a title" do
32
+ @results.first[:title].should be_an_instance_of(String)
33
+ @results.first[:title].size.should > 3
34
+ end
35
+
36
+ it "should have an url" do
37
+ @results.first[:url].should be_an_instance_of(String)
38
+ @results.first[:url].size.should > 3
39
+ end
40
+
41
+ it "should have a cache url" do
42
+ @results.first[:cache_url].should be_an_instance_of(String)
43
+ @results.first[:cache_url].size.should > 3
44
+ end
45
+
46
+ it "should have content" do
47
+ @results.first[:content].should be_an_instance_of(String)
48
+ @results.first[:content].size.should > 15
49
+ end
50
+
51
+ it "should have a domain" do
52
+ @results.first[:domain].should be_an_instance_of(String)
53
+ @results.first[:domain].size.should > 7
54
+ @results.first[:url].should include(@response.results.first[:domain])
55
+ end
56
+ end
57
+ end
58
+
59
+ describe "large result set" do
60
+ before(:all) do
61
+ @response = RubyWebSearch::Google.unthreaded_search(:query => "Natalie Portman", :result_size => "large")
62
+ end
63
+
64
+ it "should have 8 results" do
65
+ @response.results.size.should == 8
66
+ end
67
+ end
68
+
69
+ describe "custom size result set" do
70
+ before(:all) do
71
+ @response = RubyWebSearch::Google.unthreaded_search(:query => "Natalie Portman", :size => 24)
72
+ @results = @response.results
73
+ end
74
+
75
+ it "should have exactly 24 results" do
76
+ @results.size.should == 24
77
+ end
78
+
79
+ it "should have 24 unique results" do
80
+ first = @results.shift
81
+ @results.each do |result|
82
+ first[:url].should_not == result[:url]
83
+ end
84
+ end
85
+ end
86
+ end
87
+
88
+ end
@@ -0,0 +1,92 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ $RUBY_WEB_SEARCH_DEBUG = true
3
+
4
+ describe "ruby-web-search" do
5
+
6
+ describe "Google search" do
7
+
8
+ describe "simple format" do
9
+ before(:all) do
10
+ @response = RubyWebSearch::Google.search(:query => "Natalie Portman")
11
+ end
12
+
13
+ it "should return a RubyWebSeach::Google::Response " do
14
+ @response.should be_an_instance_of(RubyWebSearch::Google::Response)
15
+ end
16
+
17
+ it "should have results" do
18
+ @response.results.should be_an_instance_of(Array)
19
+ @response.results.first.should be_an_instance_of(Hash)
20
+ end
21
+
22
+ it "should have 4 results (small request set size)" do
23
+ @response.results.size.should == 4
24
+ end
25
+
26
+ it "should have a non nil estimated_result_count" do
27
+ @response.estimated_result_count.should_not == nil
28
+ end
29
+
30
+ describe "results" do
31
+ before(:all) do
32
+ @results = @response.results
33
+ end
34
+
35
+ it "should have a title" do
36
+ @results.first[:title].should be_an_instance_of(String)
37
+ @results.first[:title].size.should > 3
38
+ end
39
+
40
+ it "should have an url" do
41
+ @results.first[:url].should be_an_instance_of(String)
42
+ @results.first[:url].size.should > 3
43
+ end
44
+
45
+ it "should have a cache url" do
46
+ @results.first[:cache_url].should be_an_instance_of(String)
47
+ @results.first[:cache_url].size.should > 3
48
+ end
49
+
50
+ it "should have content" do
51
+ @results.first[:content].should be_an_instance_of(String)
52
+ @results.first[:content].size.should > 15
53
+ end
54
+
55
+ it "should have a domain" do
56
+ @results.first[:domain].should be_an_instance_of(String)
57
+ @results.first[:domain].size.should > 7
58
+ @results.first[:url].should include(@response.results.first[:domain])
59
+ end
60
+ end
61
+ end
62
+
63
+ describe "large result set" do
64
+ before(:all) do
65
+ @response = RubyWebSearch::Google.search(:query => "Natalie Portman", :result_size => "large")
66
+ end
67
+
68
+ it "should have 8 results" do
69
+ @response.results.size.should == 8
70
+ end
71
+ end
72
+
73
+ describe "custom size result set" do
74
+ before(:all) do
75
+ @response = RubyWebSearch::Google.search(:query => "Natalie Portman", :size => 24)
76
+ @results = @response.results
77
+ end
78
+
79
+ it "should have exactly 24 results" do
80
+ @results.size.should == 24
81
+ end
82
+
83
+ it "should have 24 unique results" do
84
+ first = @results.shift
85
+ @results.each do |result|
86
+ first[:url].should_not == result[:url]
87
+ end
88
+ end
89
+ end
90
+ end
91
+
92
+ end
@@ -0,0 +1,3 @@
1
+ $TESTING=true
2
+ $:.push File.join(File.dirname(__FILE__), '..', 'lib')
3
+ require 'ruby-web-search'
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby-web-search
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Matt Aimonetti
8
+ autorequire: ruby-web-search
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-02-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: json
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ description: A (very old) Ruby gem that provides a way to retrieve search results
28
+ via the main search engines using Ruby
29
+ email: mattaimonetti@gmail.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files:
33
+ - LICENSE
34
+ files:
35
+ - LICENSE
36
+ - README.markdown
37
+ - Rakefile
38
+ - lib/curbemu.rb
39
+ - lib/ruby-web-search.rb
40
+ - spec/ruby-web-search-unthreaded.rb
41
+ - spec/ruby-web-search_spec.rb
42
+ - spec/spec_helper.rb
43
+ homepage: http://merbist.com
44
+ licenses: []
45
+ metadata: {}
46
+ post_install_message:
47
+ rdoc_options: []
48
+ require_paths:
49
+ - lib
50
+ required_ruby_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ required_rubygems_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - '>='
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
60
+ requirements: []
61
+ rubyforge_project:
62
+ rubygems_version: 2.2.2
63
+ signing_key:
64
+ specification_version: 4
65
+ summary: A (very old) Ruby gem that provides a way to retrieve search results via
66
+ the main search engines using Ruby
67
+ test_files: []