edavis10-ruby-web-search 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Matt Aimonetti
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.markdown ADDED
@@ -0,0 +1,37 @@
1
+ # Ruby Web Search
2
+
3
+ This gem allows you to query google search engine from Ruby.
4
+ So far, only Google is supported.
5
+
6
+
7
+ Simple example on how to query Google:
8
+
9
+ >> require 'ruby-web-search'
10
+ => true
11
+ >> response = RubyWebSearch::Google.search(:query => "Natalie Portman")
12
+ >> response.results
13
+ => [{:content=>"<b>Natalie Portman</b>, Star Wars, Phantom Menace, Attack of the Clones, Amidala, Leon, Professional, Where The Heart Is, Anywhere But Here, Seagull, Heat, <b>...</b>", :title=>"Natalie Portman . Com - News", :url=>"http://www.natalieportman.com/", :domain=>"www.natalieportman.com", :cache_url=>"http://www.google.com/search?q=cache:9hGoJVGBJ2sJ:www.natalieportman.com"}, {:content=>"<b>Natalie Portman</b> was born on June 9th, 1981 in Jerusalem, Israel, as the... Visit IMDb for Photos, Filmography, Discussions, Bio, News, Awards, Agent, <b>...</b>", :title=>"Natalie Portman", :url=>"http://www.imdb.com/name/nm0000204/", :domain=>"www.imdb.com", :cache_url=>"http://www.google.com/search?q=cache:JLzGjsYYdlkJ:www.imdb.com"}, {:content=>"<b>Natalie Portman</b> (Hebrew: \327\240\327\230\327\234\327\231 \327\244\327\225\327\250\327\230\327\236\327\237\342\200\216; born <b>Natalie</b> Hershlag June 9, 1981) is an Israeli-American actress. <b>Portman</b> began her career in the early 1990s, <b>...</b>", :title=>"Natalie Portman - Wikipedia, the free encyclopedia", :url=>"http://en.wikipedia.org/wiki/Natalie_Portman", :domain=>"en.wikipedia.org", :cache_url=>"http://www.google.com/search?q=cache:32A4VEkC23gJ:en.wikipedia.org"}, {:content=>"Aug 30, 2008 <b>...</b> media on Miss <b>Portman</b>. You may recognize <b>Natalie</b> for her roles in <b>....</b> is in in no way affiliated with <b>Natalie Portman</b> or her management. <b>...</b>", :title=>"Natalie Portman ORG ++{natalie-p.org} | your premiere NATALIE ...", :url=>"http://www.natalie-p.org/", :domain=>"www.natalie-p.org", :cache_url=>"http://www.google.com/search?q=cache:wv-CVcMW2SEJ:www.natalie-p.org"}]
14
+
15
+ A google search returns a Response instance. Call `results` on the response to get the array on result.
16
+ A Result is a simple hash object with few keys available:
17
+
18
+ * title Title of the result
19
+ * url Url of the result
20
+ * domain Root url of the result
21
+ * content Snippet of the result content
22
+ * cache\_url Google cache url
23
+
24
+
25
+ By default, only the 4 top results get retrieved, you can specify the exact amount of results you want by passing the size argument.
26
+ RubyWebSearch::Google.search(:query => "Natalie Portman", :size => 10)
27
+
28
+ ## TODO
29
+
30
+ * Full support of the google api
31
+ * support more search engines (Yahoo, live etc...)
32
+
33
+ ## Experimentations
34
+
35
+ Here are some benchmarks, it looks like running multiple concurrent threads is often not worth it
36
+ http://gist.github.com/45350
37
+ warmed up jruby benchmarks
data/Rakefile ADDED
@@ -0,0 +1,58 @@
1
+ require 'rubygems'
2
+ require 'rake/gempackagetask'
3
+ require 'rubygems/specification'
4
+ require 'date'
5
+ require 'spec/rake/spectask'
6
+
7
+ GEM = "edavis10-ruby-web-search"
8
+ GEM_VERSION = "0.0.2"
9
+ AUTHOR = "Matt Aimonetti"
10
+ EMAIL = "mattaimonetti@gmail.com"
11
+ HOMEPAGE = "http://merbist.com"
12
+ SUMMARY = "A Ruby gem that provides a way to retrieve search results via the main search engines using Ruby"
13
+
14
+ spec = Gem::Specification.new do |s|
15
+ s.name = GEM
16
+ s.version = GEM_VERSION
17
+ s.platform = Gem::Platform::RUBY
18
+ s.has_rdoc = true
19
+ s.extra_rdoc_files = ["LICENSE"]
20
+ s.summary = SUMMARY
21
+ s.description = s.summary
22
+ s.author = AUTHOR
23
+ s.email = EMAIL
24
+ s.homepage = HOMEPAGE
25
+
26
+ # Uncomment this to add a dependency
27
+ # s.add_dependency "curb"
28
+ s.add_dependency "json"
29
+
30
+ s.require_path = 'lib'
31
+ s.autorequire = GEM
32
+ s.files = %w(LICENSE README.markdown Rakefile) + Dir.glob("{lib,spec}/**/*")
33
+ end
34
+
35
+ task :default => :spec
36
+
37
+ desc "Run specs"
38
+ Spec::Rake::SpecTask.new do |t|
39
+ t.spec_files = FileList['spec/**/*_spec.rb']
40
+ t.spec_opts = %w(-fs --color)
41
+ end
42
+
43
+
44
+ Rake::GemPackageTask.new(spec) do |pkg|
45
+ pkg.gem_spec = spec
46
+ end
47
+
48
+ desc "install the gem locally"
49
+ task :install => [:package] do
50
+ sh %{sudo gem install pkg/#{GEM}-#{GEM_VERSION}}
51
+ end
52
+
53
+ desc "create a gemspec file"
54
+ task :make_spec do
55
+ File.open("#{GEM}.gemspec", "w") do |file|
56
+ file.puts spec.to_ruby
57
+ end
58
+ end
data/lib/curbemu.rb ADDED
@@ -0,0 +1,68 @@
1
+ require 'net/http'
2
+
3
+ module Curl
4
+ module Err
5
+ class CurlError < RuntimeError; end
6
+ class GotNothingError < CurlError; end
7
+ class ConnectionFailedError < CurlError; end
8
+ class TimeoutError < CurlError; end
9
+ class HttpError < CurlError; end
10
+ end
11
+ class Easy
12
+ attr_accessor :timeout, :url, :body_str, :headers, :conn
13
+
14
+ def initialize(url = nil)
15
+ @url = url
16
+ @headers = {}
17
+ @body_str = nil
18
+ end
19
+
20
+ #Not yet implemented.. only needed for importing from LibraryThing
21
+ def header_str
22
+ ""
23
+ end
24
+
25
+ #Curl::Easy.perform("http://old-xisbn.oclc.org/xid/isbn/1234").body_str
26
+ #Curl::Easy.perform("http://old-xisbn.oclc.org/xid/isbn/1234").header_str
27
+ def self.perform(url)
28
+ c = self.new(url)
29
+ yield(c) if block_given?
30
+ c.perform
31
+ c
32
+ end
33
+
34
+ def self.http_get(url)
35
+ c = self.new(url)
36
+ yield(c) if block_given?
37
+ c.perform
38
+ c
39
+ end
40
+
41
+ #Curl::Easy.http_post("http://foo.com", {"img_url" => url}) { |r| r.headers = 'Content-Type: text/json' }.body_str)
42
+ def self.http_post(url, options = {})
43
+ c = self.new(url)
44
+ yield(c) if block_given?
45
+ c.http_post(options)
46
+ c
47
+ end
48
+
49
+ def perform
50
+ uri = URI.parse(url)
51
+ res = Net::HTTP.start(uri.host, uri.port) {|http|
52
+ http.request(Net::HTTP::Get.new(uri.request_uri))
53
+ }
54
+ @body_str = res.body
55
+ rescue => e
56
+ raise ::Curl::Err::HttpError, e.message
57
+ end
58
+
59
+ def http_post(options = {})
60
+ uri = URI.parse(url)
61
+ http = Net::HTTP.new(uri.host, uri.port)
62
+ resp, data = http.post(uri.request_uri, options, headers)
63
+ @body_str = data
64
+ rescue => e
65
+ raise ::Curl::Err::HttpError, e.message
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,541 @@
1
+ require 'rubygems'
2
+ require 'cgi'
3
+ require 'json'
4
+
5
+ # begin
6
+ # gem 'curb'
7
+ # require 'curb'
8
+ # rescue LoadError
9
+ require File.join(File.dirname(__FILE__), 'curbemu')
10
+ # end
11
+
12
+
13
+ $RUBY_WEB_SEARCH_DEBUG = true
14
+
15
+ class RubyWebSearch
16
+
17
+ # http://code.google.com/apis/ajaxsearch/documentation/reference.html
18
+ class Google
19
+
20
+ def self.search(options={})
21
+ query = ::RubyWebSearch::Google::Query.new(options)
22
+ query.execute
23
+ end
24
+
25
+ def self.unthreaded_search(options={})
26
+ query = ::RubyWebSearch::Google::Query.new(options)
27
+ query.execute_unthreaded
28
+ end
29
+
30
+ class Query
31
+ attr_accessor :query, :start_index, :result_size, :filter, :country_code, :language_code
32
+ attr_accessor :safe_search, :type, :custom_search_engine_id, :version, :referer, :request_url
33
+ attr_accessor :size, :cursor, :custom_request_url, :response
34
+
35
+ class Error < StandardError; end
36
+
37
+ SEARCH_BASE_URLS = { :web => "http://ajax.googleapis.com/ajax/services/search/web",
38
+ :local => "http://ajax.googleapis.com/ajax/services/search/local",
39
+ :video => "http://ajax.googleapis.com/ajax/services/search/video",
40
+ :blog => "http://ajax.googleapis.com/ajax/services/search/blogs",
41
+ :news => "http://ajax.googleapis.com/ajax/services/search/news",
42
+ :book => "http://ajax.googleapis.com/ajax/services/search/books",
43
+ :image => "http://ajax.googleapis.com/ajax/services/search/images",
44
+ :patent => "http://ajax.googleapis.com/ajax/services/search/patent"
45
+ }
46
+
47
+ #
48
+ # You can overwrite the query building process by passing the request url to use.
49
+ #
50
+ # ==== Params
51
+ # query<String>
52
+ # start_index<Integer>
53
+ # size<Integer> number of results default: 4
54
+ # filter
55
+ # country_code<String> 2 letters language code for the country you want
56
+ # to limit to
57
+ # language_code<String> (Web only)
58
+ # safe_search<String> active, moderate or off. Default: active (web only)
59
+ # custom_search_engine_id<String> optional argument supplying the unique id for
60
+ # the Custom Search Engine that should be used for the request (e.g., 000455696194071821846:reviews).
61
+ # (web only)
62
+ #
63
+ def initialize(options={})
64
+ if options[:custom_request_url]
65
+ @custom_request_url = options[:request_url]
66
+ else
67
+ @query = options[:query]
68
+ raise Google::Query::Error, "You need to pass a query" unless @query
69
+ @cursor = options[:start_index] || 0
70
+ @result_size = options[:result_size]
71
+ @filter = options[:filter]
72
+ @type = options[:type] || :web
73
+ @country_code = options[:country_code]
74
+ @language_code = options[:language_code] ? "lang_#{options[:language_code]}" : nil
75
+ @safe_search = options[:safe_search]
76
+ @custom_search_engine_id = options[:custom_search_engine_id]
77
+ @version = options[:version] || "1.0"
78
+ @referer = options[:referer] || "http://github.com/mattetti/"
79
+ @size = options[:size] || 4
80
+ @result_size = "large" if size > 4 # increase the result set size to avoid making too many requests
81
+ @size = 8 if (@result_size == "large" && size < 8)
82
+ end
83
+ @response ||= Response.new(:query => (query || custom_request_url), :size => size)
84
+ end
85
+
86
+ def build_request
87
+ if custom_request_url
88
+ custom_request_url
89
+ else
90
+ @request_url = "#{SEARCH_BASE_URLS[type]}?v=#{version}&q=#{CGI.escape(query)}"
91
+ @request_url << "&rsz=#{result_size}" if result_size
92
+ @request_url << "&start=#{cursor}" if cursor > 0
93
+ @request_url << "&lr=#{language_code}" if language_code
94
+ @request_url << "&hl=#{country_code}" if country_code
95
+
96
+ puts request_url if $RUBY_WEB_SEARCH_DEBUG
97
+ request_url
98
+ end
99
+ end
100
+
101
+ def build_requests
102
+ if custom_request_url
103
+ requests = [custom_request_url]
104
+ else
105
+ requests = []
106
+ # create an array of requests based on the fact that google limits
107
+ # us to 8 responses per request but let us use a cursor
108
+ (size / 8.to_f).ceil.times do |n|
109
+ url = "#{SEARCH_BASE_URLS[type]}?v=#{version}&q=#{CGI.escape(query)}"
110
+ url << "&rsz=#{result_size}" if result_size
111
+ url << "&lr=#{language_code}" if language_code
112
+ url << "&hl=#{country_code}" if country_code
113
+ url << "&start=#{cursor}"
114
+ @cursor += 8
115
+ requests << url
116
+ end
117
+
118
+ puts requests.inspect if $RUBY_WEB_SEARCH_DEBUG
119
+ requests
120
+ end
121
+ end
122
+
123
+ # Makes the request to Google
124
+ # if a larger set was requested than what is returned,
125
+ # more requests are made until the correct amount is available
126
+ def execute_unthreaded
127
+ @curl_request ||= ::Curl::Easy.new(){ |curl| curl.headers["Referer"] = referer }
128
+ @curl_request.url = build_request
129
+ @curl_request.perform
130
+ results = JSON.load(@curl_request.body_str)
131
+
132
+ response.process(results)
133
+ @cursor = response.results.size - 1
134
+ if ((cursor + 1) < size && custom_request_url.nil?)
135
+ puts "cursor: #{cursor} requested results size: #{size}" if $RUBY_WEB_SEARCH_DEBUG
136
+ execute_unthreaded
137
+ else
138
+ response.limit(size)
139
+ end
140
+ end
141
+
142
+ # Makes the request to Google
143
+ # if a larger set was requested than what is returned,
144
+ # more requests are made until the correct amount is available
145
+ def execute
146
+ threads = build_requests.map do |req|
147
+ Thread.new do
148
+ curl_request = ::Curl::Easy.new(req){ |curl| curl.headers["Referer"] = referer }
149
+ curl_request.perform
150
+ JSON.load(curl_request.body_str)
151
+ end
152
+ end
153
+ threads.each do |t|
154
+ response.process(t.value)
155
+ end
156
+ response.limit(size)
157
+ end
158
+
159
+ end #of Query
160
+
161
+
162
+ class Response
163
+ attr_reader :results, :status, :query, :size, :estimated_result_count
164
+ def initialize(google_raw_response={})
165
+ process(google_raw_response) unless google_raw_response.empty?
166
+ end
167
+
168
+ def process(google_raw_response={})
169
+ @query ||= google_raw_response[:query]
170
+ @size ||= google_raw_response[:size]
171
+ @results ||= []
172
+ @status = google_raw_response["responseStatus"]
173
+ if google_raw_response["responseData"] && status && status == 200
174
+ estimated_result_count ||= google_raw_response["cursor"]["estimatedResultCount"] if google_raw_response["cursor"]
175
+ @results += google_raw_response["responseData"]["results"].map do |r|
176
+ {
177
+ :title => r["titleNoFormatting"],
178
+ :url => r["unescapedUrl"],
179
+ :cache_url => r["cacheUrl"],
180
+ :content => r["content"],
181
+ :domain => r["visibleUrl"]
182
+ }
183
+ end
184
+ end
185
+
186
+ def limit(req_size)
187
+ @results = @results[0...req_size]
188
+ self
189
+ end
190
+
191
+ end
192
+ end #of Response
193
+
194
+ end #of Google
195
+
196
+ # http://developer.yahoo.com/search/boss/
197
+ class Yahoo
198
+
199
+ def self.search(options={})
200
+ query = ::RubyWebSearch::Yahoo::Query.new(options)
201
+ query.execute
202
+ end
203
+
204
+ def self.unthreaded_search(options={})
205
+ query = ::RubyWebSearch::Yahoo::Query.new(options)
206
+ query.execute_unthreaded
207
+ end
208
+
209
+ class Query
210
+ attr_accessor :query, :start_index, :filter, :country_code, :language_code
211
+ attr_accessor :safe_search, :type, :custom_search_engine_id, :version, :referer, :request_url
212
+ attr_accessor :size, :cursor, :custom_request_url, :response, :api_key
213
+
214
+ class Error < StandardError; end
215
+
216
+ SEARCH_BASE_URLS = { :web => "http://boss.yahooapis.com/ysearch/web",
217
+ }
218
+
219
+ #
220
+ # You can overwrite the query building process by passing the request url to use.
221
+ #
222
+ # ==== Params
223
+ # query<String>
224
+ # api_key<String>
225
+ # start_index<Integer>
226
+ # size<Integer> number of results default: 10
227
+ # filter
228
+ # country_code<String> 2 letters language code for the country you want
229
+ # to limit to
230
+ # language_code<String> (Web only)
231
+ # safe_search<String> active, moderate or off. Default: active (web only)
232
+ # custom_search_engine_id<String> optional argument supplying the unique id for
233
+ # the Custom Search Engine that should be used for the request (e.g., 000455696194071821846:reviews).
234
+ # (web only)
235
+ #
236
+ def initialize(options={})
237
+ if options[:custom_request_url]
238
+ @custom_request_url = options[:request_url]
239
+ else
240
+ @query = options[:query]
241
+ raise Yahoo::Query::Error, "You need to pass a query" unless @query
242
+ @cursor = options[:start_index] || 0
243
+ @filter = options[:filter]
244
+ @type = options[:type] || :web
245
+ @country_code = options[:country_code]
246
+ @language_code = options[:language_code]
247
+ @safe_search = options[:safe_search]
248
+ @custom_search_engine_id = options[:custom_search_engine_id]
249
+ @version = options[:version] || "1"
250
+ @referer = options[:referer] || "http://github.com/mattetti/"
251
+ @api_key = options[:api_key]
252
+ raise Yahoo::Query::Error, "You need to pass an api key" unless @api_key
253
+ @size = options[:size] || 10
254
+ end
255
+ @response ||= Response.new(:query => (query || custom_request_url), :size => size)
256
+ end
257
+
258
+ def build_request
259
+ if custom_request_url
260
+ custom_request_url
261
+ else
262
+ @request_url = "#{SEARCH_BASE_URLS[type]}/v#{version}/#{CGI.escape(query)}"
263
+ @request_url << "?appid=#{api_key}"
264
+ @request_url << "&count=#{size}" if size
265
+ @request_url << "&start=#{cursor}" if cursor > 0
266
+ @request_url << "&lang=#{language_code}&region=#{country_code}" if language_code && country_code
267
+
268
+ puts request_url if $RUBY_WEB_SEARCH_DEBUG
269
+ request_url
270
+ end
271
+ end
272
+
273
+ def build_requests
274
+ if custom_request_url
275
+ requests = [custom_request_url]
276
+ else
277
+ requests = []
278
+ # limiting to 10 responses per request
279
+ (size / 10.to_f).ceil.times do |n|
280
+ url = "#{SEARCH_BASE_URLS[type]}/v#{version}/#{CGI.escape(query)}"
281
+ url << "?appid=#{api_key}"
282
+ url << "&count=#{size}" if size
283
+ url << "&lang=#{language_code}&region=#{country_code}" if language_code && country_code
284
+ url << "&start=#{cursor}" if cursor > 0
285
+ @cursor += 10
286
+ requests << url
287
+ end
288
+
289
+ puts requests.inspect if $RUBY_WEB_SEARCH_DEBUG
290
+ requests
291
+ end
292
+ end
293
+
294
+ # Makes the request to Google
295
+ # if a larger set was requested than what is returned,
296
+ # more requests are made until the correct amount is available
297
+ def execute_unthreaded
298
+ @curl_request ||= ::Curl::Easy.new(){ |curl| curl.headers["Referer"] = referer }
299
+ @curl_request.url = build_request
300
+ @curl_request.perform
301
+ results = JSON.load(@curl_request.body_str)
302
+
303
+ response.process(results)
304
+ @cursor = response.results.size - 1
305
+ if ((cursor + 1) < size && custom_request_url.nil?)
306
+ puts "cursor: #{cursor} requested results size: #{size}" if $RUBY_WEB_SEARCH_DEBUG
307
+ execute_unthreaded
308
+ else
309
+ response.limit(size)
310
+ end
311
+ end
312
+
313
+ # Makes the request to Google
314
+ # if a larger set was requested than what is returned,
315
+ # more requests are made until the correct amount is available
316
+ def execute
317
+ threads = build_requests.map do |req|
318
+ Thread.new do
319
+ curl_request = ::Curl::Easy.new(req){ |curl| curl.headers["Referer"] = referer }
320
+ curl_request.perform
321
+ JSON.load(curl_request.body_str)
322
+ end
323
+ end
324
+ threads.each do |t|
325
+ response.process(t.value)
326
+ end
327
+ response.limit(size)
328
+ end
329
+
330
+ end #of Query
331
+
332
+
333
+ class Response
334
+ attr_reader :results, :status, :query, :size, :estimated_result_count
335
+ def initialize(google_raw_response={})
336
+ process(google_raw_response) unless google_raw_response.empty?
337
+ end
338
+
339
+ def process(google_raw_response={})
340
+ @query ||= google_raw_response[:query]
341
+ @size ||= google_raw_response[:size]
342
+ @results ||= []
343
+ @status = google_raw_response["ysearchresponse"]["responsecode"].to_i if google_raw_response["ysearchresponse"]
344
+ if google_raw_response["ysearchresponse"] && google_raw_response["ysearchresponse"]["resultset_web"] && status && status == 200
345
+ estimated_result_count ||= google_raw_response["ysearchresponse"]["totalhits"]
346
+ @results += google_raw_response["ysearchresponse"]["resultset_web"].map do |r|
347
+ {
348
+ :title => r["title"],
349
+ :url => r["clickurl"],
350
+ :cache_url => r["cacheUrl"],
351
+ :content => r["abstract"],
352
+ :domain => r["url"]
353
+ }
354
+ end
355
+ end
356
+
357
+ def limit(req_size)
358
+ @results = @results[0...req_size]
359
+ self
360
+ end
361
+
362
+ end
363
+ end #of Response
364
+
365
+ end #of Yahoo
366
+
367
+ # http://www.bing.com/developers
368
+ class Bing
369
+
370
+ def self.search(options={})
371
+ query = ::RubyWebSearch::Bing::Query.new(options)
372
+ query.execute
373
+ end
374
+
375
+ def self.unthreaded_search(options={})
376
+ query = ::RubyWebSearch::Bing::Query.new(options)
377
+ query.execute_unthreaded
378
+ end
379
+
380
+ class Query
381
+ attr_accessor :query, :start_index, :filter, :country_code, :language_code
382
+ attr_accessor :safe_search, :type, :custom_search_engine_id, :version, :referer, :request_url
383
+ attr_accessor :size, :cursor, :custom_request_url, :response, :api_key
384
+
385
+ class Error < StandardError; end
386
+
387
+ SEARCH_BASE_URLS = { :web => "http://api.search.live.net/json.aspx?sources=web",
388
+ }
389
+
390
+ #
391
+ # You can overwrite the query building process by passing the request url to use.
392
+ #
393
+ # ==== Params
394
+ # query<String>
395
+ # api_key<String>
396
+ # start_index<Integer>
397
+ # size<Integer> number of results default: 10
398
+ # filter
399
+ # country_code<String> 2 letters language code for the country you want
400
+ # to limit to
401
+ # language_code<String> (Web only)
402
+ # safe_search<String> active, moderate or off. Default: active (web only)
403
+ # custom_search_engine_id<String> optional argument supplying the unique id for
404
+ # the Custom Search Engine that should be used for the request (e.g., 000455696194071821846:reviews).
405
+ # (web only)
406
+ #
407
+ def initialize(options={})
408
+ if options[:custom_request_url]
409
+ @custom_request_url = options[:request_url]
410
+ else
411
+ @query = options[:query]
412
+ raise Bing::Query::Error, "You need to pass a query" unless @query
413
+ @cursor = options[:start_index] || 0
414
+ @filter = options[:filter]
415
+ @type = options[:type] || :web
416
+ @country_code = options[:country_code]
417
+ @language_code = options[:language_code]
418
+ @safe_search = options[:safe_search]
419
+ @custom_search_engine_id = options[:custom_search_engine_id]
420
+ @version = options[:version] || "1"
421
+ @referer = options[:referer] || "http://github.com/mattetti/"
422
+ @api_key = options[:api_key]
423
+ raise Bing::Query::Error, "You need to pass an api key" unless @api_key
424
+ @size = options[:size] || 10
425
+ end
426
+ @response ||= Response.new(:query => (query || custom_request_url), :size => size)
427
+ end
428
+
429
+ def build_request
430
+ if custom_request_url
431
+ custom_request_url
432
+ else
433
+ @request_url = "#{SEARCH_BASE_URLS[type]}&query=#{CGI.escape(query)}"
434
+ @request_url << "&appid=#{api_key}"
435
+ @request_url << "&web.count=#{size}" if size
436
+ @request_url << "&web.offset=#{cursor}" if cursor > 0
437
+ @request_url << "&market=#{language_code}-#{country_code}" if language_code && country_code
438
+
439
+ puts request_url if $RUBY_WEB_SEARCH_DEBUG
440
+ request_url
441
+ end
442
+ end
443
+
444
+ def build_requests
445
+ if custom_request_url
446
+ requests = [custom_request_url]
447
+ else
448
+ requests = []
449
+ # limiting to 10 responses per request
450
+ (size / 10.to_f).ceil.times do |n|
451
+ url = "#{SEARCH_BASE_URLS[type]}&query=#{CGI.escape(query)}"
452
+ url << "&appid=#{api_key}"
453
+ url << "&web.count=#{size}" if size
454
+ url << "&market=#{language_code}-#{country_code}" if language_code && country_code
455
+ url << "&web.offset=#{cursor}" if cursor > 0
456
+ @cursor += 10
457
+ requests << url
458
+ end
459
+
460
+ puts requests.inspect if $RUBY_WEB_SEARCH_DEBUG
461
+ requests
462
+ end
463
+ end
464
+
465
+ # Makes the request to Google
466
+ # if a larger set was requested than what is returned,
467
+ # more requests are made until the correct amount is available
468
+ def execute_unthreaded
469
+ @curl_request ||= ::Curl::Easy.new(){ |curl| curl.headers["Referer"] = referer }
470
+ @curl_request.url = build_request
471
+ @curl_request.perform
472
+ results = JSON.load(@curl_request.body_str)
473
+
474
+ response.process(results)
475
+ @cursor = response.results.size - 1
476
+ if ((cursor + 1) < size && custom_request_url.nil?)
477
+ puts "cursor: #{cursor} requested results size: #{size}" if $RUBY_WEB_SEARCH_DEBUG
478
+ execute_unthreaded
479
+ else
480
+ response.limit(size)
481
+ end
482
+ end
483
+
484
+ # Makes the request to Google
485
+ # if a larger set was requested than what is returned,
486
+ # more requests are made until the correct amount is available
487
+ def execute
488
+ threads = build_requests.map do |req|
489
+ Thread.new do
490
+ curl_request = ::Curl::Easy.new(req){ |curl| curl.headers["Referer"] = referer }
491
+ curl_request.perform
492
+ JSON.load(curl_request.body_str)
493
+ end
494
+ end
495
+ threads.each do |t|
496
+ response.process(t.value)
497
+ end
498
+ response.limit(size)
499
+ end
500
+
501
+ end #of Query
502
+
503
+
504
+ class Response
505
+ attr_reader :results, :status, :query, :size, :estimated_result_count
506
+ def initialize(google_raw_response={})
507
+ process(google_raw_response) unless google_raw_response.empty?
508
+ end
509
+
510
+ def process(google_raw_response={})
511
+ @query ||= google_raw_response[:query]
512
+ @size ||= google_raw_response[:size]
513
+ @results ||= []
514
+ @status = 200
515
+ if google_raw_response["SearchResponse"] &&
516
+ google_raw_response["SearchResponse"]["Web"] &&
517
+ google_raw_response["SearchResponse"]["Web"]["Results"] &&
518
+ status && status == 200
519
+ estimated_result_count ||= google_raw_response["SearchResponse"]["Web"]["Total"]
520
+ @results += google_raw_response["SearchResponse"]["Web"]["Results"].map do |r|
521
+ {
522
+ :title => r["Title"],
523
+ :url => r["Url"],
524
+ :cache_url => r["CacheUrl"],
525
+ :content => r["Description"],
526
+ :domain => r["DisplayUrl"]
527
+ }
528
+ end
529
+ end
530
+
531
+ def limit(req_size)
532
+ @results = @results[0...req_size]
533
+ self
534
+ end
535
+
536
+ end
537
+ end #of Response
538
+
539
+ end #of Bing
540
+
541
+ end
@@ -0,0 +1,88 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ $RUBY_WEB_SEARCH_DEBUG = true
3
+
4
+ describe "ruby-web-search" do
5
+
6
+ describe "Google search" do
7
+
8
+ describe "simple format" do
9
+ before(:all) do
10
+ @response = RubyWebSearch::Google.unthreaded_search(:query => "Natalie Portman")
11
+ end
12
+
13
+ it "should return a RubyWebSeach::Google::Response " do
14
+ @response.should be_an_instance_of(RubyWebSearch::Google::Response)
15
+ end
16
+
17
+ it "should have results" do
18
+ @response.results.should be_an_instance_of(Array)
19
+ @response.results.first.should be_an_instance_of(Hash)
20
+ end
21
+
22
+ it "should have 4 results (small request set size)" do
23
+ @response.results.size.should == 4
24
+ end
25
+
26
+ describe "results" do
27
+ before(:all) do
28
+ @results = @response.results
29
+ end
30
+
31
+ it "should have a title" do
32
+ @results.first[:title].should be_an_instance_of(String)
33
+ @results.first[:title].size.should > 3
34
+ end
35
+
36
+ it "should have an url" do
37
+ @results.first[:url].should be_an_instance_of(String)
38
+ @results.first[:url].size.should > 3
39
+ end
40
+
41
+ it "should have a cache url" do
42
+ @results.first[:cache_url].should be_an_instance_of(String)
43
+ @results.first[:cache_url].size.should > 3
44
+ end
45
+
46
+ it "should have content" do
47
+ @results.first[:content].should be_an_instance_of(String)
48
+ @results.first[:content].size.should > 15
49
+ end
50
+
51
+ it "should have a domain" do
52
+ @results.first[:domain].should be_an_instance_of(String)
53
+ @results.first[:domain].size.should > 7
54
+ @results.first[:url].should include(@response.results.first[:domain])
55
+ end
56
+ end
57
+ end
58
+
59
+ describe "large result set" do
60
+ before(:all) do
61
+ @response = RubyWebSearch::Google.unthreaded_search(:query => "Natalie Portman", :result_size => "large")
62
+ end
63
+
64
+ it "should have 8 results" do
65
+ @response.results.size.should == 8
66
+ end
67
+ end
68
+
69
+ describe "custom size result set" do
70
+ before(:all) do
71
+ @response = RubyWebSearch::Google.unthreaded_search(:query => "Natalie Portman", :size => 24)
72
+ @results = @response.results
73
+ end
74
+
75
+ it "should have exactly 24 results" do
76
+ @results.size.should == 24
77
+ end
78
+
79
+ it "should have 24 unique results" do
80
+ first = @results.shift
81
+ @results.each do |result|
82
+ first[:url].should_not == result[:url]
83
+ end
84
+ end
85
+ end
86
+ end
87
+
88
+ end
@@ -0,0 +1,88 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ $RUBY_WEB_SEARCH_DEBUG = true
3
+
4
+ describe "ruby-web-search" do
5
+
6
+ describe "Google search" do
7
+
8
+ describe "simple format" do
9
+ before(:all) do
10
+ @response = RubyWebSearch::Google.search(:query => "Natalie Portman")
11
+ end
12
+
13
+ it "should return a RubyWebSeach::Google::Response " do
14
+ @response.should be_an_instance_of(RubyWebSearch::Google::Response)
15
+ end
16
+
17
+ it "should have results" do
18
+ @response.results.should be_an_instance_of(Array)
19
+ @response.results.first.should be_an_instance_of(Hash)
20
+ end
21
+
22
+ it "should have 4 results (small request set size)" do
23
+ @response.results.size.should == 4
24
+ end
25
+
26
+ describe "results" do
27
+ before(:all) do
28
+ @results = @response.results
29
+ end
30
+
31
+ it "should have a title" do
32
+ @results.first[:title].should be_an_instance_of(String)
33
+ @results.first[:title].size.should > 3
34
+ end
35
+
36
+ it "should have an url" do
37
+ @results.first[:url].should be_an_instance_of(String)
38
+ @results.first[:url].size.should > 3
39
+ end
40
+
41
+ it "should have a cache url" do
42
+ @results.first[:cache_url].should be_an_instance_of(String)
43
+ @results.first[:cache_url].size.should > 3
44
+ end
45
+
46
+ it "should have content" do
47
+ @results.first[:content].should be_an_instance_of(String)
48
+ @results.first[:content].size.should > 15
49
+ end
50
+
51
+ it "should have a domain" do
52
+ @results.first[:domain].should be_an_instance_of(String)
53
+ @results.first[:domain].size.should > 7
54
+ @results.first[:url].should include(@response.results.first[:domain])
55
+ end
56
+ end
57
+ end
58
+
59
+ describe "large result set" do
60
+ before(:all) do
61
+ @response = RubyWebSearch::Google.search(:query => "Natalie Portman", :result_size => "large")
62
+ end
63
+
64
+ it "should have 8 results" do
65
+ @response.results.size.should == 8
66
+ end
67
+ end
68
+
69
+ describe "custom size result set" do
70
+ before(:all) do
71
+ @response = RubyWebSearch::Google.search(:query => "Natalie Portman", :size => 24)
72
+ @results = @response.results
73
+ end
74
+
75
+ it "should have exactly 24 results" do
76
+ @results.size.should == 24
77
+ end
78
+
79
+ it "should have 24 unique results" do
80
+ first = @results.shift
81
+ @results.each do |result|
82
+ first[:url].should_not == result[:url]
83
+ end
84
+ end
85
+ end
86
+ end
87
+
88
+ end
@@ -0,0 +1,3 @@
1
+ $TESTING=true
2
+ $:.push File.join(File.dirname(__FILE__), '..', 'lib')
3
+ require 'ruby-web-search'
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: edavis10-ruby-web-search
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 2
10
+ version: 0.0.2
11
+ platform: ruby
12
+ authors:
13
+ - Matt Aimonetti
14
+ autorequire: edavis10-ruby-web-search
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-01-03 00:00:00 -08:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: json
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ description: A Ruby gem that provides a way to retrieve search results via the main search engines using Ruby
36
+ email: mattaimonetti@gmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - LICENSE
43
+ files:
44
+ - LICENSE
45
+ - README.markdown
46
+ - Rakefile
47
+ - lib/ruby-web-search.rb
48
+ - lib/curbemu.rb
49
+ - spec/ruby-web-search-unthreaded.rb
50
+ - spec/spec_helper.rb
51
+ - spec/ruby-web-search_spec.rb
52
+ has_rdoc: true
53
+ homepage: http://merbist.com
54
+ licenses: []
55
+
56
+ post_install_message:
57
+ rdoc_options: []
58
+
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ hash: 3
67
+ segments:
68
+ - 0
69
+ version: "0"
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ hash: 3
76
+ segments:
77
+ - 0
78
+ version: "0"
79
+ requirements: []
80
+
81
+ rubyforge_project:
82
+ rubygems_version: 1.3.7
83
+ signing_key:
84
+ specification_version: 3
85
+ summary: A Ruby gem that provides a way to retrieve search results via the main search engines using Ruby
86
+ test_files: []
87
+