mlangenberg-googlesearch 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/googlesearch.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'googlesearch'
3
- s.version = '0.0.4'
3
+ s.version = '0.0.5'
4
4
  s.date = '2009-01-14'
5
5
  s.platform = Gem::Platform::RUBY
6
6
  s.has_rdoc = false
@@ -11,9 +11,12 @@ Gem::Specification.new do |s|
11
11
  s.files = [ "googlesearch.gemspec",
12
12
  "lib/googlesearch.rb",
13
13
  "lib/googlesearch/search_response.rb",
14
- "lib/googlesearch/search_result.rb"]
14
+ "lib/googlesearch/search_result.rb",
15
+ "lib/googlesearch/search_page.rb"]
15
16
  s.test_files = ["spec/google_search_spec.rb",
16
17
  "spec/search_response_spec.rb",
18
+ "spec/search_result_spec.rb",
19
+ "spec/search_page_spec.rb",
17
20
  "spec/spec_helper.rb",
18
21
  "spec/result_example.xml"]
19
22
  s.add_dependency("nokogiri", [">= 1.1.1"])
data/lib/googlesearch.rb CHANGED
@@ -1,6 +1,8 @@
1
1
  require 'rubygems'
2
+ require 'enumerator'
2
3
  require 'googlesearch/search_response'
3
4
  require 'googlesearch/search_result'
5
+ require 'googlesearch/search_page'
4
6
  require 'open-uri'
5
7
  require 'nokogiri'
6
8
 
@@ -10,7 +12,7 @@ class GoogleSearch
10
12
  attr_reader :response
11
13
 
12
14
  def initialize(o = {}, search_request = SearchRequest.new)
13
- @response = SearchResponse.new search_request.get("http://www.google.com/search?&q=#{o[:q]}&client=google-csbe&output=xml_no_dtd&cx=#{o[:cx]}&cr=lang_#{o[:cr]}&lr=lang_#{o[:lr]}&start=#{o[:start]}&num=#{o[:num]}")
15
+ @response = SearchResponse.new( search_request.get("http://www.google.com/search?&q=#{o[:q]}&client=google-csbe&output=xml_no_dtd&cx=#{o[:cx]}&cr=lang_#{o[:cr]}&lr=lang_#{o[:lr]}&start=#{o[:start]}&num=#{o[:num]}"), o[:num])
14
16
  end
15
17
  end
16
18
 
@@ -0,0 +1,15 @@
1
+ class SearchPage
2
+ #index is zero-based
3
+ def initialize(index, results_per_page)
4
+ @index = index
5
+ @results_per_page = results_per_page
6
+ end
7
+
8
+ def page_number
9
+ @index+1
10
+ end
11
+
12
+ def start_index
13
+ @index*@results_per_page
14
+ end
15
+ end
@@ -1,11 +1,20 @@
1
1
  class SearchResponse
2
- attr_reader :total_server_time, :total_number_of_results, :results, :index_of_first_result, :index_of_last_result
3
- def initialize(xml)
2
+ attr_reader :total_server_time, :total_number_of_results, :results, :index_of_first_result, :index_of_last_result, :requested_number_of_search_results
3
+ def initialize(xml, requested_number_of_search_results = 10)
4
4
  doc = Nokogiri::XML(xml)
5
5
  @total_server_time = doc.root.xpath('TM').text.to_f
6
6
  @index_of_first_result = doc.root.xpath('RES/@SN').text.to_i
7
7
  @index_of_last_result = doc.root.xpath('RES/@EN').text.to_i
8
8
  @total_number_of_results = doc.root.xpath('RES/M').text.to_i
9
9
  @results = doc.root.xpath('RES//R').map { |res_doc| SearchResult.new(res_doc) }
10
+ @requested_number_of_search_results = requested_number_of_search_results
11
+ end
12
+
13
+ def number_of_pages
14
+ (total_number_of_results/requested_number_of_search_results.to_f).round
15
+ end
16
+
17
+ def pages
18
+ @pages ||= number_of_pages.enum_for(:times).collect { |page_index| SearchPage.new(page_index, requested_number_of_search_results) }
10
19
  end
11
20
  end
@@ -7,7 +7,7 @@ describe GoogleSearch do
7
7
  'http://www.google.com/search?&q=adres&client=google-csbe&output=xml_no_dtd&cx=unique-cse-id&cr=lang_nl&lr=lang_nl&start=0&num=10'
8
8
  ).and_return('wat_xml')
9
9
 
10
- SearchResponse.should_receive(:new).with('wat_xml').and_return('search-response-object')
10
+ SearchResponse.should_receive(:new).with('wat_xml', 10).and_return('search-response-object')
11
11
  search = GoogleSearch.new({:cx => 'unique-cse-id', :q => 'adres', :cr => 'nl', :lr => 'nl', :start => 0, :num => 10 }, request_mock)
12
12
  search.response.should == 'search-response-object'
13
13
  end
@@ -1,23 +1,88 @@
1
1
  <?xml version="1.0" encoding="ISO-8859-1" standalone="no"?>
2
2
  <!DOCTYPE GSP SYSTEM "google.dtd">
3
3
  <GSP VER="3.2">
4
- <TM>0.458025</TM><Q>intercodam</Q>
5
- <PARAM name="q" value="intercodam" original_value="intercodam"/>
6
- <PARAM name="client" value="google-csbe" original_value="google-csbe"/>
7
- <PARAM name="output" value="xml" original_value="xml"/>
8
- <PARAM name="cx" value="unique-cse-id" original_value="unique-cse-id"/>
9
- <Context><title>Shopr</title></Context><RES SN="1" EN="5">
10
- <M>520</M>
11
- <FI/><XT/>
12
- <R N="1"><U>http://i3.shop-r.nl/language/nl/pages/39</U><UE>http://i3.shop-r.nl/language/nl/pages/39</UE><T>&lt;b&gt;Intercodam&lt;/b&gt; Tegels B.V.</T><RK>0</RK><S>&lt;b&gt;Intercodam&lt;/b&gt; B.V.. Amstel 135 (vlak naast theater Carre) 1018 EN Amsterdam &lt;b&gt;...&lt;/b&gt; &lt;br&gt; &lt;b&gt;Intercodam&lt;/b&gt; Tegels B.V. 2008. Aenean eget mi. Fusce mattis est id diam. &lt;b&gt;...&lt;/b&gt;</S><LANG>nl</LANG><Label>_cse_4a4weggqfu8</Label><HAS><L/><C SZ="7k" CID="sleD4FLqURwJ"/><RT/></HAS></R>
13
-
14
- <R N="2"><U>http://i3.shop-r.nl/language/nl/</U><UE>http://i3.shop-r.nl/language/nl/</UE><T>&lt;b&gt;Intercodam&lt;/b&gt; Tegels B.V.</T><RK>0</RK><S>&lt;b&gt;Intercodam&lt;/b&gt; Tegels B.V. 2008. Aenean eget mi. Fusce mattis est id diam. Phasellus &lt;br&gt; faucibus interdum sapien. Duis quis nunc. Sed enim.</S><LANG>nl</LANG><Label>_cse_4a4weggqfu8</Label><HAS><L/><C SZ="6k" CID="zvDK6Ic-qNMJ"/><RT/></HAS></R>
15
-
16
- <R N="3"><U>http://i3.shop-r.nl/language/nl/pages/41</U><UE>http://i3.shop-r.nl/language/nl/pages/41</UE><T>&lt;b&gt;Intercodam&lt;/b&gt; Tegels B.V.</T><RK>0</RK><S>De &amp;#39;NV &lt;b&gt;Intercodam&lt;/b&gt;&amp;#39; is opgericht op 16 september 1919 en handelde in de meest &lt;br&gt; uiteenlopende producten. In 1972 werd deze NV omgezet in een Beheer B.V. &lt;b&gt;...&lt;/b&gt;</S><LANG>nl</LANG><Label>_cse_4a4weggqfu8</Label><HAS><L/><C SZ="6k" CID="7FB2NBtfU-UJ"/><RT/></HAS></R>
17
-
18
- <R N="4" MIME="application/pdf"><U>http://i3.shop-r.nl/shops/i3.shop-r.nl/assets/REFIN%20ARDENNES.pdf</U><UE>http://i3.shop-r.nl/shops/i3.shop-r.nl/assets/REFIN%2520ARDENNES.pdf</UE><T>refin - ardennes</T><RK>0</RK><S>&lt;b&gt;INTERCODAM&lt;/b&gt;. refin - ardennes. maart 2008. &lt;b&gt;Intercodam&lt;/b&gt; Tegels BV. Amstel 135 (&lt;br&gt; naast theater Carré). 1018 EN Amsterdam. tel 020-6225115. fax 020-6243354 &lt;b&gt;...&lt;/b&gt;</S><LANG>nl</LANG><Label>_cse_4a4weggqfu8</Label><HAS><L/><C SZ="" CID="3gRs0rPYzpYJ"/><RT/></HAS></R>
19
-
20
- <R N="5" MIME="application/pdf"><U>http://i3.shop-r.nl/shops/i3.shop-r.nl/assets/CAESAR%20MORE.pdf</U><UE>http://i3.shop-r.nl/shops/i3.shop-r.nl/assets/CAESAR%2520MORE.pdf</UE><T>caesar - more</T><RK>0</RK><S>&lt;b&gt;INTERCODAM&lt;/b&gt;. caesar - more. maart 2008. &lt;b&gt;Intercodam&lt;/b&gt; Tegels BV. Amstel 135 (naast &lt;br&gt; theater Carré). 1018 EN Amsterdam. tel 020-6225115. fax 020-6243354 &lt;b&gt;...&lt;/b&gt;</S><LANG>en</LANG><Label>_cse_4a4weggqfu8</Label><HAS><L/><C SZ="" CID="jzBwnKAFGckJ"/><RT/></HAS></R>
21
-
22
- </RES>
4
+ <TM>0.458025</TM>
5
+ <Q>intercodam</Q>
6
+ <PARAM name="q" value="intercodam" original_value="intercodam"/>
7
+ <PARAM name="client" value="google-csbe" original_value="google-csbe"/>
8
+ <PARAM name="output" value="xml" original_value="xml"/>
9
+ <PARAM name="cx" value="unique-cse-id" original_value="unique-cse-id"/>
10
+ <Context>
11
+ <title>Shopr</title>
12
+ </Context>
13
+ <RES SN="1" EN="5">
14
+ <M>23</M>
15
+ <FI/>
16
+ <XT/>
17
+ <R N="1">
18
+ <U>http://i3.shop-r.nl/language/nl/pages/39</U>
19
+ <UE>http://i3.shop-r.nl/language/nl/pages/39</UE>
20
+ <T>&lt;b&gt;Intercodam&lt;/b&gt; Tegels B.V.</T>
21
+ <RK>0</RK>
22
+ <S>&lt;b&gt;Intercodam&lt;/b&gt; B.V.. Amstel 135 (vlak naast theater Carre) 1018 EN Amsterdam &lt;b&gt;...&lt;/b&gt; &lt;br&gt; &lt;b&gt;Intercodam&lt;/b&gt; Tegels B.V. 2008. Aenean eget mi. Fusce mattis est id diam. &lt;b&gt;...&lt;/b&gt;</S>
23
+ <LANG>nl</LANG>
24
+ <Label>_cse_4a4weggqfu8</Label>
25
+ <HAS>
26
+ <L/>
27
+ <C SZ="7k" CID="sleD4FLqURwJ"/>
28
+ <RT/>
29
+ </HAS>
30
+ </R>
31
+ <R N="2">
32
+ <U>http://i3.shop-r.nl/language/nl/</U>
33
+ <UE>http://i3.shop-r.nl/language/nl/</UE>
34
+ <T>&lt;b&gt;Intercodam&lt;/b&gt; Tegels B.V.</T>
35
+ <RK>0</RK>
36
+ <S>&lt;b&gt;Intercodam&lt;/b&gt; Tegels B.V. 2008. Aenean eget mi. Fusce mattis est id diam. Phasellus &lt;br&gt; faucibus interdum sapien. Duis quis nunc. Sed enim.</S>
37
+ <LANG>nl</LANG>
38
+ <Label>_cse_4a4weggqfu8</Label>
39
+ <HAS>
40
+ <L/>
41
+ <C SZ="6k" CID="zvDK6Ic-qNMJ"/>
42
+ <RT/>
43
+ </HAS>
44
+ </R>
45
+ <R N="3">
46
+ <U>http://i3.shop-r.nl/language/nl/pages/41</U>
47
+ <UE>http://i3.shop-r.nl/language/nl/pages/41</UE>
48
+ <T>&lt;b&gt;Intercodam&lt;/b&gt; Tegels B.V.</T>
49
+ <RK>0</RK>
50
+ <S>De &amp;#39;NV &lt;b&gt;Intercodam&lt;/b&gt;&amp;#39; is opgericht op 16 september 1919 en handelde in de meest &lt;br&gt; uiteenlopende producten. In 1972 werd deze NV omgezet in een Beheer B.V. &lt;b&gt;...&lt;/b&gt;</S>
51
+ <LANG>nl</LANG>
52
+ <Label>_cse_4a4weggqfu8</Label>
53
+ <HAS>
54
+ <L/>
55
+ <C SZ="6k" CID="7FB2NBtfU-UJ"/>
56
+ <RT/>
57
+ </HAS>
58
+ </R>
59
+ <R N="4" MIME="application/pdf">
60
+ <U>http://i3.shop-r.nl/shops/i3.shop-r.nl/assets/REFIN%20ARDENNES.pdf</U>
61
+ <UE>http://i3.shop-r.nl/shops/i3.shop-r.nl/assets/REFIN%2520ARDENNES.pdf</UE>
62
+ <T>refin - ardennes</T>
63
+ <RK>0</RK>
64
+ <S>&lt;b&gt;INTERCODAM&lt;/b&gt;. refin - ardennes. maart 2008. &lt;b&gt;Intercodam&lt;/b&gt; Tegels BV. Amstel 135 (&lt;br&gt; naast theater Carré). 1018 EN Amsterdam. tel 020-6225115. fax 020-6243354 &lt;b&gt;...&lt;/b&gt;</S>
65
+ <LANG>nl</LANG>
66
+ <Label>_cse_4a4weggqfu8</Label>
67
+ <HAS>
68
+ <L/>
69
+ <C SZ="" CID="3gRs0rPYzpYJ"/>
70
+ <RT/>
71
+ </HAS>
72
+ </R>
73
+ <R N="5" MIME="application/pdf">
74
+ <U>http://i3.shop-r.nl/shops/i3.shop-r.nl/assets/CAESAR%20MORE.pdf</U>
75
+ <UE>http://i3.shop-r.nl/shops/i3.shop-r.nl/assets/CAESAR%2520MORE.pdf</UE>
76
+ <T>caesar - more</T>
77
+ <RK>0</RK>
78
+ <S>&lt;b&gt;INTERCODAM&lt;/b&gt;. caesar - more. maart 2008. &lt;b&gt;Intercodam&lt;/b&gt; Tegels BV. Amstel 135 (naast &lt;br&gt; theater Carré). 1018 EN Amsterdam. tel 020-6225115. fax 020-6243354 &lt;b&gt;...&lt;/b&gt;</S>
79
+ <LANG>en</LANG>
80
+ <Label>_cse_4a4weggqfu8</Label>
81
+ <HAS>
82
+ <L/>
83
+ <C SZ="" CID="jzBwnKAFGckJ"/>
84
+ <RT/>
85
+ </HAS>
86
+ </R>
87
+ </RES>
23
88
  </GSP>
@@ -0,0 +1,18 @@
1
+ require File.join(File.dirname(__FILE__), 'spec_helper')
2
+
3
+ describe SearchPage do
4
+ it "should have page number of 1" do
5
+ p = SearchPage.new(0, 10)
6
+ p.page_number.should == 1
7
+ end
8
+
9
+ it "should have a start index of 0 when the index is 0" do
10
+ p = SearchPage.new(0, 10)
11
+ p.start_index.should == 0
12
+ end
13
+
14
+ it "should have a start index of 10 when the index is 1 and the requested number of results is 10" do
15
+ p = SearchPage.new(1, 10)
16
+ p.start_index.should == 10
17
+ end
18
+ end
@@ -3,8 +3,8 @@ require File.join(File.dirname(__FILE__), 'spec_helper')
3
3
  describe SearchResponse do
4
4
 
5
5
  setup do
6
- example_xml = File.read(File.join(File.dirname(__FILE__), 'result_example.xml'))
7
- @response = SearchResponse.new(example_xml)
6
+ @example_xml = File.read(File.join(File.dirname(__FILE__), 'result_example.xml'))
7
+ @response = SearchResponse.new(@example_xml, 5)
8
8
  end
9
9
 
10
10
  it "shoud be able to return total server time in seconds" do
@@ -12,7 +12,7 @@ describe SearchResponse do
12
12
  end
13
13
 
14
14
  it "should be able to return total number of results" do
15
- @response.total_number_of_results.should == 520
15
+ @response.total_number_of_results.should == 23
16
16
  end
17
17
 
18
18
  it "should be able to return the index of the first search result returned in the result set" do
@@ -23,7 +23,25 @@ describe SearchResponse do
23
23
  @response.index_of_last_result.should == 5
24
24
  end
25
25
 
26
+ it "should return 5 for the requested number of search results" do
27
+ @response.requested_number_of_search_results.should == 5
28
+ end
29
+
30
+ it "should return 10 for the requested number of search result as default" do
31
+ @response = SearchResponse.new(@example_xml)
32
+ @response.requested_number_of_search_results.should == 10
33
+ end
34
+
26
35
  it "should be able to return an array of SearchResult objects" do
27
36
  @response.results.map { |r| r.class }.should == [SearchResult]*5
28
37
  end
29
- end
38
+
39
+ it "should known that there are five pages with results" do
40
+ @response.number_of_pages.should == 5
41
+ end
42
+
43
+ it "should provide high level pagination" do
44
+ @response.pages[0].page_number.should == 1
45
+ @response.pages[0].start_index.should == 0
46
+ end
47
+ end
@@ -0,0 +1,36 @@
1
+ require File.join(File.dirname(__FILE__), 'spec_helper')
2
+
3
+ describe SearchResult do
4
+
5
+ setup do
6
+ example_xml = File.read(File.join(File.dirname(__FILE__), 'result_example.xml'))
7
+ response = SearchResponse.new(example_xml)
8
+ @results = response.results
9
+ end
10
+
11
+ it "should be able to return the first URL of the results" do
12
+ @results[0].url.should == 'http://i3.shop-r.nl/language/nl/pages/39'
13
+ end
14
+
15
+ it "should be able to return the second index of the results" do
16
+ @results[1].index.should == 2
17
+ end
18
+
19
+ it "should be able to return the last title of the results" do
20
+ @results[4].title.should == 'caesar - more'
21
+ end
22
+
23
+ it "should be able to return the third text of the results" do
24
+ @results[2].excerpt.should == %[De &#39;NV <b>Intercodam</b>&#39; is opgericht op 16 september 1919 en handelde in de meest <br> uiteenlopende producten. In 1972 werd deze NV omgezet in een Beheer B.V. <b>...</b>]
25
+ end
26
+
27
+ it "should be able to return the language of the result" do
28
+ @results[3].language.should == 'nl'
29
+ @results[4].language.should == 'en'
30
+ end
31
+
32
+ it "should be able to return the mime type if applicable" do
33
+ @results[0].mime_type.should == nil
34
+ @results[4].mime_type.should == 'application/pdf'
35
+ end
36
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mlangenberg-googlesearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rene Heino
@@ -35,6 +35,7 @@ files:
35
35
  - lib/googlesearch.rb
36
36
  - lib/googlesearch/search_response.rb
37
37
  - lib/googlesearch/search_result.rb
38
+ - lib/googlesearch/search_page.rb
38
39
  has_rdoc: false
39
40
  homepage:
40
41
  post_install_message:
@@ -64,5 +65,7 @@ summary: Google CSE implementation
64
65
  test_files:
65
66
  - spec/google_search_spec.rb
66
67
  - spec/search_response_spec.rb
68
+ - spec/search_result_spec.rb
69
+ - spec/search_page_spec.rb
67
70
  - spec/spec_helper.rb
68
71
  - spec/result_example.xml