mlangenberg-googlesearch 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/googlesearch.gemspec +5 -2
- data/lib/googlesearch.rb +3 -1
- data/lib/googlesearch/search_page.rb +15 -0
- data/lib/googlesearch/search_response.rb +11 -2
- data/spec/google_search_spec.rb +1 -1
- data/spec/result_example.xml +84 -19
- data/spec/search_page_spec.rb +18 -0
- data/spec/search_response_spec.rb +22 -4
- data/spec/search_result_spec.rb +36 -0
- metadata +4 -1
data/googlesearch.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'googlesearch'
|
3
|
-
s.version = '0.0.
|
3
|
+
s.version = '0.0.5'
|
4
4
|
s.date = '2009-01-14'
|
5
5
|
s.platform = Gem::Platform::RUBY
|
6
6
|
s.has_rdoc = false
|
@@ -11,9 +11,12 @@ Gem::Specification.new do |s|
|
|
11
11
|
s.files = [ "googlesearch.gemspec",
|
12
12
|
"lib/googlesearch.rb",
|
13
13
|
"lib/googlesearch/search_response.rb",
|
14
|
-
"lib/googlesearch/search_result.rb"
|
14
|
+
"lib/googlesearch/search_result.rb",
|
15
|
+
"lib/googlesearch/search_page.rb"]
|
15
16
|
s.test_files = ["spec/google_search_spec.rb",
|
16
17
|
"spec/search_response_spec.rb",
|
18
|
+
"spec/search_result_spec.rb",
|
19
|
+
"spec/search_page_spec.rb",
|
17
20
|
"spec/spec_helper.rb",
|
18
21
|
"spec/result_example.xml"]
|
19
22
|
s.add_dependency("nokogiri", [">= 1.1.1"])
|
data/lib/googlesearch.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'rubygems'
|
2
|
+
require 'enumerator'
|
2
3
|
require 'googlesearch/search_response'
|
3
4
|
require 'googlesearch/search_result'
|
5
|
+
require 'googlesearch/search_page'
|
4
6
|
require 'open-uri'
|
5
7
|
require 'nokogiri'
|
6
8
|
|
@@ -10,7 +12,7 @@ class GoogleSearch
|
|
10
12
|
attr_reader :response
|
11
13
|
|
12
14
|
def initialize(o = {}, search_request = SearchRequest.new)
|
13
|
-
@response = SearchResponse.new search_request.get("http://www.google.com/search?&q=#{o[:q]}&client=google-csbe&output=xml_no_dtd&cx=#{o[:cx]}&cr=lang_#{o[:cr]}&lr=lang_#{o[:lr]}&start=#{o[:start]}&num=#{o[:num]}")
|
15
|
+
@response = SearchResponse.new( search_request.get("http://www.google.com/search?&q=#{o[:q]}&client=google-csbe&output=xml_no_dtd&cx=#{o[:cx]}&cr=lang_#{o[:cr]}&lr=lang_#{o[:lr]}&start=#{o[:start]}&num=#{o[:num]}"), o[:num])
|
14
16
|
end
|
15
17
|
end
|
16
18
|
|
@@ -1,11 +1,20 @@
|
|
1
1
|
class SearchResponse
|
2
|
-
attr_reader :total_server_time, :total_number_of_results, :results, :index_of_first_result, :index_of_last_result
|
3
|
-
def initialize(xml)
|
2
|
+
attr_reader :total_server_time, :total_number_of_results, :results, :index_of_first_result, :index_of_last_result, :requested_number_of_search_results
|
3
|
+
def initialize(xml, requested_number_of_search_results = 10)
|
4
4
|
doc = Nokogiri::XML(xml)
|
5
5
|
@total_server_time = doc.root.xpath('TM').text.to_f
|
6
6
|
@index_of_first_result = doc.root.xpath('RES/@SN').text.to_i
|
7
7
|
@index_of_last_result = doc.root.xpath('RES/@EN').text.to_i
|
8
8
|
@total_number_of_results = doc.root.xpath('RES/M').text.to_i
|
9
9
|
@results = doc.root.xpath('RES//R').map { |res_doc| SearchResult.new(res_doc) }
|
10
|
+
@requested_number_of_search_results = requested_number_of_search_results
|
11
|
+
end
|
12
|
+
|
13
|
+
def number_of_pages
|
14
|
+
(total_number_of_results/requested_number_of_search_results.to_f).round
|
15
|
+
end
|
16
|
+
|
17
|
+
def pages
|
18
|
+
@pages ||= number_of_pages.enum_for(:times).collect { |page_index| SearchPage.new(page_index, requested_number_of_search_results) }
|
10
19
|
end
|
11
20
|
end
|
data/spec/google_search_spec.rb
CHANGED
@@ -7,7 +7,7 @@ describe GoogleSearch do
|
|
7
7
|
'http://www.google.com/search?&q=adres&client=google-csbe&output=xml_no_dtd&cx=unique-cse-id&cr=lang_nl&lr=lang_nl&start=0&num=10'
|
8
8
|
).and_return('wat_xml')
|
9
9
|
|
10
|
-
SearchResponse.should_receive(:new).with('wat_xml').and_return('search-response-object')
|
10
|
+
SearchResponse.should_receive(:new).with('wat_xml', 10).and_return('search-response-object')
|
11
11
|
search = GoogleSearch.new({:cx => 'unique-cse-id', :q => 'adres', :cr => 'nl', :lr => 'nl', :start => 0, :num => 10 }, request_mock)
|
12
12
|
search.response.should == 'search-response-object'
|
13
13
|
end
|
data/spec/result_example.xml
CHANGED
@@ -1,23 +1,88 @@
|
|
1
1
|
<?xml version="1.0" encoding="ISO-8859-1" standalone="no"?>
|
2
2
|
<!DOCTYPE GSP SYSTEM "google.dtd">
|
3
3
|
<GSP VER="3.2">
|
4
|
-
<TM>0.458025</TM
|
5
|
-
<
|
6
|
-
<PARAM name="
|
7
|
-
<PARAM name="
|
8
|
-
<PARAM name="
|
9
|
-
<
|
10
|
-
<
|
11
|
-
<
|
12
|
-
|
13
|
-
|
14
|
-
<
|
15
|
-
|
16
|
-
<
|
17
|
-
|
18
|
-
<
|
19
|
-
|
20
|
-
<
|
21
|
-
|
22
|
-
|
4
|
+
<TM>0.458025</TM>
|
5
|
+
<Q>intercodam</Q>
|
6
|
+
<PARAM name="q" value="intercodam" original_value="intercodam"/>
|
7
|
+
<PARAM name="client" value="google-csbe" original_value="google-csbe"/>
|
8
|
+
<PARAM name="output" value="xml" original_value="xml"/>
|
9
|
+
<PARAM name="cx" value="unique-cse-id" original_value="unique-cse-id"/>
|
10
|
+
<Context>
|
11
|
+
<title>Shopr</title>
|
12
|
+
</Context>
|
13
|
+
<RES SN="1" EN="5">
|
14
|
+
<M>23</M>
|
15
|
+
<FI/>
|
16
|
+
<XT/>
|
17
|
+
<R N="1">
|
18
|
+
<U>http://i3.shop-r.nl/language/nl/pages/39</U>
|
19
|
+
<UE>http://i3.shop-r.nl/language/nl/pages/39</UE>
|
20
|
+
<T><b>Intercodam</b> Tegels B.V.</T>
|
21
|
+
<RK>0</RK>
|
22
|
+
<S><b>Intercodam</b> B.V.. Amstel 135 (vlak naast theater Carre) 1018 EN Amsterdam <b>...</b> <br> <b>Intercodam</b> Tegels B.V. 2008. Aenean eget mi. Fusce mattis est id diam. <b>...</b></S>
|
23
|
+
<LANG>nl</LANG>
|
24
|
+
<Label>_cse_4a4weggqfu8</Label>
|
25
|
+
<HAS>
|
26
|
+
<L/>
|
27
|
+
<C SZ="7k" CID="sleD4FLqURwJ"/>
|
28
|
+
<RT/>
|
29
|
+
</HAS>
|
30
|
+
</R>
|
31
|
+
<R N="2">
|
32
|
+
<U>http://i3.shop-r.nl/language/nl/</U>
|
33
|
+
<UE>http://i3.shop-r.nl/language/nl/</UE>
|
34
|
+
<T><b>Intercodam</b> Tegels B.V.</T>
|
35
|
+
<RK>0</RK>
|
36
|
+
<S><b>Intercodam</b> Tegels B.V. 2008. Aenean eget mi. Fusce mattis est id diam. Phasellus <br> faucibus interdum sapien. Duis quis nunc. Sed enim.</S>
|
37
|
+
<LANG>nl</LANG>
|
38
|
+
<Label>_cse_4a4weggqfu8</Label>
|
39
|
+
<HAS>
|
40
|
+
<L/>
|
41
|
+
<C SZ="6k" CID="zvDK6Ic-qNMJ"/>
|
42
|
+
<RT/>
|
43
|
+
</HAS>
|
44
|
+
</R>
|
45
|
+
<R N="3">
|
46
|
+
<U>http://i3.shop-r.nl/language/nl/pages/41</U>
|
47
|
+
<UE>http://i3.shop-r.nl/language/nl/pages/41</UE>
|
48
|
+
<T><b>Intercodam</b> Tegels B.V.</T>
|
49
|
+
<RK>0</RK>
|
50
|
+
<S>De &#39;NV <b>Intercodam</b>&#39; is opgericht op 16 september 1919 en handelde in de meest <br> uiteenlopende producten. In 1972 werd deze NV omgezet in een Beheer B.V. <b>...</b></S>
|
51
|
+
<LANG>nl</LANG>
|
52
|
+
<Label>_cse_4a4weggqfu8</Label>
|
53
|
+
<HAS>
|
54
|
+
<L/>
|
55
|
+
<C SZ="6k" CID="7FB2NBtfU-UJ"/>
|
56
|
+
<RT/>
|
57
|
+
</HAS>
|
58
|
+
</R>
|
59
|
+
<R N="4" MIME="application/pdf">
|
60
|
+
<U>http://i3.shop-r.nl/shops/i3.shop-r.nl/assets/REFIN%20ARDENNES.pdf</U>
|
61
|
+
<UE>http://i3.shop-r.nl/shops/i3.shop-r.nl/assets/REFIN%2520ARDENNES.pdf</UE>
|
62
|
+
<T>refin - ardennes</T>
|
63
|
+
<RK>0</RK>
|
64
|
+
<S><b>INTERCODAM</b>. refin - ardennes. maart 2008. <b>Intercodam</b> Tegels BV. Amstel 135 (<br> naast theater Carré). 1018 EN Amsterdam. tel 020-6225115. fax 020-6243354 <b>...</b></S>
|
65
|
+
<LANG>nl</LANG>
|
66
|
+
<Label>_cse_4a4weggqfu8</Label>
|
67
|
+
<HAS>
|
68
|
+
<L/>
|
69
|
+
<C SZ="" CID="3gRs0rPYzpYJ"/>
|
70
|
+
<RT/>
|
71
|
+
</HAS>
|
72
|
+
</R>
|
73
|
+
<R N="5" MIME="application/pdf">
|
74
|
+
<U>http://i3.shop-r.nl/shops/i3.shop-r.nl/assets/CAESAR%20MORE.pdf</U>
|
75
|
+
<UE>http://i3.shop-r.nl/shops/i3.shop-r.nl/assets/CAESAR%2520MORE.pdf</UE>
|
76
|
+
<T>caesar - more</T>
|
77
|
+
<RK>0</RK>
|
78
|
+
<S><b>INTERCODAM</b>. caesar - more. maart 2008. <b>Intercodam</b> Tegels BV. Amstel 135 (naast <br> theater Carré). 1018 EN Amsterdam. tel 020-6225115. fax 020-6243354 <b>...</b></S>
|
79
|
+
<LANG>en</LANG>
|
80
|
+
<Label>_cse_4a4weggqfu8</Label>
|
81
|
+
<HAS>
|
82
|
+
<L/>
|
83
|
+
<C SZ="" CID="jzBwnKAFGckJ"/>
|
84
|
+
<RT/>
|
85
|
+
</HAS>
|
86
|
+
</R>
|
87
|
+
</RES>
|
23
88
|
</GSP>
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'spec_helper')
|
2
|
+
|
3
|
+
describe SearchPage do
|
4
|
+
it "should have page number of 1" do
|
5
|
+
p = SearchPage.new(0, 10)
|
6
|
+
p.page_number.should == 1
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should have a start index of 0 when the index is 0" do
|
10
|
+
p = SearchPage.new(0, 10)
|
11
|
+
p.start_index.should == 0
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should have a start index of 10 when the index is 1 and the requested number of results is 10" do
|
15
|
+
p = SearchPage.new(1, 10)
|
16
|
+
p.start_index.should == 10
|
17
|
+
end
|
18
|
+
end
|
@@ -3,8 +3,8 @@ require File.join(File.dirname(__FILE__), 'spec_helper')
|
|
3
3
|
describe SearchResponse do
|
4
4
|
|
5
5
|
setup do
|
6
|
-
example_xml = File.read(File.join(File.dirname(__FILE__), 'result_example.xml'))
|
7
|
-
@response = SearchResponse.new(example_xml)
|
6
|
+
@example_xml = File.read(File.join(File.dirname(__FILE__), 'result_example.xml'))
|
7
|
+
@response = SearchResponse.new(@example_xml, 5)
|
8
8
|
end
|
9
9
|
|
10
10
|
it "shoud be able to return total server time in seconds" do
|
@@ -12,7 +12,7 @@ describe SearchResponse do
|
|
12
12
|
end
|
13
13
|
|
14
14
|
it "should be able to return total number of results" do
|
15
|
-
@response.total_number_of_results.should ==
|
15
|
+
@response.total_number_of_results.should == 23
|
16
16
|
end
|
17
17
|
|
18
18
|
it "should be able to return the index of the first search result returned in the result set" do
|
@@ -23,7 +23,25 @@ describe SearchResponse do
|
|
23
23
|
@response.index_of_last_result.should == 5
|
24
24
|
end
|
25
25
|
|
26
|
+
it "should return 5 for the requested number of search results" do
|
27
|
+
@response.requested_number_of_search_results.should == 5
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should return 10 for the requested number of search result as default" do
|
31
|
+
@response = SearchResponse.new(@example_xml)
|
32
|
+
@response.requested_number_of_search_results.should == 10
|
33
|
+
end
|
34
|
+
|
26
35
|
it "should be able to return an array of SearchResult objects" do
|
27
36
|
@response.results.map { |r| r.class }.should == [SearchResult]*5
|
28
37
|
end
|
29
|
-
|
38
|
+
|
39
|
+
it "should known that there are five pages with results" do
|
40
|
+
@response.number_of_pages.should == 5
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should provide high level pagination" do
|
44
|
+
@response.pages[0].page_number.should == 1
|
45
|
+
@response.pages[0].start_index.should == 0
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'spec_helper')
|
2
|
+
|
3
|
+
describe SearchResult do
|
4
|
+
|
5
|
+
setup do
|
6
|
+
example_xml = File.read(File.join(File.dirname(__FILE__), 'result_example.xml'))
|
7
|
+
response = SearchResponse.new(example_xml)
|
8
|
+
@results = response.results
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should be able to return the first URL of the results" do
|
12
|
+
@results[0].url.should == 'http://i3.shop-r.nl/language/nl/pages/39'
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should be able to return the second index of the results" do
|
16
|
+
@results[1].index.should == 2
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should be able to return the last title of the results" do
|
20
|
+
@results[4].title.should == 'caesar - more'
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should be able to return the third text of the results" do
|
24
|
+
@results[2].excerpt.should == %[De 'NV <b>Intercodam</b>' is opgericht op 16 september 1919 en handelde in de meest <br> uiteenlopende producten. In 1972 werd deze NV omgezet in een Beheer B.V. <b>...</b>]
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should be able to return the language of the result" do
|
28
|
+
@results[3].language.should == 'nl'
|
29
|
+
@results[4].language.should == 'en'
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should be able to return the mime type if applicable" do
|
33
|
+
@results[0].mime_type.should == nil
|
34
|
+
@results[4].mime_type.should == 'application/pdf'
|
35
|
+
end
|
36
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mlangenberg-googlesearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rene Heino
|
@@ -35,6 +35,7 @@ files:
|
|
35
35
|
- lib/googlesearch.rb
|
36
36
|
- lib/googlesearch/search_response.rb
|
37
37
|
- lib/googlesearch/search_result.rb
|
38
|
+
- lib/googlesearch/search_page.rb
|
38
39
|
has_rdoc: false
|
39
40
|
homepage:
|
40
41
|
post_install_message:
|
@@ -64,5 +65,7 @@ summary: Google CSE implementation
|
|
64
65
|
test_files:
|
65
66
|
- spec/google_search_spec.rb
|
66
67
|
- spec/search_response_spec.rb
|
68
|
+
- spec/search_result_spec.rb
|
69
|
+
- spec/search_page_spec.rb
|
67
70
|
- spec/spec_helper.rb
|
68
71
|
- spec/result_example.xml
|