gscraper 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/.gitignore +8 -0
  2. data/.specopts +1 -0
  3. data/.yardopts +1 -0
  4. data/ChangeLog.md +122 -0
  5. data/Gemfile +25 -0
  6. data/{README.txt → README.md} +25 -24
  7. data/Rakefile +32 -10
  8. data/gscraper.gemspec +112 -0
  9. data/lib/gscraper.rb +0 -2
  10. data/lib/gscraper/extensions.rb +0 -2
  11. data/lib/gscraper/extensions/uri.rb +0 -2
  12. data/lib/gscraper/extensions/uri/http.rb +0 -2
  13. data/lib/gscraper/extensions/uri/query_params.rb +18 -5
  14. data/lib/gscraper/gscraper.rb +61 -70
  15. data/lib/gscraper/has_pages.rb +76 -20
  16. data/lib/gscraper/licenses.rb +0 -2
  17. data/lib/gscraper/page.rb +45 -16
  18. data/lib/gscraper/search.rb +0 -2
  19. data/lib/gscraper/search/ajax_query.rb +75 -22
  20. data/lib/gscraper/search/page.rb +328 -122
  21. data/lib/gscraper/search/query.rb +100 -7
  22. data/lib/gscraper/search/result.rb +27 -6
  23. data/lib/gscraper/search/search.rb +59 -9
  24. data/lib/gscraper/search/web_query.rb +120 -37
  25. data/lib/gscraper/sponsored_ad.rb +19 -6
  26. data/lib/gscraper/sponsored_links.rb +260 -92
  27. data/lib/gscraper/version.rb +2 -3
  28. data/spec/extensions/uri/query_params_spec.rb +8 -0
  29. data/spec/gscraper_spec.rb +9 -4
  30. data/spec/has_pages_examples.rb +0 -2
  31. data/spec/has_sponsored_links_examples.rb +2 -1
  32. data/spec/helpers/query.rb +3 -1
  33. data/spec/helpers/uri.rb +6 -4
  34. data/spec/page_has_results_examples.rb +0 -2
  35. data/spec/search/ajax_query_spec.rb +6 -11
  36. data/spec/search/page_has_results_examples.rb +0 -2
  37. data/spec/search/web_query_spec.rb +6 -11
  38. data/spec/spec_helper.rb +10 -4
  39. metadata +147 -54
  40. data/History.txt +0 -101
  41. data/Manifest.txt +0 -38
  42. data/tasks/spec.rb +0 -9
@@ -1,5 +1,4 @@
1
1
  #
2
- #--
3
2
  # GScraper - A web-scraping interface to various Google Services.
4
3
  #
5
4
  # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
@@ -17,9 +16,9 @@
17
16
  # You should have received a copy of the GNU General Public License
18
17
  # along with this program; if not, write to the Free Software
19
18
  # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20
- #++
21
19
  #
22
20
 
23
21
  module GScraper
24
- VERSION = '0.2.4'
22
+ # The version of GScraper
23
+ VERSION = '0.3.0'
25
24
  end
@@ -35,4 +35,12 @@ describe "URI::QueryParams" do
35
35
  @uri.query = 'u=3'
36
36
  @uri.query_params['u'].should == '3'
37
37
  end
38
+
39
+ it "should properly escape query param values" do
40
+ @uri.query_params['x'] = '1&2'
41
+ @uri.query_params['y'] = 'one=two'
42
+ @uri.query_params['z'] = '?'
43
+
44
+ @uri.to_s.should == "http://www.test.com/page.php?x=1%262&y=one%3Dtwo&z=%3F"
45
+ end
38
46
  end
@@ -1,8 +1,13 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  require 'gscraper/gscraper'
4
+ require 'gscraper/version'
4
5
 
5
6
  describe "GScraper" do
7
+ it "should have a VERSION constant" do
8
+ GScraper.should be_const_defined('VERSION')
9
+ end
10
+
6
11
  describe "User-Agent support" do
7
12
  it "should have a default User-Agent string" do
8
13
  GScraper.user_agent.should_not be_nil
@@ -11,19 +16,19 @@ describe "GScraper" do
11
16
 
12
17
  describe "Proxy support" do
13
18
  it "should provide a :host key" do
14
- GScraper.proxy.has_key?(:host).should == true
19
+ GScraper.proxy.should have_key(:host)
15
20
  end
16
21
 
17
22
  it "should provide a :port key" do
18
- GScraper.proxy.has_key?(:port).should == true
23
+ GScraper.proxy.should have_key(:port)
19
24
  end
20
25
 
21
26
  it "should provide a :user key" do
22
- GScraper.proxy.has_key?(:user).should == true
27
+ GScraper.proxy.should have_key(:user)
23
28
  end
24
29
 
25
30
  it "should provide a :password key" do
26
- GScraper.proxy.has_key?(:password).should == true
31
+ GScraper.proxy.should have_key(:password)
27
32
  end
28
33
  end
29
34
  end
@@ -1,7 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  shared_examples_for "has Pages" do
4
-
5
4
  it "should have a first page" do
6
5
  @query.first_page.should_not be_nil
7
6
  end
@@ -15,5 +14,4 @@ shared_examples_for "has Pages" do
15
14
  pages.should_not be_nil
16
15
  pages.length.should == 2
17
16
  end
18
-
19
17
  end
@@ -1,6 +1,8 @@
1
1
  require 'spec_helper'
2
+ require 'helpers/uri'
2
3
 
3
4
  shared_examples_for "has Sponsored Links" do
5
+ include Helpers
4
6
 
5
7
  it "should have ads" do
6
8
  @links.length.should_not == 0
@@ -41,5 +43,4 @@ shared_examples_for "has Sponsored Links" do
41
43
  uri_should_be_valid(url)
42
44
  end
43
45
  end
44
-
45
46
  end
@@ -1 +1,3 @@
1
- DEFAULT_QUERY = 'Ruby'
1
+ module Helpers
2
+ DEFAULT_QUERY = 'Ruby'
3
+ end
@@ -1,7 +1,9 @@
1
1
  require 'uri'
2
2
 
3
- def uri_should_be_valid(uri)
4
- uri.scheme.should_not be_nil
5
- uri.host.should_not be_nil
6
- uri.path.should_not be_nil
3
+ module Helpers
4
+ def uri_should_be_valid(uri)
5
+ uri.scheme.should_not be_nil
6
+ uri.host.should_not be_nil
7
+ uri.path.should_not be_nil
8
+ end
7
9
  end
@@ -1,7 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  shared_examples_for "Page has Results" do
4
-
5
4
  it "should have results" do
6
5
  @page.length.should_not == 0
7
6
  end
@@ -9,5 +8,4 @@ shared_examples_for "Page has Results" do
9
8
  it "should have the maximum amount of results per page" do
10
9
  @page.length.should == @query.results_per_page
11
10
  end
12
-
13
11
  end
@@ -1,4 +1,5 @@
1
1
  require 'spec_helper'
2
+ require 'helpers/query'
2
3
  require 'has_pages_examples'
3
4
  require 'page_has_results_examples'
4
5
  require 'search/page_has_results_examples'
@@ -6,9 +7,10 @@ require 'search/page_has_results_examples'
6
7
  require 'gscraper/search/ajax_query'
7
8
 
8
9
  describe GScraper::Search::AJAXQuery do
10
+ include Helpers
9
11
 
10
12
  before(:all) do
11
- @query = GScraper::Search::AJAXQuery.new(:query => DEFAULT_QUERY)
13
+ @query = GScraper::Search::AJAXQuery.new(:query => Helpers::DEFAULT_QUERY)
12
14
  @page = @query.first_page
13
15
  end
14
16
 
@@ -17,7 +19,6 @@ describe GScraper::Search::AJAXQuery do
17
19
  it_should_behave_like "Page has Search Results"
18
20
 
19
21
  describe "Search URL" do
20
-
21
22
  before(:all) do
22
23
  @uri = @query.search_url
23
24
  end
@@ -57,7 +58,7 @@ describe GScraper::Search::AJAXQuery do
57
58
  end
58
59
 
59
60
  it "should have a 'q' query-param" do
60
- @uri.query_params['q'].should == DEFAULT_QUERY
61
+ @uri.query_params['q'].should == Helpers::DEFAULT_QUERY
61
62
  end
62
63
 
63
64
  it "should have a default 'sig' query-param" do
@@ -74,11 +75,9 @@ describe GScraper::Search::AJAXQuery do
74
75
  v = @uri.query_params['v']
75
76
  v.should == GScraper::Search::AJAXQuery::DEFAULT_VERSION
76
77
  end
77
-
78
78
  end
79
79
 
80
80
  describe "page specific URLs" do
81
-
82
81
  before(:all) do
83
82
  @uri = @query.page_url(2)
84
83
  end
@@ -86,17 +85,15 @@ describe GScraper::Search::AJAXQuery do
86
85
  it "should have a 'start' query-param" do
87
86
  @uri.query_params['start'].should == @query.results_per_page
88
87
  end
89
-
90
88
  end
91
89
 
92
90
  describe "queries from AJAX search URLs" do
93
-
94
91
  before(:all) do
95
92
  @version = '1.0'
96
93
  @language = 'en'
97
94
  @sig = '582c1116317355adf613a6a843f19ece'
98
95
  @key = 'notsupplied'
99
- @query = GScraper::Search::AJAXQuery.from_url("http://www.google.com/uds/GwebSearch?v=#{@version}&lstkp=0&rsz=large&hl=#{@language}&callback=google.search.WebSearch.RawCompletion&sig=#{@sig}&q=#{DEFAULT_QUERY}&gss=.com&context=0&key=#{@key}")
96
+ @query = GScraper::Search::AJAXQuery.from_url("http://www.google.com/uds/GwebSearch?v=#{@version}&lstkp=0&rsz=large&hl=#{@language}&callback=google.search.WebSearch.RawCompletion&sig=#{@sig}&q=#{Helpers::DEFAULT_QUERY}&gss=.com&context=0&key=#{@key}")
100
97
  end
101
98
 
102
99
  it "should have a version" do
@@ -116,9 +113,7 @@ describe GScraper::Search::AJAXQuery do
116
113
  end
117
114
 
118
115
  it "should have a query" do
119
- @query.query.should == DEFAULT_QUERY
116
+ @query.query.should == Helpers::DEFAULT_QUERY
120
117
  end
121
-
122
118
  end
123
-
124
119
  end
@@ -1,7 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  shared_examples_for "Page has Search Results" do
4
-
5
4
  it "should have incremental ranks" do
6
5
  ranks = @page.ranks
7
6
 
@@ -37,5 +36,4 @@ shared_examples_for "Page has Search Results" do
37
36
  it "should have atleast one cached URL" do
38
37
  @page.cached_urls.length.should_not == 0
39
38
  end
40
-
41
39
  end
@@ -1,4 +1,5 @@
1
1
  require 'spec_helper'
2
+ require 'helpers/query'
2
3
  require 'has_pages_examples'
3
4
  require 'page_has_results_examples'
4
5
  require 'has_sponsored_links_examples'
@@ -7,9 +8,10 @@ require 'search/page_has_results_examples'
7
8
  require 'gscraper/search/web_query'
8
9
 
9
10
  describe GScraper::Search::WebQuery do
11
+ include Helpers
10
12
 
11
13
  before(:all) do
12
- @query = GScraper::Search::WebQuery.new(:query => DEFAULT_QUERY)
14
+ @query = GScraper::Search::WebQuery.new(:query => Helpers::DEFAULT_QUERY)
13
15
  @page = @query.first_page
14
16
  @links = @query.sponsored_links
15
17
  end
@@ -20,7 +22,6 @@ describe GScraper::Search::WebQuery do
20
22
  it_should_behave_like "has Sponsored Links"
21
23
 
22
24
  describe "Search URL" do
23
-
24
25
  before(:all) do
25
26
  @uri = @query.search_url
26
27
  end
@@ -30,17 +31,15 @@ describe GScraper::Search::WebQuery do
30
31
  end
31
32
 
32
33
  it "should have a 'q' query-param" do
33
- @uri.query_params['q'].should == DEFAULT_QUERY
34
+ @uri.query_params['q'].should == Helpers::DEFAULT_QUERY
34
35
  end
35
36
 
36
37
  it "should have a 'num' query-param" do
37
38
  @uri.query_params['num'].should == @query.results_per_page
38
39
  end
39
-
40
40
  end
41
41
 
42
42
  describe "page specific URLs" do
43
-
44
43
  before(:all) do
45
44
  @uri = @query.page_url(2)
46
45
  end
@@ -52,13 +51,11 @@ describe GScraper::Search::WebQuery do
52
51
  it "should have a 'sa' query-param" do
53
52
  @uri.query_params['sa'].should == 'N'
54
53
  end
55
-
56
54
  end
57
55
 
58
56
  describe "queries from Web search URLs" do
59
-
60
57
  before(:all) do
61
- @query = GScraper::Search::WebQuery.from_url("http://www.google.com/search?sa=N&start=0&q=#{DEFAULT_QUERY}&num=20")
58
+ @query = GScraper::Search::WebQuery.from_url("http://www.google.com/search?sa=N&start=0&q=#{Helpers::DEFAULT_QUERY}&num=20")
62
59
  end
63
60
 
64
61
  it "should have a results-per-page" do
@@ -66,13 +63,11 @@ describe GScraper::Search::WebQuery do
66
63
  end
67
64
 
68
65
  it "should have a query" do
69
- @query.query.should == DEFAULT_QUERY
66
+ @query.query.should == Helpers::DEFAULT_QUERY
70
67
  end
71
-
72
68
  end
73
69
 
74
70
  it "should have atleast one similar query URL" do
75
71
  @page.similar_urls.length.should_not == 0
76
72
  end
77
-
78
73
  end
@@ -1,6 +1,12 @@
1
1
  require 'rubygems'
2
- gem 'rspec', '>=1.1.3'
3
- require 'spec'
2
+ require 'bundler'
3
+
4
+ begin
5
+ Bundler.setup(:runtime, :test)
6
+ rescue Bundler::BundlerError => e
7
+ STDERR.puts e.message
8
+ STDERR.puts "Run `bundle install` to install missing gems"
9
+ exit e.status_code
10
+ end
4
11
 
5
- require 'helpers/query'
6
- require 'helpers/uri'
12
+ require 'spec'
metadata CHANGED
@@ -1,7 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gscraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 3
8
+ - 0
9
+ version: 0.3.0
5
10
  platform: ruby
6
11
  authors:
7
12
  - Postmodern
@@ -9,106 +14,194 @@ autorequire:
9
14
  bindir: bin
10
15
  cert_chain: []
11
16
 
12
- date: 2009-03-18 00:00:00 -07:00
17
+ date: 2010-07-02 00:00:00 -07:00
13
18
  default_executable:
14
19
  dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: json_pure
22
+ requirement: &id001 !ruby/object:Gem::Requirement
23
+ none: false
24
+ requirements:
25
+ - - ~>
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 1
29
+ - 4
30
+ - 0
31
+ version: 1.4.0
32
+ type: :runtime
33
+ prerelease: false
34
+ version_requirements: *id001
15
35
  - !ruby/object:Gem::Dependency
16
36
  name: mechanize
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ~>
41
+ - !ruby/object:Gem::Version
42
+ segments:
43
+ - 1
44
+ - 0
45
+ - 0
46
+ version: 1.0.0
17
47
  type: :runtime
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
48
+ prerelease: false
49
+ version_requirements: *id002
50
+ - !ruby/object:Gem::Dependency
51
+ name: bundler
52
+ requirement: &id003 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ~>
56
+ - !ruby/object:Gem::Version
57
+ segments:
58
+ - 0
59
+ - 9
60
+ - 19
61
+ version: 0.9.19
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: *id003
65
+ - !ruby/object:Gem::Dependency
66
+ name: rake
67
+ requirement: &id004 !ruby/object:Gem::Requirement
68
+ none: false
20
69
  requirements:
21
- - - ">="
70
+ - - ~>
22
71
  - !ruby/object:Gem::Version
23
- version: 0.9.0
24
- version:
72
+ segments:
73
+ - 0
74
+ - 8
75
+ - 7
76
+ version: 0.8.7
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: *id004
25
80
  - !ruby/object:Gem::Dependency
26
- name: hoe
81
+ name: jeweler
82
+ requirement: &id005 !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ~>
86
+ - !ruby/object:Gem::Version
87
+ segments:
88
+ - 1
89
+ - 4
90
+ - 0
91
+ version: 1.4.0
27
92
  type: :development
28
- version_requirement:
29
- version_requirements: !ruby/object:Gem::Requirement
93
+ prerelease: false
94
+ version_requirements: *id005
95
+ - !ruby/object:Gem::Dependency
96
+ name: rspec
97
+ requirement: &id006 !ruby/object:Gem::Requirement
98
+ none: false
30
99
  requirements:
31
- - - ">="
100
+ - - ~>
32
101
  - !ruby/object:Gem::Version
33
- version: 1.10.0
34
- version:
102
+ segments:
103
+ - 1
104
+ - 3
105
+ - 0
106
+ version: 1.3.0
107
+ type: :development
108
+ prerelease: false
109
+ version_requirements: *id006
35
110
  description: GScraper is a web-scraping interface to various Google Services.
36
- email:
37
- - postmodern.mod3@gmail.com
111
+ email: postmodern.mod3@gmail.com
38
112
  executables: []
39
113
 
40
114
  extensions: []
41
115
 
42
116
  extra_rdoc_files:
43
- - History.txt
44
- - COPYING.txt
45
- - Manifest.txt
46
- - README.txt
117
+ - ChangeLog.md
118
+ - README.md
47
119
  files:
48
- - History.txt
120
+ - .gitignore
121
+ - .specopts
122
+ - .yardopts
49
123
  - COPYING.txt
50
- - Manifest.txt
51
- - README.txt
124
+ - ChangeLog.md
125
+ - Gemfile
126
+ - README.md
52
127
  - Rakefile
53
- - lib/gscraper/extensions/uri/query_params.rb
54
- - lib/gscraper/extensions/uri/http.rb
55
- - lib/gscraper/extensions/uri.rb
128
+ - gscraper.gemspec
129
+ - lib/gscraper.rb
56
130
  - lib/gscraper/extensions.rb
131
+ - lib/gscraper/extensions/uri.rb
132
+ - lib/gscraper/extensions/uri/http.rb
133
+ - lib/gscraper/extensions/uri/query_params.rb
134
+ - lib/gscraper/gscraper.rb
135
+ - lib/gscraper/has_pages.rb
57
136
  - lib/gscraper/licenses.rb
58
137
  - lib/gscraper/page.rb
59
- - lib/gscraper/has_pages.rb
60
- - lib/gscraper/sponsored_ad.rb
61
- - lib/gscraper/sponsored_links.rb
62
- - lib/gscraper/search/result.rb
138
+ - lib/gscraper/search.rb
139
+ - lib/gscraper/search/ajax_query.rb
63
140
  - lib/gscraper/search/page.rb
64
141
  - lib/gscraper/search/query.rb
65
- - lib/gscraper/search/web_query.rb
66
- - lib/gscraper/search/ajax_query.rb
142
+ - lib/gscraper/search/result.rb
67
143
  - lib/gscraper/search/search.rb
68
- - lib/gscraper/search.rb
69
- - lib/gscraper/gscraper.rb
144
+ - lib/gscraper/search/web_query.rb
145
+ - lib/gscraper/sponsored_ad.rb
146
+ - lib/gscraper/sponsored_links.rb
70
147
  - lib/gscraper/version.rb
71
- - lib/gscraper.rb
72
- - tasks/spec.rb
73
- - spec/spec_helper.rb
74
- - spec/helpers/uri.rb
75
- - spec/helpers/query.rb
76
- - spec/extensions/uri/query_params_spec.rb
77
148
  - spec/extensions/uri/http_spec.rb
149
+ - spec/extensions/uri/query_params_spec.rb
150
+ - spec/gscraper_spec.rb
78
151
  - spec/has_pages_examples.rb
79
- - spec/page_has_results_examples.rb
80
152
  - spec/has_sponsored_links_examples.rb
153
+ - spec/helpers/query.rb
154
+ - spec/helpers/uri.rb
155
+ - spec/page_has_results_examples.rb
156
+ - spec/search/ajax_query_spec.rb
81
157
  - spec/search/page_has_results_examples.rb
82
158
  - spec/search/query_spec.rb
83
- - spec/search/ajax_query_spec.rb
84
159
  - spec/search/web_query_spec.rb
85
- - spec/gscraper_spec.rb
86
- has_rdoc: true
87
- homepage: http://gscraper.rubyforge.org/
160
+ - spec/spec_helper.rb
161
+ has_rdoc: yard
162
+ homepage: http://github.com/postmodern/gscraper
163
+ licenses:
164
+ - GPL-2
88
165
  post_install_message:
89
- rdoc_options:
90
- - --main
91
- - README.txt
166
+ rdoc_options: []
167
+
92
168
  require_paths:
93
169
  - lib
94
170
  required_ruby_version: !ruby/object:Gem::Requirement
171
+ none: false
95
172
  requirements:
96
173
  - - ">="
97
174
  - !ruby/object:Gem::Version
175
+ hash: 734300353
176
+ segments:
177
+ - 0
98
178
  version: "0"
99
- version:
100
179
  required_rubygems_version: !ruby/object:Gem::Requirement
180
+ none: false
101
181
  requirements:
102
182
  - - ">="
103
183
  - !ruby/object:Gem::Version
184
+ segments:
185
+ - 0
104
186
  version: "0"
105
- version:
106
187
  requirements: []
107
188
 
108
- rubyforge_project: gscraper
109
- rubygems_version: 1.3.1
189
+ rubyforge_project:
190
+ rubygems_version: 1.3.7
110
191
  signing_key:
111
- specification_version: 2
192
+ specification_version: 3
112
193
  summary: GScraper is a web-scraping interface to various Google Services.
113
- test_files: []
114
-
194
+ test_files:
195
+ - spec/extensions/uri/http_spec.rb
196
+ - spec/extensions/uri/query_params_spec.rb
197
+ - spec/gscraper_spec.rb
198
+ - spec/has_pages_examples.rb
199
+ - spec/has_sponsored_links_examples.rb
200
+ - spec/helpers/query.rb
201
+ - spec/helpers/uri.rb
202
+ - spec/page_has_results_examples.rb
203
+ - spec/search/ajax_query_spec.rb
204
+ - spec/search/page_has_results_examples.rb
205
+ - spec/search/query_spec.rb
206
+ - spec/search/web_query_spec.rb
207
+ - spec/spec_helper.rb