gscraper 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data/.gitignore +8 -0
  2. data/.specopts +1 -0
  3. data/.yardopts +1 -0
  4. data/ChangeLog.md +122 -0
  5. data/Gemfile +25 -0
  6. data/{README.txt → README.md} +25 -24
  7. data/Rakefile +32 -10
  8. data/gscraper.gemspec +112 -0
  9. data/lib/gscraper.rb +0 -2
  10. data/lib/gscraper/extensions.rb +0 -2
  11. data/lib/gscraper/extensions/uri.rb +0 -2
  12. data/lib/gscraper/extensions/uri/http.rb +0 -2
  13. data/lib/gscraper/extensions/uri/query_params.rb +18 -5
  14. data/lib/gscraper/gscraper.rb +61 -70
  15. data/lib/gscraper/has_pages.rb +76 -20
  16. data/lib/gscraper/licenses.rb +0 -2
  17. data/lib/gscraper/page.rb +45 -16
  18. data/lib/gscraper/search.rb +0 -2
  19. data/lib/gscraper/search/ajax_query.rb +75 -22
  20. data/lib/gscraper/search/page.rb +328 -122
  21. data/lib/gscraper/search/query.rb +100 -7
  22. data/lib/gscraper/search/result.rb +27 -6
  23. data/lib/gscraper/search/search.rb +59 -9
  24. data/lib/gscraper/search/web_query.rb +120 -37
  25. data/lib/gscraper/sponsored_ad.rb +19 -6
  26. data/lib/gscraper/sponsored_links.rb +260 -92
  27. data/lib/gscraper/version.rb +2 -3
  28. data/spec/extensions/uri/query_params_spec.rb +8 -0
  29. data/spec/gscraper_spec.rb +9 -4
  30. data/spec/has_pages_examples.rb +0 -2
  31. data/spec/has_sponsored_links_examples.rb +2 -1
  32. data/spec/helpers/query.rb +3 -1
  33. data/spec/helpers/uri.rb +6 -4
  34. data/spec/page_has_results_examples.rb +0 -2
  35. data/spec/search/ajax_query_spec.rb +6 -11
  36. data/spec/search/page_has_results_examples.rb +0 -2
  37. data/spec/search/web_query_spec.rb +6 -11
  38. data/spec/spec_helper.rb +10 -4
  39. metadata +147 -54
  40. data/History.txt +0 -101
  41. data/Manifest.txt +0 -38
  42. data/tasks/spec.rb +0 -9
@@ -1,5 +1,4 @@
1
1
  #
2
- #--
3
2
  # GScraper - A web-scraping interface to various Google Services.
4
3
  #
5
4
  # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
@@ -17,9 +16,9 @@
17
16
  # You should have received a copy of the GNU General Public License
18
17
  # along with this program; if not, write to the Free Software
19
18
  # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20
- #++
21
19
  #
22
20
 
23
21
  module GScraper
24
- VERSION = '0.2.4'
22
+ # The version of GScraper
23
+ VERSION = '0.3.0'
25
24
  end
@@ -35,4 +35,12 @@ describe "URI::QueryParams" do
35
35
  @uri.query = 'u=3'
36
36
  @uri.query_params['u'].should == '3'
37
37
  end
38
+
39
+ it "should properly escape query param values" do
40
+ @uri.query_params['x'] = '1&2'
41
+ @uri.query_params['y'] = 'one=two'
42
+ @uri.query_params['z'] = '?'
43
+
44
+ @uri.to_s.should == "http://www.test.com/page.php?x=1%262&y=one%3Dtwo&z=%3F"
45
+ end
38
46
  end
@@ -1,8 +1,13 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  require 'gscraper/gscraper'
4
+ require 'gscraper/version'
4
5
 
5
6
  describe "GScraper" do
7
+ it "should have a VERSION constant" do
8
+ GScraper.should be_const_defined('VERSION')
9
+ end
10
+
6
11
  describe "User-Agent support" do
7
12
  it "should have a default User-Agent string" do
8
13
  GScraper.user_agent.should_not be_nil
@@ -11,19 +16,19 @@ describe "GScraper" do
11
16
 
12
17
  describe "Proxy support" do
13
18
  it "should provide a :host key" do
14
- GScraper.proxy.has_key?(:host).should == true
19
+ GScraper.proxy.should have_key(:host)
15
20
  end
16
21
 
17
22
  it "should provide a :port key" do
18
- GScraper.proxy.has_key?(:port).should == true
23
+ GScraper.proxy.should have_key(:port)
19
24
  end
20
25
 
21
26
  it "should provide a :user key" do
22
- GScraper.proxy.has_key?(:user).should == true
27
+ GScraper.proxy.should have_key(:user)
23
28
  end
24
29
 
25
30
  it "should provide a :password key" do
26
- GScraper.proxy.has_key?(:password).should == true
31
+ GScraper.proxy.should have_key(:password)
27
32
  end
28
33
  end
29
34
  end
@@ -1,7 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  shared_examples_for "has Pages" do
4
-
5
4
  it "should have a first page" do
6
5
  @query.first_page.should_not be_nil
7
6
  end
@@ -15,5 +14,4 @@ shared_examples_for "has Pages" do
15
14
  pages.should_not be_nil
16
15
  pages.length.should == 2
17
16
  end
18
-
19
17
  end
@@ -1,6 +1,8 @@
1
1
  require 'spec_helper'
2
+ require 'helpers/uri'
2
3
 
3
4
  shared_examples_for "has Sponsored Links" do
5
+ include Helpers
4
6
 
5
7
  it "should have ads" do
6
8
  @links.length.should_not == 0
@@ -41,5 +43,4 @@ shared_examples_for "has Sponsored Links" do
41
43
  uri_should_be_valid(url)
42
44
  end
43
45
  end
44
-
45
46
  end
@@ -1 +1,3 @@
1
- DEFAULT_QUERY = 'Ruby'
1
+ module Helpers
2
+ DEFAULT_QUERY = 'Ruby'
3
+ end
@@ -1,7 +1,9 @@
1
1
  require 'uri'
2
2
 
3
- def uri_should_be_valid(uri)
4
- uri.scheme.should_not be_nil
5
- uri.host.should_not be_nil
6
- uri.path.should_not be_nil
3
+ module Helpers
4
+ def uri_should_be_valid(uri)
5
+ uri.scheme.should_not be_nil
6
+ uri.host.should_not be_nil
7
+ uri.path.should_not be_nil
8
+ end
7
9
  end
@@ -1,7 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  shared_examples_for "Page has Results" do
4
-
5
4
  it "should have results" do
6
5
  @page.length.should_not == 0
7
6
  end
@@ -9,5 +8,4 @@ shared_examples_for "Page has Results" do
9
8
  it "should have the maximum amount of results per page" do
10
9
  @page.length.should == @query.results_per_page
11
10
  end
12
-
13
11
  end
@@ -1,4 +1,5 @@
1
1
  require 'spec_helper'
2
+ require 'helpers/query'
2
3
  require 'has_pages_examples'
3
4
  require 'page_has_results_examples'
4
5
  require 'search/page_has_results_examples'
@@ -6,9 +7,10 @@ require 'search/page_has_results_examples'
6
7
  require 'gscraper/search/ajax_query'
7
8
 
8
9
  describe GScraper::Search::AJAXQuery do
10
+ include Helpers
9
11
 
10
12
  before(:all) do
11
- @query = GScraper::Search::AJAXQuery.new(:query => DEFAULT_QUERY)
13
+ @query = GScraper::Search::AJAXQuery.new(:query => Helpers::DEFAULT_QUERY)
12
14
  @page = @query.first_page
13
15
  end
14
16
 
@@ -17,7 +19,6 @@ describe GScraper::Search::AJAXQuery do
17
19
  it_should_behave_like "Page has Search Results"
18
20
 
19
21
  describe "Search URL" do
20
-
21
22
  before(:all) do
22
23
  @uri = @query.search_url
23
24
  end
@@ -57,7 +58,7 @@ describe GScraper::Search::AJAXQuery do
57
58
  end
58
59
 
59
60
  it "should have a 'q' query-param" do
60
- @uri.query_params['q'].should == DEFAULT_QUERY
61
+ @uri.query_params['q'].should == Helpers::DEFAULT_QUERY
61
62
  end
62
63
 
63
64
  it "should have a default 'sig' query-param" do
@@ -74,11 +75,9 @@ describe GScraper::Search::AJAXQuery do
74
75
  v = @uri.query_params['v']
75
76
  v.should == GScraper::Search::AJAXQuery::DEFAULT_VERSION
76
77
  end
77
-
78
78
  end
79
79
 
80
80
  describe "page specific URLs" do
81
-
82
81
  before(:all) do
83
82
  @uri = @query.page_url(2)
84
83
  end
@@ -86,17 +85,15 @@ describe GScraper::Search::AJAXQuery do
86
85
  it "should have a 'start' query-param" do
87
86
  @uri.query_params['start'].should == @query.results_per_page
88
87
  end
89
-
90
88
  end
91
89
 
92
90
  describe "queries from AJAX search URLs" do
93
-
94
91
  before(:all) do
95
92
  @version = '1.0'
96
93
  @language = 'en'
97
94
  @sig = '582c1116317355adf613a6a843f19ece'
98
95
  @key = 'notsupplied'
99
- @query = GScraper::Search::AJAXQuery.from_url("http://www.google.com/uds/GwebSearch?v=#{@version}&lstkp=0&rsz=large&hl=#{@language}&callback=google.search.WebSearch.RawCompletion&sig=#{@sig}&q=#{DEFAULT_QUERY}&gss=.com&context=0&key=#{@key}")
96
+ @query = GScraper::Search::AJAXQuery.from_url("http://www.google.com/uds/GwebSearch?v=#{@version}&lstkp=0&rsz=large&hl=#{@language}&callback=google.search.WebSearch.RawCompletion&sig=#{@sig}&q=#{Helpers::DEFAULT_QUERY}&gss=.com&context=0&key=#{@key}")
100
97
  end
101
98
 
102
99
  it "should have a version" do
@@ -116,9 +113,7 @@ describe GScraper::Search::AJAXQuery do
116
113
  end
117
114
 
118
115
  it "should have a query" do
119
- @query.query.should == DEFAULT_QUERY
116
+ @query.query.should == Helpers::DEFAULT_QUERY
120
117
  end
121
-
122
118
  end
123
-
124
119
  end
@@ -1,7 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  shared_examples_for "Page has Search Results" do
4
-
5
4
  it "should have incremental ranks" do
6
5
  ranks = @page.ranks
7
6
 
@@ -37,5 +36,4 @@ shared_examples_for "Page has Search Results" do
37
36
  it "should have atleast one cached URL" do
38
37
  @page.cached_urls.length.should_not == 0
39
38
  end
40
-
41
39
  end
@@ -1,4 +1,5 @@
1
1
  require 'spec_helper'
2
+ require 'helpers/query'
2
3
  require 'has_pages_examples'
3
4
  require 'page_has_results_examples'
4
5
  require 'has_sponsored_links_examples'
@@ -7,9 +8,10 @@ require 'search/page_has_results_examples'
7
8
  require 'gscraper/search/web_query'
8
9
 
9
10
  describe GScraper::Search::WebQuery do
11
+ include Helpers
10
12
 
11
13
  before(:all) do
12
- @query = GScraper::Search::WebQuery.new(:query => DEFAULT_QUERY)
14
+ @query = GScraper::Search::WebQuery.new(:query => Helpers::DEFAULT_QUERY)
13
15
  @page = @query.first_page
14
16
  @links = @query.sponsored_links
15
17
  end
@@ -20,7 +22,6 @@ describe GScraper::Search::WebQuery do
20
22
  it_should_behave_like "has Sponsored Links"
21
23
 
22
24
  describe "Search URL" do
23
-
24
25
  before(:all) do
25
26
  @uri = @query.search_url
26
27
  end
@@ -30,17 +31,15 @@ describe GScraper::Search::WebQuery do
30
31
  end
31
32
 
32
33
  it "should have a 'q' query-param" do
33
- @uri.query_params['q'].should == DEFAULT_QUERY
34
+ @uri.query_params['q'].should == Helpers::DEFAULT_QUERY
34
35
  end
35
36
 
36
37
  it "should have a 'num' query-param" do
37
38
  @uri.query_params['num'].should == @query.results_per_page
38
39
  end
39
-
40
40
  end
41
41
 
42
42
  describe "page specific URLs" do
43
-
44
43
  before(:all) do
45
44
  @uri = @query.page_url(2)
46
45
  end
@@ -52,13 +51,11 @@ describe GScraper::Search::WebQuery do
52
51
  it "should have a 'sa' query-param" do
53
52
  @uri.query_params['sa'].should == 'N'
54
53
  end
55
-
56
54
  end
57
55
 
58
56
  describe "queries from Web search URLs" do
59
-
60
57
  before(:all) do
61
- @query = GScraper::Search::WebQuery.from_url("http://www.google.com/search?sa=N&start=0&q=#{DEFAULT_QUERY}&num=20")
58
+ @query = GScraper::Search::WebQuery.from_url("http://www.google.com/search?sa=N&start=0&q=#{Helpers::DEFAULT_QUERY}&num=20")
62
59
  end
63
60
 
64
61
  it "should have a results-per-page" do
@@ -66,13 +63,11 @@ describe GScraper::Search::WebQuery do
66
63
  end
67
64
 
68
65
  it "should have a query" do
69
- @query.query.should == DEFAULT_QUERY
66
+ @query.query.should == Helpers::DEFAULT_QUERY
70
67
  end
71
-
72
68
  end
73
69
 
74
70
  it "should have atleast one similar query URL" do
75
71
  @page.similar_urls.length.should_not == 0
76
72
  end
77
-
78
73
  end
@@ -1,6 +1,12 @@
1
1
  require 'rubygems'
2
- gem 'rspec', '>=1.1.3'
3
- require 'spec'
2
+ require 'bundler'
3
+
4
+ begin
5
+ Bundler.setup(:runtime, :test)
6
+ rescue Bundler::BundlerError => e
7
+ STDERR.puts e.message
8
+ STDERR.puts "Run `bundle install` to install missing gems"
9
+ exit e.status_code
10
+ end
4
11
 
5
- require 'helpers/query'
6
- require 'helpers/uri'
12
+ require 'spec'
metadata CHANGED
@@ -1,7 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gscraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 3
8
+ - 0
9
+ version: 0.3.0
5
10
  platform: ruby
6
11
  authors:
7
12
  - Postmodern
@@ -9,106 +14,194 @@ autorequire:
9
14
  bindir: bin
10
15
  cert_chain: []
11
16
 
12
- date: 2009-03-18 00:00:00 -07:00
17
+ date: 2010-07-02 00:00:00 -07:00
13
18
  default_executable:
14
19
  dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: json_pure
22
+ requirement: &id001 !ruby/object:Gem::Requirement
23
+ none: false
24
+ requirements:
25
+ - - ~>
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 1
29
+ - 4
30
+ - 0
31
+ version: 1.4.0
32
+ type: :runtime
33
+ prerelease: false
34
+ version_requirements: *id001
15
35
  - !ruby/object:Gem::Dependency
16
36
  name: mechanize
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ~>
41
+ - !ruby/object:Gem::Version
42
+ segments:
43
+ - 1
44
+ - 0
45
+ - 0
46
+ version: 1.0.0
17
47
  type: :runtime
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
48
+ prerelease: false
49
+ version_requirements: *id002
50
+ - !ruby/object:Gem::Dependency
51
+ name: bundler
52
+ requirement: &id003 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ~>
56
+ - !ruby/object:Gem::Version
57
+ segments:
58
+ - 0
59
+ - 9
60
+ - 19
61
+ version: 0.9.19
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: *id003
65
+ - !ruby/object:Gem::Dependency
66
+ name: rake
67
+ requirement: &id004 !ruby/object:Gem::Requirement
68
+ none: false
20
69
  requirements:
21
- - - ">="
70
+ - - ~>
22
71
  - !ruby/object:Gem::Version
23
- version: 0.9.0
24
- version:
72
+ segments:
73
+ - 0
74
+ - 8
75
+ - 7
76
+ version: 0.8.7
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: *id004
25
80
  - !ruby/object:Gem::Dependency
26
- name: hoe
81
+ name: jeweler
82
+ requirement: &id005 !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ~>
86
+ - !ruby/object:Gem::Version
87
+ segments:
88
+ - 1
89
+ - 4
90
+ - 0
91
+ version: 1.4.0
27
92
  type: :development
28
- version_requirement:
29
- version_requirements: !ruby/object:Gem::Requirement
93
+ prerelease: false
94
+ version_requirements: *id005
95
+ - !ruby/object:Gem::Dependency
96
+ name: rspec
97
+ requirement: &id006 !ruby/object:Gem::Requirement
98
+ none: false
30
99
  requirements:
31
- - - ">="
100
+ - - ~>
32
101
  - !ruby/object:Gem::Version
33
- version: 1.10.0
34
- version:
102
+ segments:
103
+ - 1
104
+ - 3
105
+ - 0
106
+ version: 1.3.0
107
+ type: :development
108
+ prerelease: false
109
+ version_requirements: *id006
35
110
  description: GScraper is a web-scraping interface to various Google Services.
36
- email:
37
- - postmodern.mod3@gmail.com
111
+ email: postmodern.mod3@gmail.com
38
112
  executables: []
39
113
 
40
114
  extensions: []
41
115
 
42
116
  extra_rdoc_files:
43
- - History.txt
44
- - COPYING.txt
45
- - Manifest.txt
46
- - README.txt
117
+ - ChangeLog.md
118
+ - README.md
47
119
  files:
48
- - History.txt
120
+ - .gitignore
121
+ - .specopts
122
+ - .yardopts
49
123
  - COPYING.txt
50
- - Manifest.txt
51
- - README.txt
124
+ - ChangeLog.md
125
+ - Gemfile
126
+ - README.md
52
127
  - Rakefile
53
- - lib/gscraper/extensions/uri/query_params.rb
54
- - lib/gscraper/extensions/uri/http.rb
55
- - lib/gscraper/extensions/uri.rb
128
+ - gscraper.gemspec
129
+ - lib/gscraper.rb
56
130
  - lib/gscraper/extensions.rb
131
+ - lib/gscraper/extensions/uri.rb
132
+ - lib/gscraper/extensions/uri/http.rb
133
+ - lib/gscraper/extensions/uri/query_params.rb
134
+ - lib/gscraper/gscraper.rb
135
+ - lib/gscraper/has_pages.rb
57
136
  - lib/gscraper/licenses.rb
58
137
  - lib/gscraper/page.rb
59
- - lib/gscraper/has_pages.rb
60
- - lib/gscraper/sponsored_ad.rb
61
- - lib/gscraper/sponsored_links.rb
62
- - lib/gscraper/search/result.rb
138
+ - lib/gscraper/search.rb
139
+ - lib/gscraper/search/ajax_query.rb
63
140
  - lib/gscraper/search/page.rb
64
141
  - lib/gscraper/search/query.rb
65
- - lib/gscraper/search/web_query.rb
66
- - lib/gscraper/search/ajax_query.rb
142
+ - lib/gscraper/search/result.rb
67
143
  - lib/gscraper/search/search.rb
68
- - lib/gscraper/search.rb
69
- - lib/gscraper/gscraper.rb
144
+ - lib/gscraper/search/web_query.rb
145
+ - lib/gscraper/sponsored_ad.rb
146
+ - lib/gscraper/sponsored_links.rb
70
147
  - lib/gscraper/version.rb
71
- - lib/gscraper.rb
72
- - tasks/spec.rb
73
- - spec/spec_helper.rb
74
- - spec/helpers/uri.rb
75
- - spec/helpers/query.rb
76
- - spec/extensions/uri/query_params_spec.rb
77
148
  - spec/extensions/uri/http_spec.rb
149
+ - spec/extensions/uri/query_params_spec.rb
150
+ - spec/gscraper_spec.rb
78
151
  - spec/has_pages_examples.rb
79
- - spec/page_has_results_examples.rb
80
152
  - spec/has_sponsored_links_examples.rb
153
+ - spec/helpers/query.rb
154
+ - spec/helpers/uri.rb
155
+ - spec/page_has_results_examples.rb
156
+ - spec/search/ajax_query_spec.rb
81
157
  - spec/search/page_has_results_examples.rb
82
158
  - spec/search/query_spec.rb
83
- - spec/search/ajax_query_spec.rb
84
159
  - spec/search/web_query_spec.rb
85
- - spec/gscraper_spec.rb
86
- has_rdoc: true
87
- homepage: http://gscraper.rubyforge.org/
160
+ - spec/spec_helper.rb
161
+ has_rdoc: yard
162
+ homepage: http://github.com/postmodern/gscraper
163
+ licenses:
164
+ - GPL-2
88
165
  post_install_message:
89
- rdoc_options:
90
- - --main
91
- - README.txt
166
+ rdoc_options: []
167
+
92
168
  require_paths:
93
169
  - lib
94
170
  required_ruby_version: !ruby/object:Gem::Requirement
171
+ none: false
95
172
  requirements:
96
173
  - - ">="
97
174
  - !ruby/object:Gem::Version
175
+ hash: 734300353
176
+ segments:
177
+ - 0
98
178
  version: "0"
99
- version:
100
179
  required_rubygems_version: !ruby/object:Gem::Requirement
180
+ none: false
101
181
  requirements:
102
182
  - - ">="
103
183
  - !ruby/object:Gem::Version
184
+ segments:
185
+ - 0
104
186
  version: "0"
105
- version:
106
187
  requirements: []
107
188
 
108
- rubyforge_project: gscraper
109
- rubygems_version: 1.3.1
189
+ rubyforge_project:
190
+ rubygems_version: 1.3.7
110
191
  signing_key:
111
- specification_version: 2
192
+ specification_version: 3
112
193
  summary: GScraper is a web-scraping interface to various Google Services.
113
- test_files: []
114
-
194
+ test_files:
195
+ - spec/extensions/uri/http_spec.rb
196
+ - spec/extensions/uri/query_params_spec.rb
197
+ - spec/gscraper_spec.rb
198
+ - spec/has_pages_examples.rb
199
+ - spec/has_sponsored_links_examples.rb
200
+ - spec/helpers/query.rb
201
+ - spec/helpers/uri.rb
202
+ - spec/page_has_results_examples.rb
203
+ - spec/search/ajax_query_spec.rb
204
+ - spec/search/page_has_results_examples.rb
205
+ - spec/search/query_spec.rb
206
+ - spec/search/web_query_spec.rb
207
+ - spec/spec_helper.rb