gscraper 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.rspec +1 -0
- data/ChangeLog.md +24 -2
- data/README.md +12 -7
- data/Rakefile +26 -29
- data/gemspec.yml +20 -0
- data/gscraper.gemspec +124 -109
- data/lib/gscraper.rb +1 -1
- data/lib/gscraper/gscraper.rb +24 -20
- data/lib/gscraper/has_pages.rb +1 -3
- data/lib/gscraper/hosts.rb +158 -0
- data/lib/gscraper/languages.rb +110 -0
- data/lib/gscraper/licenses.rb +4 -1
- data/lib/gscraper/page.rb +1 -3
- data/lib/gscraper/search.rb +1 -1
- data/lib/gscraper/search/ajax_query.rb +33 -34
- data/lib/gscraper/{extensions.rb → search/exceptions.rb} +2 -2
- data/lib/gscraper/{extensions/uri.rb → search/exceptions/blocked.rb} +10 -2
- data/lib/gscraper/search/page.rb +47 -67
- data/lib/gscraper/search/query.rb +90 -44
- data/lib/gscraper/search/result.rb +7 -9
- data/lib/gscraper/search/search.rb +2 -2
- data/lib/gscraper/search/web_query.rb +93 -101
- data/lib/gscraper/sponsored_ad.rb +3 -3
- data/lib/gscraper/sponsored_links.rb +1 -3
- data/lib/gscraper/version.rb +2 -2
- data/spec/languages_spec.rb +28 -0
- data/spec/search/ajax_query_spec.rb +2 -1
- data/spec/search/query_spec.rb +29 -0
- data/spec/search/web_query_spec.rb +21 -1
- data/spec/spec_helper.rb +2 -12
- metadata +107 -125
- data/.specopts +0 -1
- data/Gemfile +0 -25
- data/lib/gscraper/extensions/uri/http.rb +0 -31
- data/lib/gscraper/extensions/uri/query_params.rb +0 -109
- data/spec/extensions/uri/http_spec.rb +0 -9
- data/spec/extensions/uri/query_params_spec.rb +0 -46
data/lib/gscraper/version.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#
|
2
2
|
# GScraper - A web-scraping interface to various Google Services.
|
3
3
|
#
|
4
|
-
# Copyright (c) 2007-
|
4
|
+
# Copyright (c) 2007-2012 Hal Brodigan (postmodern.mod3 at gmail.com)
|
5
5
|
#
|
6
6
|
# This program is free software; you can redistribute it and/or modify
|
7
7
|
# it under the terms of the GNU General Public License as published by
|
@@ -20,5 +20,5 @@
|
|
20
20
|
|
21
21
|
module GScraper
|
22
22
|
# The version of GScraper
|
23
|
-
VERSION = '0.
|
23
|
+
VERSION = '0.4.0'
|
24
24
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'gscraper/languages'
|
3
|
+
|
4
|
+
describe GScraper::Languages do
|
5
|
+
it "should lookup the language for a locale" do
|
6
|
+
GScraper::Languages.find('es').should == 'es'
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should lookup the language for locale_country" do
|
10
|
+
GScraper::Languages.find('es_AR').should == 'es'
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should lookup the language for a locale@alias" do
|
14
|
+
GScraper::Languages.find('en@quot').should == 'en'
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should map zh_CN* to zh-CN" do
|
18
|
+
GScraper::Languages.find('zh_CN').should == 'zh-CN'
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should map zh_TW* to zh-TW" do
|
22
|
+
GScraper::Languages.find('zh_TW').should == 'zh-TW'
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should auto-detect the native language" do
|
26
|
+
GScraper::Languages.native.should_not be_nil
|
27
|
+
end
|
28
|
+
end
|
@@ -50,7 +50,8 @@ describe GScraper::Search::AJAXQuery do
|
|
50
50
|
|
51
51
|
it "should have a default 'hl' query-param" do
|
52
52
|
hl = @uri.query_params['hl']
|
53
|
-
|
53
|
+
|
54
|
+
hl.should_not be_empty
|
54
55
|
end
|
55
56
|
|
56
57
|
it "should have a default 'gss' query-param of '.com'" do
|
data/spec/search/query_spec.rb
CHANGED
@@ -3,6 +3,35 @@ require 'spec_helper'
|
|
3
3
|
require 'gscraper/search/query'
|
4
4
|
|
5
5
|
describe GScraper::Search::Query do
|
6
|
+
it "should have a default host of www.google.com" do
|
7
|
+
query = GScraper::Search::Query.new
|
8
|
+
|
9
|
+
query.search_host.should == 'www.google.com'
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should allow using alternate hosts" do
|
13
|
+
alternate_host = 'www.google.com.ar'
|
14
|
+
query = GScraper::Search::Query.new(
|
15
|
+
:search_host => alternate_host
|
16
|
+
)
|
17
|
+
|
18
|
+
query.search_host.should == alternate_host
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should use random hosts if load balancing is enabled" do
|
22
|
+
query = GScraper::Search::Query.new(:load_balance => true)
|
23
|
+
|
24
|
+
host1 = query.search_host
|
25
|
+
host2 = query.search_host
|
26
|
+
|
27
|
+
host1.should_not == host2
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should have a default language" do
|
31
|
+
query = GScraper::Search::Query.new
|
32
|
+
|
33
|
+
query.language.should_not be_nil
|
34
|
+
end
|
6
35
|
|
7
36
|
it "should support basic queries" do
|
8
37
|
expr = 'ruby -blog'
|
@@ -11,7 +11,9 @@ describe GScraper::Search::WebQuery do
|
|
11
11
|
include Helpers
|
12
12
|
|
13
13
|
before(:all) do
|
14
|
-
@query = GScraper::Search::WebQuery.new(
|
14
|
+
@query = GScraper::Search::WebQuery.new(
|
15
|
+
:query => Helpers::DEFAULT_QUERY
|
16
|
+
)
|
15
17
|
@page = @query.first_page
|
16
18
|
@links = @query.sponsored_links
|
17
19
|
end
|
@@ -30,6 +32,24 @@ describe GScraper::Search::WebQuery do
|
|
30
32
|
@uri.class.should == URI::HTTP
|
31
33
|
end
|
32
34
|
|
35
|
+
it "should have a default host of www.google.com" do
|
36
|
+
@uri.host.should == 'www.google.com'
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should allow using alternate hosts" do
|
40
|
+
other_host = 'www.google.com.ar'
|
41
|
+
other_query = GScraper::Search::WebQuery.new(
|
42
|
+
:search_host => other_host,
|
43
|
+
:query => Helpers::DEFAULT_QUERY
|
44
|
+
)
|
45
|
+
|
46
|
+
other_query.search_url.host.should == other_host
|
47
|
+
end
|
48
|
+
|
49
|
+
it "should have a path of /search" do
|
50
|
+
@uri.path.should == '/search'
|
51
|
+
end
|
52
|
+
|
33
53
|
it "should have a 'q' query-param" do
|
34
54
|
@uri.query_params['q'].should == Helpers::DEFAULT_QUERY
|
35
55
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,12 +1,2 @@
|
|
1
|
-
|
2
|
-
require '
|
3
|
-
|
4
|
-
begin
|
5
|
-
Bundler.setup(:runtime, :test)
|
6
|
-
rescue Bundler::BundlerError => e
|
7
|
-
STDERR.puts e.message
|
8
|
-
STDERR.puts "Run `bundle install` to install missing gems"
|
9
|
-
exit e.status_code
|
10
|
-
end
|
11
|
-
|
12
|
-
require 'spec'
|
1
|
+
gem 'rspec', '~> 2.4'
|
2
|
+
require 'rspec'
|
metadata
CHANGED
@@ -1,142 +1,141 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: gscraper
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 3
|
8
|
-
- 0
|
9
|
-
version: 0.3.0
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.4.0
|
5
|
+
prerelease:
|
10
6
|
platform: ruby
|
11
|
-
authors:
|
7
|
+
authors:
|
12
8
|
- Postmodern
|
13
9
|
autorequire:
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
name: json_pure
|
22
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
12
|
+
date: 2012-04-26 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: json
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
23
17
|
none: false
|
24
|
-
requirements:
|
18
|
+
requirements:
|
25
19
|
- - ~>
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
|
28
|
-
- 1
|
29
|
-
- 4
|
30
|
-
- 0
|
31
|
-
version: 1.4.0
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '1.6'
|
32
22
|
type: :runtime
|
33
23
|
prerelease: false
|
34
|
-
version_requirements:
|
35
|
-
|
36
|
-
|
37
|
-
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.6'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: uri-query_params
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
38
33
|
none: false
|
39
|
-
requirements:
|
34
|
+
requirements:
|
40
35
|
- - ~>
|
41
|
-
- !ruby/object:Gem::Version
|
42
|
-
|
43
|
-
- 1
|
44
|
-
- 0
|
45
|
-
- 0
|
46
|
-
version: 1.0.0
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0.5'
|
47
38
|
type: :runtime
|
48
39
|
prerelease: false
|
49
|
-
version_requirements:
|
50
|
-
- !ruby/object:Gem::Dependency
|
51
|
-
name: bundler
|
52
|
-
requirement: &id003 !ruby/object:Gem::Requirement
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
41
|
none: false
|
54
|
-
requirements:
|
42
|
+
requirements:
|
55
43
|
- - ~>
|
56
|
-
- !ruby/object:Gem::Version
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0.5'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: mechanize
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ~>
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '2.0'
|
54
|
+
type: :runtime
|
63
55
|
prerelease: false
|
64
|
-
version_requirements:
|
65
|
-
|
66
|
-
|
67
|
-
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '2.0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rubygems-tasks
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
68
65
|
none: false
|
69
|
-
requirements:
|
66
|
+
requirements:
|
70
67
|
- - ~>
|
71
|
-
- !ruby/object:Gem::Version
|
72
|
-
|
73
|
-
- 0
|
74
|
-
- 8
|
75
|
-
- 7
|
76
|
-
version: 0.8.7
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0.1'
|
77
70
|
type: :development
|
78
71
|
prerelease: false
|
79
|
-
version_requirements:
|
80
|
-
|
81
|
-
|
82
|
-
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0.1'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: rspec
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
83
81
|
none: false
|
84
|
-
requirements:
|
82
|
+
requirements:
|
85
83
|
- - ~>
|
86
|
-
- !ruby/object:Gem::Version
|
87
|
-
|
88
|
-
- 1
|
89
|
-
- 4
|
90
|
-
- 0
|
91
|
-
version: 1.4.0
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '2.4'
|
92
86
|
type: :development
|
93
87
|
prerelease: false
|
94
|
-
version_requirements:
|
95
|
-
|
96
|
-
|
97
|
-
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ~>
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '2.4'
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: yard
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
98
97
|
none: false
|
99
|
-
requirements:
|
98
|
+
requirements:
|
100
99
|
- - ~>
|
101
|
-
- !ruby/object:Gem::Version
|
102
|
-
|
103
|
-
- 1
|
104
|
-
- 3
|
105
|
-
- 0
|
106
|
-
version: 1.3.0
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0.6'
|
107
102
|
type: :development
|
108
103
|
prerelease: false
|
109
|
-
version_requirements:
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ~>
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0.6'
|
110
110
|
description: GScraper is a web-scraping interface to various Google Services.
|
111
111
|
email: postmodern.mod3@gmail.com
|
112
112
|
executables: []
|
113
|
-
|
114
113
|
extensions: []
|
115
|
-
|
116
|
-
|
114
|
+
extra_rdoc_files:
|
115
|
+
- COPYING.txt
|
117
116
|
- ChangeLog.md
|
118
117
|
- README.md
|
119
|
-
files:
|
118
|
+
files:
|
120
119
|
- .gitignore
|
121
|
-
- .
|
120
|
+
- .rspec
|
122
121
|
- .yardopts
|
123
122
|
- COPYING.txt
|
124
123
|
- ChangeLog.md
|
125
|
-
- Gemfile
|
126
124
|
- README.md
|
127
125
|
- Rakefile
|
126
|
+
- gemspec.yml
|
128
127
|
- gscraper.gemspec
|
129
128
|
- lib/gscraper.rb
|
130
|
-
- lib/gscraper/extensions.rb
|
131
|
-
- lib/gscraper/extensions/uri.rb
|
132
|
-
- lib/gscraper/extensions/uri/http.rb
|
133
|
-
- lib/gscraper/extensions/uri/query_params.rb
|
134
129
|
- lib/gscraper/gscraper.rb
|
135
130
|
- lib/gscraper/has_pages.rb
|
131
|
+
- lib/gscraper/hosts.rb
|
132
|
+
- lib/gscraper/languages.rb
|
136
133
|
- lib/gscraper/licenses.rb
|
137
134
|
- lib/gscraper/page.rb
|
138
135
|
- lib/gscraper/search.rb
|
139
136
|
- lib/gscraper/search/ajax_query.rb
|
137
|
+
- lib/gscraper/search/exceptions.rb
|
138
|
+
- lib/gscraper/search/exceptions/blocked.rb
|
140
139
|
- lib/gscraper/search/page.rb
|
141
140
|
- lib/gscraper/search/query.rb
|
142
141
|
- lib/gscraper/search/result.rb
|
@@ -145,63 +144,46 @@ files:
|
|
145
144
|
- lib/gscraper/sponsored_ad.rb
|
146
145
|
- lib/gscraper/sponsored_links.rb
|
147
146
|
- lib/gscraper/version.rb
|
148
|
-
- spec/extensions/uri/http_spec.rb
|
149
|
-
- spec/extensions/uri/query_params_spec.rb
|
150
147
|
- spec/gscraper_spec.rb
|
151
148
|
- spec/has_pages_examples.rb
|
152
149
|
- spec/has_sponsored_links_examples.rb
|
153
150
|
- spec/helpers/query.rb
|
154
151
|
- spec/helpers/uri.rb
|
152
|
+
- spec/languages_spec.rb
|
155
153
|
- spec/page_has_results_examples.rb
|
156
154
|
- spec/search/ajax_query_spec.rb
|
157
155
|
- spec/search/page_has_results_examples.rb
|
158
156
|
- spec/search/query_spec.rb
|
159
157
|
- spec/search/web_query_spec.rb
|
160
158
|
- spec/spec_helper.rb
|
161
|
-
|
162
|
-
|
163
|
-
licenses:
|
159
|
+
homepage: https://github.com/postmodern/gscraper
|
160
|
+
licenses:
|
164
161
|
- GPL-2
|
165
162
|
post_install_message:
|
166
163
|
rdoc_options: []
|
167
|
-
|
168
|
-
require_paths:
|
164
|
+
require_paths:
|
169
165
|
- lib
|
170
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
166
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
171
167
|
none: false
|
172
|
-
requirements:
|
173
|
-
- -
|
174
|
-
- !ruby/object:Gem::Version
|
175
|
-
|
176
|
-
|
177
|
-
- 0
|
178
|
-
version: "0"
|
179
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
168
|
+
requirements:
|
169
|
+
- - ! '>='
|
170
|
+
- !ruby/object:Gem::Version
|
171
|
+
version: '0'
|
172
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
180
173
|
none: false
|
181
|
-
requirements:
|
182
|
-
- -
|
183
|
-
- !ruby/object:Gem::Version
|
184
|
-
|
185
|
-
- 0
|
186
|
-
version: "0"
|
174
|
+
requirements:
|
175
|
+
- - ! '>='
|
176
|
+
- !ruby/object:Gem::Version
|
177
|
+
version: '0'
|
187
178
|
requirements: []
|
188
|
-
|
189
179
|
rubyforge_project:
|
190
|
-
rubygems_version: 1.
|
180
|
+
rubygems_version: 1.8.23
|
191
181
|
signing_key:
|
192
182
|
specification_version: 3
|
193
|
-
summary:
|
194
|
-
test_files:
|
195
|
-
- spec/extensions/uri/http_spec.rb
|
196
|
-
- spec/extensions/uri/query_params_spec.rb
|
183
|
+
summary: Web-scraping interface to various Google Services.
|
184
|
+
test_files:
|
197
185
|
- spec/gscraper_spec.rb
|
198
|
-
- spec/
|
199
|
-
- spec/has_sponsored_links_examples.rb
|
200
|
-
- spec/helpers/query.rb
|
201
|
-
- spec/helpers/uri.rb
|
202
|
-
- spec/page_has_results_examples.rb
|
186
|
+
- spec/languages_spec.rb
|
203
187
|
- spec/search/ajax_query_spec.rb
|
204
|
-
- spec/search/page_has_results_examples.rb
|
205
188
|
- spec/search/query_spec.rb
|
206
189
|
- spec/search/web_query_spec.rb
|
207
|
-
- spec/spec_helper.rb
|