gscraper 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.rspec +1 -0
- data/ChangeLog.md +24 -2
- data/README.md +12 -7
- data/Rakefile +26 -29
- data/gemspec.yml +20 -0
- data/gscraper.gemspec +124 -109
- data/lib/gscraper.rb +1 -1
- data/lib/gscraper/gscraper.rb +24 -20
- data/lib/gscraper/has_pages.rb +1 -3
- data/lib/gscraper/hosts.rb +158 -0
- data/lib/gscraper/languages.rb +110 -0
- data/lib/gscraper/licenses.rb +4 -1
- data/lib/gscraper/page.rb +1 -3
- data/lib/gscraper/search.rb +1 -1
- data/lib/gscraper/search/ajax_query.rb +33 -34
- data/lib/gscraper/{extensions.rb → search/exceptions.rb} +2 -2
- data/lib/gscraper/{extensions/uri.rb → search/exceptions/blocked.rb} +10 -2
- data/lib/gscraper/search/page.rb +47 -67
- data/lib/gscraper/search/query.rb +90 -44
- data/lib/gscraper/search/result.rb +7 -9
- data/lib/gscraper/search/search.rb +2 -2
- data/lib/gscraper/search/web_query.rb +93 -101
- data/lib/gscraper/sponsored_ad.rb +3 -3
- data/lib/gscraper/sponsored_links.rb +1 -3
- data/lib/gscraper/version.rb +2 -2
- data/spec/languages_spec.rb +28 -0
- data/spec/search/ajax_query_spec.rb +2 -1
- data/spec/search/query_spec.rb +29 -0
- data/spec/search/web_query_spec.rb +21 -1
- data/spec/spec_helper.rb +2 -12
- metadata +107 -125
- data/.specopts +0 -1
- data/Gemfile +0 -25
- data/lib/gscraper/extensions/uri/http.rb +0 -31
- data/lib/gscraper/extensions/uri/query_params.rb +0 -109
- data/spec/extensions/uri/http_spec.rb +0 -9
- data/spec/extensions/uri/query_params_spec.rb +0 -46
data/lib/gscraper/version.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#
|
2
2
|
# GScraper - A web-scraping interface to various Google Services.
|
3
3
|
#
|
4
|
-
# Copyright (c) 2007-
|
4
|
+
# Copyright (c) 2007-2012 Hal Brodigan (postmodern.mod3 at gmail.com)
|
5
5
|
#
|
6
6
|
# This program is free software; you can redistribute it and/or modify
|
7
7
|
# it under the terms of the GNU General Public License as published by
|
@@ -20,5 +20,5 @@
|
|
20
20
|
|
21
21
|
module GScraper
|
22
22
|
# The version of GScraper
|
23
|
-
VERSION = '0.
|
23
|
+
VERSION = '0.4.0'
|
24
24
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'gscraper/languages'
|
3
|
+
|
4
|
+
describe GScraper::Languages do
|
5
|
+
it "should lookup the language for a locale" do
|
6
|
+
GScraper::Languages.find('es').should == 'es'
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should lookup the language for locale_country" do
|
10
|
+
GScraper::Languages.find('es_AR').should == 'es'
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should lookup the language for a locale@alias" do
|
14
|
+
GScraper::Languages.find('en@quot').should == 'en'
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should map zh_CN* to zh-CN" do
|
18
|
+
GScraper::Languages.find('zh_CN').should == 'zh-CN'
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should map zh_TW* to zh-TW" do
|
22
|
+
GScraper::Languages.find('zh_TW').should == 'zh-TW'
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should auto-detect the native language" do
|
26
|
+
GScraper::Languages.native.should_not be_nil
|
27
|
+
end
|
28
|
+
end
|
@@ -50,7 +50,8 @@ describe GScraper::Search::AJAXQuery do
|
|
50
50
|
|
51
51
|
it "should have a default 'hl' query-param" do
|
52
52
|
hl = @uri.query_params['hl']
|
53
|
-
|
53
|
+
|
54
|
+
hl.should_not be_empty
|
54
55
|
end
|
55
56
|
|
56
57
|
it "should have a default 'gss' query-param of '.com'" do
|
data/spec/search/query_spec.rb
CHANGED
@@ -3,6 +3,35 @@ require 'spec_helper'
|
|
3
3
|
require 'gscraper/search/query'
|
4
4
|
|
5
5
|
describe GScraper::Search::Query do
|
6
|
+
it "should have a default host of www.google.com" do
|
7
|
+
query = GScraper::Search::Query.new
|
8
|
+
|
9
|
+
query.search_host.should == 'www.google.com'
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should allow using alternate hosts" do
|
13
|
+
alternate_host = 'www.google.com.ar'
|
14
|
+
query = GScraper::Search::Query.new(
|
15
|
+
:search_host => alternate_host
|
16
|
+
)
|
17
|
+
|
18
|
+
query.search_host.should == alternate_host
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should use random hosts if load balancing is enabled" do
|
22
|
+
query = GScraper::Search::Query.new(:load_balance => true)
|
23
|
+
|
24
|
+
host1 = query.search_host
|
25
|
+
host2 = query.search_host
|
26
|
+
|
27
|
+
host1.should_not == host2
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should have a default language" do
|
31
|
+
query = GScraper::Search::Query.new
|
32
|
+
|
33
|
+
query.language.should_not be_nil
|
34
|
+
end
|
6
35
|
|
7
36
|
it "should support basic queries" do
|
8
37
|
expr = 'ruby -blog'
|
@@ -11,7 +11,9 @@ describe GScraper::Search::WebQuery do
|
|
11
11
|
include Helpers
|
12
12
|
|
13
13
|
before(:all) do
|
14
|
-
@query = GScraper::Search::WebQuery.new(
|
14
|
+
@query = GScraper::Search::WebQuery.new(
|
15
|
+
:query => Helpers::DEFAULT_QUERY
|
16
|
+
)
|
15
17
|
@page = @query.first_page
|
16
18
|
@links = @query.sponsored_links
|
17
19
|
end
|
@@ -30,6 +32,24 @@ describe GScraper::Search::WebQuery do
|
|
30
32
|
@uri.class.should == URI::HTTP
|
31
33
|
end
|
32
34
|
|
35
|
+
it "should have a default host of www.google.com" do
|
36
|
+
@uri.host.should == 'www.google.com'
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should allow using alternate hosts" do
|
40
|
+
other_host = 'www.google.com.ar'
|
41
|
+
other_query = GScraper::Search::WebQuery.new(
|
42
|
+
:search_host => other_host,
|
43
|
+
:query => Helpers::DEFAULT_QUERY
|
44
|
+
)
|
45
|
+
|
46
|
+
other_query.search_url.host.should == other_host
|
47
|
+
end
|
48
|
+
|
49
|
+
it "should have a path of /search" do
|
50
|
+
@uri.path.should == '/search'
|
51
|
+
end
|
52
|
+
|
33
53
|
it "should have a 'q' query-param" do
|
34
54
|
@uri.query_params['q'].should == Helpers::DEFAULT_QUERY
|
35
55
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,12 +1,2 @@
|
|
1
|
-
|
2
|
-
require '
|
3
|
-
|
4
|
-
begin
|
5
|
-
Bundler.setup(:runtime, :test)
|
6
|
-
rescue Bundler::BundlerError => e
|
7
|
-
STDERR.puts e.message
|
8
|
-
STDERR.puts "Run `bundle install` to install missing gems"
|
9
|
-
exit e.status_code
|
10
|
-
end
|
11
|
-
|
12
|
-
require 'spec'
|
1
|
+
gem 'rspec', '~> 2.4'
|
2
|
+
require 'rspec'
|
metadata
CHANGED
@@ -1,142 +1,141 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: gscraper
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 3
|
8
|
-
- 0
|
9
|
-
version: 0.3.0
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.4.0
|
5
|
+
prerelease:
|
10
6
|
platform: ruby
|
11
|
-
authors:
|
7
|
+
authors:
|
12
8
|
- Postmodern
|
13
9
|
autorequire:
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
name: json_pure
|
22
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
12
|
+
date: 2012-04-26 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: json
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
23
17
|
none: false
|
24
|
-
requirements:
|
18
|
+
requirements:
|
25
19
|
- - ~>
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
|
28
|
-
- 1
|
29
|
-
- 4
|
30
|
-
- 0
|
31
|
-
version: 1.4.0
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '1.6'
|
32
22
|
type: :runtime
|
33
23
|
prerelease: false
|
34
|
-
version_requirements:
|
35
|
-
|
36
|
-
|
37
|
-
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.6'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: uri-query_params
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
38
33
|
none: false
|
39
|
-
requirements:
|
34
|
+
requirements:
|
40
35
|
- - ~>
|
41
|
-
- !ruby/object:Gem::Version
|
42
|
-
|
43
|
-
- 1
|
44
|
-
- 0
|
45
|
-
- 0
|
46
|
-
version: 1.0.0
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0.5'
|
47
38
|
type: :runtime
|
48
39
|
prerelease: false
|
49
|
-
version_requirements:
|
50
|
-
- !ruby/object:Gem::Dependency
|
51
|
-
name: bundler
|
52
|
-
requirement: &id003 !ruby/object:Gem::Requirement
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
41
|
none: false
|
54
|
-
requirements:
|
42
|
+
requirements:
|
55
43
|
- - ~>
|
56
|
-
- !ruby/object:Gem::Version
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0.5'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: mechanize
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ~>
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '2.0'
|
54
|
+
type: :runtime
|
63
55
|
prerelease: false
|
64
|
-
version_requirements:
|
65
|
-
|
66
|
-
|
67
|
-
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '2.0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rubygems-tasks
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
68
65
|
none: false
|
69
|
-
requirements:
|
66
|
+
requirements:
|
70
67
|
- - ~>
|
71
|
-
- !ruby/object:Gem::Version
|
72
|
-
|
73
|
-
- 0
|
74
|
-
- 8
|
75
|
-
- 7
|
76
|
-
version: 0.8.7
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0.1'
|
77
70
|
type: :development
|
78
71
|
prerelease: false
|
79
|
-
version_requirements:
|
80
|
-
|
81
|
-
|
82
|
-
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0.1'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: rspec
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
83
81
|
none: false
|
84
|
-
requirements:
|
82
|
+
requirements:
|
85
83
|
- - ~>
|
86
|
-
- !ruby/object:Gem::Version
|
87
|
-
|
88
|
-
- 1
|
89
|
-
- 4
|
90
|
-
- 0
|
91
|
-
version: 1.4.0
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '2.4'
|
92
86
|
type: :development
|
93
87
|
prerelease: false
|
94
|
-
version_requirements:
|
95
|
-
|
96
|
-
|
97
|
-
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ~>
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '2.4'
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: yard
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
98
97
|
none: false
|
99
|
-
requirements:
|
98
|
+
requirements:
|
100
99
|
- - ~>
|
101
|
-
- !ruby/object:Gem::Version
|
102
|
-
|
103
|
-
- 1
|
104
|
-
- 3
|
105
|
-
- 0
|
106
|
-
version: 1.3.0
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0.6'
|
107
102
|
type: :development
|
108
103
|
prerelease: false
|
109
|
-
version_requirements:
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ~>
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0.6'
|
110
110
|
description: GScraper is a web-scraping interface to various Google Services.
|
111
111
|
email: postmodern.mod3@gmail.com
|
112
112
|
executables: []
|
113
|
-
|
114
113
|
extensions: []
|
115
|
-
|
116
|
-
|
114
|
+
extra_rdoc_files:
|
115
|
+
- COPYING.txt
|
117
116
|
- ChangeLog.md
|
118
117
|
- README.md
|
119
|
-
files:
|
118
|
+
files:
|
120
119
|
- .gitignore
|
121
|
-
- .
|
120
|
+
- .rspec
|
122
121
|
- .yardopts
|
123
122
|
- COPYING.txt
|
124
123
|
- ChangeLog.md
|
125
|
-
- Gemfile
|
126
124
|
- README.md
|
127
125
|
- Rakefile
|
126
|
+
- gemspec.yml
|
128
127
|
- gscraper.gemspec
|
129
128
|
- lib/gscraper.rb
|
130
|
-
- lib/gscraper/extensions.rb
|
131
|
-
- lib/gscraper/extensions/uri.rb
|
132
|
-
- lib/gscraper/extensions/uri/http.rb
|
133
|
-
- lib/gscraper/extensions/uri/query_params.rb
|
134
129
|
- lib/gscraper/gscraper.rb
|
135
130
|
- lib/gscraper/has_pages.rb
|
131
|
+
- lib/gscraper/hosts.rb
|
132
|
+
- lib/gscraper/languages.rb
|
136
133
|
- lib/gscraper/licenses.rb
|
137
134
|
- lib/gscraper/page.rb
|
138
135
|
- lib/gscraper/search.rb
|
139
136
|
- lib/gscraper/search/ajax_query.rb
|
137
|
+
- lib/gscraper/search/exceptions.rb
|
138
|
+
- lib/gscraper/search/exceptions/blocked.rb
|
140
139
|
- lib/gscraper/search/page.rb
|
141
140
|
- lib/gscraper/search/query.rb
|
142
141
|
- lib/gscraper/search/result.rb
|
@@ -145,63 +144,46 @@ files:
|
|
145
144
|
- lib/gscraper/sponsored_ad.rb
|
146
145
|
- lib/gscraper/sponsored_links.rb
|
147
146
|
- lib/gscraper/version.rb
|
148
|
-
- spec/extensions/uri/http_spec.rb
|
149
|
-
- spec/extensions/uri/query_params_spec.rb
|
150
147
|
- spec/gscraper_spec.rb
|
151
148
|
- spec/has_pages_examples.rb
|
152
149
|
- spec/has_sponsored_links_examples.rb
|
153
150
|
- spec/helpers/query.rb
|
154
151
|
- spec/helpers/uri.rb
|
152
|
+
- spec/languages_spec.rb
|
155
153
|
- spec/page_has_results_examples.rb
|
156
154
|
- spec/search/ajax_query_spec.rb
|
157
155
|
- spec/search/page_has_results_examples.rb
|
158
156
|
- spec/search/query_spec.rb
|
159
157
|
- spec/search/web_query_spec.rb
|
160
158
|
- spec/spec_helper.rb
|
161
|
-
|
162
|
-
|
163
|
-
licenses:
|
159
|
+
homepage: https://github.com/postmodern/gscraper
|
160
|
+
licenses:
|
164
161
|
- GPL-2
|
165
162
|
post_install_message:
|
166
163
|
rdoc_options: []
|
167
|
-
|
168
|
-
require_paths:
|
164
|
+
require_paths:
|
169
165
|
- lib
|
170
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
166
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
171
167
|
none: false
|
172
|
-
requirements:
|
173
|
-
- -
|
174
|
-
- !ruby/object:Gem::Version
|
175
|
-
|
176
|
-
|
177
|
-
- 0
|
178
|
-
version: "0"
|
179
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
168
|
+
requirements:
|
169
|
+
- - ! '>='
|
170
|
+
- !ruby/object:Gem::Version
|
171
|
+
version: '0'
|
172
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
180
173
|
none: false
|
181
|
-
requirements:
|
182
|
-
- -
|
183
|
-
- !ruby/object:Gem::Version
|
184
|
-
|
185
|
-
- 0
|
186
|
-
version: "0"
|
174
|
+
requirements:
|
175
|
+
- - ! '>='
|
176
|
+
- !ruby/object:Gem::Version
|
177
|
+
version: '0'
|
187
178
|
requirements: []
|
188
|
-
|
189
179
|
rubyforge_project:
|
190
|
-
rubygems_version: 1.
|
180
|
+
rubygems_version: 1.8.23
|
191
181
|
signing_key:
|
192
182
|
specification_version: 3
|
193
|
-
summary:
|
194
|
-
test_files:
|
195
|
-
- spec/extensions/uri/http_spec.rb
|
196
|
-
- spec/extensions/uri/query_params_spec.rb
|
183
|
+
summary: Web-scraping interface to various Google Services.
|
184
|
+
test_files:
|
197
185
|
- spec/gscraper_spec.rb
|
198
|
-
- spec/
|
199
|
-
- spec/has_sponsored_links_examples.rb
|
200
|
-
- spec/helpers/query.rb
|
201
|
-
- spec/helpers/uri.rb
|
202
|
-
- spec/page_has_results_examples.rb
|
186
|
+
- spec/languages_spec.rb
|
203
187
|
- spec/search/ajax_query_spec.rb
|
204
|
-
- spec/search/page_has_results_examples.rb
|
205
188
|
- spec/search/query_spec.rb
|
206
189
|
- spec/search/web_query_spec.rb
|
207
|
-
- spec/spec_helper.rb
|