gscraper 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.rspec +1 -0
- data/ChangeLog.md +24 -2
- data/README.md +12 -7
- data/Rakefile +26 -29
- data/gemspec.yml +20 -0
- data/gscraper.gemspec +124 -109
- data/lib/gscraper.rb +1 -1
- data/lib/gscraper/gscraper.rb +24 -20
- data/lib/gscraper/has_pages.rb +1 -3
- data/lib/gscraper/hosts.rb +158 -0
- data/lib/gscraper/languages.rb +110 -0
- data/lib/gscraper/licenses.rb +4 -1
- data/lib/gscraper/page.rb +1 -3
- data/lib/gscraper/search.rb +1 -1
- data/lib/gscraper/search/ajax_query.rb +33 -34
- data/lib/gscraper/{extensions.rb → search/exceptions.rb} +2 -2
- data/lib/gscraper/{extensions/uri.rb → search/exceptions/blocked.rb} +10 -2
- data/lib/gscraper/search/page.rb +47 -67
- data/lib/gscraper/search/query.rb +90 -44
- data/lib/gscraper/search/result.rb +7 -9
- data/lib/gscraper/search/search.rb +2 -2
- data/lib/gscraper/search/web_query.rb +93 -101
- data/lib/gscraper/sponsored_ad.rb +3 -3
- data/lib/gscraper/sponsored_links.rb +1 -3
- data/lib/gscraper/version.rb +2 -2
- data/spec/languages_spec.rb +28 -0
- data/spec/search/ajax_query_spec.rb +2 -1
- data/spec/search/query_spec.rb +29 -0
- data/spec/search/web_query_spec.rb +21 -1
- data/spec/spec_helper.rb +2 -12
- metadata +107 -125
- data/.specopts +0 -1
- data/Gemfile +0 -25
- data/lib/gscraper/extensions/uri/http.rb +0 -31
- data/lib/gscraper/extensions/uri/query_params.rb +0 -109
- data/spec/extensions/uri/http_spec.rb +0 -9
- data/spec/extensions/uri/query_params_spec.rb +0 -46
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--colour --format documentation
|
data/ChangeLog.md
CHANGED
@@ -1,3 +1,25 @@
|
|
1
|
+
### 0.4.0 / 2012-04-26
|
2
|
+
|
3
|
+
* Switched from Bundler to rubygems-tasks ~> 0.1.
|
4
|
+
* Switched from json_pure to json ~> 1.6.
|
5
|
+
* Require uri-query_params ~> 0.5.
|
6
|
+
* Require mechanize ~> 2.0.
|
7
|
+
* Added {GScraper::Search::Blocked}.
|
8
|
+
* Added {GScraper::Hosts}.
|
9
|
+
* Added {GScraper::Languages}.
|
10
|
+
* Added {GScraper::Search::Query#define}.
|
11
|
+
* Added `:load_balance` option to {GScraper::Search::Query#initialize}, which
|
12
|
+
will randomize {GScraper::Search::Query#search_host}.
|
13
|
+
* Allow `:all*` / `:with*` search options to accept a String or Array values.
|
14
|
+
* Allow {GScraper::Search::WebQuery} and {GScraper::Search::AJAXQuery} to
|
15
|
+
submit queries to alternate domains via the `:search_host` option.
|
16
|
+
* Renamed `#occurrs_within`, `:occurrs_within` to `#occurs_within`,
|
17
|
+
`:occurs_within`, respectively in {GScraper::Search::WebQuery}.
|
18
|
+
* Prefer XPath over CSS-path expressions.
|
19
|
+
* Fixed XPath expressions in {GScraper::Search::WebQuery#page}
|
20
|
+
(thanks Jake Auswick and Ezekiel Templin).
|
21
|
+
* Fixed spelling errors.
|
22
|
+
|
1
23
|
### 0.3.0 / 2010-07-01
|
2
24
|
|
3
25
|
* Upgraded to mechanize ~> 1.0.0.
|
@@ -13,7 +35,7 @@
|
|
13
35
|
* Aliased {GScraper::Search::WebQuery#links_to=} to `link=`.
|
14
36
|
* Removed `GScraper.open_uri`.
|
15
37
|
* Removed `GScraper.open_page`.
|
16
|
-
* Fixed the escaping/unescaping of URL query params in
|
38
|
+
* Fixed the escaping/unescaping of URL query params in `URI::QueryParams`.
|
17
39
|
* Use `yield` instead of `block.call`, when possible.
|
18
40
|
* All enumerable methods now return an `Enumerator` object, if no block was
|
19
41
|
given.
|
@@ -57,7 +79,7 @@
|
|
57
79
|
### 0.1.8 / 2008-04-30
|
58
80
|
|
59
81
|
* Added the {GScraper.user_agent_alias=} method.
|
60
|
-
* Added
|
82
|
+
* Added `URI::HTTP::QueryParams` module.
|
61
83
|
* Changed license from MIT to GPL-2.
|
62
84
|
|
63
85
|
### 0.1.7 / 2008-04-28
|
data/README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
# GScraper
|
2
2
|
|
3
|
-
* [
|
4
|
-
* [
|
5
|
-
*
|
3
|
+
* [Source](https://github.com/postmodern/gscraper/)
|
4
|
+
* [Issues](https://github.com/postmodern/gscraper/issues)
|
5
|
+
* [Email](mailto:postmodern.mod3 at gmail.com)
|
6
6
|
|
7
7
|
## Description
|
8
8
|
|
@@ -18,11 +18,16 @@ GScraper is a web-scraping interface to various Google Services.
|
|
18
18
|
|
19
19
|
## Requirements
|
20
20
|
|
21
|
-
* [
|
21
|
+
* [json](http://flori.github.com/json/)
|
22
|
+
~> 1.6
|
23
|
+
* [uri-query_params](https://github.com/postmodern/uri-query_params#readme)
|
24
|
+
~> 0.5
|
25
|
+
* [mechanize](http://mechanize.rubyforge.org/mechanize/)
|
26
|
+
~> 2.0
|
22
27
|
|
23
28
|
## Install
|
24
29
|
|
25
|
-
$
|
30
|
+
$ gem install gscraper
|
26
31
|
|
27
32
|
## Examples
|
28
33
|
|
@@ -44,7 +49,7 @@ Queries from URLs:
|
|
44
49
|
|
45
50
|
q.query # => "ruby"
|
46
51
|
q.with_words # => "rails"
|
47
|
-
q.
|
52
|
+
q.occurs_within # => :title
|
48
53
|
q.rights # => :cc_by_nc
|
49
54
|
|
50
55
|
Getting the search results:
|
@@ -128,7 +133,7 @@ Setting the User-Agent globally:
|
|
128
133
|
|
129
134
|
GScraper - A web-scraping interface to various Google Services.
|
130
135
|
|
131
|
-
Copyright (c) 2007-
|
136
|
+
Copyright (c) 2007-2012 Hal Brodigan (postmodern.mod3 at gmail.com)
|
132
137
|
|
133
138
|
This program is free software; you can redistribute it and/or modify
|
134
139
|
it under the terms of the GNU General Public License as published by
|
data/Rakefile
CHANGED
@@ -1,38 +1,35 @@
|
|
1
1
|
require 'rubygems'
|
2
|
-
require '
|
2
|
+
require 'rake'
|
3
3
|
|
4
4
|
begin
|
5
|
-
|
6
|
-
|
7
|
-
STDERR.puts e.message
|
8
|
-
STDERR.puts "Run `bundle install` to install missing gems"
|
9
|
-
exit e.status_code
|
10
|
-
end
|
5
|
+
gem 'rubygems-tasks', '~> 0.1'
|
6
|
+
require 'rubygems/tasks'
|
11
7
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
Jeweler::Tasks.new do |gem|
|
17
|
-
gem.name = 'gscraper'
|
18
|
-
gem.version = GScraper::VERSION
|
19
|
-
gem.license = 'GPL-2'
|
20
|
-
gem.summary = %Q{GScraper is a web-scraping interface to various Google Services.}
|
21
|
-
gem.description = %Q{GScraper is a web-scraping interface to various Google Services.}
|
22
|
-
gem.email = 'postmodern.mod3@gmail.com'
|
23
|
-
gem.homepage = 'http://github.com/postmodern/gscraper'
|
24
|
-
gem.authors = ['Postmodern']
|
25
|
-
gem.has_rdoc = 'yard'
|
8
|
+
Gem::Tasks.new
|
9
|
+
rescue LoadError => e
|
10
|
+
warn e.message
|
11
|
+
warn "Run `gem install rubygems-tasks` to install 'rubygems/tasks'."
|
26
12
|
end
|
27
13
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
spec.spec_files = FileList['spec/**/*_spec.rb']
|
32
|
-
spec.spec_opts = ['--options', '.specopts']
|
33
|
-
end
|
14
|
+
begin
|
15
|
+
gem 'rspec', '~> 2.4'
|
16
|
+
require 'rspec/core/rake_task'
|
34
17
|
|
18
|
+
RSpec::Core::RakeTask.new
|
19
|
+
rescue LoadError => e
|
20
|
+
task :spec do
|
21
|
+
abort "Please run `gem install rspec` to install RSpec."
|
22
|
+
end
|
23
|
+
end
|
35
24
|
task :default => :spec
|
36
25
|
|
37
|
-
|
38
|
-
|
26
|
+
begin
|
27
|
+
gem 'yard', '~> 0.6.0'
|
28
|
+
require 'yard'
|
29
|
+
|
30
|
+
YARD::Rake::YardocTask.new
|
31
|
+
rescue LoadError => e
|
32
|
+
task :yard do
|
33
|
+
abort "Please run `gem install yard` to install YARD."
|
34
|
+
end
|
35
|
+
end
|
data/gemspec.yml
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
name: gscraper
|
2
|
+
summary: Web-scraping interface to various Google Services.
|
3
|
+
description:
|
4
|
+
GScraper is a web-scraping interface to various Google Services.
|
5
|
+
|
6
|
+
license: GPL-2
|
7
|
+
authors: Postmodern
|
8
|
+
email: postmodern.mod3@gmail.com
|
9
|
+
homepage: https://github.com/postmodern/gscraper
|
10
|
+
has_yard: true
|
11
|
+
|
12
|
+
dependencies:
|
13
|
+
json: ~> 1.6
|
14
|
+
uri-query_params: ~> 0.5
|
15
|
+
mechanize: ~> 2.0
|
16
|
+
|
17
|
+
development_dependencies:
|
18
|
+
rubygems-tasks: ~> 0.1
|
19
|
+
rspec: ~> 2.4
|
20
|
+
yard: ~> 0.6
|
data/gscraper.gemspec
CHANGED
@@ -1,112 +1,127 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
".yardopts",
|
23
|
-
"COPYING.txt",
|
24
|
-
"ChangeLog.md",
|
25
|
-
"Gemfile",
|
26
|
-
"README.md",
|
27
|
-
"Rakefile",
|
28
|
-
"gscraper.gemspec",
|
29
|
-
"lib/gscraper.rb",
|
30
|
-
"lib/gscraper/extensions.rb",
|
31
|
-
"lib/gscraper/extensions/uri.rb",
|
32
|
-
"lib/gscraper/extensions/uri/http.rb",
|
33
|
-
"lib/gscraper/extensions/uri/query_params.rb",
|
34
|
-
"lib/gscraper/gscraper.rb",
|
35
|
-
"lib/gscraper/has_pages.rb",
|
36
|
-
"lib/gscraper/licenses.rb",
|
37
|
-
"lib/gscraper/page.rb",
|
38
|
-
"lib/gscraper/search.rb",
|
39
|
-
"lib/gscraper/search/ajax_query.rb",
|
40
|
-
"lib/gscraper/search/page.rb",
|
41
|
-
"lib/gscraper/search/query.rb",
|
42
|
-
"lib/gscraper/search/result.rb",
|
43
|
-
"lib/gscraper/search/search.rb",
|
44
|
-
"lib/gscraper/search/web_query.rb",
|
45
|
-
"lib/gscraper/sponsored_ad.rb",
|
46
|
-
"lib/gscraper/sponsored_links.rb",
|
47
|
-
"lib/gscraper/version.rb",
|
48
|
-
"spec/extensions/uri/http_spec.rb",
|
49
|
-
"spec/extensions/uri/query_params_spec.rb",
|
50
|
-
"spec/gscraper_spec.rb",
|
51
|
-
"spec/has_pages_examples.rb",
|
52
|
-
"spec/has_sponsored_links_examples.rb",
|
53
|
-
"spec/helpers/query.rb",
|
54
|
-
"spec/helpers/uri.rb",
|
55
|
-
"spec/page_has_results_examples.rb",
|
56
|
-
"spec/search/ajax_query_spec.rb",
|
57
|
-
"spec/search/page_has_results_examples.rb",
|
58
|
-
"spec/search/query_spec.rb",
|
59
|
-
"spec/search/web_query_spec.rb",
|
60
|
-
"spec/spec_helper.rb"
|
61
|
-
]
|
62
|
-
s.has_rdoc = %q{yard}
|
63
|
-
s.homepage = %q{http://github.com/postmodern/gscraper}
|
64
|
-
s.licenses = ["GPL-2"]
|
65
|
-
s.require_paths = ["lib"]
|
66
|
-
s.rubygems_version = %q{1.3.7}
|
67
|
-
s.summary = %q{GScraper is a web-scraping interface to various Google Services.}
|
68
|
-
s.test_files = [
|
69
|
-
"spec/extensions/uri/http_spec.rb",
|
70
|
-
"spec/extensions/uri/query_params_spec.rb",
|
71
|
-
"spec/gscraper_spec.rb",
|
72
|
-
"spec/has_pages_examples.rb",
|
73
|
-
"spec/has_sponsored_links_examples.rb",
|
74
|
-
"spec/helpers/query.rb",
|
75
|
-
"spec/helpers/uri.rb",
|
76
|
-
"spec/page_has_results_examples.rb",
|
77
|
-
"spec/search/ajax_query_spec.rb",
|
78
|
-
"spec/search/page_has_results_examples.rb",
|
79
|
-
"spec/search/query_spec.rb",
|
80
|
-
"spec/search/web_query_spec.rb",
|
81
|
-
"spec/spec_helper.rb"
|
82
|
-
]
|
83
|
-
|
84
|
-
if s.respond_to? :specification_version then
|
85
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
86
|
-
s.specification_version = 3
|
87
|
-
|
88
|
-
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
89
|
-
s.add_runtime_dependency(%q<json_pure>, ["~> 1.4.0"])
|
90
|
-
s.add_runtime_dependency(%q<mechanize>, ["~> 1.0.0"])
|
91
|
-
s.add_development_dependency(%q<bundler>, ["~> 0.9.19"])
|
92
|
-
s.add_development_dependency(%q<rake>, ["~> 0.8.7"])
|
93
|
-
s.add_development_dependency(%q<jeweler>, ["~> 1.4.0"])
|
94
|
-
s.add_development_dependency(%q<rspec>, ["~> 1.3.0"])
|
95
|
-
else
|
96
|
-
s.add_dependency(%q<json_pure>, ["~> 1.4.0"])
|
97
|
-
s.add_dependency(%q<mechanize>, ["~> 1.0.0"])
|
98
|
-
s.add_dependency(%q<bundler>, ["~> 0.9.19"])
|
99
|
-
s.add_dependency(%q<rake>, ["~> 0.8.7"])
|
100
|
-
s.add_dependency(%q<jeweler>, ["~> 1.4.0"])
|
101
|
-
s.add_dependency(%q<rspec>, ["~> 1.3.0"])
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
Gem::Specification.new do |gemspec|
|
6
|
+
files = if File.directory?('.git')
|
7
|
+
`git ls-files`.split($/)
|
8
|
+
elsif File.directory?('.hg')
|
9
|
+
`hg manifest`.split($/)
|
10
|
+
elsif File.directory?('.svn')
|
11
|
+
`svn ls -R`.split($/).select { |path| File.file?(path) }
|
12
|
+
else
|
13
|
+
Dir['{**/}{.*,*}'].select { |path| File.file?(path) }
|
14
|
+
end
|
15
|
+
|
16
|
+
filter_files = lambda { |paths|
|
17
|
+
case paths
|
18
|
+
when Array
|
19
|
+
(files & paths)
|
20
|
+
when String
|
21
|
+
(files & Dir[paths])
|
102
22
|
end
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
23
|
+
}
|
24
|
+
|
25
|
+
version = {
|
26
|
+
:file => 'lib/gscraper/version.rb',
|
27
|
+
:constant => 'GScraper::VERSION'
|
28
|
+
}
|
29
|
+
|
30
|
+
defaults = {
|
31
|
+
'name' => File.basename(File.dirname(__FILE__)),
|
32
|
+
'files' => files,
|
33
|
+
'executables' => filter_files['bin/*'].map { |path| File.basename(path) },
|
34
|
+
'test_files' => filter_files['{test/{**/}*_test.rb,spec/{**/}*_spec.rb}'],
|
35
|
+
'extra_doc_files' => filter_files['*.{txt,rdoc,md,markdown,tt,textile}'],
|
36
|
+
}
|
37
|
+
|
38
|
+
metadata = defaults.merge(YAML.load_file('gemspec.yml'))
|
39
|
+
|
40
|
+
gemspec.name = metadata.fetch('name',defaults[:name])
|
41
|
+
gemspec.version = if metadata['version']
|
42
|
+
metadata['version']
|
43
|
+
elsif File.file?(version[:file])
|
44
|
+
require File.join('.',version[:file])
|
45
|
+
eval(version[:constant])
|
46
|
+
end
|
47
|
+
|
48
|
+
gemspec.summary = metadata.fetch('summary',metadata['description'])
|
49
|
+
gemspec.description = metadata.fetch('description',metadata['summary'])
|
50
|
+
|
51
|
+
case metadata['license']
|
52
|
+
when Array
|
53
|
+
gemspec.licenses = metadata['license']
|
54
|
+
when String
|
55
|
+
gemspec.license = metadata['license']
|
56
|
+
end
|
57
|
+
|
58
|
+
case metadata['authors']
|
59
|
+
when Array
|
60
|
+
gemspec.authors = metadata['authors']
|
61
|
+
when String
|
62
|
+
gemspec.author = metadata['authors']
|
63
|
+
end
|
64
|
+
|
65
|
+
gemspec.email = metadata['email']
|
66
|
+
gemspec.homepage = metadata['homepage']
|
67
|
+
|
68
|
+
case metadata['require_paths']
|
69
|
+
when Array
|
70
|
+
gemspec.require_paths = metadata['require_paths']
|
71
|
+
when String
|
72
|
+
gemspec.require_path = metadata['require_paths']
|
73
|
+
end
|
74
|
+
|
75
|
+
gemspec.files = filter_files[metadata['files']]
|
76
|
+
|
77
|
+
gemspec.executables = metadata['executables']
|
78
|
+
gemspec.extensions = metadata['extensions']
|
79
|
+
|
80
|
+
if Gem::VERSION < '1.7.'
|
81
|
+
gemspec.default_executable = gemspec.executables.first
|
110
82
|
end
|
111
|
-
end
|
112
83
|
|
84
|
+
gemspec.test_files = filter_files[metadata['test_files']]
|
85
|
+
|
86
|
+
unless gemspec.files.include?('.document')
|
87
|
+
gemspec.extra_rdoc_files = metadata['extra_doc_files']
|
88
|
+
end
|
89
|
+
|
90
|
+
gemspec.post_install_message = metadata['post_install_message']
|
91
|
+
gemspec.requirements = metadata['requirements']
|
92
|
+
|
93
|
+
if gemspec.respond_to?(:required_ruby_version=)
|
94
|
+
gemspec.required_ruby_version = metadata['required_ruby_version']
|
95
|
+
end
|
96
|
+
|
97
|
+
if gemspec.respond_to?(:required_rubygems_version=)
|
98
|
+
gemspec.required_rubygems_version = metadata['required_ruby_version']
|
99
|
+
end
|
100
|
+
|
101
|
+
parse_versions = lambda { |versions|
|
102
|
+
case versions
|
103
|
+
when Array
|
104
|
+
versions.map { |v| v.to_s }
|
105
|
+
when String
|
106
|
+
versions.split(/,\s*/)
|
107
|
+
end
|
108
|
+
}
|
109
|
+
|
110
|
+
if metadata['dependencies']
|
111
|
+
metadata['dependencies'].each do |name,versions|
|
112
|
+
gemspec.add_dependency(name,parse_versions[versions])
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
if metadata['runtime_dependencies']
|
117
|
+
metadata['runtime_dependencies'].each do |name,versions|
|
118
|
+
gemspec.add_runtime_dependency(name,parse_versions[versions])
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
if metadata['development_dependencies']
|
123
|
+
metadata['development_dependencies'].each do |name,versions|
|
124
|
+
gemspec.add_development_dependency(name,parse_versions[versions])
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
data/lib/gscraper.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#
|
2
2
|
# GScraper - A web-scraping interface to various Google Services.
|
3
3
|
#
|
4
|
-
# Copyright (c) 2007-
|
4
|
+
# Copyright (c) 2007-2012 Hal Brodigan (postmodern.mod3 at gmail.com)
|
5
5
|
#
|
6
6
|
# This program is free software; you can redistribute it and/or modify
|
7
7
|
# it under the terms of the GNU General Public License as published by
|
data/lib/gscraper/gscraper.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#
|
2
2
|
# GScraper - A web-scraping interface to various Google Services.
|
3
3
|
#
|
4
|
-
# Copyright (c) 2007-
|
4
|
+
# Copyright (c) 2007-2012 Hal Brodigan (postmodern.mod3 at gmail.com)
|
5
5
|
#
|
6
6
|
# This program is free software; you can redistribute it and/or modify
|
7
7
|
# it under the terms of the GNU General Public License as published by
|
@@ -21,7 +21,6 @@
|
|
21
21
|
require 'uri/http'
|
22
22
|
require 'mechanize'
|
23
23
|
require 'nokogiri'
|
24
|
-
require 'open-uri'
|
25
24
|
|
26
25
|
module GScraper
|
27
26
|
# Common proxy port.
|
@@ -32,8 +31,13 @@ module GScraper
|
|
32
31
|
#
|
33
32
|
# @return [Hash]
|
34
33
|
#
|
35
|
-
def
|
36
|
-
@@gscraper_proxy ||= {
|
34
|
+
def self.proxy
|
35
|
+
@@gscraper_proxy ||= {
|
36
|
+
:host => nil,
|
37
|
+
:port => COMMON_PROXY_PORT,
|
38
|
+
:user => nil,
|
39
|
+
:password => nil
|
40
|
+
}
|
37
41
|
end
|
38
42
|
|
39
43
|
#
|
@@ -54,13 +58,13 @@ module GScraper
|
|
54
58
|
# @option proxy_info [String] :password
|
55
59
|
# The password to login with.
|
56
60
|
#
|
57
|
-
def
|
58
|
-
if
|
61
|
+
def self.proxy_uri(proxy=self.proxy)
|
62
|
+
if proxy[:host]
|
59
63
|
return URI::HTTP.build(
|
60
|
-
:host
|
61
|
-
:port
|
62
|
-
:userinfo => "#{
|
63
|
-
:path
|
64
|
+
:host => proxy[:host],
|
65
|
+
:port => proxy[:port],
|
66
|
+
:userinfo => "#{proxy[:user]}:#{proxy[:password]}",
|
67
|
+
:path => '/'
|
64
68
|
)
|
65
69
|
end
|
66
70
|
end
|
@@ -70,7 +74,7 @@ module GScraper
|
|
70
74
|
#
|
71
75
|
# @return [Array<String>]
|
72
76
|
#
|
73
|
-
def
|
77
|
+
def self.user_agent_aliases
|
74
78
|
Mechanize::AGENT_ALIASES
|
75
79
|
end
|
76
80
|
|
@@ -79,8 +83,8 @@ module GScraper
|
|
79
83
|
#
|
80
84
|
# @return [String]
|
81
85
|
#
|
82
|
-
def
|
83
|
-
@@gscraper_user_agent ||=
|
86
|
+
def self.user_agent
|
87
|
+
@@gscraper_user_agent ||= self.user_agent_aliases['Windows IE 6']
|
84
88
|
end
|
85
89
|
|
86
90
|
#
|
@@ -92,7 +96,7 @@ module GScraper
|
|
92
96
|
# @return [String]
|
93
97
|
# The new User-Agent string.
|
94
98
|
#
|
95
|
-
def
|
99
|
+
def self.user_agent=(agent)
|
96
100
|
@@gscraper_user_agent = agent
|
97
101
|
end
|
98
102
|
|
@@ -105,8 +109,8 @@ module GScraper
|
|
105
109
|
# @return [String]
|
106
110
|
# The new User-Agent string.
|
107
111
|
#
|
108
|
-
def
|
109
|
-
@@gscraper_user_agent =
|
112
|
+
def self.user_agent_alias=(name)
|
113
|
+
@@gscraper_user_agent = self.user_agent_aliases[name.to_s]
|
110
114
|
end
|
111
115
|
|
112
116
|
#
|
@@ -143,18 +147,18 @@ module GScraper
|
|
143
147
|
# GScraper.web_agent(:user_agent_alias => 'Linux Mozilla')
|
144
148
|
# GScraper.web_agent(:user_agent => 'Google Bot')
|
145
149
|
#
|
146
|
-
def
|
150
|
+
def self.web_agent(options={})
|
147
151
|
agent = Mechanize.new
|
148
152
|
|
149
153
|
if options[:user_agent_alias]
|
150
154
|
agent.user_agent_alias = options[:user_agent_alias]
|
151
155
|
elsif options[:user_agent]
|
152
156
|
agent.user_agent = options[:user_agent]
|
153
|
-
elsif
|
154
|
-
agent.user_agent =
|
157
|
+
elsif user_agent
|
158
|
+
agent.user_agent = self.user_agent
|
155
159
|
end
|
156
160
|
|
157
|
-
proxy = (options[:proxy] ||
|
161
|
+
proxy = (options[:proxy] || self.proxy)
|
158
162
|
if proxy[:host]
|
159
163
|
agent.set_proxy(proxy[:host],proxy[:port],proxy[:user],proxy[:password])
|
160
164
|
end
|