gscraper 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.rspec +1 -0
- data/ChangeLog.md +24 -2
- data/README.md +12 -7
- data/Rakefile +26 -29
- data/gemspec.yml +20 -0
- data/gscraper.gemspec +124 -109
- data/lib/gscraper.rb +1 -1
- data/lib/gscraper/gscraper.rb +24 -20
- data/lib/gscraper/has_pages.rb +1 -3
- data/lib/gscraper/hosts.rb +158 -0
- data/lib/gscraper/languages.rb +110 -0
- data/lib/gscraper/licenses.rb +4 -1
- data/lib/gscraper/page.rb +1 -3
- data/lib/gscraper/search.rb +1 -1
- data/lib/gscraper/search/ajax_query.rb +33 -34
- data/lib/gscraper/{extensions.rb → search/exceptions.rb} +2 -2
- data/lib/gscraper/{extensions/uri.rb → search/exceptions/blocked.rb} +10 -2
- data/lib/gscraper/search/page.rb +47 -67
- data/lib/gscraper/search/query.rb +90 -44
- data/lib/gscraper/search/result.rb +7 -9
- data/lib/gscraper/search/search.rb +2 -2
- data/lib/gscraper/search/web_query.rb +93 -101
- data/lib/gscraper/sponsored_ad.rb +3 -3
- data/lib/gscraper/sponsored_links.rb +1 -3
- data/lib/gscraper/version.rb +2 -2
- data/spec/languages_spec.rb +28 -0
- data/spec/search/ajax_query_spec.rb +2 -1
- data/spec/search/query_spec.rb +29 -0
- data/spec/search/web_query_spec.rb +21 -1
- data/spec/spec_helper.rb +2 -12
- metadata +107 -125
- data/.specopts +0 -1
- data/Gemfile +0 -25
- data/lib/gscraper/extensions/uri/http.rb +0 -31
- data/lib/gscraper/extensions/uri/query_params.rb +0 -109
- data/spec/extensions/uri/http_spec.rb +0 -9
- data/spec/extensions/uri/query_params_spec.rb +0 -46
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--colour --format documentation
|
data/ChangeLog.md
CHANGED
@@ -1,3 +1,25 @@
|
|
1
|
+
### 0.4.0 / 2012-04-26
|
2
|
+
|
3
|
+
* Switched from Bundler to rubygems-tasks ~> 0.1.
|
4
|
+
* Switched from json_pure to json ~> 1.6.
|
5
|
+
* Require uri-query_params ~> 0.5.
|
6
|
+
* Require mechanize ~> 2.0.
|
7
|
+
* Added {GScraper::Search::Blocked}.
|
8
|
+
* Added {GScraper::Hosts}.
|
9
|
+
* Added {GScraper::Languages}.
|
10
|
+
* Added {GScraper::Search::Query#define}.
|
11
|
+
* Added `:load_balance` option to {GScraper::Search::Query#initialize}, which
|
12
|
+
will randomize {GScraper::Search::Query#search_host}.
|
13
|
+
* Allow `:all*` / `:with*` search options to accept a String or Array values.
|
14
|
+
* Allow {GScraper::Search::WebQuery} and {GScraper::Search::AJAXQuery} to
|
15
|
+
submit queries to alternate domains via the `:search_host` option.
|
16
|
+
* Renamed `#occurrs_within`, `:occurrs_within` to `#occurs_within`,
|
17
|
+
`:occurs_within`, respectively in {GScraper::Search::WebQuery}.
|
18
|
+
* Prefer XPath over CSS-path expressions.
|
19
|
+
* Fixed XPath expressions in {GScraper::Search::WebQuery#page}
|
20
|
+
(thanks Jake Auswick and Ezekiel Templin).
|
21
|
+
* Fixed spelling errors.
|
22
|
+
|
1
23
|
### 0.3.0 / 2010-07-01
|
2
24
|
|
3
25
|
* Upgraded to mechanize ~> 1.0.0.
|
@@ -13,7 +35,7 @@
|
|
13
35
|
* Aliased {GScraper::Search::WebQuery#links_to=} to `link=`.
|
14
36
|
* Removed `GScraper.open_uri`.
|
15
37
|
* Removed `GScraper.open_page`.
|
16
|
-
* Fixed the escaping/unescaping of URL query params in
|
38
|
+
* Fixed the escaping/unescaping of URL query params in `URI::QueryParams`.
|
17
39
|
* Use `yield` instead of `block.call`, when possible.
|
18
40
|
* All enumerable methods now return an `Enumerator` object, if no block was
|
19
41
|
given.
|
@@ -57,7 +79,7 @@
|
|
57
79
|
### 0.1.8 / 2008-04-30
|
58
80
|
|
59
81
|
* Added the {GScraper.user_agent_alias=} method.
|
60
|
-
* Added
|
82
|
+
* Added `URI::HTTP::QueryParams` module.
|
61
83
|
* Changed license from MIT to GPL-2.
|
62
84
|
|
63
85
|
### 0.1.7 / 2008-04-28
|
data/README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
# GScraper
|
2
2
|
|
3
|
-
* [
|
4
|
-
* [
|
5
|
-
*
|
3
|
+
* [Source](https://github.com/postmodern/gscraper/)
|
4
|
+
* [Issues](https://github.com/postmodern/gscraper/issues)
|
5
|
+
* [Email](mailto:postmodern.mod3 at gmail.com)
|
6
6
|
|
7
7
|
## Description
|
8
8
|
|
@@ -18,11 +18,16 @@ GScraper is a web-scraping interface to various Google Services.
|
|
18
18
|
|
19
19
|
## Requirements
|
20
20
|
|
21
|
-
* [
|
21
|
+
* [json](http://flori.github.com/json/)
|
22
|
+
~> 1.6
|
23
|
+
* [uri-query_params](https://github.com/postmodern/uri-query_params#readme)
|
24
|
+
~> 0.5
|
25
|
+
* [mechanize](http://mechanize.rubyforge.org/mechanize/)
|
26
|
+
~> 2.0
|
22
27
|
|
23
28
|
## Install
|
24
29
|
|
25
|
-
$
|
30
|
+
$ gem install gscraper
|
26
31
|
|
27
32
|
## Examples
|
28
33
|
|
@@ -44,7 +49,7 @@ Queries from URLs:
|
|
44
49
|
|
45
50
|
q.query # => "ruby"
|
46
51
|
q.with_words # => "rails"
|
47
|
-
q.
|
52
|
+
q.occurs_within # => :title
|
48
53
|
q.rights # => :cc_by_nc
|
49
54
|
|
50
55
|
Getting the search results:
|
@@ -128,7 +133,7 @@ Setting the User-Agent globally:
|
|
128
133
|
|
129
134
|
GScraper - A web-scraping interface to various Google Services.
|
130
135
|
|
131
|
-
Copyright (c) 2007-
|
136
|
+
Copyright (c) 2007-2012 Hal Brodigan (postmodern.mod3 at gmail.com)
|
132
137
|
|
133
138
|
This program is free software; you can redistribute it and/or modify
|
134
139
|
it under the terms of the GNU General Public License as published by
|
data/Rakefile
CHANGED
@@ -1,38 +1,35 @@
|
|
1
1
|
require 'rubygems'
|
2
|
-
require '
|
2
|
+
require 'rake'
|
3
3
|
|
4
4
|
begin
|
5
|
-
|
6
|
-
|
7
|
-
STDERR.puts e.message
|
8
|
-
STDERR.puts "Run `bundle install` to install missing gems"
|
9
|
-
exit e.status_code
|
10
|
-
end
|
5
|
+
gem 'rubygems-tasks', '~> 0.1'
|
6
|
+
require 'rubygems/tasks'
|
11
7
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
Jeweler::Tasks.new do |gem|
|
17
|
-
gem.name = 'gscraper'
|
18
|
-
gem.version = GScraper::VERSION
|
19
|
-
gem.license = 'GPL-2'
|
20
|
-
gem.summary = %Q{GScraper is a web-scraping interface to various Google Services.}
|
21
|
-
gem.description = %Q{GScraper is a web-scraping interface to various Google Services.}
|
22
|
-
gem.email = 'postmodern.mod3@gmail.com'
|
23
|
-
gem.homepage = 'http://github.com/postmodern/gscraper'
|
24
|
-
gem.authors = ['Postmodern']
|
25
|
-
gem.has_rdoc = 'yard'
|
8
|
+
Gem::Tasks.new
|
9
|
+
rescue LoadError => e
|
10
|
+
warn e.message
|
11
|
+
warn "Run `gem install rubygems-tasks` to install 'rubygems/tasks'."
|
26
12
|
end
|
27
13
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
spec.spec_files = FileList['spec/**/*_spec.rb']
|
32
|
-
spec.spec_opts = ['--options', '.specopts']
|
33
|
-
end
|
14
|
+
begin
|
15
|
+
gem 'rspec', '~> 2.4'
|
16
|
+
require 'rspec/core/rake_task'
|
34
17
|
|
18
|
+
RSpec::Core::RakeTask.new
|
19
|
+
rescue LoadError => e
|
20
|
+
task :spec do
|
21
|
+
abort "Please run `gem install rspec` to install RSpec."
|
22
|
+
end
|
23
|
+
end
|
35
24
|
task :default => :spec
|
36
25
|
|
37
|
-
|
38
|
-
|
26
|
+
begin
|
27
|
+
gem 'yard', '~> 0.6.0'
|
28
|
+
require 'yard'
|
29
|
+
|
30
|
+
YARD::Rake::YardocTask.new
|
31
|
+
rescue LoadError => e
|
32
|
+
task :yard do
|
33
|
+
abort "Please run `gem install yard` to install YARD."
|
34
|
+
end
|
35
|
+
end
|
data/gemspec.yml
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
name: gscraper
|
2
|
+
summary: Web-scraping interface to various Google Services.
|
3
|
+
description:
|
4
|
+
GScraper is a web-scraping interface to various Google Services.
|
5
|
+
|
6
|
+
license: GPL-2
|
7
|
+
authors: Postmodern
|
8
|
+
email: postmodern.mod3@gmail.com
|
9
|
+
homepage: https://github.com/postmodern/gscraper
|
10
|
+
has_yard: true
|
11
|
+
|
12
|
+
dependencies:
|
13
|
+
json: ~> 1.6
|
14
|
+
uri-query_params: ~> 0.5
|
15
|
+
mechanize: ~> 2.0
|
16
|
+
|
17
|
+
development_dependencies:
|
18
|
+
rubygems-tasks: ~> 0.1
|
19
|
+
rspec: ~> 2.4
|
20
|
+
yard: ~> 0.6
|
data/gscraper.gemspec
CHANGED
@@ -1,112 +1,127 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
".yardopts",
|
23
|
-
"COPYING.txt",
|
24
|
-
"ChangeLog.md",
|
25
|
-
"Gemfile",
|
26
|
-
"README.md",
|
27
|
-
"Rakefile",
|
28
|
-
"gscraper.gemspec",
|
29
|
-
"lib/gscraper.rb",
|
30
|
-
"lib/gscraper/extensions.rb",
|
31
|
-
"lib/gscraper/extensions/uri.rb",
|
32
|
-
"lib/gscraper/extensions/uri/http.rb",
|
33
|
-
"lib/gscraper/extensions/uri/query_params.rb",
|
34
|
-
"lib/gscraper/gscraper.rb",
|
35
|
-
"lib/gscraper/has_pages.rb",
|
36
|
-
"lib/gscraper/licenses.rb",
|
37
|
-
"lib/gscraper/page.rb",
|
38
|
-
"lib/gscraper/search.rb",
|
39
|
-
"lib/gscraper/search/ajax_query.rb",
|
40
|
-
"lib/gscraper/search/page.rb",
|
41
|
-
"lib/gscraper/search/query.rb",
|
42
|
-
"lib/gscraper/search/result.rb",
|
43
|
-
"lib/gscraper/search/search.rb",
|
44
|
-
"lib/gscraper/search/web_query.rb",
|
45
|
-
"lib/gscraper/sponsored_ad.rb",
|
46
|
-
"lib/gscraper/sponsored_links.rb",
|
47
|
-
"lib/gscraper/version.rb",
|
48
|
-
"spec/extensions/uri/http_spec.rb",
|
49
|
-
"spec/extensions/uri/query_params_spec.rb",
|
50
|
-
"spec/gscraper_spec.rb",
|
51
|
-
"spec/has_pages_examples.rb",
|
52
|
-
"spec/has_sponsored_links_examples.rb",
|
53
|
-
"spec/helpers/query.rb",
|
54
|
-
"spec/helpers/uri.rb",
|
55
|
-
"spec/page_has_results_examples.rb",
|
56
|
-
"spec/search/ajax_query_spec.rb",
|
57
|
-
"spec/search/page_has_results_examples.rb",
|
58
|
-
"spec/search/query_spec.rb",
|
59
|
-
"spec/search/web_query_spec.rb",
|
60
|
-
"spec/spec_helper.rb"
|
61
|
-
]
|
62
|
-
s.has_rdoc = %q{yard}
|
63
|
-
s.homepage = %q{http://github.com/postmodern/gscraper}
|
64
|
-
s.licenses = ["GPL-2"]
|
65
|
-
s.require_paths = ["lib"]
|
66
|
-
s.rubygems_version = %q{1.3.7}
|
67
|
-
s.summary = %q{GScraper is a web-scraping interface to various Google Services.}
|
68
|
-
s.test_files = [
|
69
|
-
"spec/extensions/uri/http_spec.rb",
|
70
|
-
"spec/extensions/uri/query_params_spec.rb",
|
71
|
-
"spec/gscraper_spec.rb",
|
72
|
-
"spec/has_pages_examples.rb",
|
73
|
-
"spec/has_sponsored_links_examples.rb",
|
74
|
-
"spec/helpers/query.rb",
|
75
|
-
"spec/helpers/uri.rb",
|
76
|
-
"spec/page_has_results_examples.rb",
|
77
|
-
"spec/search/ajax_query_spec.rb",
|
78
|
-
"spec/search/page_has_results_examples.rb",
|
79
|
-
"spec/search/query_spec.rb",
|
80
|
-
"spec/search/web_query_spec.rb",
|
81
|
-
"spec/spec_helper.rb"
|
82
|
-
]
|
83
|
-
|
84
|
-
if s.respond_to? :specification_version then
|
85
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
86
|
-
s.specification_version = 3
|
87
|
-
|
88
|
-
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
89
|
-
s.add_runtime_dependency(%q<json_pure>, ["~> 1.4.0"])
|
90
|
-
s.add_runtime_dependency(%q<mechanize>, ["~> 1.0.0"])
|
91
|
-
s.add_development_dependency(%q<bundler>, ["~> 0.9.19"])
|
92
|
-
s.add_development_dependency(%q<rake>, ["~> 0.8.7"])
|
93
|
-
s.add_development_dependency(%q<jeweler>, ["~> 1.4.0"])
|
94
|
-
s.add_development_dependency(%q<rspec>, ["~> 1.3.0"])
|
95
|
-
else
|
96
|
-
s.add_dependency(%q<json_pure>, ["~> 1.4.0"])
|
97
|
-
s.add_dependency(%q<mechanize>, ["~> 1.0.0"])
|
98
|
-
s.add_dependency(%q<bundler>, ["~> 0.9.19"])
|
99
|
-
s.add_dependency(%q<rake>, ["~> 0.8.7"])
|
100
|
-
s.add_dependency(%q<jeweler>, ["~> 1.4.0"])
|
101
|
-
s.add_dependency(%q<rspec>, ["~> 1.3.0"])
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
Gem::Specification.new do |gemspec|
|
6
|
+
files = if File.directory?('.git')
|
7
|
+
`git ls-files`.split($/)
|
8
|
+
elsif File.directory?('.hg')
|
9
|
+
`hg manifest`.split($/)
|
10
|
+
elsif File.directory?('.svn')
|
11
|
+
`svn ls -R`.split($/).select { |path| File.file?(path) }
|
12
|
+
else
|
13
|
+
Dir['{**/}{.*,*}'].select { |path| File.file?(path) }
|
14
|
+
end
|
15
|
+
|
16
|
+
filter_files = lambda { |paths|
|
17
|
+
case paths
|
18
|
+
when Array
|
19
|
+
(files & paths)
|
20
|
+
when String
|
21
|
+
(files & Dir[paths])
|
102
22
|
end
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
23
|
+
}
|
24
|
+
|
25
|
+
version = {
|
26
|
+
:file => 'lib/gscraper/version.rb',
|
27
|
+
:constant => 'GScraper::VERSION'
|
28
|
+
}
|
29
|
+
|
30
|
+
defaults = {
|
31
|
+
'name' => File.basename(File.dirname(__FILE__)),
|
32
|
+
'files' => files,
|
33
|
+
'executables' => filter_files['bin/*'].map { |path| File.basename(path) },
|
34
|
+
'test_files' => filter_files['{test/{**/}*_test.rb,spec/{**/}*_spec.rb}'],
|
35
|
+
'extra_doc_files' => filter_files['*.{txt,rdoc,md,markdown,tt,textile}'],
|
36
|
+
}
|
37
|
+
|
38
|
+
metadata = defaults.merge(YAML.load_file('gemspec.yml'))
|
39
|
+
|
40
|
+
gemspec.name = metadata.fetch('name',defaults[:name])
|
41
|
+
gemspec.version = if metadata['version']
|
42
|
+
metadata['version']
|
43
|
+
elsif File.file?(version[:file])
|
44
|
+
require File.join('.',version[:file])
|
45
|
+
eval(version[:constant])
|
46
|
+
end
|
47
|
+
|
48
|
+
gemspec.summary = metadata.fetch('summary',metadata['description'])
|
49
|
+
gemspec.description = metadata.fetch('description',metadata['summary'])
|
50
|
+
|
51
|
+
case metadata['license']
|
52
|
+
when Array
|
53
|
+
gemspec.licenses = metadata['license']
|
54
|
+
when String
|
55
|
+
gemspec.license = metadata['license']
|
56
|
+
end
|
57
|
+
|
58
|
+
case metadata['authors']
|
59
|
+
when Array
|
60
|
+
gemspec.authors = metadata['authors']
|
61
|
+
when String
|
62
|
+
gemspec.author = metadata['authors']
|
63
|
+
end
|
64
|
+
|
65
|
+
gemspec.email = metadata['email']
|
66
|
+
gemspec.homepage = metadata['homepage']
|
67
|
+
|
68
|
+
case metadata['require_paths']
|
69
|
+
when Array
|
70
|
+
gemspec.require_paths = metadata['require_paths']
|
71
|
+
when String
|
72
|
+
gemspec.require_path = metadata['require_paths']
|
73
|
+
end
|
74
|
+
|
75
|
+
gemspec.files = filter_files[metadata['files']]
|
76
|
+
|
77
|
+
gemspec.executables = metadata['executables']
|
78
|
+
gemspec.extensions = metadata['extensions']
|
79
|
+
|
80
|
+
if Gem::VERSION < '1.7.'
|
81
|
+
gemspec.default_executable = gemspec.executables.first
|
110
82
|
end
|
111
|
-
end
|
112
83
|
|
84
|
+
gemspec.test_files = filter_files[metadata['test_files']]
|
85
|
+
|
86
|
+
unless gemspec.files.include?('.document')
|
87
|
+
gemspec.extra_rdoc_files = metadata['extra_doc_files']
|
88
|
+
end
|
89
|
+
|
90
|
+
gemspec.post_install_message = metadata['post_install_message']
|
91
|
+
gemspec.requirements = metadata['requirements']
|
92
|
+
|
93
|
+
if gemspec.respond_to?(:required_ruby_version=)
|
94
|
+
gemspec.required_ruby_version = metadata['required_ruby_version']
|
95
|
+
end
|
96
|
+
|
97
|
+
if gemspec.respond_to?(:required_rubygems_version=)
|
98
|
+
gemspec.required_rubygems_version = metadata['required_ruby_version']
|
99
|
+
end
|
100
|
+
|
101
|
+
parse_versions = lambda { |versions|
|
102
|
+
case versions
|
103
|
+
when Array
|
104
|
+
versions.map { |v| v.to_s }
|
105
|
+
when String
|
106
|
+
versions.split(/,\s*/)
|
107
|
+
end
|
108
|
+
}
|
109
|
+
|
110
|
+
if metadata['dependencies']
|
111
|
+
metadata['dependencies'].each do |name,versions|
|
112
|
+
gemspec.add_dependency(name,parse_versions[versions])
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
if metadata['runtime_dependencies']
|
117
|
+
metadata['runtime_dependencies'].each do |name,versions|
|
118
|
+
gemspec.add_runtime_dependency(name,parse_versions[versions])
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
if metadata['development_dependencies']
|
123
|
+
metadata['development_dependencies'].each do |name,versions|
|
124
|
+
gemspec.add_development_dependency(name,parse_versions[versions])
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
data/lib/gscraper.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#
|
2
2
|
# GScraper - A web-scraping interface to various Google Services.
|
3
3
|
#
|
4
|
-
# Copyright (c) 2007-
|
4
|
+
# Copyright (c) 2007-2012 Hal Brodigan (postmodern.mod3 at gmail.com)
|
5
5
|
#
|
6
6
|
# This program is free software; you can redistribute it and/or modify
|
7
7
|
# it under the terms of the GNU General Public License as published by
|
data/lib/gscraper/gscraper.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#
|
2
2
|
# GScraper - A web-scraping interface to various Google Services.
|
3
3
|
#
|
4
|
-
# Copyright (c) 2007-
|
4
|
+
# Copyright (c) 2007-2012 Hal Brodigan (postmodern.mod3 at gmail.com)
|
5
5
|
#
|
6
6
|
# This program is free software; you can redistribute it and/or modify
|
7
7
|
# it under the terms of the GNU General Public License as published by
|
@@ -21,7 +21,6 @@
|
|
21
21
|
require 'uri/http'
|
22
22
|
require 'mechanize'
|
23
23
|
require 'nokogiri'
|
24
|
-
require 'open-uri'
|
25
24
|
|
26
25
|
module GScraper
|
27
26
|
# Common proxy port.
|
@@ -32,8 +31,13 @@ module GScraper
|
|
32
31
|
#
|
33
32
|
# @return [Hash]
|
34
33
|
#
|
35
|
-
def
|
36
|
-
@@gscraper_proxy ||= {
|
34
|
+
def self.proxy
|
35
|
+
@@gscraper_proxy ||= {
|
36
|
+
:host => nil,
|
37
|
+
:port => COMMON_PROXY_PORT,
|
38
|
+
:user => nil,
|
39
|
+
:password => nil
|
40
|
+
}
|
37
41
|
end
|
38
42
|
|
39
43
|
#
|
@@ -54,13 +58,13 @@ module GScraper
|
|
54
58
|
# @option proxy_info [String] :password
|
55
59
|
# The password to login with.
|
56
60
|
#
|
57
|
-
def
|
58
|
-
if
|
61
|
+
def self.proxy_uri(proxy=self.proxy)
|
62
|
+
if proxy[:host]
|
59
63
|
return URI::HTTP.build(
|
60
|
-
:host
|
61
|
-
:port
|
62
|
-
:userinfo => "#{
|
63
|
-
:path
|
64
|
+
:host => proxy[:host],
|
65
|
+
:port => proxy[:port],
|
66
|
+
:userinfo => "#{proxy[:user]}:#{proxy[:password]}",
|
67
|
+
:path => '/'
|
64
68
|
)
|
65
69
|
end
|
66
70
|
end
|
@@ -70,7 +74,7 @@ module GScraper
|
|
70
74
|
#
|
71
75
|
# @return [Array<String>]
|
72
76
|
#
|
73
|
-
def
|
77
|
+
def self.user_agent_aliases
|
74
78
|
Mechanize::AGENT_ALIASES
|
75
79
|
end
|
76
80
|
|
@@ -79,8 +83,8 @@ module GScraper
|
|
79
83
|
#
|
80
84
|
# @return [String]
|
81
85
|
#
|
82
|
-
def
|
83
|
-
@@gscraper_user_agent ||=
|
86
|
+
def self.user_agent
|
87
|
+
@@gscraper_user_agent ||= self.user_agent_aliases['Windows IE 6']
|
84
88
|
end
|
85
89
|
|
86
90
|
#
|
@@ -92,7 +96,7 @@ module GScraper
|
|
92
96
|
# @return [String]
|
93
97
|
# The new User-Agent string.
|
94
98
|
#
|
95
|
-
def
|
99
|
+
def self.user_agent=(agent)
|
96
100
|
@@gscraper_user_agent = agent
|
97
101
|
end
|
98
102
|
|
@@ -105,8 +109,8 @@ module GScraper
|
|
105
109
|
# @return [String]
|
106
110
|
# The new User-Agent string.
|
107
111
|
#
|
108
|
-
def
|
109
|
-
@@gscraper_user_agent =
|
112
|
+
def self.user_agent_alias=(name)
|
113
|
+
@@gscraper_user_agent = self.user_agent_aliases[name.to_s]
|
110
114
|
end
|
111
115
|
|
112
116
|
#
|
@@ -143,18 +147,18 @@ module GScraper
|
|
143
147
|
# GScraper.web_agent(:user_agent_alias => 'Linux Mozilla')
|
144
148
|
# GScraper.web_agent(:user_agent => 'Google Bot')
|
145
149
|
#
|
146
|
-
def
|
150
|
+
def self.web_agent(options={})
|
147
151
|
agent = Mechanize.new
|
148
152
|
|
149
153
|
if options[:user_agent_alias]
|
150
154
|
agent.user_agent_alias = options[:user_agent_alias]
|
151
155
|
elsif options[:user_agent]
|
152
156
|
agent.user_agent = options[:user_agent]
|
153
|
-
elsif
|
154
|
-
agent.user_agent =
|
157
|
+
elsif user_agent
|
158
|
+
agent.user_agent = self.user_agent
|
155
159
|
end
|
156
160
|
|
157
|
-
proxy = (options[:proxy] ||
|
161
|
+
proxy = (options[:proxy] || self.proxy)
|
158
162
|
if proxy[:host]
|
159
163
|
agent.set_proxy(proxy[:host],proxy[:port],proxy[:user],proxy[:password])
|
160
164
|
end
|