metainspector 1.14.0 → 1.15.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +7 -5
- data/lib/meta_inspector/scraper.rb +7 -11
- data/lib/meta_inspector/version.rb +1 -1
- data/meta_inspector.gemspec +1 -1
- data/spec/redirections_spec.rb +14 -14
- metadata +8 -8
data/README.md
CHANGED
@@ -93,13 +93,15 @@ You can set a different timeout with a second parameter, like this:
|
|
93
93
|
|
94
94
|
### Redirections
|
95
95
|
|
96
|
-
|
96
|
+
By default, redirections from HTTP to HTTPS, and from HTTPS to HTTP are disallowed.
|
97
97
|
|
98
|
-
|
98
|
+
However, you can tell MetaInspector to allow these redirections with the option `:allow_redirections`, like this:
|
99
99
|
|
100
|
-
|
101
|
-
|
102
|
-
|
100
|
+
# This will allow HTTP => HTTPS redirections
|
101
|
+
page = MetaInspector.new('facebook.com', :allow_redirections => :safe)
|
102
|
+
|
103
|
+
# And this will allow HTTP => HTTPS ("safe") and HTTPS => HTTP ("unsafe") redirections
|
104
|
+
page = MetaInspector.new('facebook.com', :allow_redirections => :all)
|
103
105
|
|
104
106
|
### HTML Content Only
|
105
107
|
|
@@ -10,14 +10,13 @@ require 'timeout'
|
|
10
10
|
module MetaInspector
|
11
11
|
class Scraper
|
12
12
|
attr_reader :url, :scheme, :host, :root_url, :errors, :content_type, :timeout, :html_content_only
|
13
|
-
attr_reader :
|
13
|
+
attr_reader :allow_redirections, :verbose
|
14
14
|
|
15
15
|
# Initializes a new instance of MetaInspector, setting the URL to the one given
|
16
16
|
# Options:
|
17
17
|
# => timeout: defaults to 20 seconds
|
18
18
|
# => html_content_type_only: if an exception should be raised if request content-type is not text/html. Defaults to false
|
19
|
-
# =>
|
20
|
-
# => allow_unsafe_redirections: if redirects from https to http sites on the same domain should be allowed or not
|
19
|
+
# => allow_redirections: when :safe, allows HTTP => HTTPS redirections. When :all, it also allows HTTPS => HTTP
|
21
20
|
# => document: the html of the url as a string
|
22
21
|
# => verbose: if the errors should be logged to the screen
|
23
22
|
def initialize(url, options = {})
|
@@ -30,11 +29,10 @@ module MetaInspector
|
|
30
29
|
@timeout = options[:timeout]
|
31
30
|
@data = Hashie::Rash.new
|
32
31
|
@errors = []
|
33
|
-
@html_content_only
|
34
|
-
@
|
35
|
-
@
|
36
|
-
@
|
37
|
-
@document = options[:document]
|
32
|
+
@html_content_only = options[:html_content_only]
|
33
|
+
@allow_redirections = options[:allow_redirections]
|
34
|
+
@verbose = options[:verbose]
|
35
|
+
@document = options[:document]
|
38
36
|
end
|
39
37
|
|
40
38
|
# Returns the parsed document title, from the content of the <title> tag.
|
@@ -139,8 +137,6 @@ module MetaInspector
|
|
139
137
|
{
|
140
138
|
:timeout => 20,
|
141
139
|
:html_content_only => false,
|
142
|
-
:allow_safe_redirections => true,
|
143
|
-
:allow_unsafe_redirections => false,
|
144
140
|
:verbose => false
|
145
141
|
}
|
146
142
|
end
|
@@ -167,7 +163,7 @@ module MetaInspector
|
|
167
163
|
|
168
164
|
# Makes the request to the server
|
169
165
|
def request
|
170
|
-
Timeout::timeout(timeout) { @request ||= open(url, {:
|
166
|
+
Timeout::timeout(timeout) { @request ||= open(url, {:allow_redirections => allow_redirections}) }
|
171
167
|
|
172
168
|
rescue TimeoutError
|
173
169
|
add_fatal_error 'Timeout!!!'
|
data/meta_inspector.gemspec
CHANGED
@@ -16,7 +16,7 @@ Gem::Specification.new do |gem|
|
|
16
16
|
|
17
17
|
gem.add_dependency 'nokogiri', '~> 1.5'
|
18
18
|
gem.add_dependency 'rash', '0.3.2'
|
19
|
-
gem.add_dependency 'open_uri_redirections', '0.0
|
19
|
+
gem.add_dependency 'open_uri_redirections', '~> 0.1.0'
|
20
20
|
|
21
21
|
gem.add_development_dependency 'rspec', '2.12.0'
|
22
22
|
gem.add_development_dependency 'fakeweb', '1.3.0'
|
data/spec/redirections_spec.rb
CHANGED
@@ -5,23 +5,23 @@ require File.join(File.dirname(__FILE__), "/spec_helper")
|
|
5
5
|
describe MetaInspector do
|
6
6
|
describe "redirections" do
|
7
7
|
describe "safe redirections (HTTP to HTTPS)" do
|
8
|
-
it "
|
8
|
+
it "disallows safe redirections by default" do
|
9
9
|
m = MetaInspector.new("http://facebook.com")
|
10
|
-
m.title.should
|
11
|
-
m.
|
10
|
+
m.title.should be_nil
|
11
|
+
m.should_not be_ok
|
12
|
+
m.errors.first.should == "Scraping exception: redirection forbidden: http://facebook.com -> https://www.facebook.com/"
|
12
13
|
end
|
13
14
|
|
14
|
-
it "allows safe redirections when
|
15
|
-
m = MetaInspector.new("http://facebook.com", :
|
15
|
+
it "allows safe redirections when :allow_redirections => :safe" do
|
16
|
+
m = MetaInspector.new("http://facebook.com", :allow_redirections => :safe)
|
16
17
|
m.title.should == "Hello From Facebook"
|
17
18
|
m.should be_ok
|
18
19
|
end
|
19
20
|
|
20
|
-
it "
|
21
|
-
m = MetaInspector.new("http://facebook.com", :
|
22
|
-
m.title.should
|
23
|
-
m.
|
24
|
-
m.errors.first.should == "Scraping exception: redirection forbidden: http://facebook.com -> https://www.facebook.com/"
|
21
|
+
it "allows safe redirections when :allow_redirections => :all" do
|
22
|
+
m = MetaInspector.new("http://facebook.com", :allow_redirections => :all)
|
23
|
+
m.title.should == "Hello From Facebook"
|
24
|
+
m.should be_ok
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
@@ -33,15 +33,15 @@ describe MetaInspector do
|
|
33
33
|
m.errors.first.should == "Scraping exception: redirection forbidden: https://unsafe-facebook.com -> http://unsafe-facebook.com/"
|
34
34
|
end
|
35
35
|
|
36
|
-
it "disallows unsafe redirections when
|
37
|
-
m = MetaInspector.new("https://unsafe-facebook.com", :
|
36
|
+
it "disallows unsafe redirections when :allow_redirections => :safe" do
|
37
|
+
m = MetaInspector.new("https://unsafe-facebook.com", :allow_redirections => :safe)
|
38
38
|
m.title.should be_nil
|
39
39
|
m.should_not be_ok
|
40
40
|
m.errors.first.should == "Scraping exception: redirection forbidden: https://unsafe-facebook.com -> http://unsafe-facebook.com/"
|
41
41
|
end
|
42
42
|
|
43
|
-
it "allows unsafe redirections
|
44
|
-
m = MetaInspector.new("https://unsafe-facebook.com", :
|
43
|
+
it "allows unsafe redirections when :allow_redirections => :all" do
|
44
|
+
m = MetaInspector.new("https://unsafe-facebook.com", :allow_redirections => :all)
|
45
45
|
m.title.should == "Hello From Unsafe Facebook"
|
46
46
|
m.should be_ok
|
47
47
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 43
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
|
-
-
|
8
|
+
- 15
|
9
9
|
- 0
|
10
|
-
version: 1.
|
10
|
+
version: 1.15.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jaime Iniesta
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2013-01-
|
18
|
+
date: 2013-01-19 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: nokogiri
|
@@ -54,14 +54,14 @@ dependencies:
|
|
54
54
|
requirement: &id003 !ruby/object:Gem::Requirement
|
55
55
|
none: false
|
56
56
|
requirements:
|
57
|
-
- -
|
57
|
+
- - ~>
|
58
58
|
- !ruby/object:Gem::Version
|
59
|
-
hash:
|
59
|
+
hash: 27
|
60
60
|
segments:
|
61
61
|
- 0
|
62
|
-
- 0
|
63
62
|
- 1
|
64
|
-
|
63
|
+
- 0
|
64
|
+
version: 0.1.0
|
65
65
|
type: :runtime
|
66
66
|
version_requirements: *id003
|
67
67
|
- !ruby/object:Gem::Dependency
|