metainspector 1.14.0 → 1.15.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -93,13 +93,15 @@ You can set a different timeout with a second parameter, like this:
93
93
 
94
94
  ### Redirections
95
95
 
96
- MetaInspector allows safe redirects from http to https (for example, [http://github.com](http://github.com) => [https://github.com](https://github.com)) by default. With the option `:allow_safe_redirections => false`, it will throw exceptions on such redirects.
96
+ By default, redirections from HTTP to HTTPS, and from HTTPS to HTTP are disallowed.
97
97
 
98
- page = MetaInspector.new('facebook.com', :allow_safe_redirections => false)
98
+ However, you can tell MetaInspector to allow these redirections with the option `:allow_redirections`, like this:
99
99
 
100
- To enable unsafe redirects from https to http (like, [https://example.com](https://example.com) => [http://example.com](http://example.com)) you can pass the option `:allow_unsafe_redirections => true`. If this option is not specified or is false an exception is thrown on such redirects.
101
-
102
- page = MetaInspector.new('facebook.com', :allow_unsafe_redirections => true)
100
+ # This will allow HTTP => HTTPS redirections
101
+ page = MetaInspector.new('facebook.com', :allow_redirections => :safe)
102
+
103
+ # And this will allow HTTP => HTTPS ("safe") and HTTPS => HTTP ("unsafe") redirections
104
+ page = MetaInspector.new('facebook.com', :allow_redirections => :all)
103
105
 
104
106
  ### HTML Content Only
105
107
 
@@ -10,14 +10,13 @@ require 'timeout'
10
10
  module MetaInspector
11
11
  class Scraper
12
12
  attr_reader :url, :scheme, :host, :root_url, :errors, :content_type, :timeout, :html_content_only
13
- attr_reader :allow_safe_redirections, :allow_unsafe_redirections, :verbose
13
+ attr_reader :allow_redirections, :verbose
14
14
 
15
15
  # Initializes a new instance of MetaInspector, setting the URL to the one given
16
16
  # Options:
17
17
  # => timeout: defaults to 20 seconds
18
18
  # => html_content_type_only: if an exception should be raised if request content-type is not text/html. Defaults to false
19
- # => allow_safe_redirections: if redirects from http to https sites on the same domain should be allowed or not
20
- # => allow_unsafe_redirections: if redirects from https to http sites on the same domain should be allowed or not
19
+ # => allow_redirections: when :safe, allows HTTP => HTTPS redirections. When :all, it also allows HTTPS => HTTP
21
20
  # => document: the html of the url as a string
22
21
  # => verbose: if the errors should be logged to the screen
23
22
  def initialize(url, options = {})
@@ -30,11 +29,10 @@ module MetaInspector
30
29
  @timeout = options[:timeout]
31
30
  @data = Hashie::Rash.new
32
31
  @errors = []
33
- @html_content_only = options[:html_content_only]
34
- @allow_safe_redirections = options[:allow_safe_redirections]
35
- @allow_unsafe_redirections = options[:allow_unsafe_redirections]
36
- @verbose = options[:verbose]
37
- @document = options[:document]
32
+ @html_content_only = options[:html_content_only]
33
+ @allow_redirections = options[:allow_redirections]
34
+ @verbose = options[:verbose]
35
+ @document = options[:document]
38
36
  end
39
37
 
40
38
  # Returns the parsed document title, from the content of the <title> tag.
@@ -139,8 +137,6 @@ module MetaInspector
139
137
  {
140
138
  :timeout => 20,
141
139
  :html_content_only => false,
142
- :allow_safe_redirections => true,
143
- :allow_unsafe_redirections => false,
144
140
  :verbose => false
145
141
  }
146
142
  end
@@ -167,7 +163,7 @@ module MetaInspector
167
163
 
168
164
  # Makes the request to the server
169
165
  def request
170
- Timeout::timeout(timeout) { @request ||= open(url, {:allow_safe_redirections => allow_safe_redirections, :allow_unsafe_redirections => allow_unsafe_redirections}) }
166
+ Timeout::timeout(timeout) { @request ||= open(url, {:allow_redirections => allow_redirections}) }
171
167
 
172
168
  rescue TimeoutError
173
169
  add_fatal_error 'Timeout!!!'
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module MetaInspector
4
- VERSION = "1.14.0"
4
+ VERSION = "1.15.0"
5
5
  end
@@ -16,7 +16,7 @@ Gem::Specification.new do |gem|
16
16
 
17
17
  gem.add_dependency 'nokogiri', '~> 1.5'
18
18
  gem.add_dependency 'rash', '0.3.2'
19
- gem.add_dependency 'open_uri_redirections', '0.0.1'
19
+ gem.add_dependency 'open_uri_redirections', '~> 0.1.0'
20
20
 
21
21
  gem.add_development_dependency 'rspec', '2.12.0'
22
22
  gem.add_development_dependency 'fakeweb', '1.3.0'
@@ -5,23 +5,23 @@ require File.join(File.dirname(__FILE__), "/spec_helper")
5
5
  describe MetaInspector do
6
6
  describe "redirections" do
7
7
  describe "safe redirections (HTTP to HTTPS)" do
8
- it "allows safe redirections by default" do
8
+ it "disallows safe redirections by default" do
9
9
  m = MetaInspector.new("http://facebook.com")
10
- m.title.should == "Hello From Facebook"
11
- m.should be_ok
10
+ m.title.should be_nil
11
+ m.should_not be_ok
12
+ m.errors.first.should == "Scraping exception: redirection forbidden: http://facebook.com -> https://www.facebook.com/"
12
13
  end
13
14
 
14
- it "allows safe redirections when specifically set to true" do
15
- m = MetaInspector.new("http://facebook.com", :allow_safe_redirections => true)
15
+ it "allows safe redirections when :allow_redirections => :safe" do
16
+ m = MetaInspector.new("http://facebook.com", :allow_redirections => :safe)
16
17
  m.title.should == "Hello From Facebook"
17
18
  m.should be_ok
18
19
  end
19
20
 
20
- it "disallows safe redirections if set to false" do
21
- m = MetaInspector.new("http://facebook.com", :allow_safe_redirections => false)
22
- m.title.should be_nil
23
- m.should_not be_ok
24
- m.errors.first.should == "Scraping exception: redirection forbidden: http://facebook.com -> https://www.facebook.com/"
21
+ it "allows safe redirections when :allow_redirections => :all" do
22
+ m = MetaInspector.new("http://facebook.com", :allow_redirections => :all)
23
+ m.title.should == "Hello From Facebook"
24
+ m.should be_ok
25
25
  end
26
26
  end
27
27
 
@@ -33,15 +33,15 @@ describe MetaInspector do
33
33
  m.errors.first.should == "Scraping exception: redirection forbidden: https://unsafe-facebook.com -> http://unsafe-facebook.com/"
34
34
  end
35
35
 
36
- it "disallows unsafe redirections when specifically set to false" do
37
- m = MetaInspector.new("https://unsafe-facebook.com", :allow_unsafe_redirections => false)
36
+ it "disallows unsafe redirections when :allow_redirections => :safe" do
37
+ m = MetaInspector.new("https://unsafe-facebook.com", :allow_redirections => :safe)
38
38
  m.title.should be_nil
39
39
  m.should_not be_ok
40
40
  m.errors.first.should == "Scraping exception: redirection forbidden: https://unsafe-facebook.com -> http://unsafe-facebook.com/"
41
41
  end
42
42
 
43
- it "allows unsafe redirections if set to true" do
44
- m = MetaInspector.new("https://unsafe-facebook.com", :allow_unsafe_redirections => true)
43
+ it "allows unsafe redirections when :allow_redirections => :all" do
44
+ m = MetaInspector.new("https://unsafe-facebook.com", :allow_redirections => :all)
45
45
  m.title.should == "Hello From Unsafe Facebook"
46
46
  m.should be_ok
47
47
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
3
  version: !ruby/object:Gem::Version
4
- hash: 47
4
+ hash: 43
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
- - 14
8
+ - 15
9
9
  - 0
10
- version: 1.14.0
10
+ version: 1.15.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jaime Iniesta
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2013-01-14 00:00:00 Z
18
+ date: 2013-01-19 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: nokogiri
@@ -54,14 +54,14 @@ dependencies:
54
54
  requirement: &id003 !ruby/object:Gem::Requirement
55
55
  none: false
56
56
  requirements:
57
- - - "="
57
+ - - ~>
58
58
  - !ruby/object:Gem::Version
59
- hash: 29
59
+ hash: 27
60
60
  segments:
61
61
  - 0
62
- - 0
63
62
  - 1
64
- version: 0.0.1
63
+ - 0
64
+ version: 0.1.0
65
65
  type: :runtime
66
66
  version_requirements: *id003
67
67
  - !ruby/object:Gem::Dependency