metainspector 1.14.0 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -93,13 +93,15 @@ You can set a different timeout with a second parameter, like this:
93
93
 
94
94
  ### Redirections
95
95
 
96
- MetaInspector allows safe redirects from http to https (for example, [http://github.com](http://github.com) => [https://github.com](https://github.com)) by default. With the option `:allow_safe_redirections => false`, it will throw exceptions on such redirects.
96
+ By default, redirections from HTTP to HTTPS, and from HTTPS to HTTP are disallowed.
97
97
 
98
- page = MetaInspector.new('facebook.com', :allow_safe_redirections => false)
98
+ However, you can tell MetaInspector to allow these redirections with the option `:allow_redirections`, like this:
99
99
 
100
- To enable unsafe redirects from https to http (like, [https://example.com](https://example.com) => [http://example.com](http://example.com)) you can pass the option `:allow_unsafe_redirections => true`. If this option is not specified or is false an exception is thrown on such redirects.
101
-
102
- page = MetaInspector.new('facebook.com', :allow_unsafe_redirections => true)
100
+ # This will allow HTTP => HTTPS redirections
101
+ page = MetaInspector.new('facebook.com', :allow_redirections => :safe)
102
+
103
+ # And this will allow HTTP => HTTPS ("safe") and HTTPS => HTTP ("unsafe") redirections
104
+ page = MetaInspector.new('facebook.com', :allow_redirections => :all)
103
105
 
104
106
  ### HTML Content Only
105
107
 
@@ -10,14 +10,13 @@ require 'timeout'
10
10
  module MetaInspector
11
11
  class Scraper
12
12
  attr_reader :url, :scheme, :host, :root_url, :errors, :content_type, :timeout, :html_content_only
13
- attr_reader :allow_safe_redirections, :allow_unsafe_redirections, :verbose
13
+ attr_reader :allow_redirections, :verbose
14
14
 
15
15
  # Initializes a new instance of MetaInspector, setting the URL to the one given
16
16
  # Options:
17
17
  # => timeout: defaults to 20 seconds
18
18
  # => html_content_type_only: if an exception should be raised if request content-type is not text/html. Defaults to false
19
- # => allow_safe_redirections: if redirects from http to https sites on the same domain should be allowed or not
20
- # => allow_unsafe_redirections: if redirects from https to http sites on the same domain should be allowed or not
19
+ # => allow_redirections: when :safe, allows HTTP => HTTPS redirections. When :all, it also allows HTTPS => HTTP
21
20
  # => document: the html of the url as a string
22
21
  # => verbose: if the errors should be logged to the screen
23
22
  def initialize(url, options = {})
@@ -30,11 +29,10 @@ module MetaInspector
30
29
  @timeout = options[:timeout]
31
30
  @data = Hashie::Rash.new
32
31
  @errors = []
33
- @html_content_only = options[:html_content_only]
34
- @allow_safe_redirections = options[:allow_safe_redirections]
35
- @allow_unsafe_redirections = options[:allow_unsafe_redirections]
36
- @verbose = options[:verbose]
37
- @document = options[:document]
32
+ @html_content_only = options[:html_content_only]
33
+ @allow_redirections = options[:allow_redirections]
34
+ @verbose = options[:verbose]
35
+ @document = options[:document]
38
36
  end
39
37
 
40
38
  # Returns the parsed document title, from the content of the <title> tag.
@@ -139,8 +137,6 @@ module MetaInspector
139
137
  {
140
138
  :timeout => 20,
141
139
  :html_content_only => false,
142
- :allow_safe_redirections => true,
143
- :allow_unsafe_redirections => false,
144
140
  :verbose => false
145
141
  }
146
142
  end
@@ -167,7 +163,7 @@ module MetaInspector
167
163
 
168
164
  # Makes the request to the server
169
165
  def request
170
- Timeout::timeout(timeout) { @request ||= open(url, {:allow_safe_redirections => allow_safe_redirections, :allow_unsafe_redirections => allow_unsafe_redirections}) }
166
+ Timeout::timeout(timeout) { @request ||= open(url, {:allow_redirections => allow_redirections}) }
171
167
 
172
168
  rescue TimeoutError
173
169
  add_fatal_error 'Timeout!!!'
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module MetaInspector
4
- VERSION = "1.14.0"
4
+ VERSION = "1.15.0"
5
5
  end
@@ -16,7 +16,7 @@ Gem::Specification.new do |gem|
16
16
 
17
17
  gem.add_dependency 'nokogiri', '~> 1.5'
18
18
  gem.add_dependency 'rash', '0.3.2'
19
- gem.add_dependency 'open_uri_redirections', '0.0.1'
19
+ gem.add_dependency 'open_uri_redirections', '~> 0.1.0'
20
20
 
21
21
  gem.add_development_dependency 'rspec', '2.12.0'
22
22
  gem.add_development_dependency 'fakeweb', '1.3.0'
@@ -5,23 +5,23 @@ require File.join(File.dirname(__FILE__), "/spec_helper")
5
5
  describe MetaInspector do
6
6
  describe "redirections" do
7
7
  describe "safe redirections (HTTP to HTTPS)" do
8
- it "allows safe redirections by default" do
8
+ it "disallows safe redirections by default" do
9
9
  m = MetaInspector.new("http://facebook.com")
10
- m.title.should == "Hello From Facebook"
11
- m.should be_ok
10
+ m.title.should be_nil
11
+ m.should_not be_ok
12
+ m.errors.first.should == "Scraping exception: redirection forbidden: http://facebook.com -> https://www.facebook.com/"
12
13
  end
13
14
 
14
- it "allows safe redirections when specifically set to true" do
15
- m = MetaInspector.new("http://facebook.com", :allow_safe_redirections => true)
15
+ it "allows safe redirections when :allow_redirections => :safe" do
16
+ m = MetaInspector.new("http://facebook.com", :allow_redirections => :safe)
16
17
  m.title.should == "Hello From Facebook"
17
18
  m.should be_ok
18
19
  end
19
20
 
20
- it "disallows safe redirections if set to false" do
21
- m = MetaInspector.new("http://facebook.com", :allow_safe_redirections => false)
22
- m.title.should be_nil
23
- m.should_not be_ok
24
- m.errors.first.should == "Scraping exception: redirection forbidden: http://facebook.com -> https://www.facebook.com/"
21
+ it "allows safe redirections when :allow_redirections => :all" do
22
+ m = MetaInspector.new("http://facebook.com", :allow_redirections => :all)
23
+ m.title.should == "Hello From Facebook"
24
+ m.should be_ok
25
25
  end
26
26
  end
27
27
 
@@ -33,15 +33,15 @@ describe MetaInspector do
33
33
  m.errors.first.should == "Scraping exception: redirection forbidden: https://unsafe-facebook.com -> http://unsafe-facebook.com/"
34
34
  end
35
35
 
36
- it "disallows unsafe redirections when specifically set to false" do
37
- m = MetaInspector.new("https://unsafe-facebook.com", :allow_unsafe_redirections => false)
36
+ it "disallows unsafe redirections when :allow_redirections => :safe" do
37
+ m = MetaInspector.new("https://unsafe-facebook.com", :allow_redirections => :safe)
38
38
  m.title.should be_nil
39
39
  m.should_not be_ok
40
40
  m.errors.first.should == "Scraping exception: redirection forbidden: https://unsafe-facebook.com -> http://unsafe-facebook.com/"
41
41
  end
42
42
 
43
- it "allows unsafe redirections if set to true" do
44
- m = MetaInspector.new("https://unsafe-facebook.com", :allow_unsafe_redirections => true)
43
+ it "allows unsafe redirections when :allow_redirections => :all" do
44
+ m = MetaInspector.new("https://unsafe-facebook.com", :allow_redirections => :all)
45
45
  m.title.should == "Hello From Unsafe Facebook"
46
46
  m.should be_ok
47
47
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
3
  version: !ruby/object:Gem::Version
4
- hash: 47
4
+ hash: 43
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
- - 14
8
+ - 15
9
9
  - 0
10
- version: 1.14.0
10
+ version: 1.15.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jaime Iniesta
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2013-01-14 00:00:00 Z
18
+ date: 2013-01-19 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: nokogiri
@@ -54,14 +54,14 @@ dependencies:
54
54
  requirement: &id003 !ruby/object:Gem::Requirement
55
55
  none: false
56
56
  requirements:
57
- - - "="
57
+ - - ~>
58
58
  - !ruby/object:Gem::Version
59
- hash: 29
59
+ hash: 27
60
60
  segments:
61
61
  - 0
62
- - 0
63
62
  - 1
64
- version: 0.0.1
63
+ - 0
64
+ version: 0.1.0
65
65
  type: :runtime
66
66
  version_requirements: *id003
67
67
  - !ruby/object:Gem::Dependency