metainspector 2.3.2 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +2 -2
- data/README.md +15 -8
- data/lib/meta_inspector/document.rb +3 -2
- data/lib/meta_inspector/request.rb +21 -15
- data/lib/meta_inspector/version.rb +1 -1
- data/meta_inspector.gemspec +4 -1
- data/spec/document_spec.rb +9 -13
- data/spec/redirections_spec.rb +24 -46
- data/spec/request_spec.rb +1 -1
- data/spec/spec_helper.rb +2 -0
- metadata +49 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 19cf1cb2c804ede2e36407d38a150840c5f19de6
|
4
|
+
data.tar.gz: 6e3cc4349b842ee52e5d4f9b993603662a573037
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f77d946f82fd31d6683c512cef527370e99b7edef0a84c355d25e0ce2c73c2c12de642438c9f4c28a80b8e82b73d9974e2f52a8b0eab2e0cfc1ba900aa7148d2
|
7
|
+
data.tar.gz: cfa3f9e830fb56a8e3202c26827158d8504ca0ef0b1787953599b282b9d5005aa47275979e7d212a6f8dbed5a56aafd84ca0fd9025c700e819da8fc3bbbd183f
|
data/.travis.yml
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
rvm:
|
2
|
-
|
3
|
-
|
2
|
+
- 2.0.0
|
3
|
+
- 2.1.3
|
data/README.md
CHANGED
@@ -8,6 +8,17 @@ You give it an URL, and it lets you easily get its title, links, images, charset
|
|
8
8
|
|
9
9
|
You can try MetaInspector live at this little demo: [https://metainspectordemo.herokuapp.com](https://metainspectordemo.herokuapp.com)
|
10
10
|
|
11
|
+
## Changes in 3.0
|
12
|
+
|
13
|
+
This latest release introduces some backwards-incompatible changes, so we've decided to do a major version upgrade:
|
14
|
+
|
15
|
+
* The redirect API has been changed, now the `:allow_redirections` option will expect only a boolean, which by default is `true`. That is, no more specifying `:safe`, `:unsafe` or `:all`.
|
16
|
+
* We've dropped support for Ruby < 2.
|
17
|
+
|
18
|
+
Also, we've introduced a new feature:
|
19
|
+
|
20
|
+
* Persist cookies across redirects. Now MetaInspector will include the received cookies when following redirects. This fixes some cases where a redirect would fail, sometimes caught in a redirection loop.
|
21
|
+
|
11
22
|
## Installation
|
12
23
|
|
13
24
|
Install the gem from RubyGems:
|
@@ -18,7 +29,7 @@ If you're using it on a Rails application, just add it to your Gemfile and run `
|
|
18
29
|
|
19
30
|
gem 'metainspector'
|
20
31
|
|
21
|
-
This gem is tested on Ruby versions
|
32
|
+
This gem is tested on Ruby versions 2.0.0 and 2.1.3.
|
22
33
|
|
23
34
|
## Usage
|
24
35
|
|
@@ -182,15 +193,11 @@ You can set a different timeout with a second parameter, like this:
|
|
182
193
|
|
183
194
|
### Redirections
|
184
195
|
|
185
|
-
By default,
|
186
|
-
|
187
|
-
However, you can tell MetaInspector to allow these redirections with the option `:allow_redirections`, like this:
|
196
|
+
By default, MetaInspector will follow redirects (up to a limit of 10).
|
188
197
|
|
189
|
-
|
190
|
-
page = MetaInspector.new('facebook.com', :allow_redirections => :safe)
|
198
|
+
If you want to disallow redirects, you can do it like this:
|
191
199
|
|
192
|
-
|
193
|
-
page = MetaInspector.new('facebook.com', :allow_redirections => :all)
|
200
|
+
page = MetaInspector.new('facebook.com', :allow_redirections => false)
|
194
201
|
|
195
202
|
### Headers
|
196
203
|
|
@@ -11,7 +11,7 @@ module MetaInspector
|
|
11
11
|
# Options:
|
12
12
|
# => timeout: defaults to 20 seconds
|
13
13
|
# => html_content_type_only: if an exception should be raised if request content-type is not text/html. Defaults to false
|
14
|
-
# => allow_redirections: when
|
14
|
+
# => allow_redirections: when true, follow HTTP redirects. Defaults to true
|
15
15
|
# => document: the html of the url as a string
|
16
16
|
# => warn_level: what to do when encountering exceptions. Can be :warn, :raise or nil
|
17
17
|
# => headers: object containing custom headers for the request
|
@@ -66,7 +66,8 @@ module MetaInspector
|
|
66
66
|
{ :timeout => 20,
|
67
67
|
:html_content_only => false,
|
68
68
|
:warn_level => :raise,
|
69
|
-
:headers => {'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)"}
|
69
|
+
:headers => {'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)"},
|
70
|
+
:allow_redirections => true
|
70
71
|
}
|
71
72
|
end
|
72
73
|
|
@@ -1,7 +1,8 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
|
3
|
-
require '
|
4
|
-
require '
|
3
|
+
require 'faraday'
|
4
|
+
require 'faraday_middleware'
|
5
|
+
require 'faraday-cookie_jar'
|
5
6
|
require 'timeout'
|
6
7
|
|
7
8
|
module MetaInspector
|
@@ -14,6 +15,7 @@ module MetaInspector
|
|
14
15
|
options = defaults.merge(options)
|
15
16
|
|
16
17
|
@url = initial_url
|
18
|
+
|
17
19
|
@allow_redirections = options[:allow_redirections]
|
18
20
|
@timeout = options[:timeout]
|
19
21
|
@exception_log = options[:exception_log]
|
@@ -26,36 +28,40 @@ module MetaInspector
|
|
26
28
|
def_delegators :@url, :url
|
27
29
|
|
28
30
|
def read
|
29
|
-
response.
|
31
|
+
response.body if response
|
30
32
|
end
|
31
33
|
|
32
34
|
def content_type
|
33
|
-
response.
|
35
|
+
response.headers["content-type"].split(";")[0] if response
|
34
36
|
end
|
35
37
|
|
36
38
|
private
|
37
39
|
|
38
40
|
def response
|
39
41
|
Timeout::timeout(@timeout) { @response ||= fetch }
|
40
|
-
rescue TimeoutError,
|
42
|
+
rescue TimeoutError, Faraday::ConnectionFailed, RuntimeError => e
|
41
43
|
@exception_log << e
|
42
44
|
nil
|
43
45
|
end
|
44
46
|
|
45
47
|
def fetch
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
48
|
+
session = Faraday.new(:url => url) do |faraday|
|
49
|
+
if @allow_redirections
|
50
|
+
faraday.use FaradayMiddleware::FollowRedirects, limit: 10
|
51
|
+
faraday.use :cookie_jar
|
52
|
+
end
|
53
|
+
faraday.headers.merge!(@headers || {})
|
54
|
+
faraday.adapter :net_http
|
55
|
+
end
|
56
|
+
response = session.get
|
57
|
+
|
58
|
+
@url.url = response.env.url.to_s
|
59
|
+
|
60
|
+
response
|
55
61
|
end
|
56
62
|
|
57
63
|
def defaults
|
58
|
-
{ timeout: 20, exception_log: MetaInspector::ExceptionLog.new }
|
64
|
+
{ timeout: 20, exception_log: MetaInspector::ExceptionLog.new, allow_redirections: true }
|
59
65
|
end
|
60
66
|
end
|
61
67
|
end
|
data/meta_inspector.gemspec
CHANGED
@@ -16,11 +16,14 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.version = MetaInspector::VERSION
|
17
17
|
|
18
18
|
gem.add_dependency 'nokogiri', '~> 1.6'
|
19
|
-
gem.add_dependency '
|
19
|
+
gem.add_dependency 'faraday'
|
20
|
+
gem.add_dependency 'faraday_middleware'
|
21
|
+
gem.add_dependency 'faraday-cookie_jar'
|
20
22
|
gem.add_dependency 'addressable', '~> 2.3.5'
|
21
23
|
|
22
24
|
gem.add_development_dependency 'rspec', '2.14.1'
|
23
25
|
gem.add_development_dependency 'fakeweb', '1.3.0'
|
26
|
+
gem.add_development_dependency 'webmock'
|
24
27
|
gem.add_development_dependency 'awesome_print', '~> 1.2.0'
|
25
28
|
gem.add_development_dependency 'rake', '~> 10.1.0'
|
26
29
|
gem.add_development_dependency 'pry'
|
data/spec/document_spec.rb
CHANGED
@@ -93,26 +93,22 @@ describe MetaInspector::Document do
|
|
93
93
|
|
94
94
|
describe 'headers' do
|
95
95
|
it "should include default headers" do
|
96
|
-
url
|
97
|
-
request = double('Request', base_uri: url)
|
96
|
+
url = "http://pagerankalert.com/"
|
98
97
|
expected_headers = {'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)"}
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
98
|
+
|
99
|
+
headers = {}
|
100
|
+
headers.should_receive(:merge!).with(expected_headers)
|
101
|
+
Faraday::Connection.any_instance.stub(:headers){headers}
|
104
102
|
MetaInspector::Document.new(url)
|
105
103
|
end
|
106
104
|
|
107
105
|
it "should include passed headers on the request" do
|
108
|
-
url
|
106
|
+
url = "http://pagerankalert.com/"
|
109
107
|
headers = {'User-Agent' => 'Mozilla', 'Referer' => 'https://github.com/'}
|
110
|
-
request = double('Request', base_uri: url)
|
111
|
-
|
112
|
-
MetaInspector::Request.any_instance.should_receive(:open)
|
113
|
-
.with(url, headers)
|
114
|
-
.and_return(request)
|
115
108
|
|
109
|
+
headers = {}
|
110
|
+
headers.should_receive(:merge!).with(headers)
|
111
|
+
Faraday::Connection.any_instance.stub(:headers){headers}
|
116
112
|
MetaInspector::Document.new(url, headers: headers)
|
117
113
|
end
|
118
114
|
end
|
data/spec/redirections_spec.rb
CHANGED
@@ -6,64 +6,42 @@ describe MetaInspector do
|
|
6
6
|
describe "redirections" do
|
7
7
|
let(:logger) { MetaInspector::ExceptionLog.new }
|
8
8
|
|
9
|
-
|
10
|
-
it "disallows
|
11
|
-
|
12
|
-
|
13
|
-
MetaInspector.new("http://facebook.com", exception_log: logger)
|
14
|
-
end
|
15
|
-
|
16
|
-
it "allows safe redirections when :allow_redirections => :safe" do
|
17
|
-
logger.should_not receive(:<<)
|
18
|
-
|
19
|
-
m = MetaInspector.new("http://facebook.com", :allow_redirections => :safe, exception_log: logger)
|
20
|
-
|
21
|
-
m.url.should == "https://www.facebook.com/"
|
9
|
+
context "when redirections are turned off" do
|
10
|
+
it "disallows redirections" do
|
11
|
+
m = MetaInspector.new("http://facebook.com", :allow_redirections => false, exception_log: logger)
|
12
|
+
m.url.should == "http://facebook.com/"
|
22
13
|
end
|
14
|
+
end
|
23
15
|
|
24
|
-
|
16
|
+
context "when redirections are on (default)" do
|
17
|
+
it "allows follows redirections" do
|
25
18
|
logger.should_not receive(:<<)
|
26
19
|
|
27
|
-
m = MetaInspector.new("http://facebook.com",
|
20
|
+
m = MetaInspector.new("http://facebook.com", exception_log: logger)
|
28
21
|
|
29
22
|
m.url.should == "https://www.facebook.com/"
|
30
23
|
end
|
31
24
|
end
|
32
25
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
it "allows unsafe redirections when :allow_redirections => :all" do
|
26
|
+
context "when there are cookies required for proper redirection" do
|
27
|
+
before(:all){WebMock.enable!}
|
28
|
+
after(:all){WebMock.disable!}
|
29
|
+
|
30
|
+
it "allows follows redirections while sending the cookies" do
|
31
|
+
stub_request(:get, "http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/").to_return(
|
32
|
+
:status => 302,
|
33
|
+
:headers => {
|
34
|
+
"Location" => "http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/?nclick_check=1",
|
35
|
+
"Set-Cookie" => "EMETA_COOKIE_CHECK=1; path=/; domain=clarionledger.com"
|
36
|
+
})
|
37
|
+
stub_request(:get, "http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/?nclick_check=1")
|
38
|
+
.with(:headers => {"Cookie" => "EMETA_COOKIE_CHECK=1"})
|
47
39
|
logger.should_not receive(:<<)
|
48
40
|
|
49
|
-
m = MetaInspector.new("
|
50
|
-
|
51
|
-
m.url.should == "http://unsafe-facebook.com/"
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
describe "Redirections should update the base_uri" do
|
56
|
-
it "updates the base_uri on safe redirections" do
|
57
|
-
m = MetaInspector.new("http://facebook.com", :allow_redirections => :safe)
|
41
|
+
m = MetaInspector.new("http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/", exception_log: logger)
|
58
42
|
|
59
|
-
m.url.should == "
|
60
|
-
end
|
61
|
-
|
62
|
-
it "updates the base_uri on all redirections" do
|
63
|
-
m = MetaInspector.new("http://facebook.com", :allow_redirections => :all)
|
64
|
-
|
65
|
-
m.url.should == "https://www.facebook.com/"
|
66
|
-
end
|
43
|
+
m.url.should == "http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/?nclick_check=1"
|
44
|
+
end
|
67
45
|
end
|
68
46
|
end
|
69
47
|
end
|
data/spec/request_spec.rb
CHANGED
@@ -45,7 +45,7 @@ describe MetaInspector::Request do
|
|
45
45
|
|
46
46
|
it "should handle socket errors" do
|
47
47
|
TCPSocket.stub(:open).and_raise(SocketError)
|
48
|
-
logger.should receive(:<<).with(an_instance_of(
|
48
|
+
logger.should receive(:<<).with(an_instance_of(Faraday::ConnectionFailed))
|
49
49
|
|
50
50
|
MetaInspector::Request.new(url('http://caca232dsdsaer3sdsd-asd343.org'), exception_log: logger)
|
51
51
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -3,9 +3,11 @@
|
|
3
3
|
$: << File.join(File.dirname(__FILE__), "/../lib")
|
4
4
|
require 'meta_inspector'
|
5
5
|
require 'fakeweb'
|
6
|
+
require "webmock/rspec"
|
6
7
|
require "pry"
|
7
8
|
|
8
9
|
FakeWeb.allow_net_connect = false
|
10
|
+
WebMock.disable!
|
9
11
|
|
10
12
|
def fixture_file(filename)
|
11
13
|
return '' if filename == ''
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaime Iniesta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-10-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -25,19 +25,47 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.6'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: faraday
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0
|
33
|
+
version: '0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: faraday_middleware
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: faraday-cookie_jar
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
41
69
|
- !ruby/object:Gem::Dependency
|
42
70
|
name: addressable
|
43
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,6 +108,20 @@ dependencies:
|
|
80
108
|
- - '='
|
81
109
|
- !ruby/object:Gem::Version
|
82
110
|
version: 1.3.0
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: webmock
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
83
125
|
- !ruby/object:Gem::Dependency
|
84
126
|
name: awesome_print
|
85
127
|
requirement: !ruby/object:Gem::Requirement
|