metainspector 2.3.2 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +2 -2
- data/README.md +15 -8
- data/lib/meta_inspector/document.rb +3 -2
- data/lib/meta_inspector/request.rb +21 -15
- data/lib/meta_inspector/version.rb +1 -1
- data/meta_inspector.gemspec +4 -1
- data/spec/document_spec.rb +9 -13
- data/spec/redirections_spec.rb +24 -46
- data/spec/request_spec.rb +1 -1
- data/spec/spec_helper.rb +2 -0
- metadata +49 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 19cf1cb2c804ede2e36407d38a150840c5f19de6
|
4
|
+
data.tar.gz: 6e3cc4349b842ee52e5d4f9b993603662a573037
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f77d946f82fd31d6683c512cef527370e99b7edef0a84c355d25e0ce2c73c2c12de642438c9f4c28a80b8e82b73d9974e2f52a8b0eab2e0cfc1ba900aa7148d2
|
7
|
+
data.tar.gz: cfa3f9e830fb56a8e3202c26827158d8504ca0ef0b1787953599b282b9d5005aa47275979e7d212a6f8dbed5a56aafd84ca0fd9025c700e819da8fc3bbbd183f
|
data/.travis.yml
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
rvm:
|
2
|
-
|
3
|
-
|
2
|
+
- 2.0.0
|
3
|
+
- 2.1.3
|
data/README.md
CHANGED
@@ -8,6 +8,17 @@ You give it an URL, and it lets you easily get its title, links, images, charset
|
|
8
8
|
|
9
9
|
You can try MetaInspector live at this little demo: [https://metainspectordemo.herokuapp.com](https://metainspectordemo.herokuapp.com)
|
10
10
|
|
11
|
+
## Changes in 3.0
|
12
|
+
|
13
|
+
This latest release introduces some backwards-incompatible changes, so we've decided to do a major version upgrade:
|
14
|
+
|
15
|
+
* The redirect API has been changed, now the `:allow_redirections` option will expect only a boolean, which by default is `true`. That is, no more specifying `:safe`, `:unsafe` or `:all`.
|
16
|
+
* We've dropped support for Ruby < 2.
|
17
|
+
|
18
|
+
Also, we've introduced a new feature:
|
19
|
+
|
20
|
+
* Persist cookies across redirects. Now MetaInspector will include the received cookies when following redirects. This fixes some cases where a redirect would fail, sometimes caught in a redirection loop.
|
21
|
+
|
11
22
|
## Installation
|
12
23
|
|
13
24
|
Install the gem from RubyGems:
|
@@ -18,7 +29,7 @@ If you're using it on a Rails application, just add it to your Gemfile and run `
|
|
18
29
|
|
19
30
|
gem 'metainspector'
|
20
31
|
|
21
|
-
This gem is tested on Ruby versions
|
32
|
+
This gem is tested on Ruby versions 2.0.0 and 2.1.3.
|
22
33
|
|
23
34
|
## Usage
|
24
35
|
|
@@ -182,15 +193,11 @@ You can set a different timeout with a second parameter, like this:
|
|
182
193
|
|
183
194
|
### Redirections
|
184
195
|
|
185
|
-
By default,
|
186
|
-
|
187
|
-
However, you can tell MetaInspector to allow these redirections with the option `:allow_redirections`, like this:
|
196
|
+
By default, MetaInspector will follow redirects (up to a limit of 10).
|
188
197
|
|
189
|
-
|
190
|
-
page = MetaInspector.new('facebook.com', :allow_redirections => :safe)
|
198
|
+
If you want to disallow redirects, you can do it like this:
|
191
199
|
|
192
|
-
|
193
|
-
page = MetaInspector.new('facebook.com', :allow_redirections => :all)
|
200
|
+
page = MetaInspector.new('facebook.com', :allow_redirections => false)
|
194
201
|
|
195
202
|
### Headers
|
196
203
|
|
@@ -11,7 +11,7 @@ module MetaInspector
|
|
11
11
|
# Options:
|
12
12
|
# => timeout: defaults to 20 seconds
|
13
13
|
# => html_content_type_only: if an exception should be raised if request content-type is not text/html. Defaults to false
|
14
|
-
# => allow_redirections: when
|
14
|
+
# => allow_redirections: when true, follow HTTP redirects. Defaults to true
|
15
15
|
# => document: the html of the url as a string
|
16
16
|
# => warn_level: what to do when encountering exceptions. Can be :warn, :raise or nil
|
17
17
|
# => headers: object containing custom headers for the request
|
@@ -66,7 +66,8 @@ module MetaInspector
|
|
66
66
|
{ :timeout => 20,
|
67
67
|
:html_content_only => false,
|
68
68
|
:warn_level => :raise,
|
69
|
-
:headers => {'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)"}
|
69
|
+
:headers => {'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)"},
|
70
|
+
:allow_redirections => true
|
70
71
|
}
|
71
72
|
end
|
72
73
|
|
@@ -1,7 +1,8 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
|
3
|
-
require '
|
4
|
-
require '
|
3
|
+
require 'faraday'
|
4
|
+
require 'faraday_middleware'
|
5
|
+
require 'faraday-cookie_jar'
|
5
6
|
require 'timeout'
|
6
7
|
|
7
8
|
module MetaInspector
|
@@ -14,6 +15,7 @@ module MetaInspector
|
|
14
15
|
options = defaults.merge(options)
|
15
16
|
|
16
17
|
@url = initial_url
|
18
|
+
|
17
19
|
@allow_redirections = options[:allow_redirections]
|
18
20
|
@timeout = options[:timeout]
|
19
21
|
@exception_log = options[:exception_log]
|
@@ -26,36 +28,40 @@ module MetaInspector
|
|
26
28
|
def_delegators :@url, :url
|
27
29
|
|
28
30
|
def read
|
29
|
-
response.
|
31
|
+
response.body if response
|
30
32
|
end
|
31
33
|
|
32
34
|
def content_type
|
33
|
-
response.
|
35
|
+
response.headers["content-type"].split(";")[0] if response
|
34
36
|
end
|
35
37
|
|
36
38
|
private
|
37
39
|
|
38
40
|
def response
|
39
41
|
Timeout::timeout(@timeout) { @response ||= fetch }
|
40
|
-
rescue TimeoutError,
|
42
|
+
rescue TimeoutError, Faraday::ConnectionFailed, RuntimeError => e
|
41
43
|
@exception_log << e
|
42
44
|
nil
|
43
45
|
end
|
44
46
|
|
45
47
|
def fetch
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
48
|
+
session = Faraday.new(:url => url) do |faraday|
|
49
|
+
if @allow_redirections
|
50
|
+
faraday.use FaradayMiddleware::FollowRedirects, limit: 10
|
51
|
+
faraday.use :cookie_jar
|
52
|
+
end
|
53
|
+
faraday.headers.merge!(@headers || {})
|
54
|
+
faraday.adapter :net_http
|
55
|
+
end
|
56
|
+
response = session.get
|
57
|
+
|
58
|
+
@url.url = response.env.url.to_s
|
59
|
+
|
60
|
+
response
|
55
61
|
end
|
56
62
|
|
57
63
|
def defaults
|
58
|
-
{ timeout: 20, exception_log: MetaInspector::ExceptionLog.new }
|
64
|
+
{ timeout: 20, exception_log: MetaInspector::ExceptionLog.new, allow_redirections: true }
|
59
65
|
end
|
60
66
|
end
|
61
67
|
end
|
data/meta_inspector.gemspec
CHANGED
@@ -16,11 +16,14 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.version = MetaInspector::VERSION
|
17
17
|
|
18
18
|
gem.add_dependency 'nokogiri', '~> 1.6'
|
19
|
-
gem.add_dependency '
|
19
|
+
gem.add_dependency 'faraday'
|
20
|
+
gem.add_dependency 'faraday_middleware'
|
21
|
+
gem.add_dependency 'faraday-cookie_jar'
|
20
22
|
gem.add_dependency 'addressable', '~> 2.3.5'
|
21
23
|
|
22
24
|
gem.add_development_dependency 'rspec', '2.14.1'
|
23
25
|
gem.add_development_dependency 'fakeweb', '1.3.0'
|
26
|
+
gem.add_development_dependency 'webmock'
|
24
27
|
gem.add_development_dependency 'awesome_print', '~> 1.2.0'
|
25
28
|
gem.add_development_dependency 'rake', '~> 10.1.0'
|
26
29
|
gem.add_development_dependency 'pry'
|
data/spec/document_spec.rb
CHANGED
@@ -93,26 +93,22 @@ describe MetaInspector::Document do
|
|
93
93
|
|
94
94
|
describe 'headers' do
|
95
95
|
it "should include default headers" do
|
96
|
-
url
|
97
|
-
request = double('Request', base_uri: url)
|
96
|
+
url = "http://pagerankalert.com/"
|
98
97
|
expected_headers = {'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)"}
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
98
|
+
|
99
|
+
headers = {}
|
100
|
+
headers.should_receive(:merge!).with(expected_headers)
|
101
|
+
Faraday::Connection.any_instance.stub(:headers){headers}
|
104
102
|
MetaInspector::Document.new(url)
|
105
103
|
end
|
106
104
|
|
107
105
|
it "should include passed headers on the request" do
|
108
|
-
url
|
106
|
+
url = "http://pagerankalert.com/"
|
109
107
|
headers = {'User-Agent' => 'Mozilla', 'Referer' => 'https://github.com/'}
|
110
|
-
request = double('Request', base_uri: url)
|
111
|
-
|
112
|
-
MetaInspector::Request.any_instance.should_receive(:open)
|
113
|
-
.with(url, headers)
|
114
|
-
.and_return(request)
|
115
108
|
|
109
|
+
headers = {}
|
110
|
+
headers.should_receive(:merge!).with(headers)
|
111
|
+
Faraday::Connection.any_instance.stub(:headers){headers}
|
116
112
|
MetaInspector::Document.new(url, headers: headers)
|
117
113
|
end
|
118
114
|
end
|
data/spec/redirections_spec.rb
CHANGED
@@ -6,64 +6,42 @@ describe MetaInspector do
|
|
6
6
|
describe "redirections" do
|
7
7
|
let(:logger) { MetaInspector::ExceptionLog.new }
|
8
8
|
|
9
|
-
|
10
|
-
it "disallows
|
11
|
-
|
12
|
-
|
13
|
-
MetaInspector.new("http://facebook.com", exception_log: logger)
|
14
|
-
end
|
15
|
-
|
16
|
-
it "allows safe redirections when :allow_redirections => :safe" do
|
17
|
-
logger.should_not receive(:<<)
|
18
|
-
|
19
|
-
m = MetaInspector.new("http://facebook.com", :allow_redirections => :safe, exception_log: logger)
|
20
|
-
|
21
|
-
m.url.should == "https://www.facebook.com/"
|
9
|
+
context "when redirections are turned off" do
|
10
|
+
it "disallows redirections" do
|
11
|
+
m = MetaInspector.new("http://facebook.com", :allow_redirections => false, exception_log: logger)
|
12
|
+
m.url.should == "http://facebook.com/"
|
22
13
|
end
|
14
|
+
end
|
23
15
|
|
24
|
-
|
16
|
+
context "when redirections are on (default)" do
|
17
|
+
it "allows follows redirections" do
|
25
18
|
logger.should_not receive(:<<)
|
26
19
|
|
27
|
-
m = MetaInspector.new("http://facebook.com",
|
20
|
+
m = MetaInspector.new("http://facebook.com", exception_log: logger)
|
28
21
|
|
29
22
|
m.url.should == "https://www.facebook.com/"
|
30
23
|
end
|
31
24
|
end
|
32
25
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
it "allows unsafe redirections when :allow_redirections => :all" do
|
26
|
+
context "when there are cookies required for proper redirection" do
|
27
|
+
before(:all){WebMock.enable!}
|
28
|
+
after(:all){WebMock.disable!}
|
29
|
+
|
30
|
+
it "allows follows redirections while sending the cookies" do
|
31
|
+
stub_request(:get, "http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/").to_return(
|
32
|
+
:status => 302,
|
33
|
+
:headers => {
|
34
|
+
"Location" => "http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/?nclick_check=1",
|
35
|
+
"Set-Cookie" => "EMETA_COOKIE_CHECK=1; path=/; domain=clarionledger.com"
|
36
|
+
})
|
37
|
+
stub_request(:get, "http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/?nclick_check=1")
|
38
|
+
.with(:headers => {"Cookie" => "EMETA_COOKIE_CHECK=1"})
|
47
39
|
logger.should_not receive(:<<)
|
48
40
|
|
49
|
-
m = MetaInspector.new("
|
50
|
-
|
51
|
-
m.url.should == "http://unsafe-facebook.com/"
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
describe "Redirections should update the base_uri" do
|
56
|
-
it "updates the base_uri on safe redirections" do
|
57
|
-
m = MetaInspector.new("http://facebook.com", :allow_redirections => :safe)
|
41
|
+
m = MetaInspector.new("http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/", exception_log: logger)
|
58
42
|
|
59
|
-
m.url.should == "
|
60
|
-
end
|
61
|
-
|
62
|
-
it "updates the base_uri on all redirections" do
|
63
|
-
m = MetaInspector.new("http://facebook.com", :allow_redirections => :all)
|
64
|
-
|
65
|
-
m.url.should == "https://www.facebook.com/"
|
66
|
-
end
|
43
|
+
m.url.should == "http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/?nclick_check=1"
|
44
|
+
end
|
67
45
|
end
|
68
46
|
end
|
69
47
|
end
|
data/spec/request_spec.rb
CHANGED
@@ -45,7 +45,7 @@ describe MetaInspector::Request do
|
|
45
45
|
|
46
46
|
it "should handle socket errors" do
|
47
47
|
TCPSocket.stub(:open).and_raise(SocketError)
|
48
|
-
logger.should receive(:<<).with(an_instance_of(
|
48
|
+
logger.should receive(:<<).with(an_instance_of(Faraday::ConnectionFailed))
|
49
49
|
|
50
50
|
MetaInspector::Request.new(url('http://caca232dsdsaer3sdsd-asd343.org'), exception_log: logger)
|
51
51
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -3,9 +3,11 @@
|
|
3
3
|
$: << File.join(File.dirname(__FILE__), "/../lib")
|
4
4
|
require 'meta_inspector'
|
5
5
|
require 'fakeweb'
|
6
|
+
require "webmock/rspec"
|
6
7
|
require "pry"
|
7
8
|
|
8
9
|
FakeWeb.allow_net_connect = false
|
10
|
+
WebMock.disable!
|
9
11
|
|
10
12
|
def fixture_file(filename)
|
11
13
|
return '' if filename == ''
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaime Iniesta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-10-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -25,19 +25,47 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.6'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: faraday
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0
|
33
|
+
version: '0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: faraday_middleware
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: faraday-cookie_jar
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
41
69
|
- !ruby/object:Gem::Dependency
|
42
70
|
name: addressable
|
43
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,6 +108,20 @@ dependencies:
|
|
80
108
|
- - '='
|
81
109
|
- !ruby/object:Gem::Version
|
82
110
|
version: 1.3.0
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: webmock
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
83
125
|
- !ruby/object:Gem::Dependency
|
84
126
|
name: awesome_print
|
85
127
|
requirement: !ruby/object:Gem::Requirement
|