metainspector 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -0
- data/README.md +7 -20
- data/lib/meta_inspector/document.rb +4 -5
- data/lib/meta_inspector/exception_log.rb +17 -6
- data/lib/meta_inspector/request.rb +2 -0
- data/lib/meta_inspector/url.rb +3 -4
- data/lib/meta_inspector/version.rb +1 -1
- data/spec/document_spec.rb +14 -16
- data/spec/exception_log_spec.rb +52 -30
- data/spec/parser_spec.rb +6 -10
- data/spec/redirections_spec.rb +28 -24
- data/spec/request_spec.rb +6 -12
- data/spec/spec_helper.rb +1 -0
- data/spec/url_spec.rb +0 -18
- metadata +18 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fcb1f8adaeb835639f198ee9ede5f54da02211fd
|
4
|
+
data.tar.gz: 40bb733a1e3f2b48011c535951a2a2ac2bb097dc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5f502ee4f24fb04b8d0afb8e369a5da2ac7c7c4a4b4c8a743cfbfd24f0f15125d139c5f35bd65c2fe9bce59d4015305c2fdf05f4fe6e696e691e90bb60e445b2
|
7
|
+
data.tar.gz: 2f9252d0283f815b15bc7e4c43b33e588975b63a935354e3e0c34639a0bad1c96d52b63308f9902120829b3747c796fc43f36d95d5ee78237c9dbc4fb64baa80
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -18,7 +18,7 @@ If you're using it on a Rails application, just add it to your Gemfile and run `
|
|
18
18
|
|
19
19
|
gem 'metainspector'
|
20
20
|
|
21
|
-
This gem is tested on Ruby versions 1.9.2, 1.9.3 and 2.
|
21
|
+
This gem is tested on Ruby versions 1.9.2, 1.9.3, 2.0.0 and 2.1.0.
|
22
22
|
|
23
23
|
## Usage
|
24
24
|
|
@@ -200,35 +200,22 @@ MetaInspector will try to parse all URLs by default. If you want to raise an exc
|
|
200
200
|
This is useful when using MetaInspector on web spidering. Although on the initial URL you'll probably have an HTML URL, following links you may find yourself trying to parse non-html URLs.
|
201
201
|
|
202
202
|
page = MetaInspector.new('http://example.com/image.png')
|
203
|
-
page.title # returns ""
|
204
203
|
page.content_type # "image/png"
|
205
|
-
page.
|
204
|
+
page.description # will returned a garbled string
|
206
205
|
|
207
206
|
page = MetaInspector.new('http://example.com/image.png', :html_content_only => true)
|
208
|
-
page.title # returns nil
|
209
207
|
page.content_type # "image/png"
|
210
|
-
page.
|
211
|
-
page.exceptions.first.message # "The url provided contains image/png content instead of text/html content"
|
208
|
+
page.description # raises an exception
|
212
209
|
|
213
210
|
## Exception handling
|
214
211
|
|
215
|
-
|
212
|
+
By default, MetaInspector will raise the exceptions found. We think that this is the safest default: in case the URL you're trying to scrape is unreachable, you should clearly be notified, and treat the exception as needed in your app.
|
216
213
|
|
217
|
-
|
214
|
+
However, if you prefer you can also set the `warn_level: :warn` option, so that exceptions found will just be warned on the standard output, instead of being raised.
|
218
215
|
|
219
|
-
|
216
|
+
You can also set the `warn_level: :store` option so that exceptions found will be silenced, and left for you to inspect on `page.exceptions`. You can also ask for `page.ok?`, wich will return `true` if no exceptions are stored.
|
220
217
|
|
221
|
-
|
222
|
-
|
223
|
-
You can also specify what to do when encountering an exception. By default it
|
224
|
-
will store it, but you can also tell MetaInspector to warn about it on the log
|
225
|
-
console, or to raise the exceptions, like this:
|
226
|
-
|
227
|
-
# This will warn about the exception on console
|
228
|
-
page = MetaInspector.new('http://example.com', warn_level: :warn)
|
229
|
-
|
230
|
-
# This will raise the exception
|
231
|
-
page = MetaInspector.new('http://example.com', warn_level: :raise)
|
218
|
+
You should avoid using the `:store` option, or use it wisely, as silencing errors can be problematic, it's always better to face the errors and treat them accordingly.
|
232
219
|
|
233
220
|
## Examples
|
234
221
|
|
@@ -26,13 +26,12 @@ module MetaInspector
|
|
26
26
|
options[:warn_level] = :warn
|
27
27
|
end
|
28
28
|
|
29
|
-
@warn_level
|
30
|
-
|
31
|
-
@exception_log = MetaInspector::ExceptionLog.new(warn_level: warn_level)
|
29
|
+
@warn_level = options[:warn_level]
|
30
|
+
@exception_log = options[:exception_log] || MetaInspector::ExceptionLog.new(warn_level: warn_level)
|
32
31
|
@url = MetaInspector::URL.new(initial_url, exception_log: @exception_log)
|
33
32
|
@request = MetaInspector::Request.new(@url, allow_redirections: @allow_redirections,
|
34
33
|
timeout: @timeout,
|
35
|
-
exception_log: @exception_log)
|
34
|
+
exception_log: @exception_log) unless @document
|
36
35
|
@parser = MetaInspector::Parser.new(self, exception_log: @exception_log)
|
37
36
|
end
|
38
37
|
|
@@ -66,7 +65,7 @@ module MetaInspector
|
|
66
65
|
private
|
67
66
|
|
68
67
|
def defaults
|
69
|
-
{ :timeout => 20, :html_content_only => false }
|
68
|
+
{ :timeout => 20, :html_content_only => false, :warn_level => :raise }
|
70
69
|
end
|
71
70
|
|
72
71
|
def document
|
@@ -7,23 +7,34 @@ module MetaInspector
|
|
7
7
|
attr_reader :exceptions, :warn_level
|
8
8
|
|
9
9
|
def initialize(options = {})
|
10
|
-
|
10
|
+
options = defaults.merge(options)
|
11
11
|
@warn_level = options[:warn_level]
|
12
|
+
@exceptions = []
|
12
13
|
end
|
13
14
|
|
14
15
|
def <<(exception)
|
15
16
|
case warn_level
|
16
|
-
when :warn
|
17
|
-
warn exception
|
18
17
|
when :raise
|
19
18
|
raise exception
|
19
|
+
when :warn
|
20
|
+
warn exception
|
21
|
+
when :store
|
22
|
+
@exceptions << exception
|
20
23
|
end
|
21
|
-
|
22
|
-
@exceptions << exception
|
23
24
|
end
|
24
25
|
|
25
26
|
def ok?
|
26
|
-
|
27
|
+
if warn_level == :store
|
28
|
+
exceptions.empty?
|
29
|
+
else
|
30
|
+
warn "ExceptionLog#ok? should only be used when warn_level is :store"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def defaults
|
37
|
+
{ warn_level: :raise }
|
27
38
|
end
|
28
39
|
end
|
29
40
|
end
|
@@ -17,6 +17,8 @@ module MetaInspector
|
|
17
17
|
@allow_redirections = options[:allow_redirections]
|
18
18
|
@timeout = options[:timeout]
|
19
19
|
@exception_log = options[:exception_log]
|
20
|
+
|
21
|
+
response # as soon as it is set up, we make the request so we can fail early
|
20
22
|
end
|
21
23
|
|
22
24
|
extend Forwardable
|
data/lib/meta_inspector/url.rb
CHANGED
@@ -44,8 +44,7 @@ module MetaInspector
|
|
44
44
|
else
|
45
45
|
Addressable::URI.join(base_url, url).normalize.to_s
|
46
46
|
end
|
47
|
-
rescue
|
48
|
-
@exception_log << e
|
47
|
+
rescue Addressable::URI::InvalidURIError => e
|
49
48
|
nil
|
50
49
|
end
|
51
50
|
|
@@ -66,9 +65,9 @@ module MetaInspector
|
|
66
65
|
end
|
67
66
|
|
68
67
|
def parsed(url)
|
69
|
-
URI.parse(url)
|
68
|
+
Addressable::URI.parse(url)
|
70
69
|
|
71
|
-
rescue URI::InvalidURIError
|
70
|
+
rescue Addressable::URI::InvalidURIError => e
|
72
71
|
@exception_log << e
|
73
72
|
nil
|
74
73
|
end
|
data/spec/document_spec.rb
CHANGED
@@ -57,38 +57,36 @@ describe MetaInspector::Document do
|
|
57
57
|
end
|
58
58
|
|
59
59
|
describe 'exception handling' do
|
60
|
+
let(:logger) { MetaInspector::ExceptionLog.new }
|
61
|
+
|
60
62
|
it "should parse images when parse_html_content_type_only is not specified" do
|
61
|
-
|
62
|
-
desc = image_url.description
|
63
|
+
logger.should_not receive(:<<)
|
63
64
|
|
64
|
-
image_url.
|
65
|
+
image_url = MetaInspector::Document.new('http://pagerankalert.com/image.png', exception_log: logger)
|
66
|
+
image_url.title
|
65
67
|
end
|
66
68
|
|
67
69
|
it "should parse images when parse_html_content_type_only is false" do
|
68
|
-
|
69
|
-
desc = image_url.description
|
70
|
+
logger.should_not receive(:<<)
|
70
71
|
|
71
|
-
image_url.
|
72
|
+
image_url = MetaInspector::Document.new('http://pagerankalert.com/image.png', html_content_only: false, exception_log: logger)
|
73
|
+
image_url.title
|
72
74
|
end
|
73
75
|
|
74
76
|
it "should handle errors when content is image/jpeg and html_content_type_only is true" do
|
75
|
-
|
77
|
+
logger.should_receive(:<<).with(an_instance_of(RuntimeError))
|
76
78
|
|
77
|
-
|
78
|
-
title = image_url.title
|
79
|
-
}.to change { image_url.exceptions.size }
|
79
|
+
image_url = MetaInspector::Document.new('http://pagerankalert.com/image.png', html_content_only: true, exception_log: logger)
|
80
80
|
|
81
|
-
image_url.
|
81
|
+
image_url.title
|
82
82
|
end
|
83
83
|
|
84
84
|
it "should handle errors when content is not text/html and html_content_type_only is true" do
|
85
|
-
|
85
|
+
logger.should_receive(:<<).with(an_instance_of(RuntimeError))
|
86
86
|
|
87
|
-
|
88
|
-
title = tar_url.title
|
89
|
-
}.to change { tar_url.exceptions.size }
|
87
|
+
tar_url = MetaInspector::Document.new('http://pagerankalert.com/file.tar.gz', html_content_only: true, exception_log: logger)
|
90
88
|
|
91
|
-
tar_url.
|
89
|
+
tar_url.title
|
92
90
|
end
|
93
91
|
end
|
94
92
|
end
|
data/spec/exception_log_spec.rb
CHANGED
@@ -3,9 +3,41 @@
|
|
3
3
|
require File.join(File.dirname(__FILE__), "/spec_helper")
|
4
4
|
|
5
5
|
describe MetaInspector::ExceptionLog do
|
6
|
-
|
6
|
+
|
7
|
+
describe "warn_level" do
|
8
|
+
it "should be :raise by default" do
|
9
|
+
MetaInspector::ExceptionLog.new.warn_level.should == :raise
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should raise exceptions when warn_level is :raise" do
|
13
|
+
logger = MetaInspector::ExceptionLog.new(warn_level: :raise)
|
14
|
+
exception = StandardError.new("this should be raised")
|
15
|
+
|
16
|
+
expect {
|
17
|
+
logger << exception
|
18
|
+
}.to raise_exception(StandardError, "this should be raised")
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should warn about the error if warn_level is :warn" do
|
22
|
+
logger = MetaInspector::ExceptionLog.new(warn_level: :warn)
|
23
|
+
exception = StandardError.new("an error message")
|
24
|
+
|
25
|
+
logger.should_receive(:warn).with(exception)
|
26
|
+
logger << exception
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should store the error if warn_level is :store" do
|
30
|
+
logger = MetaInspector::ExceptionLog.new(warn_level: :store)
|
31
|
+
exception = StandardError.new("an error message")
|
32
|
+
expect {
|
33
|
+
logger << exception
|
34
|
+
}.to change { logger.exceptions.size }.by(1)
|
35
|
+
end
|
36
|
+
end
|
7
37
|
|
8
38
|
describe "storing exceptions" do
|
39
|
+
let(:logger) { MetaInspector::ExceptionLog.new(warn_level: :store) }
|
40
|
+
|
9
41
|
it "should store exceptions" do
|
10
42
|
expect {
|
11
43
|
logger << StandardError.new("an error message")
|
@@ -21,39 +53,29 @@ describe MetaInspector::ExceptionLog do
|
|
21
53
|
|
22
54
|
logger.exceptions.should == [first, second]
|
23
55
|
end
|
24
|
-
end
|
25
|
-
|
26
|
-
describe "ok?" do
|
27
|
-
it "should be true if no exceptions stored" do
|
28
|
-
logger.should be_ok
|
29
|
-
end
|
30
|
-
|
31
|
-
it "should be false if some exception stored" do
|
32
|
-
logger << StandardError.new("some message")
|
33
|
-
logger.should_not be_ok
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
describe "warn_level" do
|
38
|
-
it "should be quiet by default" do
|
39
|
-
MetaInspector::ExceptionLog.new.warn_level.should be_nil
|
40
|
-
end
|
41
56
|
|
42
|
-
|
43
|
-
|
44
|
-
|
57
|
+
describe "ok?" do
|
58
|
+
it "should be true if no exceptions stored" do
|
59
|
+
logger.should be_ok
|
60
|
+
end
|
45
61
|
|
46
|
-
|
47
|
-
|
48
|
-
|
62
|
+
it "should be false if some exception stored" do
|
63
|
+
logger << StandardError.new("some message")
|
64
|
+
logger.should_not be_ok
|
65
|
+
end
|
49
66
|
|
50
|
-
|
51
|
-
|
52
|
-
|
67
|
+
it "should warn about misuse if warn_level is :raise" do
|
68
|
+
logger = MetaInspector::ExceptionLog.new(warn_level: :raise)
|
69
|
+
logger.should_receive(:warn).with("ExceptionLog#ok? should only be used when warn_level is :store")
|
70
|
+
logger.ok?
|
71
|
+
end
|
53
72
|
|
54
|
-
|
55
|
-
|
56
|
-
|
73
|
+
it "should warn about misuse if warn_level is :warn" do
|
74
|
+
logger = MetaInspector::ExceptionLog.new(warn_level: :warn)
|
75
|
+
logger.should_receive(:warn).with("ExceptionLog#ok? should only be used when warn_level is :store")
|
76
|
+
logger.ok?
|
77
|
+
end
|
57
78
|
end
|
58
79
|
end
|
80
|
+
|
59
81
|
end
|
data/spec/parser_spec.rb
CHANGED
@@ -3,6 +3,8 @@
|
|
3
3
|
require File.join(File.dirname(__FILE__), "/spec_helper")
|
4
4
|
|
5
5
|
describe MetaInspector::Parser do
|
6
|
+
let(:logger) { MetaInspector::ExceptionLog.new }
|
7
|
+
|
6
8
|
describe 'Doing a basic scrape' do
|
7
9
|
|
8
10
|
before(:each) do
|
@@ -155,10 +157,7 @@ describe MetaInspector::Parser do
|
|
155
157
|
|
156
158
|
it "should not crash when processing malformed hrefs" do
|
157
159
|
m = MetaInspector::Parser.new(doc 'http://example.com/malformed_href')
|
158
|
-
|
159
|
-
m.internal_links.should == [ "http://example.com/faqs" ]
|
160
|
-
m.should be_ok
|
161
|
-
}.to_not raise_error
|
160
|
+
m.internal_links.should == [ "http://example.com/faqs" ]
|
162
161
|
end
|
163
162
|
end
|
164
163
|
|
@@ -174,11 +173,8 @@ describe MetaInspector::Parser do
|
|
174
173
|
|
175
174
|
it "should not crash when processing malformed hrefs" do
|
176
175
|
m = MetaInspector::Parser.new(doc 'http://example.com/malformed_href')
|
177
|
-
|
178
|
-
|
179
|
-
"javascript:alert('ok');", "javascript://", "mailto:email(at)example.com"]
|
180
|
-
m.should be_ok
|
181
|
-
}.to_not raise_error
|
176
|
+
m.external_links.should == ["skype:joeuser?call", "telnet://telnet.cdrom.com", "javascript:alert('ok');",
|
177
|
+
"javascript://", "mailto:email(at)example.com"]
|
182
178
|
end
|
183
179
|
end
|
184
180
|
end
|
@@ -369,7 +365,7 @@ describe MetaInspector::Parser do
|
|
369
365
|
|
370
366
|
private
|
371
367
|
|
372
|
-
def doc(url, options = {})
|
368
|
+
def doc(url, options = { exception_log: logger })
|
373
369
|
MetaInspector::Document.new(url, options)
|
374
370
|
end
|
375
371
|
end
|
data/spec/redirections_spec.rb
CHANGED
@@ -4,59 +4,63 @@ require File.join(File.dirname(__FILE__), "/spec_helper")
|
|
4
4
|
|
5
5
|
describe MetaInspector do
|
6
6
|
describe "redirections" do
|
7
|
+
let(:logger) { MetaInspector::ExceptionLog.new }
|
8
|
+
|
7
9
|
describe "safe redirections (HTTP to HTTPS)" do
|
8
10
|
it "disallows safe redirections by default" do
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
m.exceptions.first.message.should == "redirection forbidden: http://facebook.com/ -> https://www.facebook.com/"
|
11
|
+
logger.should receive(:<<).with(an_instance_of(RuntimeError))
|
12
|
+
|
13
|
+
MetaInspector.new("http://facebook.com", exception_log: logger)
|
13
14
|
end
|
14
15
|
|
15
16
|
it "allows safe redirections when :allow_redirections => :safe" do
|
16
|
-
|
17
|
-
|
18
|
-
m.
|
17
|
+
logger.should_not receive(:<<)
|
18
|
+
|
19
|
+
m = MetaInspector.new("http://facebook.com", :allow_redirections => :safe, exception_log: logger)
|
20
|
+
|
21
|
+
m.url.should == "https://www.facebook.com/"
|
19
22
|
end
|
20
23
|
|
21
24
|
it "allows safe redirections when :allow_redirections => :all" do
|
22
|
-
|
23
|
-
|
24
|
-
m.
|
25
|
+
logger.should_not receive(:<<)
|
26
|
+
|
27
|
+
m = MetaInspector.new("http://facebook.com", :allow_redirections => :all, exception_log: logger)
|
28
|
+
|
29
|
+
m.url.should == "https://www.facebook.com/"
|
25
30
|
end
|
26
31
|
end
|
27
32
|
|
28
33
|
describe "unsafe redirections (HTTPS to HTTP)" do
|
29
34
|
it "disallows unsafe redirections by default" do
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
m.exceptions.first.message.should == "redirection forbidden: https://unsafe-facebook.com/ -> http://unsafe-facebook.com/"
|
35
|
+
logger.should receive(:<<).with(an_instance_of(RuntimeError))
|
36
|
+
|
37
|
+
MetaInspector.new("https://unsafe-facebook.com", exception_log: logger)
|
34
38
|
end
|
35
39
|
|
36
40
|
it "disallows unsafe redirections when :allow_redirections => :safe" do
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
m.exceptions.first.message.should == "redirection forbidden: https://unsafe-facebook.com/ -> http://unsafe-facebook.com/"
|
41
|
+
logger.should receive(:<<).with(an_instance_of(RuntimeError))
|
42
|
+
|
43
|
+
MetaInspector.new("https://unsafe-facebook.com", :allow_redirections => :safe, exception_log: logger)
|
41
44
|
end
|
42
45
|
|
43
46
|
it "allows unsafe redirections when :allow_redirections => :all" do
|
44
|
-
|
45
|
-
|
46
|
-
m.
|
47
|
+
logger.should_not receive(:<<)
|
48
|
+
|
49
|
+
m = MetaInspector.new("https://unsafe-facebook.com", :allow_redirections => :all, exception_log: logger)
|
50
|
+
|
51
|
+
m.url.should == "http://unsafe-facebook.com/"
|
47
52
|
end
|
48
53
|
end
|
49
54
|
|
50
55
|
describe "Redirections should update the base_uri" do
|
51
56
|
it "updates the base_uri on safe redirections" do
|
52
|
-
m = MetaInspector.new("http://facebook.com", :allow_redirections => :safe)
|
53
|
-
|
57
|
+
m = MetaInspector.new("http://facebook.com", :allow_redirections => :safe)
|
58
|
+
|
54
59
|
m.url.should == "https://www.facebook.com/"
|
55
60
|
end
|
56
61
|
|
57
62
|
it "updates the base_uri on all redirections" do
|
58
63
|
m = MetaInspector.new("http://facebook.com", :allow_redirections => :all)
|
59
|
-
m.to_hash
|
60
64
|
|
61
65
|
m.url.should == "https://www.facebook.com/"
|
62
66
|
end
|
data/spec/request_spec.rb
CHANGED
@@ -27,6 +27,8 @@ describe MetaInspector::Request do
|
|
27
27
|
end
|
28
28
|
|
29
29
|
describe 'exception handling' do
|
30
|
+
let(:logger) { MetaInspector::ExceptionLog.new }
|
31
|
+
|
30
32
|
before(:each) do
|
31
33
|
FakeWeb.allow_net_connect = true
|
32
34
|
end
|
@@ -36,23 +38,15 @@ describe MetaInspector::Request do
|
|
36
38
|
end
|
37
39
|
|
38
40
|
it "should handle timeouts" do
|
39
|
-
|
40
|
-
|
41
|
-
expect {
|
42
|
-
impatient.read.should be_nil
|
43
|
-
}.to change { impatient.exceptions.size }
|
41
|
+
logger.should receive(:<<).with(an_instance_of(Timeout::Error))
|
44
42
|
|
45
|
-
|
43
|
+
MetaInspector::Request.new(url('http://example.com/timeout'), timeout: 0.0000000000000000001, exception_log: logger)
|
46
44
|
end
|
47
45
|
|
48
46
|
it "should handle socket errors" do
|
49
|
-
|
50
|
-
|
51
|
-
expect {
|
52
|
-
nowhere.read.should be_nil
|
53
|
-
}.to change { nowhere.exceptions.size }
|
47
|
+
logger.should receive(:<<).with(an_instance_of(SocketError))
|
54
48
|
|
55
|
-
|
49
|
+
MetaInspector::Request.new(url('http://caca232dsdsaer3sdsd-asd343.org'), exception_log: logger)
|
56
50
|
end
|
57
51
|
end
|
58
52
|
|
data/spec/spec_helper.rb
CHANGED
@@ -16,6 +16,7 @@ end
|
|
16
16
|
# Faked web responses #
|
17
17
|
#######################
|
18
18
|
|
19
|
+
FakeWeb.register_uri(:get, "http://example.com/", :response => fixture_file("empty_page.response"))
|
19
20
|
FakeWeb.register_uri(:get, "http://pagerankalert.com", :response => fixture_file("pagerankalert.com.response"))
|
20
21
|
FakeWeb.register_uri(:get, "pagerankalert.com", :response => fixture_file("pagerankalert.com.response"))
|
21
22
|
FakeWeb.register_uri(:get, "http://www.alazan.com", :response => fixture_file("alazan.com.response"))
|
data/spec/url_spec.rb
CHANGED
@@ -53,22 +53,4 @@ describe MetaInspector::URL do
|
|
53
53
|
url.url.should == 'http://second.com/'
|
54
54
|
end
|
55
55
|
end
|
56
|
-
|
57
|
-
describe "exception handling" do
|
58
|
-
it "should handle URI::InvalidURIError" do
|
59
|
-
expect {
|
60
|
-
@malformed = MetaInspector::URL.new('javascript://')
|
61
|
-
}.to_not raise_error
|
62
|
-
|
63
|
-
@malformed.exceptions.first.class.should == URI::InvalidURIError
|
64
|
-
end
|
65
|
-
|
66
|
-
it "should handle URI::InvalidComponentError" do
|
67
|
-
expect {
|
68
|
-
@malformed = MetaInspector::URL.new('mailto:email(at)example.com')
|
69
|
-
}.to_not raise_error
|
70
|
-
|
71
|
-
@malformed.exceptions.first.class.should == URI::InvalidComponentError
|
72
|
-
end
|
73
|
-
end
|
74
56
|
end
|
metadata
CHANGED
@@ -1,55 +1,55 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaime Iniesta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-02-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - ~>
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '1.6'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - ~>
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.6'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: open_uri_redirections
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - ~>
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 0.1.4
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - ~>
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 0.1.4
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: addressable
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - ~>
|
45
|
+
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: 2.3.5
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - ~>
|
52
|
+
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: 2.3.5
|
55
55
|
- !ruby/object:Gem::Dependency
|
@@ -84,28 +84,28 @@ dependencies:
|
|
84
84
|
name: awesome_print
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- - ~>
|
87
|
+
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
89
|
version: 1.2.0
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- - ~>
|
94
|
+
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: 1.2.0
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: rake
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- - ~>
|
101
|
+
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
103
|
version: 10.1.0
|
104
104
|
type: :development
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- - ~>
|
108
|
+
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: 10.1.0
|
111
111
|
description: MetaInspector lets you scrape a web page and get its title, charset,
|
@@ -116,9 +116,9 @@ executables: []
|
|
116
116
|
extensions: []
|
117
117
|
extra_rdoc_files: []
|
118
118
|
files:
|
119
|
-
- .gitignore
|
120
|
-
- .rspec.example
|
121
|
-
- .travis.yml
|
119
|
+
- ".gitignore"
|
120
|
+
- ".rspec.example"
|
121
|
+
- ".travis.yml"
|
122
122
|
- Gemfile
|
123
123
|
- MIT-LICENSE
|
124
124
|
- README.md
|
@@ -182,17 +182,17 @@ require_paths:
|
|
182
182
|
- lib
|
183
183
|
required_ruby_version: !ruby/object:Gem::Requirement
|
184
184
|
requirements:
|
185
|
-
- -
|
185
|
+
- - ">="
|
186
186
|
- !ruby/object:Gem::Version
|
187
187
|
version: '0'
|
188
188
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
189
189
|
requirements:
|
190
|
-
- -
|
190
|
+
- - ">="
|
191
191
|
- !ruby/object:Gem::Version
|
192
192
|
version: '0'
|
193
193
|
requirements: []
|
194
194
|
rubyforge_project:
|
195
|
-
rubygems_version: 2.
|
195
|
+
rubygems_version: 2.1.11
|
196
196
|
signing_key:
|
197
197
|
specification_version: 4
|
198
198
|
summary: MetaInspector is a ruby gem for web scraping purposes, that returns a hash
|