metainspector 4.7.2 → 5.0.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -21
- data/examples/basic_scraping.rb +2 -1
- data/examples/link_checker.rb +6 -5
- data/examples/spider.rb +3 -2
- data/lib/meta_inspector.rb +1 -2
- data/lib/meta_inspector/document.rb +4 -16
- data/lib/meta_inspector/errors.rb +13 -0
- data/lib/meta_inspector/parser.rb +0 -5
- data/lib/meta_inspector/request.rb +4 -7
- data/lib/meta_inspector/url.rb +4 -9
- data/lib/meta_inspector/version.rb +1 -1
- data/meta_inspector.gemspec +1 -0
- data/spec/document_spec.rb +16 -65
- data/spec/meta_inspector/meta_inspector_spec.rb +1 -1
- data/spec/meta_inspector/redirections_spec.rb +2 -8
- data/spec/request_spec.rb +3 -4
- data/spec/url_spec.rb +8 -9
- metadata +19 -7
- data/lib/meta_inspector/exception_log.rb +0 -31
- data/lib/meta_inspector/exceptionable.rb +0 -9
- data/spec/exception_log_spec.rb +0 -79
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cb39fe43b5a67a67f0543043ded76d44001eb677
|
4
|
+
data.tar.gz: 3183a8d0f564b3acda845436470b07b466345050
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f0d5f4b1dda14174e082956f354c4995b34525f0da33c81c002f422dc12d06c3af9eed5b919c39030daedc3305957ed54dc7462d596e564d51cb9d3b38f069c7
|
7
|
+
data.tar.gz: 478ef7aef64b6310106733dd32bdd111e9342a84831d93369d35d275149196613acea262e02d3c058ce0b2daeb55aed4b1729a886031e7a7697df1e3f72b6017
|
data/README.md
CHANGED
@@ -385,28 +385,11 @@ page = MetaInspector.new('http://example.com', faraday_http_cache: { store: cach
|
|
385
385
|
|
386
386
|
## Exception Handling
|
387
387
|
|
388
|
-
|
388
|
+
Web page scraping is tricky, you can expect to find different exceptions during the request of the page or the parsing of its contents. MetaInspector will encapsulate these exceptions on these main errors:
|
389
389
|
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
You should avoid using the `:store` option, or use it wisely, as silencing errors can be problematic, it's always better to face the errors and treat them accordingly.
|
395
|
-
|
396
|
-
If you're using this exception store, you're advised to first initialize the document, check if it seems OK, and then proceed with the extractions, like this:
|
397
|
-
|
398
|
-
```ruby
|
399
|
-
# This will fail because the URL will return a text/xml document
|
400
|
-
page = MetaInspector.new("http://example.com/rss",
|
401
|
-
html_content_only: true,
|
402
|
-
warn_level: :store )
|
403
|
-
|
404
|
-
if page.ok?
|
405
|
-
puts "TITLE: #{page.title}"
|
406
|
-
else
|
407
|
-
puts "There were some exceptions: #{page.exceptions}"
|
408
|
-
end
|
409
|
-
```
|
390
|
+
* `MetaInspector::TimeoutError`. When fetching a web page has taken too long.
|
391
|
+
* `MetaInspector::RequestError`. When there has been an error on the request phase. Examples: page not found, SSL failure, invalid URI.
|
392
|
+
* `MetaInspector::ParserError`. When there has been an error parsing the contents of the page. Example: trying to parse an image file.
|
410
393
|
|
411
394
|
## Examples
|
412
395
|
|
data/examples/basic_scraping.rb
CHANGED
data/examples/link_checker.rb
CHANGED
@@ -2,9 +2,10 @@
|
|
2
2
|
#
|
3
3
|
# Usage example:
|
4
4
|
#
|
5
|
-
# ruby link_checker.rb
|
5
|
+
# ruby link_checker.rb example.com
|
6
6
|
|
7
|
-
require 'metainspector'
|
7
|
+
require '../lib/metainspector'
|
8
|
+
puts "Using MetaInspector #{MetaInspector::VERSION}"
|
8
9
|
|
9
10
|
class BrokenLinkChecker
|
10
11
|
|
@@ -39,11 +40,11 @@ class BrokenLinkChecker
|
|
39
40
|
end
|
40
41
|
|
41
42
|
def process_next_on_queue
|
42
|
-
page = MetaInspector.new(@queue.pop
|
43
|
+
page = MetaInspector.new(@queue.pop)
|
43
44
|
|
44
45
|
page.links.all.select {|l| l =~ /^http(s)?:\/\//i}.each do |link|
|
45
46
|
check_status(link, page.url)
|
46
|
-
end
|
47
|
+
end
|
47
48
|
|
48
49
|
@visited.push(page.url)
|
49
50
|
|
@@ -90,7 +91,7 @@ class BrokenLinkChecker
|
|
90
91
|
else
|
91
92
|
false
|
92
93
|
end
|
93
|
-
rescue
|
94
|
+
rescue
|
94
95
|
false
|
95
96
|
end
|
96
97
|
end
|
data/examples/spider.rb
CHANGED
@@ -2,9 +2,10 @@
|
|
2
2
|
#
|
3
3
|
# Usage example:
|
4
4
|
#
|
5
|
-
# ruby spider.rb
|
5
|
+
# ruby spider.rb example.com
|
6
6
|
|
7
|
-
require 'metainspector'
|
7
|
+
require '../lib/metainspector'
|
8
|
+
puts "Using MetaInspector #{MetaInspector::VERSION}"
|
8
9
|
|
9
10
|
# Two arrays, one for the scraping queue and one for the visited links
|
10
11
|
queue = []
|
data/lib/meta_inspector.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require 'forwardable'
|
2
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'meta_inspector/
|
3
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'meta_inspector/exception_log'))
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'meta_inspector/errors'))
|
4
3
|
require File.expand_path(File.join(File.dirname(__FILE__), 'meta_inspector/request'))
|
5
4
|
require File.expand_path(File.join(File.dirname(__FILE__), 'meta_inspector/url'))
|
6
5
|
require File.expand_path(File.join(File.dirname(__FILE__), 'meta_inspector/parser'))
|
@@ -1,9 +1,7 @@
|
|
1
1
|
module MetaInspector
|
2
2
|
# A MetaInspector::Document knows about its URL and its contents
|
3
3
|
class Document
|
4
|
-
attr_reader :html_content_only, :allow_redirections, :
|
5
|
-
|
6
|
-
include MetaInspector::Exceptionable
|
4
|
+
attr_reader :html_content_only, :allow_redirections, :headers
|
7
5
|
|
8
6
|
# Initializes a new instance of MetaInspector::Document, setting the URL
|
9
7
|
# Options:
|
@@ -14,8 +12,6 @@ module MetaInspector
|
|
14
12
|
# content-type is not text/html. Defaults to false.
|
15
13
|
# * allow_redirections: when true, follow HTTP redirects. Defaults to true
|
16
14
|
# * document: the html of the url as a string
|
17
|
-
# * warn_level: what to do when encountering exceptions.
|
18
|
-
# Can be :warn, :raise or nil
|
19
15
|
# * headers: object containing custom headers for the request
|
20
16
|
# * normalize_url: true by default
|
21
17
|
# * faraday_options: an optional hash of options to pass to Faraday on the request
|
@@ -29,23 +25,18 @@ module MetaInspector
|
|
29
25
|
@document = options[:document]
|
30
26
|
@download_images = options[:download_images]
|
31
27
|
@headers = options[:headers]
|
32
|
-
@warn_level = options[:warn_level]
|
33
|
-
@exception_log = options[:exception_log] || MetaInspector::ExceptionLog.new(warn_level: warn_level)
|
34
28
|
@normalize_url = options[:normalize_url]
|
35
29
|
@faraday_options = options[:faraday_options]
|
36
30
|
@faraday_http_cache = options[:faraday_http_cache]
|
37
|
-
@url = MetaInspector::URL.new(initial_url,
|
38
|
-
normalize: @normalize_url)
|
31
|
+
@url = MetaInspector::URL.new(initial_url, normalize: @normalize_url)
|
39
32
|
@request = MetaInspector::Request.new(@url, allow_redirections: @allow_redirections,
|
40
33
|
connection_timeout: @connection_timeout,
|
41
34
|
read_timeout: @read_timeout,
|
42
35
|
retries: @retries,
|
43
|
-
exception_log: @exception_log,
|
44
36
|
headers: @headers,
|
45
37
|
faraday_options: @faraday_options,
|
46
38
|
faraday_http_cache: @faraday_http_cache) unless @document
|
47
|
-
@parser = MetaInspector::Parser.new(self,
|
48
|
-
download_images: @download_images)
|
39
|
+
@parser = MetaInspector::Parser.new(self, download_images: @download_images)
|
49
40
|
end
|
50
41
|
|
51
42
|
extend Forwardable
|
@@ -93,7 +84,6 @@ module MetaInspector
|
|
93
84
|
{ :timeout => 20,
|
94
85
|
:retries => 3,
|
95
86
|
:html_content_only => false,
|
96
|
-
:warn_level => :raise,
|
97
87
|
:headers => {
|
98
88
|
'User-Agent' => default_user_agent,
|
99
89
|
'Accept-Encoding' => 'identity'
|
@@ -109,12 +99,10 @@ module MetaInspector
|
|
109
99
|
|
110
100
|
def document
|
111
101
|
@document ||= if html_content_only && content_type != 'text/html'
|
112
|
-
fail "The url provided contains #{content_type} content instead of text/html content"
|
102
|
+
fail MetaInspector::ParserError.new "The url provided contains #{content_type} content instead of text/html content"
|
113
103
|
else
|
114
104
|
@request.read
|
115
105
|
end
|
116
|
-
rescue Exception => e
|
117
|
-
@exception_log << e
|
118
106
|
end
|
119
107
|
end
|
120
108
|
end
|
@@ -8,11 +8,8 @@ module MetaInspector
|
|
8
8
|
# passing itself as a reference for coordination purposes
|
9
9
|
#
|
10
10
|
class Parser
|
11
|
-
include MetaInspector::Exceptionable
|
12
|
-
|
13
11
|
def initialize(document, options = {})
|
14
12
|
@document = document
|
15
|
-
@exception_log = options[:exception_log]
|
16
13
|
@head_links_parser = MetaInspector::Parsers::HeadLinksParser.new(self)
|
17
14
|
@meta_tag_parser = MetaInspector::Parsers::MetaTagsParser.new(self)
|
18
15
|
@links_parser = MetaInspector::Parsers::LinksParser.new(self)
|
@@ -34,8 +31,6 @@ module MetaInspector
|
|
34
31
|
# Returns the whole parsed document
|
35
32
|
def parsed
|
36
33
|
@parsed ||= Nokogiri::HTML(@document.to_s)
|
37
|
-
rescue Exception => e
|
38
|
-
@exception_log << e
|
39
34
|
end
|
40
35
|
end
|
41
36
|
end
|
@@ -7,8 +7,6 @@ module MetaInspector
|
|
7
7
|
|
8
8
|
# Makes the request to the server
|
9
9
|
class Request
|
10
|
-
include MetaInspector::Exceptionable
|
11
|
-
|
12
10
|
def initialize(initial_url, options = {})
|
13
11
|
@url = initial_url
|
14
12
|
|
@@ -16,7 +14,6 @@ module MetaInspector
|
|
16
14
|
@connection_timeout = options[:connection_timeout]
|
17
15
|
@read_timeout = options[:read_timeout]
|
18
16
|
@retries = options[:retries]
|
19
|
-
@exception_log = options[:exception_log]
|
20
17
|
@headers = options[:headers]
|
21
18
|
@faraday_options = options[:faraday_options] || {}
|
22
19
|
@faraday_http_cache = options[:faraday_http_cache]
|
@@ -38,10 +35,10 @@ module MetaInspector
|
|
38
35
|
|
39
36
|
def response
|
40
37
|
@response ||= fetch
|
41
|
-
rescue Faraday::TimeoutError
|
42
|
-
|
43
|
-
|
44
|
-
|
38
|
+
rescue Faraday::TimeoutError => e
|
39
|
+
raise MetaInspector::TimeoutError.new(e)
|
40
|
+
rescue Faraday::Error::ConnectionFailed, Faraday::SSLError, URI::InvalidURIError => e
|
41
|
+
raise MetaInspector::RequestError.new(e)
|
45
42
|
end
|
46
43
|
|
47
44
|
private
|
data/lib/meta_inspector/url.rb
CHANGED
@@ -4,12 +4,9 @@ module MetaInspector
|
|
4
4
|
class URL
|
5
5
|
attr_reader :url
|
6
6
|
|
7
|
-
include MetaInspector::Exceptionable
|
8
|
-
|
9
7
|
def initialize(initial_url, options = {})
|
10
8
|
options = defaults.merge(options)
|
11
9
|
|
12
|
-
@exception_log = options[:exception_log]
|
13
10
|
@normalize = options[:normalize]
|
14
11
|
|
15
12
|
self.url = initial_url
|
@@ -61,11 +58,11 @@ module MetaInspector
|
|
61
58
|
# schema as the base_url
|
62
59
|
def self.absolutify(url, base_url)
|
63
60
|
if url =~ /^\w*\:/i
|
64
|
-
MetaInspector::URL.new(url
|
61
|
+
MetaInspector::URL.new(url).url
|
65
62
|
else
|
66
63
|
Addressable::URI.join(base_url, url).normalize.to_s
|
67
64
|
end
|
68
|
-
rescue
|
65
|
+
rescue MetaInspector::ParserError
|
69
66
|
nil
|
70
67
|
end
|
71
68
|
|
@@ -85,15 +82,13 @@ module MetaInspector
|
|
85
82
|
def normalized(url)
|
86
83
|
Addressable::URI.parse(url).normalize.to_s
|
87
84
|
rescue Addressable::URI::InvalidURIError => e
|
88
|
-
|
89
|
-
nil
|
85
|
+
raise MetaInspector::ParserError.new(e)
|
90
86
|
end
|
91
87
|
|
92
88
|
def parsed(url)
|
93
89
|
Addressable::URI.parse(url)
|
94
90
|
rescue Addressable::URI::InvalidURIError => e
|
95
|
-
|
96
|
-
nil
|
91
|
+
raise MetaInspector::ParserError.new(e)
|
97
92
|
end
|
98
93
|
end
|
99
94
|
end
|
data/meta_inspector.gemspec
CHANGED
@@ -21,6 +21,7 @@ Gem::Specification.new do |gem|
|
|
21
21
|
gem.add_dependency 'faraday-http-cache', '~> 1.2.2'
|
22
22
|
gem.add_dependency 'addressable', '~> 2.3.5'
|
23
23
|
gem.add_dependency 'fastimage'
|
24
|
+
gem.add_dependency 'nesty', '~> 1.0'
|
24
25
|
|
25
26
|
gem.add_development_dependency 'rspec', '~> 3.0'
|
26
27
|
gem.add_development_dependency 'fakeweb', '1.3.0'
|
data/spec/document_spec.rb
CHANGED
@@ -75,83 +75,34 @@ describe MetaInspector::Document do
|
|
75
75
|
end
|
76
76
|
|
77
77
|
describe 'exception handling' do
|
78
|
-
let(:logger) { MetaInspector::ExceptionLog.new }
|
79
|
-
|
80
78
|
it "should parse images when parse_html_content_type_only is not specified" do
|
81
|
-
expect
|
82
|
-
|
83
|
-
|
84
|
-
|
79
|
+
expect do
|
80
|
+
image_url = MetaInspector::Document.new('http://pagerankalert.com/image.png')
|
81
|
+
image_url.title
|
82
|
+
end.to_not raise_error
|
85
83
|
end
|
86
84
|
|
87
85
|
it "should parse images when parse_html_content_type_only is false" do
|
88
|
-
expect
|
89
|
-
|
90
|
-
|
91
|
-
|
86
|
+
expect do
|
87
|
+
image_url = MetaInspector::Document.new('http://pagerankalert.com/image.png', html_content_only: false)
|
88
|
+
image_url.title
|
89
|
+
end.to_not raise_error
|
92
90
|
end
|
93
91
|
|
94
92
|
it "should handle errors when content is image/jpeg and html_content_type_only is true" do
|
95
|
-
expect
|
93
|
+
expect do
|
94
|
+
image_url = MetaInspector::Document.new('http://pagerankalert.com/image.png', html_content_only: true)
|
96
95
|
|
97
|
-
|
98
|
-
|
99
|
-
image_url.title
|
96
|
+
image_url.title
|
97
|
+
end.to raise_error(MetaInspector::ParserError)
|
100
98
|
end
|
101
99
|
|
102
100
|
it "should handle errors when content is not text/html and html_content_type_only is true" do
|
103
|
-
expect
|
104
|
-
|
105
|
-
tar_url = MetaInspector::Document.new('http://pagerankalert.com/file.tar.gz', html_content_only: true, exception_log: logger)
|
106
|
-
|
107
|
-
tar_url.title
|
108
|
-
end
|
109
|
-
|
110
|
-
context 'when a warn_level of :store is passed in' do
|
111
|
-
before do
|
112
|
-
@bad_request = MetaInspector::Document.new('http://pagerankalert.com/image.png', html_content_only: true, warn_level: :store)
|
113
|
-
@bad_request.title
|
114
|
-
end
|
115
|
-
|
116
|
-
it 'stores the exceptions' do
|
117
|
-
expect(@bad_request.exceptions).not_to be_empty
|
118
|
-
end
|
119
|
-
|
120
|
-
it 'makes ok? to return false' do
|
121
|
-
expect(@bad_request).not_to be_ok
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
context 'when a warn_level of :warn is passed in' do
|
126
|
-
before do
|
127
|
-
$stderr = StringIO.new
|
128
|
-
end
|
129
|
-
|
130
|
-
after do
|
131
|
-
$stderr = STDERR
|
132
|
-
end
|
133
|
-
|
134
|
-
it 'warns on STDERR' do
|
135
|
-
bad_request = MetaInspector::Document.new('http://pagerankalert.com/image.png', html_content_only: true, warn_level: :warn)
|
136
|
-
bad_request.title
|
137
|
-
|
138
|
-
$stderr.rewind
|
139
|
-
expect($stderr.string.chomp).to eq("The url provided contains image/png content instead of text/html content")
|
140
|
-
end
|
141
|
-
|
142
|
-
it 'does not raise an exception' do
|
143
|
-
expect {
|
144
|
-
bad_request = MetaInspector::Document.new('http://pagerankalert.com/image.png', html_content_only: true, warn_level: :warn)
|
145
|
-
bad_request.title
|
146
|
-
}.to_not raise_exception
|
147
|
-
end
|
148
|
-
|
149
|
-
it 'does not store exceptions' do
|
150
|
-
bad_request = MetaInspector::Document.new('http://pagerankalert.com/image.png', html_content_only: true, warn_level: :warn)
|
151
|
-
bad_request.title
|
101
|
+
expect do
|
102
|
+
tar_url = MetaInspector::Document.new('http://pagerankalert.com/file.tar.gz', html_content_only: true)
|
152
103
|
|
153
|
-
|
154
|
-
end
|
104
|
+
tar_url.title
|
105
|
+
end.to raise_error(MetaInspector::ParserError)
|
155
106
|
end
|
156
107
|
end
|
157
108
|
|
@@ -11,7 +11,7 @@ describe MetaInspector do
|
|
11
11
|
def cache.read(k) self[k]; end
|
12
12
|
def cache.write(k, v) self[k] = v; end
|
13
13
|
|
14
|
-
|
14
|
+
MetaInspector.new('http://example.com', faraday_http_cache: { store: cache })
|
15
15
|
|
16
16
|
expect(cache.keys).not_to be_empty
|
17
17
|
end
|
@@ -2,8 +2,6 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe MetaInspector do
|
4
4
|
describe "redirections" do
|
5
|
-
let(:logger) { MetaInspector::ExceptionLog.new }
|
6
|
-
|
7
5
|
context "when redirections are turned off" do
|
8
6
|
it "disallows redirections" do
|
9
7
|
page = MetaInspector.new("http://facebook.com", :allow_redirections => false)
|
@@ -14,9 +12,7 @@ describe MetaInspector do
|
|
14
12
|
|
15
13
|
context "when redirections are on (default)" do
|
16
14
|
it "allows follows redirections" do
|
17
|
-
|
18
|
-
|
19
|
-
page = MetaInspector.new("http://facebook.com", exception_log: logger)
|
15
|
+
page = MetaInspector.new("http://facebook.com")
|
20
16
|
|
21
17
|
expect(page.url).to eq("https://www.facebook.com/")
|
22
18
|
end
|
@@ -37,9 +33,7 @@ describe MetaInspector do
|
|
37
33
|
stub_request(:get, "http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/?nclick_check=1")
|
38
34
|
.with(:headers => {"Cookie" => "EMETA_COOKIE_CHECK=1"})
|
39
35
|
|
40
|
-
|
41
|
-
|
42
|
-
page = MetaInspector.new("http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/", exception_log: logger)
|
36
|
+
page = MetaInspector.new("http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/")
|
43
37
|
|
44
38
|
expect(page.url).to eq("http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/?nclick_check=1")
|
45
39
|
end
|
data/spec/request_spec.rb
CHANGED
@@ -48,8 +48,6 @@ describe MetaInspector::Request do
|
|
48
48
|
end
|
49
49
|
|
50
50
|
describe 'exception handling' do
|
51
|
-
let(:logger) { MetaInspector::ExceptionLog.new }
|
52
|
-
|
53
51
|
before(:each) do
|
54
52
|
FakeWeb.allow_net_connect = true
|
55
53
|
end
|
@@ -60,9 +58,10 @@ describe MetaInspector::Request do
|
|
60
58
|
|
61
59
|
it "should handle socket errors" do
|
62
60
|
allow(TCPSocket).to receive(:open).and_raise(SocketError)
|
63
|
-
expect(logger).to receive(:<<).with(an_instance_of(Faraday::Error::ConnectionFailed))
|
64
61
|
|
65
|
-
|
62
|
+
expect do
|
63
|
+
MetaInspector::Request.new(url('http://example.com/fail'))
|
64
|
+
end.to raise_error(MetaInspector::RequestError)
|
66
65
|
end
|
67
66
|
end
|
68
67
|
|
data/spec/url_spec.rb
CHANGED
@@ -110,17 +110,16 @@ describe MetaInspector::URL do
|
|
110
110
|
end
|
111
111
|
|
112
112
|
describe "handling malformed URLs" do
|
113
|
-
|
114
|
-
|
115
|
-
|
113
|
+
it "detects empty URLs" do
|
114
|
+
expect do
|
115
|
+
MetaInspector::URL.new('')
|
116
|
+
end.to raise_error(MetaInspector::ParserError)
|
116
117
|
end
|
117
118
|
|
118
|
-
it "
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
it "handles incomplete URLs" do
|
123
|
-
MetaInspector::URL.new('', exception_log: @logger)
|
119
|
+
it "detects incomplete URLs" do
|
120
|
+
expect do
|
121
|
+
MetaInspector::URL.new('http:')
|
122
|
+
end.to raise_error(MetaInspector::ParserError)
|
124
123
|
end
|
125
124
|
end
|
126
125
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 5.0.0.rc1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaime Iniesta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-11-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: nesty
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '1.0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '1.0'
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: rspec
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -258,8 +272,7 @@ files:
|
|
258
272
|
- examples/spider.rb
|
259
273
|
- lib/meta_inspector.rb
|
260
274
|
- lib/meta_inspector/document.rb
|
261
|
-
- lib/meta_inspector/
|
262
|
-
- lib/meta_inspector/exceptionable.rb
|
275
|
+
- lib/meta_inspector/errors.rb
|
263
276
|
- lib/meta_inspector/parser.rb
|
264
277
|
- lib/meta_inspector/parsers/base.rb
|
265
278
|
- lib/meta_inspector/parsers/head_links.rb
|
@@ -273,7 +286,6 @@ files:
|
|
273
286
|
- lib/metainspector.rb
|
274
287
|
- meta_inspector.gemspec
|
275
288
|
- spec/document_spec.rb
|
276
|
-
- spec/exception_log_spec.rb
|
277
289
|
- spec/fixtures/100x100.jpg.response
|
278
290
|
- spec/fixtures/10x10.jpg.response
|
279
291
|
- spec/fixtures/404.response
|
@@ -348,9 +360,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
348
360
|
version: '0'
|
349
361
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
350
362
|
requirements:
|
351
|
-
- - "
|
363
|
+
- - ">"
|
352
364
|
- !ruby/object:Gem::Version
|
353
|
-
version:
|
365
|
+
version: 1.3.1
|
354
366
|
requirements: []
|
355
367
|
rubyforge_project:
|
356
368
|
rubygems_version: 2.4.8
|
@@ -1,31 +0,0 @@
|
|
1
|
-
module MetaInspector
|
2
|
-
|
3
|
-
# Stores the exceptions passed to it, warning about them if required
|
4
|
-
class ExceptionLog
|
5
|
-
attr_reader :exceptions, :warn_level
|
6
|
-
|
7
|
-
def initialize(options = {})
|
8
|
-
@warn_level = options[:warn_level] || :raise
|
9
|
-
@exceptions = []
|
10
|
-
end
|
11
|
-
|
12
|
-
def <<(exception)
|
13
|
-
case warn_level
|
14
|
-
when :raise
|
15
|
-
fail exception
|
16
|
-
when :warn
|
17
|
-
warn exception
|
18
|
-
when :store
|
19
|
-
@exceptions << exception
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
def ok?
|
24
|
-
if warn_level == :store
|
25
|
-
exceptions.empty?
|
26
|
-
else
|
27
|
-
warn 'ExceptionLog#ok? should only be used when warn_level is :store'
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
data/spec/exception_log_spec.rb
DELETED
@@ -1,79 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe MetaInspector::ExceptionLog do
|
4
|
-
|
5
|
-
describe "warn_level" do
|
6
|
-
it "should be :raise by default" do
|
7
|
-
expect(MetaInspector::ExceptionLog.new.warn_level).to eq(:raise)
|
8
|
-
end
|
9
|
-
|
10
|
-
it "should raise exceptions when warn_level is :raise" do
|
11
|
-
logger = MetaInspector::ExceptionLog.new(warn_level: :raise)
|
12
|
-
exception = StandardError.new("this should be raised")
|
13
|
-
|
14
|
-
expect {
|
15
|
-
logger << exception
|
16
|
-
}.to raise_exception(StandardError, "this should be raised")
|
17
|
-
end
|
18
|
-
|
19
|
-
it "should warn about the error if warn_level is :warn" do
|
20
|
-
logger = MetaInspector::ExceptionLog.new(warn_level: :warn)
|
21
|
-
exception = StandardError.new("an error message")
|
22
|
-
|
23
|
-
expect(logger).to receive(:warn).with(exception)
|
24
|
-
logger << exception
|
25
|
-
end
|
26
|
-
|
27
|
-
it "should store the error if warn_level is :store" do
|
28
|
-
logger = MetaInspector::ExceptionLog.new(warn_level: :store)
|
29
|
-
exception = StandardError.new("an error message")
|
30
|
-
expect {
|
31
|
-
logger << exception
|
32
|
-
}.to change { logger.exceptions.size }.by(1)
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
describe "storing exceptions" do
|
37
|
-
let(:logger) { MetaInspector::ExceptionLog.new(warn_level: :store) }
|
38
|
-
|
39
|
-
it "should store exceptions" do
|
40
|
-
expect {
|
41
|
-
logger << StandardError.new("an error message")
|
42
|
-
}.to change { logger.exceptions.length }.from(0).to(1)
|
43
|
-
end
|
44
|
-
|
45
|
-
it "should return stored exceptions" do
|
46
|
-
first = StandardError.new("first message")
|
47
|
-
second = StandardError.new("second message")
|
48
|
-
|
49
|
-
logger << first
|
50
|
-
logger << second
|
51
|
-
|
52
|
-
expect(logger.exceptions).to eq([first, second])
|
53
|
-
end
|
54
|
-
|
55
|
-
describe "ok?" do
|
56
|
-
it "should be true if no exceptions stored" do
|
57
|
-
expect(logger).to be_ok
|
58
|
-
end
|
59
|
-
|
60
|
-
it "should be false if some exception stored" do
|
61
|
-
logger << StandardError.new("some message")
|
62
|
-
expect(logger).not_to be_ok
|
63
|
-
end
|
64
|
-
|
65
|
-
it "should warn about misuse if warn_level is :raise" do
|
66
|
-
logger = MetaInspector::ExceptionLog.new(warn_level: :raise)
|
67
|
-
expect(logger).to receive(:warn).with("ExceptionLog#ok? should only be used when warn_level is :store")
|
68
|
-
logger.ok?
|
69
|
-
end
|
70
|
-
|
71
|
-
it "should warn about misuse if warn_level is :warn" do
|
72
|
-
logger = MetaInspector::ExceptionLog.new(warn_level: :warn)
|
73
|
-
expect(logger).to receive(:warn).with("ExceptionLog#ok? should only be used when warn_level is :store")
|
74
|
-
logger.ok?
|
75
|
-
end
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
end
|