metainspector 1.10.1 → 1.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -41,7 +41,7 @@ module MetaInspector
|
|
41
41
|
|
42
42
|
# Links found on the page, as absolute URLs
|
43
43
|
def links
|
44
|
-
@data.links ||= parsed_links.map
|
44
|
+
@data.links ||= parsed_links.map{ |l| absolutify_url(unrelativize_url(l)) }.compact
|
45
45
|
end
|
46
46
|
|
47
47
|
# Internal links found on the page, as absolute URLs
|
@@ -190,6 +190,8 @@ module MetaInspector
|
|
190
190
|
else
|
191
191
|
URI.parse(@root_url).merge(encode_url(url)).to_s
|
192
192
|
end
|
193
|
+
rescue URI::InvalidURIError => e
|
194
|
+
add_fatal_error "Link parsing exception: #{e.message}" and nil
|
193
195
|
end
|
194
196
|
|
195
197
|
# Convert a protocol-relative url to its full form, depending on the scheme of the page that contains it
|
@@ -0,0 +1,21 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Server: nginx/0.7.67
|
3
|
+
Date: Fri, 18 Nov 2011 21:46:46 GMT
|
4
|
+
Content-Type: text/html
|
5
|
+
Connection: keep-alive
|
6
|
+
Last-Modified: Mon, 14 Nov 2011 16:53:18 GMT
|
7
|
+
Content-Length: 4987
|
8
|
+
X-Varnish: 2000423390
|
9
|
+
Age: 0
|
10
|
+
Via: 1.1 varnish
|
11
|
+
|
12
|
+
<html>
|
13
|
+
<head>
|
14
|
+
<title>Sample file non-http links</title>
|
15
|
+
</head>
|
16
|
+
<body>
|
17
|
+
<a href="<p>ftp://ftp.cdrom.com">an FTP link</a>
|
18
|
+
<a href="skype:joeuser?call">a skype link</a>
|
19
|
+
<a href="telnet://telnet.cdrom.com">a telnet link</a>
|
20
|
+
</body>
|
21
|
+
</html>
|
data/spec/metainspector_spec.rb
CHANGED
@@ -15,6 +15,7 @@ describe MetaInspector do
|
|
15
15
|
FakeWeb.register_uri(:get, "http://protocol-relative.com", :response => fixture_file("protocol_relative.response"))
|
16
16
|
FakeWeb.register_uri(:get, "https://protocol-relative.com", :response => fixture_file("protocol_relative.response"))
|
17
17
|
FakeWeb.register_uri(:get, "http://example.com/nonhttp", :response => fixture_file("nonhttp.response"))
|
18
|
+
FakeWeb.register_uri(:get, "http://example.com/invalid_href", :response => fixture_file("invalid_href.response"))
|
18
19
|
FakeWeb.register_uri(:get, "http://www.youtube.com/watch?v=iaGSSrp49uc", :response => fixture_file("youtube.response"))
|
19
20
|
FakeWeb.register_uri(:get, "http://markupvalidator.com/faqs", :response => fixture_file("markupvalidator_faqs.response"))
|
20
21
|
FakeWeb.register_uri(:get, "https://twitter.com/markupvalidator", :response => fixture_file("twitter_markupvalidator.response"))
|
@@ -212,6 +213,22 @@ describe MetaInspector do
|
|
212
213
|
"http://example.com/search?q=espa%C3%B1a#top"]
|
213
214
|
end
|
214
215
|
end
|
216
|
+
|
217
|
+
it "should avoid links that contain invalid links as href value" do
|
218
|
+
m = MetaInspector.new('http://example.com/invalid_href')
|
219
|
+
m.links.should == [ "skype:joeuser?call",
|
220
|
+
"telnet://telnet.cdrom.com"]
|
221
|
+
end
|
222
|
+
|
223
|
+
it "should store errors when links contain invalid href values" do
|
224
|
+
m = MetaInspector.new('http://example.com/invalid_href')
|
225
|
+
|
226
|
+
expect {
|
227
|
+
links = m.links
|
228
|
+
}.to change { m.errors.size }.from(0).to(1)
|
229
|
+
|
230
|
+
m.errors.first.should == "Link parsing exception: bad URI(is not URI?): %3Cp%3Eftp://ftp.cdrom.com"
|
231
|
+
end
|
215
232
|
end
|
216
233
|
|
217
234
|
describe 'Non-HTTP links' do
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 59
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 10
|
9
|
-
-
|
10
|
-
version: 1.10.
|
9
|
+
- 2
|
10
|
+
version: 1.10.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jaime Iniesta
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-11-
|
18
|
+
date: 2012-11-17 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
version_requirements: &id001 !ruby/object:Gem::Requirement
|
@@ -145,6 +145,7 @@ files:
|
|
145
145
|
- spec/fixtures/empty_page.response
|
146
146
|
- spec/fixtures/guardian.co.uk.response
|
147
147
|
- spec/fixtures/international.response
|
148
|
+
- spec/fixtures/invalid_href.response
|
148
149
|
- spec/fixtures/iteh.at.response
|
149
150
|
- spec/fixtures/markupvalidator_faqs.response
|
150
151
|
- spec/fixtures/nonhttp.response
|