metainspector 1.10.1 → 1.10.2
Sign up to get free protection for your applications and to get access to all the features.
@@ -41,7 +41,7 @@ module MetaInspector
|
|
41
41
|
|
42
42
|
# Links found on the page, as absolute URLs
|
43
43
|
def links
|
44
|
-
@data.links ||= parsed_links.map
|
44
|
+
@data.links ||= parsed_links.map{ |l| absolutify_url(unrelativize_url(l)) }.compact
|
45
45
|
end
|
46
46
|
|
47
47
|
# Internal links found on the page, as absolute URLs
|
@@ -190,6 +190,8 @@ module MetaInspector
|
|
190
190
|
else
|
191
191
|
URI.parse(@root_url).merge(encode_url(url)).to_s
|
192
192
|
end
|
193
|
+
rescue URI::InvalidURIError => e
|
194
|
+
add_fatal_error "Link parsing exception: #{e.message}" and nil
|
193
195
|
end
|
194
196
|
|
195
197
|
# Convert a protocol-relative url to its full form, depending on the scheme of the page that contains it
|
@@ -0,0 +1,21 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Server: nginx/0.7.67
|
3
|
+
Date: Fri, 18 Nov 2011 21:46:46 GMT
|
4
|
+
Content-Type: text/html
|
5
|
+
Connection: keep-alive
|
6
|
+
Last-Modified: Mon, 14 Nov 2011 16:53:18 GMT
|
7
|
+
Content-Length: 4987
|
8
|
+
X-Varnish: 2000423390
|
9
|
+
Age: 0
|
10
|
+
Via: 1.1 varnish
|
11
|
+
|
12
|
+
<html>
|
13
|
+
<head>
|
14
|
+
<title>Sample file non-http links</title>
|
15
|
+
</head>
|
16
|
+
<body>
|
17
|
+
<a href="<p>ftp://ftp.cdrom.com">an FTP link</a>
|
18
|
+
<a href="skype:joeuser?call">a skype link</a>
|
19
|
+
<a href="telnet://telnet.cdrom.com">a telnet link</a>
|
20
|
+
</body>
|
21
|
+
</html>
|
data/spec/metainspector_spec.rb
CHANGED
@@ -15,6 +15,7 @@ describe MetaInspector do
|
|
15
15
|
FakeWeb.register_uri(:get, "http://protocol-relative.com", :response => fixture_file("protocol_relative.response"))
|
16
16
|
FakeWeb.register_uri(:get, "https://protocol-relative.com", :response => fixture_file("protocol_relative.response"))
|
17
17
|
FakeWeb.register_uri(:get, "http://example.com/nonhttp", :response => fixture_file("nonhttp.response"))
|
18
|
+
FakeWeb.register_uri(:get, "http://example.com/invalid_href", :response => fixture_file("invalid_href.response"))
|
18
19
|
FakeWeb.register_uri(:get, "http://www.youtube.com/watch?v=iaGSSrp49uc", :response => fixture_file("youtube.response"))
|
19
20
|
FakeWeb.register_uri(:get, "http://markupvalidator.com/faqs", :response => fixture_file("markupvalidator_faqs.response"))
|
20
21
|
FakeWeb.register_uri(:get, "https://twitter.com/markupvalidator", :response => fixture_file("twitter_markupvalidator.response"))
|
@@ -212,6 +213,22 @@ describe MetaInspector do
|
|
212
213
|
"http://example.com/search?q=espa%C3%B1a#top"]
|
213
214
|
end
|
214
215
|
end
|
216
|
+
|
217
|
+
it "should avoid links that contain invalid links as href value" do
|
218
|
+
m = MetaInspector.new('http://example.com/invalid_href')
|
219
|
+
m.links.should == [ "skype:joeuser?call",
|
220
|
+
"telnet://telnet.cdrom.com"]
|
221
|
+
end
|
222
|
+
|
223
|
+
it "should store errors when links contain invalid href values" do
|
224
|
+
m = MetaInspector.new('http://example.com/invalid_href')
|
225
|
+
|
226
|
+
expect {
|
227
|
+
links = m.links
|
228
|
+
}.to change { m.errors.size }.from(0).to(1)
|
229
|
+
|
230
|
+
m.errors.first.should == "Link parsing exception: bad URI(is not URI?): %3Cp%3Eftp://ftp.cdrom.com"
|
231
|
+
end
|
215
232
|
end
|
216
233
|
|
217
234
|
describe 'Non-HTTP links' do
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 59
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 10
|
9
|
-
-
|
10
|
-
version: 1.10.
|
9
|
+
- 2
|
10
|
+
version: 1.10.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jaime Iniesta
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-11-
|
18
|
+
date: 2012-11-17 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
version_requirements: &id001 !ruby/object:Gem::Requirement
|
@@ -145,6 +145,7 @@ files:
|
|
145
145
|
- spec/fixtures/empty_page.response
|
146
146
|
- spec/fixtures/guardian.co.uk.response
|
147
147
|
- spec/fixtures/international.response
|
148
|
+
- spec/fixtures/invalid_href.response
|
148
149
|
- spec/fixtures/iteh.at.response
|
149
150
|
- spec/fixtures/markupvalidator_faqs.response
|
150
151
|
- spec/fixtures/nonhttp.response
|