metainspector 1.10.1 → 1.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -41,7 +41,7 @@ module MetaInspector
41
41
 
42
42
  # Links found on the page, as absolute URLs
43
43
  def links
44
- @data.links ||= parsed_links.map { |l| absolutify_url(unrelativize_url(l)) }
44
+ @data.links ||= parsed_links.map{ |l| absolutify_url(unrelativize_url(l)) }.compact
45
45
  end
46
46
 
47
47
  # Internal links found on the page, as absolute URLs
@@ -190,6 +190,8 @@ module MetaInspector
190
190
  else
191
191
  URI.parse(@root_url).merge(encode_url(url)).to_s
192
192
  end
193
+ rescue URI::InvalidURIError => e
194
+ add_fatal_error "Link parsing exception: #{e.message}" and nil
193
195
  end
194
196
 
195
197
  # Convert a protocol-relative url to its full form, depending on the scheme of the page that contains it
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module MetaInspector
4
- VERSION = "1.10.1"
4
+ VERSION = "1.10.2"
5
5
  end
@@ -0,0 +1,21 @@
1
+ HTTP/1.1 200 OK
2
+ Server: nginx/0.7.67
3
+ Date: Fri, 18 Nov 2011 21:46:46 GMT
4
+ Content-Type: text/html
5
+ Connection: keep-alive
6
+ Last-Modified: Mon, 14 Nov 2011 16:53:18 GMT
7
+ Content-Length: 4987
8
+ X-Varnish: 2000423390
9
+ Age: 0
10
+ Via: 1.1 varnish
11
+
12
+ <html>
13
+ <head>
14
+ <title>Sample file non-http links</title>
15
+ </head>
16
+ <body>
17
+ <a href="<p>ftp://ftp.cdrom.com">an FTP link</a>
18
+ <a href="skype:joeuser?call">a skype link</a>
19
+ <a href="telnet://telnet.cdrom.com">a telnet link</a>
20
+ </body>
21
+ </html>
@@ -15,6 +15,7 @@ describe MetaInspector do
15
15
  FakeWeb.register_uri(:get, "http://protocol-relative.com", :response => fixture_file("protocol_relative.response"))
16
16
  FakeWeb.register_uri(:get, "https://protocol-relative.com", :response => fixture_file("protocol_relative.response"))
17
17
  FakeWeb.register_uri(:get, "http://example.com/nonhttp", :response => fixture_file("nonhttp.response"))
18
+ FakeWeb.register_uri(:get, "http://example.com/invalid_href", :response => fixture_file("invalid_href.response"))
18
19
  FakeWeb.register_uri(:get, "http://www.youtube.com/watch?v=iaGSSrp49uc", :response => fixture_file("youtube.response"))
19
20
  FakeWeb.register_uri(:get, "http://markupvalidator.com/faqs", :response => fixture_file("markupvalidator_faqs.response"))
20
21
  FakeWeb.register_uri(:get, "https://twitter.com/markupvalidator", :response => fixture_file("twitter_markupvalidator.response"))
@@ -212,6 +213,22 @@ describe MetaInspector do
212
213
  "http://example.com/search?q=espa%C3%B1a#top"]
213
214
  end
214
215
  end
216
+
217
+ it "should avoid links that contain invalid links as href value" do
218
+ m = MetaInspector.new('http://example.com/invalid_href')
219
+ m.links.should == [ "skype:joeuser?call",
220
+ "telnet://telnet.cdrom.com"]
221
+ end
222
+
223
+ it "should store errors when links contain invalid href values" do
224
+ m = MetaInspector.new('http://example.com/invalid_href')
225
+
226
+ expect {
227
+ links = m.links
228
+ }.to change { m.errors.size }.from(0).to(1)
229
+
230
+ m.errors.first.should == "Link parsing exception: bad URI(is not URI?): %3Cp%3Eftp://ftp.cdrom.com"
231
+ end
215
232
  end
216
233
 
217
234
  describe 'Non-HTTP links' do
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
3
  version: !ruby/object:Gem::Version
4
- hash: 61
4
+ hash: 59
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 10
9
- - 1
10
- version: 1.10.1
9
+ - 2
10
+ version: 1.10.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jaime Iniesta
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-11-16 00:00:00 Z
18
+ date: 2012-11-17 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  version_requirements: &id001 !ruby/object:Gem::Requirement
@@ -145,6 +145,7 @@ files:
145
145
  - spec/fixtures/empty_page.response
146
146
  - spec/fixtures/guardian.co.uk.response
147
147
  - spec/fixtures/international.response
148
+ - spec/fixtures/invalid_href.response
148
149
  - spec/fixtures/iteh.at.response
149
150
  - spec/fixtures/markupvalidator_faqs.response
150
151
  - spec/fixtures/nonhttp.response