metainspector 1.13.0 → 1.13.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -53,12 +53,12 @@ module MetaInspector
53
53
 
54
54
  # Internal links found on the page, as absolute URLs
55
55
  def internal_links
56
- @internal_links ||= links.select {|link| URI.parse(link).host == host }
56
+ @internal_links ||= links.select {|link| host_from_url(link) == host }
57
57
  end
58
58
 
59
59
  # External links found on the page, as absolute URLs
60
60
  def external_links
61
- @external_links ||= links.select {|link| URI.parse(link).host != host }
61
+ @external_links ||= links.select {|link| host_from_url(link) != host }
62
62
  end
63
63
 
64
64
  # Images found on the page, as absolute URLs
@@ -245,6 +245,13 @@ module MetaInspector
245
245
  url =~ /^\/\// ? "#{scheme}://#{url[2..-1]}" : url
246
246
  end
247
247
 
248
+ # Extracts the host from a given URL
249
+ def host_from_url(url)
250
+ URI.parse(url).host
251
+ rescue URI::InvalidURIError, URI::InvalidComponentError => e
252
+ add_fatal_error "Link parsing exception: #{e.message}" and nil
253
+ end
254
+
248
255
  # Look for the first <p> block with 120 characters or more
249
256
  def secondary_description
250
257
  first_long_paragraph = parsed_document.search('//p[string-length() >= 120]').first
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module MetaInspector
4
- VERSION = "1.13.0"
4
+ VERSION = "1.13.1"
5
5
  end
@@ -0,0 +1,27 @@
1
+ HTTP/1.1 200 OK
2
+ Server: nginx/0.7.67
3
+ Date: Fri, 18 Nov 2011 21:46:46 GMT
4
+ Content-Type: text/html
5
+ Connection: keep-alive
6
+ Last-Modified: Mon, 14 Nov 2011 16:53:18 GMT
7
+ Content-Length: 4987
8
+ X-Varnish: 2000423390
9
+ Age: 0
10
+ Via: 1.1 varnish
11
+
12
+ <html>
13
+ <head>
14
+ <title>Malformed hrefs</title>
15
+ </head>
16
+ <body>
17
+ <h1>Good links</h1>
18
+ <a href="/faqs">FAQs</a>
19
+ <a href="skype:joeuser?call">a skype link</a>
20
+ <a href="telnet://telnet.cdrom.com">a telnet link</a>
21
+ <a href="javascript:alert('ok');">ok</a>
22
+
23
+ <h1>Bad links due to malformed href</h1>
24
+ <a href="javascript://">oops</a>
25
+ <a href="mailto:email(at)example.com">
26
+ </body>
27
+ </html>
@@ -177,22 +177,44 @@ describe MetaInspector do
177
177
  "http://example.com/search?q=espa%C3%B1a#top"]
178
178
  end
179
179
 
180
- it "should get correct internal links, encoding the URLs as needed but respecting # and ?" do
181
- m = MetaInspector.new('http://international.com')
182
- m.internal_links.should == [ "http://international.com/espa%C3%B1a.asp",
183
- "http://international.com/roman%C3%A9e",
184
- "http://international.com/faqs#cami%C3%B3n",
185
- "http://international.com/search?q=cami%C3%B3n",
186
- "http://international.com/search?q=espa%C3%B1a#top"]
180
+ describe "internal links" do
181
+ it "should get correct internal links, encoding the URLs as needed but respecting # and ?" do
182
+ m = MetaInspector.new('http://international.com')
183
+ m.internal_links.should == [ "http://international.com/espa%C3%B1a.asp",
184
+ "http://international.com/roman%C3%A9e",
185
+ "http://international.com/faqs#cami%C3%B3n",
186
+ "http://international.com/search?q=cami%C3%B3n",
187
+ "http://international.com/search?q=espa%C3%B1a#top"]
188
+ end
189
+
190
+ it "should not crash when processing malformed hrefs" do
191
+ m = MetaInspector.new('http://example.com/malformed_href')
192
+ expect {
193
+ m.internal_links.should == [ "http://example.com/faqs" ]
194
+ m.should_not be_ok
195
+ }.to_not raise_error
196
+ end
187
197
  end
188
198
 
189
- it "should get correct external links, encoding the URLs as needed but respecting # and ?" do
190
- m = MetaInspector.new('http://international.com')
191
- m.external_links.should == [ "http://example.com/espa%C3%B1a.asp",
192
- "http://example.com/roman%C3%A9e",
193
- "http://example.com/faqs#cami%C3%B3n",
194
- "http://example.com/search?q=cami%C3%B3n",
195
- "http://example.com/search?q=espa%C3%B1a#top"]
199
+ describe "external links" do
200
+ it "should get correct external links, encoding the URLs as needed but respecting # and ?" do
201
+ m = MetaInspector.new('http://international.com')
202
+ m.external_links.should == [ "http://example.com/espa%C3%B1a.asp",
203
+ "http://example.com/roman%C3%A9e",
204
+ "http://example.com/faqs#cami%C3%B3n",
205
+ "http://example.com/search?q=cami%C3%B3n",
206
+ "http://example.com/search?q=espa%C3%B1a#top"]
207
+ end
208
+
209
+ it "should not crash when processing malformed hrefs" do
210
+ m = MetaInspector.new('http://example.com/malformed_href')
211
+ expect {
212
+ m.external_links.should == ["skype:joeuser?call", "telnet://telnet.cdrom.com",
213
+ "javascript:alert('ok');", "javascript://",
214
+ "mailto:email(at)example.com"]
215
+ m.should_not be_ok
216
+ }.to_not raise_error
217
+ end
196
218
  end
197
219
  end
198
220
 
data/spec/spec_helper.rb CHANGED
@@ -29,6 +29,7 @@ FakeWeb.register_uri(:get, "http://protocol-relative.com", :response => fixture_
29
29
  FakeWeb.register_uri(:get, "https://protocol-relative.com", :response => fixture_file("protocol_relative.response"))
30
30
  FakeWeb.register_uri(:get, "http://example.com/nonhttp", :response => fixture_file("nonhttp.response"))
31
31
  FakeWeb.register_uri(:get, "http://example.com/invalid_href", :response => fixture_file("invalid_href.response"))
32
+ FakeWeb.register_uri(:get, "http://example.com/malformed_href", :response => fixture_file("malformed_href.response"))
32
33
  FakeWeb.register_uri(:get, "http://www.youtube.com/watch?v=iaGSSrp49uc", :response => fixture_file("youtube.response"))
33
34
  FakeWeb.register_uri(:get, "http://markupvalidator.com/faqs", :response => fixture_file("markupvalidator_faqs.response"))
34
35
  FakeWeb.register_uri(:get, "https://twitter.com/markupvalidator", :response => fixture_file("twitter_markupvalidator.response"))
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
3
  version: !ruby/object:Gem::Version
4
- hash: 35
4
+ hash: 33
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 13
9
- - 0
10
- version: 1.13.0
9
+ - 1
10
+ version: 1.13.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jaime Iniesta
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-12-03 00:00:00 Z
18
+ date: 2012-12-13 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: nokogiri
@@ -164,6 +164,7 @@ files:
164
164
  - spec/fixtures/international.response
165
165
  - spec/fixtures/invalid_href.response
166
166
  - spec/fixtures/iteh.at.response
167
+ - spec/fixtures/malformed_href.response
167
168
  - spec/fixtures/markupvalidator_faqs.response
168
169
  - spec/fixtures/nonhttp.response
169
170
  - spec/fixtures/pagerankalert.com.response