metainspector 1.13.0 → 1.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -53,12 +53,12 @@ module MetaInspector
53
53
 
54
54
  # Internal links found on the page, as absolute URLs
55
55
  def internal_links
56
- @internal_links ||= links.select {|link| URI.parse(link).host == host }
56
+ @internal_links ||= links.select {|link| host_from_url(link) == host }
57
57
  end
58
58
 
59
59
  # External links found on the page, as absolute URLs
60
60
  def external_links
61
- @external_links ||= links.select {|link| URI.parse(link).host != host }
61
+ @external_links ||= links.select {|link| host_from_url(link) != host }
62
62
  end
63
63
 
64
64
  # Images found on the page, as absolute URLs
@@ -245,6 +245,13 @@ module MetaInspector
245
245
  url =~ /^\/\// ? "#{scheme}://#{url[2..-1]}" : url
246
246
  end
247
247
 
248
+ # Extracts the host from a given URL
249
+ def host_from_url(url)
250
+ URI.parse(url).host
251
+ rescue URI::InvalidURIError, URI::InvalidComponentError => e
252
+ add_fatal_error "Link parsing exception: #{e.message}" and nil
253
+ end
254
+
248
255
  # Look for the first <p> block with 120 characters or more
249
256
  def secondary_description
250
257
  first_long_paragraph = parsed_document.search('//p[string-length() >= 120]').first
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module MetaInspector
4
- VERSION = "1.13.0"
4
+ VERSION = "1.13.1"
5
5
  end
@@ -0,0 +1,27 @@
1
+ HTTP/1.1 200 OK
2
+ Server: nginx/0.7.67
3
+ Date: Fri, 18 Nov 2011 21:46:46 GMT
4
+ Content-Type: text/html
5
+ Connection: keep-alive
6
+ Last-Modified: Mon, 14 Nov 2011 16:53:18 GMT
7
+ Content-Length: 4987
8
+ X-Varnish: 2000423390
9
+ Age: 0
10
+ Via: 1.1 varnish
11
+
12
+ <html>
13
+ <head>
14
+ <title>Malformed hrefs</title>
15
+ </head>
16
+ <body>
17
+ <h1>Good links</h1>
18
+ <a href="/faqs">FAQs</a>
19
+ <a href="skype:joeuser?call">a skype link</a>
20
+ <a href="telnet://telnet.cdrom.com">a telnet link</a>
21
+ <a href="javascript:alert('ok');">ok</a>
22
+
23
+ <h1>Bad links due to malformed href</h1>
24
+ <a href="javascript://">oops</a>
25
+ <a href="mailto:email(at)example.com">
26
+ </body>
27
+ </html>
@@ -177,22 +177,44 @@ describe MetaInspector do
177
177
  "http://example.com/search?q=espa%C3%B1a#top"]
178
178
  end
179
179
 
180
- it "should get correct internal links, encoding the URLs as needed but respecting # and ?" do
181
- m = MetaInspector.new('http://international.com')
182
- m.internal_links.should == [ "http://international.com/espa%C3%B1a.asp",
183
- "http://international.com/roman%C3%A9e",
184
- "http://international.com/faqs#cami%C3%B3n",
185
- "http://international.com/search?q=cami%C3%B3n",
186
- "http://international.com/search?q=espa%C3%B1a#top"]
180
+ describe "internal links" do
181
+ it "should get correct internal links, encoding the URLs as needed but respecting # and ?" do
182
+ m = MetaInspector.new('http://international.com')
183
+ m.internal_links.should == [ "http://international.com/espa%C3%B1a.asp",
184
+ "http://international.com/roman%C3%A9e",
185
+ "http://international.com/faqs#cami%C3%B3n",
186
+ "http://international.com/search?q=cami%C3%B3n",
187
+ "http://international.com/search?q=espa%C3%B1a#top"]
188
+ end
189
+
190
+ it "should not crash when processing malformed hrefs" do
191
+ m = MetaInspector.new('http://example.com/malformed_href')
192
+ expect {
193
+ m.internal_links.should == [ "http://example.com/faqs" ]
194
+ m.should_not be_ok
195
+ }.to_not raise_error
196
+ end
187
197
  end
188
198
 
189
- it "should get correct external links, encoding the URLs as needed but respecting # and ?" do
190
- m = MetaInspector.new('http://international.com')
191
- m.external_links.should == [ "http://example.com/espa%C3%B1a.asp",
192
- "http://example.com/roman%C3%A9e",
193
- "http://example.com/faqs#cami%C3%B3n",
194
- "http://example.com/search?q=cami%C3%B3n",
195
- "http://example.com/search?q=espa%C3%B1a#top"]
199
+ describe "external links" do
200
+ it "should get correct external links, encoding the URLs as needed but respecting # and ?" do
201
+ m = MetaInspector.new('http://international.com')
202
+ m.external_links.should == [ "http://example.com/espa%C3%B1a.asp",
203
+ "http://example.com/roman%C3%A9e",
204
+ "http://example.com/faqs#cami%C3%B3n",
205
+ "http://example.com/search?q=cami%C3%B3n",
206
+ "http://example.com/search?q=espa%C3%B1a#top"]
207
+ end
208
+
209
+ it "should not crash when processing malformed hrefs" do
210
+ m = MetaInspector.new('http://example.com/malformed_href')
211
+ expect {
212
+ m.external_links.should == ["skype:joeuser?call", "telnet://telnet.cdrom.com",
213
+ "javascript:alert('ok');", "javascript://",
214
+ "mailto:email(at)example.com"]
215
+ m.should_not be_ok
216
+ }.to_not raise_error
217
+ end
196
218
  end
197
219
  end
198
220
 
data/spec/spec_helper.rb CHANGED
@@ -29,6 +29,7 @@ FakeWeb.register_uri(:get, "http://protocol-relative.com", :response => fixture_
29
29
  FakeWeb.register_uri(:get, "https://protocol-relative.com", :response => fixture_file("protocol_relative.response"))
30
30
  FakeWeb.register_uri(:get, "http://example.com/nonhttp", :response => fixture_file("nonhttp.response"))
31
31
  FakeWeb.register_uri(:get, "http://example.com/invalid_href", :response => fixture_file("invalid_href.response"))
32
+ FakeWeb.register_uri(:get, "http://example.com/malformed_href", :response => fixture_file("malformed_href.response"))
32
33
  FakeWeb.register_uri(:get, "http://www.youtube.com/watch?v=iaGSSrp49uc", :response => fixture_file("youtube.response"))
33
34
  FakeWeb.register_uri(:get, "http://markupvalidator.com/faqs", :response => fixture_file("markupvalidator_faqs.response"))
34
35
  FakeWeb.register_uri(:get, "https://twitter.com/markupvalidator", :response => fixture_file("twitter_markupvalidator.response"))
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
3
  version: !ruby/object:Gem::Version
4
- hash: 35
4
+ hash: 33
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 13
9
- - 0
10
- version: 1.13.0
9
+ - 1
10
+ version: 1.13.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jaime Iniesta
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-12-03 00:00:00 Z
18
+ date: 2012-12-13 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: nokogiri
@@ -164,6 +164,7 @@ files:
164
164
  - spec/fixtures/international.response
165
165
  - spec/fixtures/invalid_href.response
166
166
  - spec/fixtures/iteh.at.response
167
+ - spec/fixtures/malformed_href.response
167
168
  - spec/fixtures/markupvalidator_faqs.response
168
169
  - spec/fixtures/nonhttp.response
169
170
  - spec/fixtures/pagerankalert.com.response