site_validator 1.3.9 → 1.3.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6f20f9890302ebb31b971de1d281aca3ba3f38fe
4
- data.tar.gz: 3ebb6a272c290fa4da99bd4d145723241e6d04f0
3
+ metadata.gz: 2e82c301e6d3b77cb88a25f7df84bfe0b6f3c19d
4
+ data.tar.gz: 87c4dbe4deb6ac9efedd88a34f6fb64439dad5fd
5
5
  SHA512:
6
- metadata.gz: d4e66dc049fd60da2f2edd47a8ff208f1570d1cff353aa39c35c87bdc8ed118166c5f3b438c9445836afb9fb9a3fe1fa1a92350e15dc762d0d35f788df824e1d
7
- data.tar.gz: 2efdcf3b0ce55e62d6e4ceae0fa85ab5455e2463841db439ee3429002679012e83cca5c6d63d3823928b0994b7f9bafadc16d51489f6f3e096c81d09079ca8cc
6
+ metadata.gz: 5dd31f40cd774140c9ed3799019ccd226208920b6f8df9c0bed66739c51777653fb139178efd14783e25f1bb6a590678016083eefa47c3d74ade6938267c3d4a
7
+ data.tar.gz: c41869c1796ea3d194098202fd360c9962090b78f6e31b3f4a3cb30d31ddf78ed6d61bad2f6980b5a5d3ab20e1305bddcc0e3b1024d9511e37a74d0777aa15e5
@@ -52,7 +52,8 @@ module SiteValidator
52
52
  # to absolute links, remove anchors from links, include the sitemap url, and exclude links that don't
53
53
  # seem to point to HTML (like images, multimedia, text, javascript...)
54
54
  def pages_in_sitemap
55
- pages = xml_locations.select {|loc| looks_like_html?(loc.text)}.map {|loc| SiteValidator::Page.new(loc.text)}
55
+ pages = xml_locations.select {|loc| looks_like_html?(loc.text.strip)}.map {|loc| SiteValidator::Page.new(loc.text.strip)}
56
+
56
57
  if pages.empty?
57
58
  m = MetaInspector.new(url, :timeout => timeout, :allow_redirections => :all)
58
59
  links = [m.url]
@@ -78,6 +79,8 @@ module SiteValidator
78
79
  extension = u.path.split(".").last if u.path
79
80
 
80
81
  (scheme =~ /http[s]?/i) && (extension !~ /gif|jpg|jpeg|png|tiff|bmp|txt|pdf|mobi|epub|doc|rtf|xml|xls|csv|wav|mp3|ogg|zip|rar|tar|gz/i)
82
+ rescue URI::InvalidURIError
83
+ false
81
84
  end
82
85
 
83
86
  def xml_locations
@@ -88,4 +91,4 @@ module SiteValidator
88
91
  @doc ||= open(url, :allow_redirections => :all)
89
92
  end
90
93
  end
91
- end
94
+ end
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module SiteValidator
4
- VERSION = "1.3.9"
4
+ VERSION = "1.3.10"
5
5
  end
@@ -0,0 +1,24 @@
1
+ HTTP/1.1 200 OK
2
+ Server: nginx/0.7.67
3
+ Date: Fri, 18 Nov 2011 21:46:46 GMT
4
+ Content-Type: text/html
5
+ Connection: keep-alive
6
+ Last-Modified: Mon, 14 Nov 2011 16:53:18 GMT
7
+ Content-Length: 4987
8
+ X-Varnish: 2000423390
9
+ Age: 0
10
+ Via: 1.1 varnish
11
+
12
+ <html>
13
+ <head>
14
+ <title>Sample file with links that have spaces</title>
15
+ </head>
16
+ <body>
17
+ <a href=" http://example.com">index</a>
18
+ <a href="http://example.com/faqs ">FAQs</a>
19
+ <a href="
20
+ http://example.com/contact
21
+
22
+ ">contact</a>
23
+ </body>
24
+ </html>
@@ -0,0 +1,23 @@
1
+ HTTP/1.1 200 OK
2
+ Date: Thu, 29 Dec 2011 23:08:54 GMT
3
+ Server: Apache/2.2.21 (Unix) mod_ssl/2.2.3 OpenSSL/0.9.8e-fips-rhel5
4
+ Transfer-Encoding: chunked
5
+ Content-Type: text/xml
6
+
7
+ <?xml version="1.0" encoding="UTF-8"?>
8
+ <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
9
+ <url>
10
+ <loc> http://www.ryanair.com/es/ </loc>
11
+ </url>
12
+ <url>
13
+ <loc>
14
+ http://www.ryanair.com/es/careers/job
15
+ </loc>
16
+ </url>
17
+ <url>
18
+ <loc>mailto:email@example.com</loc>
19
+ </url>
20
+ <url>
21
+ <loc> </loc>
22
+ </url>
23
+ </urlset>
@@ -5,6 +5,8 @@ require_relative 'spec_helper'
5
5
  describe SiteValidator::Sitemap do
6
6
  before(:each) do
7
7
  @sitemap = SiteValidator::Sitemap.new('http://ryanair.com/sitemap.xml')
8
+ @sitemap_dirty = SiteValidator::Sitemap.new('http://example.com/sitemap_dirty.xml')
9
+ @sitemap_html_dirty = SiteValidator::Sitemap.new('http://example.com/dirty')
8
10
  @sitemap_html = SiteValidator::Sitemap.new('http://guides.rubyonrails.org')
9
11
  @sitemap_no_links = SiteValidator::Sitemap.new('http://zigotica.com')
10
12
  @sitemap_with_trailing_slash = SiteValidator::Sitemap.new('http://eparreno.com')
@@ -32,6 +34,26 @@ describe SiteValidator::Sitemap do
32
34
  @sitemap.pages[2].url.should == 'http://www.ryanair.com/es/about'
33
35
  end
34
36
 
37
+ it "should not crash when encountering invalid locs on an xml sitemap" do
38
+ expect {
39
+ @sitemap_dirty.pages.length.should == 2
40
+ }.to_not raise_error
41
+
42
+ @sitemap_dirty.pages[0].url.should == 'http://www.ryanair.com/es/'
43
+ @sitemap_dirty.pages[1].url.should == 'http://www.ryanair.com/es/careers/job'
44
+ end
45
+
46
+ it "should not crash when encountering invalid hrefs on an html page" do
47
+ expect {
48
+ @sitemap_html_dirty.pages.length.should == 4
49
+ }.to_not raise_error
50
+
51
+ @sitemap_html_dirty.pages[0].url.should == 'http://example.com/dirty'
52
+ @sitemap_html_dirty.pages[1].url.should == 'http://example.com/'
53
+ @sitemap_html_dirty.pages[2].url.should == 'http://example.com/faqs'
54
+ @sitemap_html_dirty.pages[3].url.should == 'http://example.com/contact'
55
+ end
56
+
35
57
  it "should get pages from the sample guides.rubyonrails.org site" do
36
58
  @sitemap_html.pages.map {|p| p.url}
37
59
  .should == ["http://guides.rubyonrails.org/",
@@ -7,6 +7,8 @@ require 'fakeweb'
7
7
 
8
8
  $samples_dir = File.dirname(__FILE__) + '/samples'
9
9
  FakeWeb.register_uri(:get, "http://ryanair.com/sitemap.xml", :response => open("#{$samples_dir}/sitemap.xml").read)
10
+ FakeWeb.register_uri(:get, "http://example.com/sitemap_dirty.xml", :response => open("#{$samples_dir}/sitemap_dirty.xml").read)
11
+ FakeWeb.register_uri(:get, "http://example.com/dirty", :response => open("#{$samples_dir}/dirty.html").read)
10
12
  FakeWeb.register_uri(:get, "http://guides.rubyonrails.org", :response => open("#{$samples_dir}/guides.rubyonrails.org.html").read)
11
13
  FakeWeb.register_uri(:get, "http://eparreno.com", :response => open("#{$samples_dir}/eparreno.com.html").read)
12
14
  FakeWeb.register_uri(:get, "http://www.eparreno.com", :response => open("#{$samples_dir}/eparreno.com.html").read)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: site_validator
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.9
4
+ version: 1.3.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jaime Iniesta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-11-21 00:00:00.000000000 Z
11
+ date: 2014-05-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: w3c_validators
@@ -140,6 +140,7 @@ files:
140
140
  - spec/mocks/mocked_validator.rb
141
141
  - spec/page_spec.rb
142
142
  - spec/samples/absolute_links.html
143
+ - spec/samples/dirty.html
143
144
  - spec/samples/eparreno.com.html
144
145
  - spec/samples/exclusions.html
145
146
  - spec/samples/exclusions.xml
@@ -152,6 +153,7 @@ files:
152
153
  - spec/samples/markup_validator_faqs.response
153
154
  - spec/samples/protocol_relative.html
154
155
  - spec/samples/sitemap.xml
156
+ - spec/samples/sitemap_dirty.xml
155
157
  - spec/samples/zigotica.com.html
156
158
  - spec/sitemap_spec.rb
157
159
  - spec/spec_helper.rb
@@ -175,7 +177,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
175
177
  version: '0'
176
178
  requirements: []
177
179
  rubyforge_project:
178
- rubygems_version: 2.0.5
180
+ rubygems_version: 2.2.2
179
181
  signing_key:
180
182
  specification_version: 4
181
183
  summary: command-line tool to validate the markup of a whole site against the W3C