site_validator 1.3.9 → 1.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6f20f9890302ebb31b971de1d281aca3ba3f38fe
4
- data.tar.gz: 3ebb6a272c290fa4da99bd4d145723241e6d04f0
3
+ metadata.gz: 2e82c301e6d3b77cb88a25f7df84bfe0b6f3c19d
4
+ data.tar.gz: 87c4dbe4deb6ac9efedd88a34f6fb64439dad5fd
5
5
  SHA512:
6
- metadata.gz: d4e66dc049fd60da2f2edd47a8ff208f1570d1cff353aa39c35c87bdc8ed118166c5f3b438c9445836afb9fb9a3fe1fa1a92350e15dc762d0d35f788df824e1d
7
- data.tar.gz: 2efdcf3b0ce55e62d6e4ceae0fa85ab5455e2463841db439ee3429002679012e83cca5c6d63d3823928b0994b7f9bafadc16d51489f6f3e096c81d09079ca8cc
6
+ metadata.gz: 5dd31f40cd774140c9ed3799019ccd226208920b6f8df9c0bed66739c51777653fb139178efd14783e25f1bb6a590678016083eefa47c3d74ade6938267c3d4a
7
+ data.tar.gz: c41869c1796ea3d194098202fd360c9962090b78f6e31b3f4a3cb30d31ddf78ed6d61bad2f6980b5a5d3ab20e1305bddcc0e3b1024d9511e37a74d0777aa15e5
@@ -52,7 +52,8 @@ module SiteValidator
52
52
  # to absolute links, remove anchors from links, include the sitemap url, and exclude links that don't
53
53
  # seem to point to HTML (like images, multimedia, text, javascript...)
54
54
  def pages_in_sitemap
55
- pages = xml_locations.select {|loc| looks_like_html?(loc.text)}.map {|loc| SiteValidator::Page.new(loc.text)}
55
+ pages = xml_locations.select {|loc| looks_like_html?(loc.text.strip)}.map {|loc| SiteValidator::Page.new(loc.text.strip)}
56
+
56
57
  if pages.empty?
57
58
  m = MetaInspector.new(url, :timeout => timeout, :allow_redirections => :all)
58
59
  links = [m.url]
@@ -78,6 +79,8 @@ module SiteValidator
78
79
  extension = u.path.split(".").last if u.path
79
80
 
80
81
  (scheme =~ /http[s]?/i) && (extension !~ /gif|jpg|jpeg|png|tiff|bmp|txt|pdf|mobi|epub|doc|rtf|xml|xls|csv|wav|mp3|ogg|zip|rar|tar|gz/i)
82
+ rescue URI::InvalidURIError
83
+ false
81
84
  end
82
85
 
83
86
  def xml_locations
@@ -88,4 +91,4 @@ module SiteValidator
88
91
  @doc ||= open(url, :allow_redirections => :all)
89
92
  end
90
93
  end
91
- end
94
+ end
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module SiteValidator
4
- VERSION = "1.3.9"
4
+ VERSION = "1.3.10"
5
5
  end
@@ -0,0 +1,24 @@
1
+ HTTP/1.1 200 OK
2
+ Server: nginx/0.7.67
3
+ Date: Fri, 18 Nov 2011 21:46:46 GMT
4
+ Content-Type: text/html
5
+ Connection: keep-alive
6
+ Last-Modified: Mon, 14 Nov 2011 16:53:18 GMT
7
+ Content-Length: 4987
8
+ X-Varnish: 2000423390
9
+ Age: 0
10
+ Via: 1.1 varnish
11
+
12
+ <html>
13
+ <head>
14
+ <title>Sample file with links that have spaces</title>
15
+ </head>
16
+ <body>
17
+ <a href=" http://example.com">index</a>
18
+ <a href="http://example.com/faqs ">FAQs</a>
19
+ <a href="
20
+ http://example.com/contact
21
+
22
+ ">contact</a>
23
+ </body>
24
+ </html>
@@ -0,0 +1,23 @@
1
+ HTTP/1.1 200 OK
2
+ Date: Thu, 29 Dec 2011 23:08:54 GMT
3
+ Server: Apache/2.2.21 (Unix) mod_ssl/2.2.3 OpenSSL/0.9.8e-fips-rhel5
4
+ Transfer-Encoding: chunked
5
+ Content-Type: text/xml
6
+
7
+ <?xml version="1.0" encoding="UTF-8"?>
8
+ <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
9
+ <url>
10
+ <loc> http://www.ryanair.com/es/ </loc>
11
+ </url>
12
+ <url>
13
+ <loc>
14
+ http://www.ryanair.com/es/careers/job
15
+ </loc>
16
+ </url>
17
+ <url>
18
+ <loc>mailto:email@example.com</loc>
19
+ </url>
20
+ <url>
21
+ <loc> </loc>
22
+ </url>
23
+ </urlset>
@@ -5,6 +5,8 @@ require_relative 'spec_helper'
5
5
  describe SiteValidator::Sitemap do
6
6
  before(:each) do
7
7
  @sitemap = SiteValidator::Sitemap.new('http://ryanair.com/sitemap.xml')
8
+ @sitemap_dirty = SiteValidator::Sitemap.new('http://example.com/sitemap_dirty.xml')
9
+ @sitemap_html_dirty = SiteValidator::Sitemap.new('http://example.com/dirty')
8
10
  @sitemap_html = SiteValidator::Sitemap.new('http://guides.rubyonrails.org')
9
11
  @sitemap_no_links = SiteValidator::Sitemap.new('http://zigotica.com')
10
12
  @sitemap_with_trailing_slash = SiteValidator::Sitemap.new('http://eparreno.com')
@@ -32,6 +34,26 @@ describe SiteValidator::Sitemap do
32
34
  @sitemap.pages[2].url.should == 'http://www.ryanair.com/es/about'
33
35
  end
34
36
 
37
+ it "should not crash when encountering invalid locs on an xml sitemap" do
38
+ expect {
39
+ @sitemap_dirty.pages.length.should == 2
40
+ }.to_not raise_error
41
+
42
+ @sitemap_dirty.pages[0].url.should == 'http://www.ryanair.com/es/'
43
+ @sitemap_dirty.pages[1].url.should == 'http://www.ryanair.com/es/careers/job'
44
+ end
45
+
46
+ it "should not crash when encountering invalid hrefs on an html page" do
47
+ expect {
48
+ @sitemap_html_dirty.pages.length.should == 4
49
+ }.to_not raise_error
50
+
51
+ @sitemap_html_dirty.pages[0].url.should == 'http://example.com/dirty'
52
+ @sitemap_html_dirty.pages[1].url.should == 'http://example.com/'
53
+ @sitemap_html_dirty.pages[2].url.should == 'http://example.com/faqs'
54
+ @sitemap_html_dirty.pages[3].url.should == 'http://example.com/contact'
55
+ end
56
+
35
57
  it "should get pages from the sample guides.rubyonrails.org site" do
36
58
  @sitemap_html.pages.map {|p| p.url}
37
59
  .should == ["http://guides.rubyonrails.org/",
@@ -7,6 +7,8 @@ require 'fakeweb'
7
7
 
8
8
  $samples_dir = File.dirname(__FILE__) + '/samples'
9
9
  FakeWeb.register_uri(:get, "http://ryanair.com/sitemap.xml", :response => open("#{$samples_dir}/sitemap.xml").read)
10
+ FakeWeb.register_uri(:get, "http://example.com/sitemap_dirty.xml", :response => open("#{$samples_dir}/sitemap_dirty.xml").read)
11
+ FakeWeb.register_uri(:get, "http://example.com/dirty", :response => open("#{$samples_dir}/dirty.html").read)
10
12
  FakeWeb.register_uri(:get, "http://guides.rubyonrails.org", :response => open("#{$samples_dir}/guides.rubyonrails.org.html").read)
11
13
  FakeWeb.register_uri(:get, "http://eparreno.com", :response => open("#{$samples_dir}/eparreno.com.html").read)
12
14
  FakeWeb.register_uri(:get, "http://www.eparreno.com", :response => open("#{$samples_dir}/eparreno.com.html").read)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: site_validator
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.9
4
+ version: 1.3.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jaime Iniesta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-11-21 00:00:00.000000000 Z
11
+ date: 2014-05-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: w3c_validators
@@ -140,6 +140,7 @@ files:
140
140
  - spec/mocks/mocked_validator.rb
141
141
  - spec/page_spec.rb
142
142
  - spec/samples/absolute_links.html
143
+ - spec/samples/dirty.html
143
144
  - spec/samples/eparreno.com.html
144
145
  - spec/samples/exclusions.html
145
146
  - spec/samples/exclusions.xml
@@ -152,6 +153,7 @@ files:
152
153
  - spec/samples/markup_validator_faqs.response
153
154
  - spec/samples/protocol_relative.html
154
155
  - spec/samples/sitemap.xml
156
+ - spec/samples/sitemap_dirty.xml
155
157
  - spec/samples/zigotica.com.html
156
158
  - spec/sitemap_spec.rb
157
159
  - spec/spec_helper.rb
@@ -175,7 +177,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
175
177
  version: '0'
176
178
  requirements: []
177
179
  rubyforge_project:
178
- rubygems_version: 2.0.5
180
+ rubygems_version: 2.2.2
179
181
  signing_key:
180
182
  specification_version: 4
181
183
  summary: command-line tool to validate the markup of a whole site against the W3C