site_validator 1.3.9 → 1.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/site_validator/sitemap.rb +5 -2
- data/lib/site_validator/version.rb +1 -1
- data/spec/samples/dirty.html +24 -0
- data/spec/samples/sitemap_dirty.xml +23 -0
- data/spec/sitemap_spec.rb +22 -0
- data/spec/spec_helper.rb +2 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2e82c301e6d3b77cb88a25f7df84bfe0b6f3c19d
|
4
|
+
data.tar.gz: 87c4dbe4deb6ac9efedd88a34f6fb64439dad5fd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5dd31f40cd774140c9ed3799019ccd226208920b6f8df9c0bed66739c51777653fb139178efd14783e25f1bb6a590678016083eefa47c3d74ade6938267c3d4a
|
7
|
+
data.tar.gz: c41869c1796ea3d194098202fd360c9962090b78f6e31b3f4a3cb30d31ddf78ed6d61bad2f6980b5a5d3ab20e1305bddcc0e3b1024d9511e37a74d0777aa15e5
|
@@ -52,7 +52,8 @@ module SiteValidator
|
|
52
52
|
# to absolute links, remove anchors from links, include the sitemap url, and exclude links that don't
|
53
53
|
# seem to point to HTML (like images, multimedia, text, javascript...)
|
54
54
|
def pages_in_sitemap
|
55
|
-
pages = xml_locations.select {|loc| looks_like_html?(loc.text)}.map {|loc| SiteValidator::Page.new(loc.text)}
|
55
|
+
pages = xml_locations.select {|loc| looks_like_html?(loc.text.strip)}.map {|loc| SiteValidator::Page.new(loc.text.strip)}
|
56
|
+
|
56
57
|
if pages.empty?
|
57
58
|
m = MetaInspector.new(url, :timeout => timeout, :allow_redirections => :all)
|
58
59
|
links = [m.url]
|
@@ -78,6 +79,8 @@ module SiteValidator
|
|
78
79
|
extension = u.path.split(".").last if u.path
|
79
80
|
|
80
81
|
(scheme =~ /http[s]?/i) && (extension !~ /gif|jpg|jpeg|png|tiff|bmp|txt|pdf|mobi|epub|doc|rtf|xml|xls|csv|wav|mp3|ogg|zip|rar|tar|gz/i)
|
82
|
+
rescue URI::InvalidURIError
|
83
|
+
false
|
81
84
|
end
|
82
85
|
|
83
86
|
def xml_locations
|
@@ -88,4 +91,4 @@ module SiteValidator
|
|
88
91
|
@doc ||= open(url, :allow_redirections => :all)
|
89
92
|
end
|
90
93
|
end
|
91
|
-
end
|
94
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Server: nginx/0.7.67
|
3
|
+
Date: Fri, 18 Nov 2011 21:46:46 GMT
|
4
|
+
Content-Type: text/html
|
5
|
+
Connection: keep-alive
|
6
|
+
Last-Modified: Mon, 14 Nov 2011 16:53:18 GMT
|
7
|
+
Content-Length: 4987
|
8
|
+
X-Varnish: 2000423390
|
9
|
+
Age: 0
|
10
|
+
Via: 1.1 varnish
|
11
|
+
|
12
|
+
<html>
|
13
|
+
<head>
|
14
|
+
<title>Sample file with links that have spaces</title>
|
15
|
+
</head>
|
16
|
+
<body>
|
17
|
+
<a href=" http://example.com">index</a>
|
18
|
+
<a href="http://example.com/faqs ">FAQs</a>
|
19
|
+
<a href="
|
20
|
+
http://example.com/contact
|
21
|
+
|
22
|
+
">contact</a>
|
23
|
+
</body>
|
24
|
+
</html>
|
@@ -0,0 +1,23 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Date: Thu, 29 Dec 2011 23:08:54 GMT
|
3
|
+
Server: Apache/2.2.21 (Unix) mod_ssl/2.2.3 OpenSSL/0.9.8e-fips-rhel5
|
4
|
+
Transfer-Encoding: chunked
|
5
|
+
Content-Type: text/xml
|
6
|
+
|
7
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
8
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
9
|
+
<url>
|
10
|
+
<loc> http://www.ryanair.com/es/ </loc>
|
11
|
+
</url>
|
12
|
+
<url>
|
13
|
+
<loc>
|
14
|
+
http://www.ryanair.com/es/careers/job
|
15
|
+
</loc>
|
16
|
+
</url>
|
17
|
+
<url>
|
18
|
+
<loc>mailto:email@example.com</loc>
|
19
|
+
</url>
|
20
|
+
<url>
|
21
|
+
<loc> </loc>
|
22
|
+
</url>
|
23
|
+
</urlset>
|
data/spec/sitemap_spec.rb
CHANGED
@@ -5,6 +5,8 @@ require_relative 'spec_helper'
|
|
5
5
|
describe SiteValidator::Sitemap do
|
6
6
|
before(:each) do
|
7
7
|
@sitemap = SiteValidator::Sitemap.new('http://ryanair.com/sitemap.xml')
|
8
|
+
@sitemap_dirty = SiteValidator::Sitemap.new('http://example.com/sitemap_dirty.xml')
|
9
|
+
@sitemap_html_dirty = SiteValidator::Sitemap.new('http://example.com/dirty')
|
8
10
|
@sitemap_html = SiteValidator::Sitemap.new('http://guides.rubyonrails.org')
|
9
11
|
@sitemap_no_links = SiteValidator::Sitemap.new('http://zigotica.com')
|
10
12
|
@sitemap_with_trailing_slash = SiteValidator::Sitemap.new('http://eparreno.com')
|
@@ -32,6 +34,26 @@ describe SiteValidator::Sitemap do
|
|
32
34
|
@sitemap.pages[2].url.should == 'http://www.ryanair.com/es/about'
|
33
35
|
end
|
34
36
|
|
37
|
+
it "should not crash when encountering invalid locs on an xml sitemap" do
|
38
|
+
expect {
|
39
|
+
@sitemap_dirty.pages.length.should == 2
|
40
|
+
}.to_not raise_error
|
41
|
+
|
42
|
+
@sitemap_dirty.pages[0].url.should == 'http://www.ryanair.com/es/'
|
43
|
+
@sitemap_dirty.pages[1].url.should == 'http://www.ryanair.com/es/careers/job'
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should not crash when encountering invalid hrefs on an html page" do
|
47
|
+
expect {
|
48
|
+
@sitemap_html_dirty.pages.length.should == 4
|
49
|
+
}.to_not raise_error
|
50
|
+
|
51
|
+
@sitemap_html_dirty.pages[0].url.should == 'http://example.com/dirty'
|
52
|
+
@sitemap_html_dirty.pages[1].url.should == 'http://example.com/'
|
53
|
+
@sitemap_html_dirty.pages[2].url.should == 'http://example.com/faqs'
|
54
|
+
@sitemap_html_dirty.pages[3].url.should == 'http://example.com/contact'
|
55
|
+
end
|
56
|
+
|
35
57
|
it "should get pages from the sample guides.rubyonrails.org site" do
|
36
58
|
@sitemap_html.pages.map {|p| p.url}
|
37
59
|
.should == ["http://guides.rubyonrails.org/",
|
data/spec/spec_helper.rb
CHANGED
@@ -7,6 +7,8 @@ require 'fakeweb'
|
|
7
7
|
|
8
8
|
$samples_dir = File.dirname(__FILE__) + '/samples'
|
9
9
|
FakeWeb.register_uri(:get, "http://ryanair.com/sitemap.xml", :response => open("#{$samples_dir}/sitemap.xml").read)
|
10
|
+
FakeWeb.register_uri(:get, "http://example.com/sitemap_dirty.xml", :response => open("#{$samples_dir}/sitemap_dirty.xml").read)
|
11
|
+
FakeWeb.register_uri(:get, "http://example.com/dirty", :response => open("#{$samples_dir}/dirty.html").read)
|
10
12
|
FakeWeb.register_uri(:get, "http://guides.rubyonrails.org", :response => open("#{$samples_dir}/guides.rubyonrails.org.html").read)
|
11
13
|
FakeWeb.register_uri(:get, "http://eparreno.com", :response => open("#{$samples_dir}/eparreno.com.html").read)
|
12
14
|
FakeWeb.register_uri(:get, "http://www.eparreno.com", :response => open("#{$samples_dir}/eparreno.com.html").read)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: site_validator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaime Iniesta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-05-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: w3c_validators
|
@@ -140,6 +140,7 @@ files:
|
|
140
140
|
- spec/mocks/mocked_validator.rb
|
141
141
|
- spec/page_spec.rb
|
142
142
|
- spec/samples/absolute_links.html
|
143
|
+
- spec/samples/dirty.html
|
143
144
|
- spec/samples/eparreno.com.html
|
144
145
|
- spec/samples/exclusions.html
|
145
146
|
- spec/samples/exclusions.xml
|
@@ -152,6 +153,7 @@ files:
|
|
152
153
|
- spec/samples/markup_validator_faqs.response
|
153
154
|
- spec/samples/protocol_relative.html
|
154
155
|
- spec/samples/sitemap.xml
|
156
|
+
- spec/samples/sitemap_dirty.xml
|
155
157
|
- spec/samples/zigotica.com.html
|
156
158
|
- spec/sitemap_spec.rb
|
157
159
|
- spec/spec_helper.rb
|
@@ -175,7 +177,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
175
177
|
version: '0'
|
176
178
|
requirements: []
|
177
179
|
rubyforge_project:
|
178
|
-
rubygems_version: 2.
|
180
|
+
rubygems_version: 2.2.2
|
179
181
|
signing_key:
|
180
182
|
specification_version: 4
|
181
183
|
summary: command-line tool to validate the markup of a whole site against the W3C
|