site_validator 1.3.9 → 1.3.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/site_validator/sitemap.rb +5 -2
- data/lib/site_validator/version.rb +1 -1
- data/spec/samples/dirty.html +24 -0
- data/spec/samples/sitemap_dirty.xml +23 -0
- data/spec/sitemap_spec.rb +22 -0
- data/spec/spec_helper.rb +2 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2e82c301e6d3b77cb88a25f7df84bfe0b6f3c19d
|
4
|
+
data.tar.gz: 87c4dbe4deb6ac9efedd88a34f6fb64439dad5fd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5dd31f40cd774140c9ed3799019ccd226208920b6f8df9c0bed66739c51777653fb139178efd14783e25f1bb6a590678016083eefa47c3d74ade6938267c3d4a
|
7
|
+
data.tar.gz: c41869c1796ea3d194098202fd360c9962090b78f6e31b3f4a3cb30d31ddf78ed6d61bad2f6980b5a5d3ab20e1305bddcc0e3b1024d9511e37a74d0777aa15e5
|
@@ -52,7 +52,8 @@ module SiteValidator
|
|
52
52
|
# to absolute links, remove anchors from links, include the sitemap url, and exclude links that don't
|
53
53
|
# seem to point to HTML (like images, multimedia, text, javascript...)
|
54
54
|
def pages_in_sitemap
|
55
|
-
pages = xml_locations.select {|loc| looks_like_html?(loc.text)}.map {|loc| SiteValidator::Page.new(loc.text)}
|
55
|
+
pages = xml_locations.select {|loc| looks_like_html?(loc.text.strip)}.map {|loc| SiteValidator::Page.new(loc.text.strip)}
|
56
|
+
|
56
57
|
if pages.empty?
|
57
58
|
m = MetaInspector.new(url, :timeout => timeout, :allow_redirections => :all)
|
58
59
|
links = [m.url]
|
@@ -78,6 +79,8 @@ module SiteValidator
|
|
78
79
|
extension = u.path.split(".").last if u.path
|
79
80
|
|
80
81
|
(scheme =~ /http[s]?/i) && (extension !~ /gif|jpg|jpeg|png|tiff|bmp|txt|pdf|mobi|epub|doc|rtf|xml|xls|csv|wav|mp3|ogg|zip|rar|tar|gz/i)
|
82
|
+
rescue URI::InvalidURIError
|
83
|
+
false
|
81
84
|
end
|
82
85
|
|
83
86
|
def xml_locations
|
@@ -88,4 +91,4 @@ module SiteValidator
|
|
88
91
|
@doc ||= open(url, :allow_redirections => :all)
|
89
92
|
end
|
90
93
|
end
|
91
|
-
end
|
94
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Server: nginx/0.7.67
|
3
|
+
Date: Fri, 18 Nov 2011 21:46:46 GMT
|
4
|
+
Content-Type: text/html
|
5
|
+
Connection: keep-alive
|
6
|
+
Last-Modified: Mon, 14 Nov 2011 16:53:18 GMT
|
7
|
+
Content-Length: 4987
|
8
|
+
X-Varnish: 2000423390
|
9
|
+
Age: 0
|
10
|
+
Via: 1.1 varnish
|
11
|
+
|
12
|
+
<html>
|
13
|
+
<head>
|
14
|
+
<title>Sample file with links that have spaces</title>
|
15
|
+
</head>
|
16
|
+
<body>
|
17
|
+
<a href=" http://example.com">index</a>
|
18
|
+
<a href="http://example.com/faqs ">FAQs</a>
|
19
|
+
<a href="
|
20
|
+
http://example.com/contact
|
21
|
+
|
22
|
+
">contact</a>
|
23
|
+
</body>
|
24
|
+
</html>
|
@@ -0,0 +1,23 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Date: Thu, 29 Dec 2011 23:08:54 GMT
|
3
|
+
Server: Apache/2.2.21 (Unix) mod_ssl/2.2.3 OpenSSL/0.9.8e-fips-rhel5
|
4
|
+
Transfer-Encoding: chunked
|
5
|
+
Content-Type: text/xml
|
6
|
+
|
7
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
8
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
9
|
+
<url>
|
10
|
+
<loc> http://www.ryanair.com/es/ </loc>
|
11
|
+
</url>
|
12
|
+
<url>
|
13
|
+
<loc>
|
14
|
+
http://www.ryanair.com/es/careers/job
|
15
|
+
</loc>
|
16
|
+
</url>
|
17
|
+
<url>
|
18
|
+
<loc>mailto:email@example.com</loc>
|
19
|
+
</url>
|
20
|
+
<url>
|
21
|
+
<loc> </loc>
|
22
|
+
</url>
|
23
|
+
</urlset>
|
data/spec/sitemap_spec.rb
CHANGED
@@ -5,6 +5,8 @@ require_relative 'spec_helper'
|
|
5
5
|
describe SiteValidator::Sitemap do
|
6
6
|
before(:each) do
|
7
7
|
@sitemap = SiteValidator::Sitemap.new('http://ryanair.com/sitemap.xml')
|
8
|
+
@sitemap_dirty = SiteValidator::Sitemap.new('http://example.com/sitemap_dirty.xml')
|
9
|
+
@sitemap_html_dirty = SiteValidator::Sitemap.new('http://example.com/dirty')
|
8
10
|
@sitemap_html = SiteValidator::Sitemap.new('http://guides.rubyonrails.org')
|
9
11
|
@sitemap_no_links = SiteValidator::Sitemap.new('http://zigotica.com')
|
10
12
|
@sitemap_with_trailing_slash = SiteValidator::Sitemap.new('http://eparreno.com')
|
@@ -32,6 +34,26 @@ describe SiteValidator::Sitemap do
|
|
32
34
|
@sitemap.pages[2].url.should == 'http://www.ryanair.com/es/about'
|
33
35
|
end
|
34
36
|
|
37
|
+
it "should not crash when encountering invalid locs on an xml sitemap" do
|
38
|
+
expect {
|
39
|
+
@sitemap_dirty.pages.length.should == 2
|
40
|
+
}.to_not raise_error
|
41
|
+
|
42
|
+
@sitemap_dirty.pages[0].url.should == 'http://www.ryanair.com/es/'
|
43
|
+
@sitemap_dirty.pages[1].url.should == 'http://www.ryanair.com/es/careers/job'
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should not crash when encountering invalid hrefs on an html page" do
|
47
|
+
expect {
|
48
|
+
@sitemap_html_dirty.pages.length.should == 4
|
49
|
+
}.to_not raise_error
|
50
|
+
|
51
|
+
@sitemap_html_dirty.pages[0].url.should == 'http://example.com/dirty'
|
52
|
+
@sitemap_html_dirty.pages[1].url.should == 'http://example.com/'
|
53
|
+
@sitemap_html_dirty.pages[2].url.should == 'http://example.com/faqs'
|
54
|
+
@sitemap_html_dirty.pages[3].url.should == 'http://example.com/contact'
|
55
|
+
end
|
56
|
+
|
35
57
|
it "should get pages from the sample guides.rubyonrails.org site" do
|
36
58
|
@sitemap_html.pages.map {|p| p.url}
|
37
59
|
.should == ["http://guides.rubyonrails.org/",
|
data/spec/spec_helper.rb
CHANGED
@@ -7,6 +7,8 @@ require 'fakeweb'
|
|
7
7
|
|
8
8
|
$samples_dir = File.dirname(__FILE__) + '/samples'
|
9
9
|
FakeWeb.register_uri(:get, "http://ryanair.com/sitemap.xml", :response => open("#{$samples_dir}/sitemap.xml").read)
|
10
|
+
FakeWeb.register_uri(:get, "http://example.com/sitemap_dirty.xml", :response => open("#{$samples_dir}/sitemap_dirty.xml").read)
|
11
|
+
FakeWeb.register_uri(:get, "http://example.com/dirty", :response => open("#{$samples_dir}/dirty.html").read)
|
10
12
|
FakeWeb.register_uri(:get, "http://guides.rubyonrails.org", :response => open("#{$samples_dir}/guides.rubyonrails.org.html").read)
|
11
13
|
FakeWeb.register_uri(:get, "http://eparreno.com", :response => open("#{$samples_dir}/eparreno.com.html").read)
|
12
14
|
FakeWeb.register_uri(:get, "http://www.eparreno.com", :response => open("#{$samples_dir}/eparreno.com.html").read)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: site_validator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaime Iniesta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-05-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: w3c_validators
|
@@ -140,6 +140,7 @@ files:
|
|
140
140
|
- spec/mocks/mocked_validator.rb
|
141
141
|
- spec/page_spec.rb
|
142
142
|
- spec/samples/absolute_links.html
|
143
|
+
- spec/samples/dirty.html
|
143
144
|
- spec/samples/eparreno.com.html
|
144
145
|
- spec/samples/exclusions.html
|
145
146
|
- spec/samples/exclusions.xml
|
@@ -152,6 +153,7 @@ files:
|
|
152
153
|
- spec/samples/markup_validator_faqs.response
|
153
154
|
- spec/samples/protocol_relative.html
|
154
155
|
- spec/samples/sitemap.xml
|
156
|
+
- spec/samples/sitemap_dirty.xml
|
155
157
|
- spec/samples/zigotica.com.html
|
156
158
|
- spec/sitemap_spec.rb
|
157
159
|
- spec/spec_helper.rb
|
@@ -175,7 +177,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
175
177
|
version: '0'
|
176
178
|
requirements: []
|
177
179
|
rubyforge_project:
|
178
|
-
rubygems_version: 2.
|
180
|
+
rubygems_version: 2.2.2
|
179
181
|
signing_key:
|
180
182
|
specification_version: 4
|
181
183
|
summary: command-line tool to validate the markup of a whole site against the W3C
|