w3clove 0.7.9 → 0.7.10
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +1 -1
- data/lib/w3clove/sitemap.rb +4 -4
- data/lib/w3clove/version.rb +1 -1
- data/spec/samples/exclusions.html +3 -0
- data/spec/samples/exclusions.xml +81 -0
- data/spec/sitemap_spec.rb +8 -1
- data/spec/spec_helper.rb +1 -0
- metadata +5 -4
data/README.rdoc
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
= w3clove {<img src="http://travis-ci.org/jaimeiniesta/w3clove.png" />}[http://travis-ci.org/jaimeiniesta/w3clove]
|
1
|
+
= w3clove {<img src="http://travis-ci.org/jaimeiniesta/w3clove.png" />}[http://travis-ci.org/jaimeiniesta/w3clove] {<img src="https://codeclimate.com/badge.png" />}[https://codeclimate.com/github/jaimeiniesta/w3clove]
|
2
2
|
|
3
3
|
Site-wide markup validator. Validate a whole web site on the W3C Markup Validator, from the command line, and generate a comprehensive report of all errors found.
|
4
4
|
|
data/lib/w3clove/sitemap.rb
CHANGED
@@ -53,7 +53,7 @@ module W3Clove
|
|
53
53
|
# to absolute links, remove anchors from links, include the sitemap url, and exclude links that don't
|
54
54
|
# seem to point to HTML (like images, multimedia, text, javascript...)
|
55
55
|
def pages_in_sitemap
|
56
|
-
pages = xml_locations.map {|loc| W3Clove::Page.new(loc.text)}
|
56
|
+
pages = xml_locations.select {|loc| looks_like_html?(loc.text)}.map {|loc| W3Clove::Page.new(loc.text)}
|
57
57
|
if pages.empty?
|
58
58
|
m = MetaInspector.new(url, timeout)
|
59
59
|
links = [m.url]
|
@@ -75,10 +75,10 @@ module W3Clove
|
|
75
75
|
# That is, it does not look like javascript, image, pdf...
|
76
76
|
def looks_like_html?(url)
|
77
77
|
u = URI.parse(URI.encode(url))
|
78
|
-
scheme = u.scheme
|
79
|
-
extension = u.path.split(".").last
|
78
|
+
scheme = u.scheme if u.scheme
|
79
|
+
extension = u.path.split(".").last if u.path
|
80
80
|
|
81
|
-
(scheme =~ /http[s]?/i) && (extension !~ /gif|jpg|jpeg|png|tiff|bmp|txt|pdf|doc|xls|wav|mp3|ogg/i)
|
81
|
+
(scheme && extension) && (scheme =~ /http[s]?/i) && (extension !~ /gif|jpg|jpeg|png|tiff|bmp|txt|pdf|doc|rtf|xml|xls|csv|wav|mp3|ogg/i)
|
82
82
|
end
|
83
83
|
|
84
84
|
def xml_locations
|
data/lib/w3clove/version.rb
CHANGED
@@ -30,6 +30,9 @@ Via: 1.1 varnish
|
|
30
30
|
<a href="/file.txt">link</a><br />
|
31
31
|
<a href="/file.doc">link</a><br />
|
32
32
|
<a href="/file.xls">link</a><br />
|
33
|
+
<a href="/file.rtf">link</a><br />
|
34
|
+
<a href="/file.xml">link</a><br />
|
35
|
+
<a href="/file.csv">link</a><br />
|
33
36
|
|
34
37
|
<a href="/file.wav">link</a><br />
|
35
38
|
<a href="/file.mp3">link</a><br />
|
@@ -0,0 +1,81 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Date: Thu, 29 Dec 2011 23:08:54 GMT
|
3
|
+
Server: Apache/2.2.21 (Unix) mod_ssl/2.2.3 OpenSSL/0.9.8e-fips-rhel5
|
4
|
+
Transfer-Encoding: chunked
|
5
|
+
Content-Type: text/xml
|
6
|
+
|
7
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
8
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
9
|
+
<url>
|
10
|
+
<loc>http://example.com/</loc>
|
11
|
+
</url>
|
12
|
+
<url>
|
13
|
+
<loc>http://example.com/faqs</loc>
|
14
|
+
</url>
|
15
|
+
<url>
|
16
|
+
<loc>http://example.com/file.gif</loc>
|
17
|
+
</url>
|
18
|
+
<url>
|
19
|
+
<loc>http://example.com/file.GIF</loc>
|
20
|
+
</url>
|
21
|
+
<url>
|
22
|
+
<loc>http://example.com/file.gif?size=thumb</loc>
|
23
|
+
</url>
|
24
|
+
<url>
|
25
|
+
<loc>http://example.com/file.jpg</loc>
|
26
|
+
</url>
|
27
|
+
<url>
|
28
|
+
<loc>http://example.com/file.jpeg</loc>
|
29
|
+
</url>
|
30
|
+
<url>
|
31
|
+
<loc>http://example.com/file.png</loc>
|
32
|
+
</url>
|
33
|
+
<url>
|
34
|
+
<loc>http://example.com/file.tiff</loc>
|
35
|
+
</url>
|
36
|
+
<url>
|
37
|
+
<loc>http://example.com/file.bmp</loc>
|
38
|
+
</url>
|
39
|
+
<url>
|
40
|
+
<loc>http://example.com/file.pdf</loc>
|
41
|
+
</url>
|
42
|
+
<url>
|
43
|
+
<loc>http://example.com/file.txt</loc>
|
44
|
+
</url>
|
45
|
+
<url>
|
46
|
+
<loc>http://example.com/file.doc</loc>
|
47
|
+
</url>
|
48
|
+
<url>
|
49
|
+
<loc>http://example.com/file.xls</loc>
|
50
|
+
</url>
|
51
|
+
<url>
|
52
|
+
<loc>http://example.com/file.rtf</loc>
|
53
|
+
</url>
|
54
|
+
<url>
|
55
|
+
<loc>http://example.com/file.xml</loc>
|
56
|
+
</url>
|
57
|
+
<url>
|
58
|
+
<loc>http://example.com/file.csv</loc>
|
59
|
+
</url>
|
60
|
+
<url>
|
61
|
+
<loc>http://example.com/file.wav</loc>
|
62
|
+
</url>
|
63
|
+
<url>
|
64
|
+
<loc>http://example.com/file.mp3</loc>
|
65
|
+
</url>
|
66
|
+
<url>
|
67
|
+
<loc>http://example.com/file.ogg</loc>
|
68
|
+
</url>
|
69
|
+
<url>
|
70
|
+
<loc>mailto:user@example.com</loc>
|
71
|
+
</url>
|
72
|
+
<url>
|
73
|
+
<loc>ftp://user:password@example.com</loc>
|
74
|
+
</url>
|
75
|
+
<url>
|
76
|
+
<loc>telnet://user:password@example.com</loc>
|
77
|
+
</url>
|
78
|
+
<url>
|
79
|
+
<loc>javascript:alert('hey');</loc>
|
80
|
+
</url>
|
81
|
+
</urlset>
|
data/spec/sitemap_spec.rb
CHANGED
@@ -11,6 +11,7 @@ describe W3Clove::Sitemap do
|
|
11
11
|
@sitemap_with_protocol_relative = W3Clove::Sitemap.new('http://protocol-relative.com')
|
12
12
|
@sitemap_with_protocol_relative_https = W3Clove::Sitemap.new('https://protocol-relative.com')
|
13
13
|
@sitemap_for_exclusions = W3Clove::Sitemap.new('http://example.com/exclusions')
|
14
|
+
@sitemap_for_exclusions_xml = W3Clove::Sitemap.new('http://example.com/exclusions.xml')
|
14
15
|
@sitemap_for_absolute_urls = W3Clove::Sitemap.new('http://w3clove.com/faqs')
|
15
16
|
@sitemap_international = W3Clove::Sitemap.new('http://example.com/international')
|
16
17
|
|
@@ -98,13 +99,19 @@ describe W3Clove::Sitemap do
|
|
98
99
|
urls.should_not include 'http://w3clove.com/faqs/'
|
99
100
|
end
|
100
101
|
|
101
|
-
it "should exclude non-html pages" do
|
102
|
+
it "should exclude non-html pages from HTML sitemaps" do
|
102
103
|
@sitemap_for_exclusions.pages.length.should == 3
|
103
104
|
@sitemap_for_exclusions.pages[0].url.should == 'http://example.com/exclusions'
|
104
105
|
@sitemap_for_exclusions.pages[1].url.should == 'http://example.com/'
|
105
106
|
@sitemap_for_exclusions.pages[2].url.should == 'http://example.com/faqs'
|
106
107
|
end
|
107
108
|
|
109
|
+
it "should exclude non-html pages from XML sitemaps" do
|
110
|
+
@sitemap_for_exclusions_xml.pages.length.should == 2
|
111
|
+
@sitemap_for_exclusions_xml.pages[0].url.should == 'http://example.com/'
|
112
|
+
@sitemap_for_exclusions_xml.pages[1].url.should == 'http://example.com/faqs'
|
113
|
+
end
|
114
|
+
|
108
115
|
it "should not crash when URLs have international characters" do
|
109
116
|
@sitemap_international.pages.length.should == 9
|
110
117
|
@sitemap_international.pages[0].url.should == 'http://example.com/international'
|
data/spec/spec_helper.rb
CHANGED
@@ -14,6 +14,7 @@ FakeWeb.register_uri(:get, "http://zigotica.com", :response => open("#{$samples_
|
|
14
14
|
FakeWeb.register_uri(:get, "http://protocol-relative.com", :response => open("#{$samples_dir}/protocol_relative.html").read)
|
15
15
|
FakeWeb.register_uri(:get, "https://protocol-relative.com", :response => open("#{$samples_dir}/protocol_relative.html").read)
|
16
16
|
FakeWeb.register_uri(:get, "http://example.com/exclusions", :response => open("#{$samples_dir}/exclusions.html").read)
|
17
|
+
FakeWeb.register_uri(:get, "http://example.com/exclusions.xml", :response => open("#{$samples_dir}/exclusions.xml").read)
|
17
18
|
FakeWeb.register_uri(:get, "http://w3clove.com/faqs", :response => open("#{$samples_dir}/w3clove_faqs.response").read)
|
18
19
|
FakeWeb.register_uri(:get, "http://example.com/international", :response => open("#{$samples_dir}/international.response").read)
|
19
20
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: w3clove
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 7
|
9
|
-
-
|
10
|
-
version: 0.7.
|
9
|
+
- 10
|
10
|
+
version: 0.7.10
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jaime Iniesta
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-
|
18
|
+
date: 2012-11-02 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: w3c_validators
|
@@ -162,6 +162,7 @@ files:
|
|
162
162
|
- spec/samples/absolute_links.html
|
163
163
|
- spec/samples/eparreno.com.html
|
164
164
|
- spec/samples/exclusions.html
|
165
|
+
- spec/samples/exclusions.xml
|
165
166
|
- spec/samples/guides.rubyonrails.org.html
|
166
167
|
- spec/samples/international.response
|
167
168
|
- spec/samples/protocol_relative.html
|