w3clove 0.7.9 → 0.7.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +1 -1
- data/lib/w3clove/sitemap.rb +4 -4
- data/lib/w3clove/version.rb +1 -1
- data/spec/samples/exclusions.html +3 -0
- data/spec/samples/exclusions.xml +81 -0
- data/spec/sitemap_spec.rb +8 -1
- data/spec/spec_helper.rb +1 -0
- metadata +5 -4
data/README.rdoc
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
= w3clove {<img src="http://travis-ci.org/jaimeiniesta/w3clove.png" />}[http://travis-ci.org/jaimeiniesta/w3clove]
|
1
|
+
= w3clove {<img src="http://travis-ci.org/jaimeiniesta/w3clove.png" />}[http://travis-ci.org/jaimeiniesta/w3clove] {<img src="https://codeclimate.com/badge.png" />}[https://codeclimate.com/github/jaimeiniesta/w3clove]
|
2
2
|
|
3
3
|
Site-wide markup validator. Validate a whole web site on the W3C Markup Validator, from the command line, and generate a comprehensive report of all errors found.
|
4
4
|
|
data/lib/w3clove/sitemap.rb
CHANGED
@@ -53,7 +53,7 @@ module W3Clove
|
|
53
53
|
# to absolute links, remove anchors from links, include the sitemap url, and exclude links that don't
|
54
54
|
# seem to point to HTML (like images, multimedia, text, javascript...)
|
55
55
|
def pages_in_sitemap
|
56
|
-
pages = xml_locations.map {|loc| W3Clove::Page.new(loc.text)}
|
56
|
+
pages = xml_locations.select {|loc| looks_like_html?(loc.text)}.map {|loc| W3Clove::Page.new(loc.text)}
|
57
57
|
if pages.empty?
|
58
58
|
m = MetaInspector.new(url, timeout)
|
59
59
|
links = [m.url]
|
@@ -75,10 +75,10 @@ module W3Clove
|
|
75
75
|
# That is, it does not look like javascript, image, pdf...
|
76
76
|
def looks_like_html?(url)
|
77
77
|
u = URI.parse(URI.encode(url))
|
78
|
-
scheme = u.scheme
|
79
|
-
extension = u.path.split(".").last
|
78
|
+
scheme = u.scheme if u.scheme
|
79
|
+
extension = u.path.split(".").last if u.path
|
80
80
|
|
81
|
-
(scheme =~ /http[s]?/i) && (extension !~ /gif|jpg|jpeg|png|tiff|bmp|txt|pdf|doc|xls|wav|mp3|ogg/i)
|
81
|
+
(scheme && extension) && (scheme =~ /http[s]?/i) && (extension !~ /gif|jpg|jpeg|png|tiff|bmp|txt|pdf|doc|rtf|xml|xls|csv|wav|mp3|ogg/i)
|
82
82
|
end
|
83
83
|
|
84
84
|
def xml_locations
|
data/lib/w3clove/version.rb
CHANGED
@@ -30,6 +30,9 @@ Via: 1.1 varnish
|
|
30
30
|
<a href="/file.txt">link</a><br />
|
31
31
|
<a href="/file.doc">link</a><br />
|
32
32
|
<a href="/file.xls">link</a><br />
|
33
|
+
<a href="/file.rtf">link</a><br />
|
34
|
+
<a href="/file.xml">link</a><br />
|
35
|
+
<a href="/file.csv">link</a><br />
|
33
36
|
|
34
37
|
<a href="/file.wav">link</a><br />
|
35
38
|
<a href="/file.mp3">link</a><br />
|
@@ -0,0 +1,81 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Date: Thu, 29 Dec 2011 23:08:54 GMT
|
3
|
+
Server: Apache/2.2.21 (Unix) mod_ssl/2.2.3 OpenSSL/0.9.8e-fips-rhel5
|
4
|
+
Transfer-Encoding: chunked
|
5
|
+
Content-Type: text/xml
|
6
|
+
|
7
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
8
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
9
|
+
<url>
|
10
|
+
<loc>http://example.com/</loc>
|
11
|
+
</url>
|
12
|
+
<url>
|
13
|
+
<loc>http://example.com/faqs</loc>
|
14
|
+
</url>
|
15
|
+
<url>
|
16
|
+
<loc>http://example.com/file.gif</loc>
|
17
|
+
</url>
|
18
|
+
<url>
|
19
|
+
<loc>http://example.com/file.GIF</loc>
|
20
|
+
</url>
|
21
|
+
<url>
|
22
|
+
<loc>http://example.com/file.gif?size=thumb</loc>
|
23
|
+
</url>
|
24
|
+
<url>
|
25
|
+
<loc>http://example.com/file.jpg</loc>
|
26
|
+
</url>
|
27
|
+
<url>
|
28
|
+
<loc>http://example.com/file.jpeg</loc>
|
29
|
+
</url>
|
30
|
+
<url>
|
31
|
+
<loc>http://example.com/file.png</loc>
|
32
|
+
</url>
|
33
|
+
<url>
|
34
|
+
<loc>http://example.com/file.tiff</loc>
|
35
|
+
</url>
|
36
|
+
<url>
|
37
|
+
<loc>http://example.com/file.bmp</loc>
|
38
|
+
</url>
|
39
|
+
<url>
|
40
|
+
<loc>http://example.com/file.pdf</loc>
|
41
|
+
</url>
|
42
|
+
<url>
|
43
|
+
<loc>http://example.com/file.txt</loc>
|
44
|
+
</url>
|
45
|
+
<url>
|
46
|
+
<loc>http://example.com/file.doc</loc>
|
47
|
+
</url>
|
48
|
+
<url>
|
49
|
+
<loc>http://example.com/file.xls</loc>
|
50
|
+
</url>
|
51
|
+
<url>
|
52
|
+
<loc>http://example.com/file.rtf</loc>
|
53
|
+
</url>
|
54
|
+
<url>
|
55
|
+
<loc>http://example.com/file.xml</loc>
|
56
|
+
</url>
|
57
|
+
<url>
|
58
|
+
<loc>http://example.com/file.csv</loc>
|
59
|
+
</url>
|
60
|
+
<url>
|
61
|
+
<loc>http://example.com/file.wav</loc>
|
62
|
+
</url>
|
63
|
+
<url>
|
64
|
+
<loc>http://example.com/file.mp3</loc>
|
65
|
+
</url>
|
66
|
+
<url>
|
67
|
+
<loc>http://example.com/file.ogg</loc>
|
68
|
+
</url>
|
69
|
+
<url>
|
70
|
+
<loc>mailto:user@example.com</loc>
|
71
|
+
</url>
|
72
|
+
<url>
|
73
|
+
<loc>ftp://user:password@example.com</loc>
|
74
|
+
</url>
|
75
|
+
<url>
|
76
|
+
<loc>telnet://user:password@example.com</loc>
|
77
|
+
</url>
|
78
|
+
<url>
|
79
|
+
<loc>javascript:alert('hey');</loc>
|
80
|
+
</url>
|
81
|
+
</urlset>
|
data/spec/sitemap_spec.rb
CHANGED
@@ -11,6 +11,7 @@ describe W3Clove::Sitemap do
|
|
11
11
|
@sitemap_with_protocol_relative = W3Clove::Sitemap.new('http://protocol-relative.com')
|
12
12
|
@sitemap_with_protocol_relative_https = W3Clove::Sitemap.new('https://protocol-relative.com')
|
13
13
|
@sitemap_for_exclusions = W3Clove::Sitemap.new('http://example.com/exclusions')
|
14
|
+
@sitemap_for_exclusions_xml = W3Clove::Sitemap.new('http://example.com/exclusions.xml')
|
14
15
|
@sitemap_for_absolute_urls = W3Clove::Sitemap.new('http://w3clove.com/faqs')
|
15
16
|
@sitemap_international = W3Clove::Sitemap.new('http://example.com/international')
|
16
17
|
|
@@ -98,13 +99,19 @@ describe W3Clove::Sitemap do
|
|
98
99
|
urls.should_not include 'http://w3clove.com/faqs/'
|
99
100
|
end
|
100
101
|
|
101
|
-
it "should exclude non-html pages" do
|
102
|
+
it "should exclude non-html pages from HTML sitemaps" do
|
102
103
|
@sitemap_for_exclusions.pages.length.should == 3
|
103
104
|
@sitemap_for_exclusions.pages[0].url.should == 'http://example.com/exclusions'
|
104
105
|
@sitemap_for_exclusions.pages[1].url.should == 'http://example.com/'
|
105
106
|
@sitemap_for_exclusions.pages[2].url.should == 'http://example.com/faqs'
|
106
107
|
end
|
107
108
|
|
109
|
+
it "should exclude non-html pages from XML sitemaps" do
|
110
|
+
@sitemap_for_exclusions_xml.pages.length.should == 2
|
111
|
+
@sitemap_for_exclusions_xml.pages[0].url.should == 'http://example.com/'
|
112
|
+
@sitemap_for_exclusions_xml.pages[1].url.should == 'http://example.com/faqs'
|
113
|
+
end
|
114
|
+
|
108
115
|
it "should not crash when URLs have international characters" do
|
109
116
|
@sitemap_international.pages.length.should == 9
|
110
117
|
@sitemap_international.pages[0].url.should == 'http://example.com/international'
|
data/spec/spec_helper.rb
CHANGED
@@ -14,6 +14,7 @@ FakeWeb.register_uri(:get, "http://zigotica.com", :response => open("#{$samples_
|
|
14
14
|
FakeWeb.register_uri(:get, "http://protocol-relative.com", :response => open("#{$samples_dir}/protocol_relative.html").read)
|
15
15
|
FakeWeb.register_uri(:get, "https://protocol-relative.com", :response => open("#{$samples_dir}/protocol_relative.html").read)
|
16
16
|
FakeWeb.register_uri(:get, "http://example.com/exclusions", :response => open("#{$samples_dir}/exclusions.html").read)
|
17
|
+
FakeWeb.register_uri(:get, "http://example.com/exclusions.xml", :response => open("#{$samples_dir}/exclusions.xml").read)
|
17
18
|
FakeWeb.register_uri(:get, "http://w3clove.com/faqs", :response => open("#{$samples_dir}/w3clove_faqs.response").read)
|
18
19
|
FakeWeb.register_uri(:get, "http://example.com/international", :response => open("#{$samples_dir}/international.response").read)
|
19
20
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: w3clove
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 7
|
9
|
-
-
|
10
|
-
version: 0.7.
|
9
|
+
- 10
|
10
|
+
version: 0.7.10
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jaime Iniesta
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-
|
18
|
+
date: 2012-11-02 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: w3c_validators
|
@@ -162,6 +162,7 @@ files:
|
|
162
162
|
- spec/samples/absolute_links.html
|
163
163
|
- spec/samples/eparreno.com.html
|
164
164
|
- spec/samples/exclusions.html
|
165
|
+
- spec/samples/exclusions.xml
|
165
166
|
- spec/samples/guides.rubyonrails.org.html
|
166
167
|
- spec/samples/international.response
|
167
168
|
- spec/samples/protocol_relative.html
|