w3clove 0.7.9 → 0.7.10

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,4 @@
1
- = w3clove {<img src="http://travis-ci.org/jaimeiniesta/w3clove.png" />}[http://travis-ci.org/jaimeiniesta/w3clove]
1
+ = w3clove {<img src="http://travis-ci.org/jaimeiniesta/w3clove.png" />}[http://travis-ci.org/jaimeiniesta/w3clove] {<img src="https://codeclimate.com/badge.png" />}[https://codeclimate.com/github/jaimeiniesta/w3clove]
2
2
 
3
3
  Site-wide markup validator. Validate a whole web site on the W3C Markup Validator, from the command line, and generate a comprehensive report of all errors found.
4
4
 
@@ -53,7 +53,7 @@ module W3Clove
53
53
  # to absolute links, remove anchors from links, include the sitemap url, and exclude links that don't
54
54
  # seem to point to HTML (like images, multimedia, text, javascript...)
55
55
  def pages_in_sitemap
56
- pages = xml_locations.map {|loc| W3Clove::Page.new(loc.text)}
56
+ pages = xml_locations.select {|loc| looks_like_html?(loc.text)}.map {|loc| W3Clove::Page.new(loc.text)}
57
57
  if pages.empty?
58
58
  m = MetaInspector.new(url, timeout)
59
59
  links = [m.url]
@@ -75,10 +75,10 @@ module W3Clove
75
75
  # That is, it does not look like javascript, image, pdf...
76
76
  def looks_like_html?(url)
77
77
  u = URI.parse(URI.encode(url))
78
- scheme = u.scheme
79
- extension = u.path.split(".").last
78
+ scheme = u.scheme if u.scheme
79
+ extension = u.path.split(".").last if u.path
80
80
 
81
- (scheme =~ /http[s]?/i) && (extension !~ /gif|jpg|jpeg|png|tiff|bmp|txt|pdf|doc|xls|wav|mp3|ogg/i)
81
+ (scheme && extension) && (scheme =~ /http[s]?/i) && (extension !~ /gif|jpg|jpeg|png|tiff|bmp|txt|pdf|doc|rtf|xml|xls|csv|wav|mp3|ogg/i)
82
82
  end
83
83
 
84
84
  def xml_locations
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module W3Clove
4
- VERSION = "0.7.9"
4
+ VERSION = "0.7.10"
5
5
  end
@@ -30,6 +30,9 @@ Via: 1.1 varnish
30
30
  <a href="/file.txt">link</a><br />
31
31
  <a href="/file.doc">link</a><br />
32
32
  <a href="/file.xls">link</a><br />
33
+ <a href="/file.rtf">link</a><br />
34
+ <a href="/file.xml">link</a><br />
35
+ <a href="/file.csv">link</a><br />
33
36
 
34
37
  <a href="/file.wav">link</a><br />
35
38
  <a href="/file.mp3">link</a><br />
@@ -0,0 +1,81 @@
1
+ HTTP/1.1 200 OK
2
+ Date: Thu, 29 Dec 2011 23:08:54 GMT
3
+ Server: Apache/2.2.21 (Unix) mod_ssl/2.2.3 OpenSSL/0.9.8e-fips-rhel5
4
+ Transfer-Encoding: chunked
5
+ Content-Type: text/xml
6
+
7
+ <?xml version="1.0" encoding="UTF-8"?>
8
+ <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
9
+ <url>
10
+ <loc>http://example.com/</loc>
11
+ </url>
12
+ <url>
13
+ <loc>http://example.com/faqs</loc>
14
+ </url>
15
+ <url>
16
+ <loc>http://example.com/file.gif</loc>
17
+ </url>
18
+ <url>
19
+ <loc>http://example.com/file.GIF</loc>
20
+ </url>
21
+ <url>
22
+ <loc>http://example.com/file.gif?size=thumb</loc>
23
+ </url>
24
+ <url>
25
+ <loc>http://example.com/file.jpg</loc>
26
+ </url>
27
+ <url>
28
+ <loc>http://example.com/file.jpeg</loc>
29
+ </url>
30
+ <url>
31
+ <loc>http://example.com/file.png</loc>
32
+ </url>
33
+ <url>
34
+ <loc>http://example.com/file.tiff</loc>
35
+ </url>
36
+ <url>
37
+ <loc>http://example.com/file.bmp</loc>
38
+ </url>
39
+ <url>
40
+ <loc>http://example.com/file.pdf</loc>
41
+ </url>
42
+ <url>
43
+ <loc>http://example.com/file.txt</loc>
44
+ </url>
45
+ <url>
46
+ <loc>http://example.com/file.doc</loc>
47
+ </url>
48
+ <url>
49
+ <loc>http://example.com/file.xls</loc>
50
+ </url>
51
+ <url>
52
+ <loc>http://example.com/file.rtf</loc>
53
+ </url>
54
+ <url>
55
+ <loc>http://example.com/file.xml</loc>
56
+ </url>
57
+ <url>
58
+ <loc>http://example.com/file.csv</loc>
59
+ </url>
60
+ <url>
61
+ <loc>http://example.com/file.wav</loc>
62
+ </url>
63
+ <url>
64
+ <loc>http://example.com/file.mp3</loc>
65
+ </url>
66
+ <url>
67
+ <loc>http://example.com/file.ogg</loc>
68
+ </url>
69
+ <url>
70
+ <loc>mailto:user@example.com</loc>
71
+ </url>
72
+ <url>
73
+ <loc>ftp://user:password@example.com</loc>
74
+ </url>
75
+ <url>
76
+ <loc>telnet://user:password@example.com</loc>
77
+ </url>
78
+ <url>
79
+ <loc>javascript:alert('hey');</loc>
80
+ </url>
81
+ </urlset>
@@ -11,6 +11,7 @@ describe W3Clove::Sitemap do
11
11
  @sitemap_with_protocol_relative = W3Clove::Sitemap.new('http://protocol-relative.com')
12
12
  @sitemap_with_protocol_relative_https = W3Clove::Sitemap.new('https://protocol-relative.com')
13
13
  @sitemap_for_exclusions = W3Clove::Sitemap.new('http://example.com/exclusions')
14
+ @sitemap_for_exclusions_xml = W3Clove::Sitemap.new('http://example.com/exclusions.xml')
14
15
  @sitemap_for_absolute_urls = W3Clove::Sitemap.new('http://w3clove.com/faqs')
15
16
  @sitemap_international = W3Clove::Sitemap.new('http://example.com/international')
16
17
 
@@ -98,13 +99,19 @@ describe W3Clove::Sitemap do
98
99
  urls.should_not include 'http://w3clove.com/faqs/'
99
100
  end
100
101
 
101
- it "should exclude non-html pages" do
102
+ it "should exclude non-html pages from HTML sitemaps" do
102
103
  @sitemap_for_exclusions.pages.length.should == 3
103
104
  @sitemap_for_exclusions.pages[0].url.should == 'http://example.com/exclusions'
104
105
  @sitemap_for_exclusions.pages[1].url.should == 'http://example.com/'
105
106
  @sitemap_for_exclusions.pages[2].url.should == 'http://example.com/faqs'
106
107
  end
107
108
 
109
+ it "should exclude non-html pages from XML sitemaps" do
110
+ @sitemap_for_exclusions_xml.pages.length.should == 2
111
+ @sitemap_for_exclusions_xml.pages[0].url.should == 'http://example.com/'
112
+ @sitemap_for_exclusions_xml.pages[1].url.should == 'http://example.com/faqs'
113
+ end
114
+
108
115
  it "should not crash when URLs have international characters" do
109
116
  @sitemap_international.pages.length.should == 9
110
117
  @sitemap_international.pages[0].url.should == 'http://example.com/international'
@@ -14,6 +14,7 @@ FakeWeb.register_uri(:get, "http://zigotica.com", :response => open("#{$samples_
14
14
  FakeWeb.register_uri(:get, "http://protocol-relative.com", :response => open("#{$samples_dir}/protocol_relative.html").read)
15
15
  FakeWeb.register_uri(:get, "https://protocol-relative.com", :response => open("#{$samples_dir}/protocol_relative.html").read)
16
16
  FakeWeb.register_uri(:get, "http://example.com/exclusions", :response => open("#{$samples_dir}/exclusions.html").read)
17
+ FakeWeb.register_uri(:get, "http://example.com/exclusions.xml", :response => open("#{$samples_dir}/exclusions.xml").read)
17
18
  FakeWeb.register_uri(:get, "http://w3clove.com/faqs", :response => open("#{$samples_dir}/w3clove_faqs.response").read)
18
19
  FakeWeb.register_uri(:get, "http://example.com/international", :response => open("#{$samples_dir}/international.response").read)
19
20
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: w3clove
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
4
+ hash: 23
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 7
9
- - 9
10
- version: 0.7.9
9
+ - 10
10
+ version: 0.7.10
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jaime Iniesta
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-09-12 00:00:00 Z
18
+ date: 2012-11-02 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: w3c_validators
@@ -162,6 +162,7 @@ files:
162
162
  - spec/samples/absolute_links.html
163
163
  - spec/samples/eparreno.com.html
164
164
  - spec/samples/exclusions.html
165
+ - spec/samples/exclusions.xml
165
166
  - spec/samples/guides.rubyonrails.org.html
166
167
  - spec/samples/international.response
167
168
  - spec/samples/protocol_relative.html