w3clove 0.7.9 → 0.7.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- = w3clove {<img src="http://travis-ci.org/jaimeiniesta/w3clove.png" />}[http://travis-ci.org/jaimeiniesta/w3clove]
1
+ = w3clove {<img src="http://travis-ci.org/jaimeiniesta/w3clove.png" />}[http://travis-ci.org/jaimeiniesta/w3clove] {<img src="https://codeclimate.com/badge.png" />}[https://codeclimate.com/github/jaimeiniesta/w3clove]
2
2
 
3
3
  Site-wide markup validator. Validate a whole web site on the W3C Markup Validator, from the command line, and generate a comprehensive report of all errors found.
4
4
 
@@ -53,7 +53,7 @@ module W3Clove
53
53
  # to absolute links, remove anchors from links, include the sitemap url, and exclude links that don't
54
54
  # seem to point to HTML (like images, multimedia, text, javascript...)
55
55
  def pages_in_sitemap
56
- pages = xml_locations.map {|loc| W3Clove::Page.new(loc.text)}
56
+ pages = xml_locations.select {|loc| looks_like_html?(loc.text)}.map {|loc| W3Clove::Page.new(loc.text)}
57
57
  if pages.empty?
58
58
  m = MetaInspector.new(url, timeout)
59
59
  links = [m.url]
@@ -75,10 +75,10 @@ module W3Clove
75
75
  # That is, it does not look like javascript, image, pdf...
76
76
  def looks_like_html?(url)
77
77
  u = URI.parse(URI.encode(url))
78
- scheme = u.scheme
79
- extension = u.path.split(".").last
78
+ scheme = u.scheme if u.scheme
79
+ extension = u.path.split(".").last if u.path
80
80
 
81
- (scheme =~ /http[s]?/i) && (extension !~ /gif|jpg|jpeg|png|tiff|bmp|txt|pdf|doc|xls|wav|mp3|ogg/i)
81
+ (scheme && extension) && (scheme =~ /http[s]?/i) && (extension !~ /gif|jpg|jpeg|png|tiff|bmp|txt|pdf|doc|rtf|xml|xls|csv|wav|mp3|ogg/i)
82
82
  end
83
83
 
84
84
  def xml_locations
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module W3Clove
4
- VERSION = "0.7.9"
4
+ VERSION = "0.7.10"
5
5
  end
@@ -30,6 +30,9 @@ Via: 1.1 varnish
30
30
  <a href="/file.txt">link</a><br />
31
31
  <a href="/file.doc">link</a><br />
32
32
  <a href="/file.xls">link</a><br />
33
+ <a href="/file.rtf">link</a><br />
34
+ <a href="/file.xml">link</a><br />
35
+ <a href="/file.csv">link</a><br />
33
36
 
34
37
  <a href="/file.wav">link</a><br />
35
38
  <a href="/file.mp3">link</a><br />
@@ -0,0 +1,81 @@
1
+ HTTP/1.1 200 OK
2
+ Date: Thu, 29 Dec 2011 23:08:54 GMT
3
+ Server: Apache/2.2.21 (Unix) mod_ssl/2.2.3 OpenSSL/0.9.8e-fips-rhel5
4
+ Transfer-Encoding: chunked
5
+ Content-Type: text/xml
6
+
7
+ <?xml version="1.0" encoding="UTF-8"?>
8
+ <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
9
+ <url>
10
+ <loc>http://example.com/</loc>
11
+ </url>
12
+ <url>
13
+ <loc>http://example.com/faqs</loc>
14
+ </url>
15
+ <url>
16
+ <loc>http://example.com/file.gif</loc>
17
+ </url>
18
+ <url>
19
+ <loc>http://example.com/file.GIF</loc>
20
+ </url>
21
+ <url>
22
+ <loc>http://example.com/file.gif?size=thumb</loc>
23
+ </url>
24
+ <url>
25
+ <loc>http://example.com/file.jpg</loc>
26
+ </url>
27
+ <url>
28
+ <loc>http://example.com/file.jpeg</loc>
29
+ </url>
30
+ <url>
31
+ <loc>http://example.com/file.png</loc>
32
+ </url>
33
+ <url>
34
+ <loc>http://example.com/file.tiff</loc>
35
+ </url>
36
+ <url>
37
+ <loc>http://example.com/file.bmp</loc>
38
+ </url>
39
+ <url>
40
+ <loc>http://example.com/file.pdf</loc>
41
+ </url>
42
+ <url>
43
+ <loc>http://example.com/file.txt</loc>
44
+ </url>
45
+ <url>
46
+ <loc>http://example.com/file.doc</loc>
47
+ </url>
48
+ <url>
49
+ <loc>http://example.com/file.xls</loc>
50
+ </url>
51
+ <url>
52
+ <loc>http://example.com/file.rtf</loc>
53
+ </url>
54
+ <url>
55
+ <loc>http://example.com/file.xml</loc>
56
+ </url>
57
+ <url>
58
+ <loc>http://example.com/file.csv</loc>
59
+ </url>
60
+ <url>
61
+ <loc>http://example.com/file.wav</loc>
62
+ </url>
63
+ <url>
64
+ <loc>http://example.com/file.mp3</loc>
65
+ </url>
66
+ <url>
67
+ <loc>http://example.com/file.ogg</loc>
68
+ </url>
69
+ <url>
70
+ <loc>mailto:user@example.com</loc>
71
+ </url>
72
+ <url>
73
+ <loc>ftp://user:password@example.com</loc>
74
+ </url>
75
+ <url>
76
+ <loc>telnet://user:password@example.com</loc>
77
+ </url>
78
+ <url>
79
+ <loc>javascript:alert('hey');</loc>
80
+ </url>
81
+ </urlset>
@@ -11,6 +11,7 @@ describe W3Clove::Sitemap do
11
11
  @sitemap_with_protocol_relative = W3Clove::Sitemap.new('http://protocol-relative.com')
12
12
  @sitemap_with_protocol_relative_https = W3Clove::Sitemap.new('https://protocol-relative.com')
13
13
  @sitemap_for_exclusions = W3Clove::Sitemap.new('http://example.com/exclusions')
14
+ @sitemap_for_exclusions_xml = W3Clove::Sitemap.new('http://example.com/exclusions.xml')
14
15
  @sitemap_for_absolute_urls = W3Clove::Sitemap.new('http://w3clove.com/faqs')
15
16
  @sitemap_international = W3Clove::Sitemap.new('http://example.com/international')
16
17
 
@@ -98,13 +99,19 @@ describe W3Clove::Sitemap do
98
99
  urls.should_not include 'http://w3clove.com/faqs/'
99
100
  end
100
101
 
101
- it "should exclude non-html pages" do
102
+ it "should exclude non-html pages from HTML sitemaps" do
102
103
  @sitemap_for_exclusions.pages.length.should == 3
103
104
  @sitemap_for_exclusions.pages[0].url.should == 'http://example.com/exclusions'
104
105
  @sitemap_for_exclusions.pages[1].url.should == 'http://example.com/'
105
106
  @sitemap_for_exclusions.pages[2].url.should == 'http://example.com/faqs'
106
107
  end
107
108
 
109
+ it "should exclude non-html pages from XML sitemaps" do
110
+ @sitemap_for_exclusions_xml.pages.length.should == 2
111
+ @sitemap_for_exclusions_xml.pages[0].url.should == 'http://example.com/'
112
+ @sitemap_for_exclusions_xml.pages[1].url.should == 'http://example.com/faqs'
113
+ end
114
+
108
115
  it "should not crash when URLs have international characters" do
109
116
  @sitemap_international.pages.length.should == 9
110
117
  @sitemap_international.pages[0].url.should == 'http://example.com/international'
@@ -14,6 +14,7 @@ FakeWeb.register_uri(:get, "http://zigotica.com", :response => open("#{$samples_
14
14
  FakeWeb.register_uri(:get, "http://protocol-relative.com", :response => open("#{$samples_dir}/protocol_relative.html").read)
15
15
  FakeWeb.register_uri(:get, "https://protocol-relative.com", :response => open("#{$samples_dir}/protocol_relative.html").read)
16
16
  FakeWeb.register_uri(:get, "http://example.com/exclusions", :response => open("#{$samples_dir}/exclusions.html").read)
17
+ FakeWeb.register_uri(:get, "http://example.com/exclusions.xml", :response => open("#{$samples_dir}/exclusions.xml").read)
17
18
  FakeWeb.register_uri(:get, "http://w3clove.com/faqs", :response => open("#{$samples_dir}/w3clove_faqs.response").read)
18
19
  FakeWeb.register_uri(:get, "http://example.com/international", :response => open("#{$samples_dir}/international.response").read)
19
20
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: w3clove
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
4
+ hash: 23
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 7
9
- - 9
10
- version: 0.7.9
9
+ - 10
10
+ version: 0.7.10
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jaime Iniesta
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-09-12 00:00:00 Z
18
+ date: 2012-11-02 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: w3c_validators
@@ -162,6 +162,7 @@ files:
162
162
  - spec/samples/absolute_links.html
163
163
  - spec/samples/eparreno.com.html
164
164
  - spec/samples/exclusions.html
165
+ - spec/samples/exclusions.xml
165
166
  - spec/samples/guides.rubyonrails.org.html
166
167
  - spec/samples/international.response
167
168
  - spec/samples/protocol_relative.html