w3clove 0.6.2 → 0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/w3clove/sitemap.rb +13 -2
- data/lib/w3clove/version.rb +1 -1
- data/spec/samples/exclusions.html +43 -0
- data/spec/sitemap_spec.rb +7 -0
- data/spec/spec_helper.rb +1 -0
- data/w3clove.gemspec +4 -4
- metadata +19 -20
data/lib/w3clove/sitemap.rb
CHANGED
@@ -48,18 +48,29 @@ module W3Clove
|
|
48
48
|
# scrape the links from HTML.
|
49
49
|
#
|
50
50
|
# For HTML sources, it will only get the links that start with the sitemap url, convert relative links
|
51
|
-
# to absolute links, remove anchors from links,
|
51
|
+
# to absolute links, remove anchors from links, include the sitemap url, and exclude links that don't
|
52
|
+
# seem to point to HTML (like images, multimedia, text, javascript...)
|
52
53
|
def pages_in_sitemap
|
53
54
|
pages = xml_locations.map {|loc| W3Clove::Page.new(loc.text)}
|
54
55
|
if pages.empty?
|
55
56
|
m = MetaInspector.new(url)
|
56
|
-
links = m.absolute_links.select {|l| l.start_with?(m.url)}.map {|l| l.split('#')[0]}.uniq
|
57
|
+
links = m.absolute_links.select {|l| l.start_with?(m.url) && looks_like_html?(l)}.map {|l| l.split('#')[0]}.uniq
|
57
58
|
links << m.url unless (links.include?(m.url) || links.include?("#{m.url}/"))
|
58
59
|
pages = links.map {|link| W3Clove::Page.new(link)}
|
59
60
|
end
|
60
61
|
pages
|
61
62
|
end
|
62
63
|
|
64
|
+
# Tells if the given url looks like an HTML page.
|
65
|
+
# That is, it does not look like javascript, image, pdf...
|
66
|
+
def looks_like_html?(url)
|
67
|
+
u = URI.parse(url)
|
68
|
+
scheme = u.scheme
|
69
|
+
extension = u.path.split(".").last
|
70
|
+
|
71
|
+
(scheme =~ /http[s]?/i) && (extension !~ /gif|jpg|jpeg|png|tiff|bmp|txt|pdf|doc|xls|wav|mp3|ogg/i)
|
72
|
+
end
|
73
|
+
|
63
74
|
def xml_locations
|
64
75
|
Nokogiri::XML(doc).css('loc')
|
65
76
|
end
|
data/lib/w3clove/version.rb
CHANGED
@@ -0,0 +1,43 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Server: nginx/0.7.67
|
3
|
+
Date: Fri, 18 Nov 2011 21:46:46 GMT
|
4
|
+
Content-Type: text/html
|
5
|
+
Connection: keep-alive
|
6
|
+
Last-Modified: Mon, 14 Nov 2011 16:53:18 GMT
|
7
|
+
Content-Length: 4987
|
8
|
+
X-Varnish: 2000423390
|
9
|
+
Age: 0
|
10
|
+
Via: 1.1 varnish
|
11
|
+
|
12
|
+
<html>
|
13
|
+
<head>
|
14
|
+
<title>Examples of exclusions</title>
|
15
|
+
</head>
|
16
|
+
<body>
|
17
|
+
<a href="/">Front page</a><br />
|
18
|
+
<a href="/faqs">FAQs</a><br />
|
19
|
+
|
20
|
+
<a href="/file.gif">link</a><br />
|
21
|
+
<a href="/file.GIF">link</a><br />
|
22
|
+
<a href="/file.gif?size=thumb">link</a><br />
|
23
|
+
<a href="/file.jpg">link</a><br />
|
24
|
+
<a href="/file.jpeg">link</a><br />
|
25
|
+
<a href="/file.png">link</a><br />
|
26
|
+
<a href="/file.tiff">link</a><br />
|
27
|
+
<a href="/file.bmp">link</a><br />
|
28
|
+
|
29
|
+
<a href="/file.pdf">link</a><br />
|
30
|
+
<a href="/file.txt">link</a><br />
|
31
|
+
<a href="/file.doc">link</a><br />
|
32
|
+
<a href="/file.xls">link</a><br />
|
33
|
+
|
34
|
+
<a href="/file.wav">link</a><br />
|
35
|
+
<a href="/file.mp3">link</a><br />
|
36
|
+
<a href="/file.ogg">link</a><br />
|
37
|
+
|
38
|
+
<a href="mailto:user@example.com">link</a><br />
|
39
|
+
<a href="ftp://user:password@example.com">link</a><br />
|
40
|
+
<a href="telnet://user:password@example.com">link</a><br />
|
41
|
+
<a href="javascript:alert('hey');">link</a><br />
|
42
|
+
</body>
|
43
|
+
</html>
|
data/spec/sitemap_spec.rb
CHANGED
@@ -10,6 +10,7 @@ describe W3Clove::Sitemap do
|
|
10
10
|
@sitemap_with_trailing_slash = W3Clove::Sitemap.new('http://eparreno.com')
|
11
11
|
@sitemap_with_protocol_relative = W3Clove::Sitemap.new('http://protocol-relative.com')
|
12
12
|
@sitemap_with_protocol_relative_https = W3Clove::Sitemap.new('https://protocol-relative.com')
|
13
|
+
@sitemap_for_exclusions = W3Clove::Sitemap.new('http://example.com/exclusions')
|
13
14
|
|
14
15
|
MarkupValidator.any_instance.stubs(:validate_uri).returns(stubbed_validator_results)
|
15
16
|
end
|
@@ -75,6 +76,12 @@ describe W3Clove::Sitemap do
|
|
75
76
|
urls.should_not include 'http://eparreno.com'
|
76
77
|
end
|
77
78
|
|
79
|
+
it "should exclude non-html pages" do
|
80
|
+
@sitemap_for_exclusions.pages.length.should == 2
|
81
|
+
@sitemap_for_exclusions.pages.first.url.should == 'http://example.com/exclusions/'
|
82
|
+
@sitemap_for_exclusions.pages.last.url.should == 'http://example.com/exclusions/faqs'
|
83
|
+
end
|
84
|
+
|
78
85
|
context "protocol-relative links" do
|
79
86
|
it "should include only internal links" do
|
80
87
|
@sitemap_with_protocol_relative.pages.size.should == 3
|
data/spec/spec_helper.rb
CHANGED
@@ -13,6 +13,7 @@ FakeWeb.register_uri(:get, "http://www.eparreno.com", :response => open("#{$samp
|
|
13
13
|
FakeWeb.register_uri(:get, "http://zigotica.com", :response => open("#{$samples_dir}/zigotica.com.html").read)
|
14
14
|
FakeWeb.register_uri(:get, "http://protocol-relative.com", :response => open("#{$samples_dir}/protocol_relative.html").read)
|
15
15
|
FakeWeb.register_uri(:get, "https://protocol-relative.com", :response => open("#{$samples_dir}/protocol_relative.html").read)
|
16
|
+
FakeWeb.register_uri(:get, "http://example.com/exclusions", :response => open("#{$samples_dir}/exclusions.html").read)
|
16
17
|
|
17
18
|
def message_text(message_id)
|
18
19
|
message_texts = {
|
data/w3clove.gemspec
CHANGED
@@ -16,12 +16,12 @@ and outputs a detailed report with all errors and warnings}
|
|
16
16
|
gem.require_paths = ["lib"]
|
17
17
|
gem.version = W3Clove::VERSION
|
18
18
|
|
19
|
-
gem.add_dependency 'w3c_validators', '~> 1.
|
20
|
-
gem.add_dependency 'nokogiri', '~> 1.5.
|
21
|
-
gem.add_dependency 'metainspector', '~> 1.
|
19
|
+
gem.add_dependency 'w3c_validators', '~> 1.2'
|
20
|
+
gem.add_dependency 'nokogiri', '~> 1.5.3'
|
21
|
+
gem.add_dependency 'metainspector', '~> 1.9.0'
|
22
22
|
|
23
23
|
gem.add_development_dependency 'rspec', '~> 2.5.0'
|
24
|
-
gem.add_development_dependency 'mocha', '~> 0.
|
24
|
+
gem.add_development_dependency 'mocha', '~> 0.11.4'
|
25
25
|
gem.add_development_dependency 'rake', '~> 0.9.2'
|
26
26
|
gem.add_development_dependency 'fakeweb', '~> 1.3.0'
|
27
27
|
end
|
metadata
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: w3clove
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 5
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
|
10
|
-
version: 0.6.2
|
8
|
+
- 7
|
9
|
+
version: "0.7"
|
11
10
|
platform: ruby
|
12
11
|
authors:
|
13
12
|
- Jaime Iniesta
|
@@ -15,7 +14,7 @@ autorequire:
|
|
15
14
|
bindir: bin
|
16
15
|
cert_chain: []
|
17
16
|
|
18
|
-
date:
|
17
|
+
date: 2012-06-03 00:00:00 Z
|
19
18
|
dependencies:
|
20
19
|
- !ruby/object:Gem::Dependency
|
21
20
|
name: w3c_validators
|
@@ -25,12 +24,11 @@ dependencies:
|
|
25
24
|
requirements:
|
26
25
|
- - ~>
|
27
26
|
- !ruby/object:Gem::Version
|
28
|
-
hash:
|
27
|
+
hash: 11
|
29
28
|
segments:
|
30
29
|
- 1
|
31
|
-
- 0
|
32
30
|
- 2
|
33
|
-
version: 1.
|
31
|
+
version: "1.2"
|
34
32
|
type: :runtime
|
35
33
|
version_requirements: *id001
|
36
34
|
- !ruby/object:Gem::Dependency
|
@@ -41,12 +39,12 @@ dependencies:
|
|
41
39
|
requirements:
|
42
40
|
- - ~>
|
43
41
|
- !ruby/object:Gem::Version
|
44
|
-
hash:
|
42
|
+
hash: 5
|
45
43
|
segments:
|
46
44
|
- 1
|
47
45
|
- 5
|
48
|
-
-
|
49
|
-
version: 1.5.
|
46
|
+
- 3
|
47
|
+
version: 1.5.3
|
50
48
|
type: :runtime
|
51
49
|
version_requirements: *id002
|
52
50
|
- !ruby/object:Gem::Dependency
|
@@ -57,12 +55,12 @@ dependencies:
|
|
57
55
|
requirements:
|
58
56
|
- - ~>
|
59
57
|
- !ruby/object:Gem::Version
|
60
|
-
hash:
|
58
|
+
hash: 51
|
61
59
|
segments:
|
62
60
|
- 1
|
63
|
-
-
|
64
|
-
-
|
65
|
-
version: 1.
|
61
|
+
- 9
|
62
|
+
- 0
|
63
|
+
version: 1.9.0
|
66
64
|
type: :runtime
|
67
65
|
version_requirements: *id003
|
68
66
|
- !ruby/object:Gem::Dependency
|
@@ -89,12 +87,12 @@ dependencies:
|
|
89
87
|
requirements:
|
90
88
|
- - ~>
|
91
89
|
- !ruby/object:Gem::Version
|
92
|
-
hash:
|
90
|
+
hash: 59
|
93
91
|
segments:
|
94
92
|
- 0
|
95
|
-
-
|
96
|
-
-
|
97
|
-
version: 0.
|
93
|
+
- 11
|
94
|
+
- 4
|
95
|
+
version: 0.11.4
|
98
96
|
type: :development
|
99
97
|
version_requirements: *id005
|
100
98
|
- !ruby/object:Gem::Dependency
|
@@ -162,6 +160,7 @@ files:
|
|
162
160
|
- spec/page_spec.rb
|
163
161
|
- spec/samples/absolute_links.html
|
164
162
|
- spec/samples/eparreno.com.html
|
163
|
+
- spec/samples/exclusions.html
|
165
164
|
- spec/samples/guides.rubyonrails.org.html
|
166
165
|
- spec/samples/protocol_relative.html
|
167
166
|
- spec/samples/sitemap.xml
|
@@ -198,7 +197,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
198
197
|
requirements: []
|
199
198
|
|
200
199
|
rubyforge_project:
|
201
|
-
rubygems_version: 1.8.
|
200
|
+
rubygems_version: 1.8.15
|
202
201
|
signing_key:
|
203
202
|
specification_version: 3
|
204
203
|
summary: command-line tool to validate the markup of a whole site against the W3C validator
|