w3clove 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/README.rdoc +19 -0
- data/Rakefile +2 -0
- data/bin/w3clove +11 -0
- data/heatup/01_single_validation.rb +16 -0
- data/heatup/02_multiple_validation.rb +25 -0
- data/heatup/03_multiple_validation_from_text_file.rb +23 -0
- data/heatup/04_multiple_validations_from_xml_file.rb +24 -0
- data/heatup/05_multiple_validations_from_remote_xml_sitemap.rb +25 -0
- data/heatup/sitemap.xml +12 -0
- data/heatup/urls.txt +3 -0
- data/lib/w3clove/version.rb +3 -0
- data/lib/w3clove.rb +27 -0
- data/w3clove.gemspec +25 -0
- metadata +105 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
= w3clove
|
2
|
+
|
3
|
+
This is my {Ruby Mendicant University}[http://university.rubymendicant.com/] personal project, and is currently in alpha status.
|
4
|
+
|
5
|
+
I want to build a site-wide markup validator, a Ruby gem that lets you validate a whole web site against the W3C Markup Validator, from the command line, and generate a comprehensive report of all errors found.
|
6
|
+
|
7
|
+
Currently, the official {W3C Validator site}[http://validator.w3.org/] only lets you validate one URL at a time, so when you want to validate all the pages on a web site, it can be a tedious process. There is a {related tool}[http://www.htmlhelp.com/tools/validator/batch.html.en] that lets you use a batch mode for this and submit a list of URLs to be checked, but it is still a semi-manual process, and the output is not very useful.
|
8
|
+
|
9
|
+
My plan then is building a command line utility that would accept as input a XML sitemap file, or its URL, expecting it to be on the {Google Sitemap format}[http://en.wikipedia.org/wiki/Google_Sitemaps]. This utility will then check the markup validation of each URL on this sitemap querying the W3C Validator, and store all detected errors and warnings. After checking all the URLs, it will generate as output an HTML file, with a style similar to what RCov[https://github.com/relevance/rcov] produces, showing all these errors on an easy to read format, grouping common errors together, sorting them by popularity, and linking to the URLs and to the explanations on how to correct them.
|
10
|
+
|
11
|
+
Internally, it would use the {w3c_validators gem}[http://rubygems.org/gems/w3c_validators] to do the individual checks, so my gem would be concerned only with the XML sitemap parsing, building the queue, storing the errors, grouping and sorting them, and producing the HTML output.
|
12
|
+
|
13
|
+
I've already done something similar to this, I sent {a little contribution to docrails}[https://github.com/lifo/docrails/blob/master/railties/guides/w3c_validator.rb] that checks the generated guides using this gem.
|
14
|
+
|
15
|
+
= Bonus points:
|
16
|
+
|
17
|
+
* in addition to an XML file, accept as input the URL of a site and crawl the site to find all internal links
|
18
|
+
* validate the markup locally, without querying the W3C site, for more speed and to not saturate the W3C site
|
19
|
+
* store the results on a local database, so on subsequent checks, only the pages that had errors are re-checked (unless a --checkall force flag is passed). This way developers can check the whole site, get the errors, deploy the corrections, and recheck the site.
|
data/Rakefile
ADDED
data/bin/w3clove
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# Example of validation of a single URL
|
2
|
+
|
3
|
+
require 'w3c_validators'
|
4
|
+
include W3CValidators
|
5
|
+
validator = MarkupValidator.new
|
6
|
+
|
7
|
+
url = 'http://university.rubymendicant.com/'
|
8
|
+
puts "Validating markup of #{url}"
|
9
|
+
|
10
|
+
results = validator.validate_uri(url)
|
11
|
+
|
12
|
+
if results.errors.length > 0
|
13
|
+
puts "There are #{results.errors.length} validation errors"
|
14
|
+
else
|
15
|
+
puts 'Valid!'
|
16
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# Example of validation of a list of URLs
|
2
|
+
|
3
|
+
require 'w3c_validators'
|
4
|
+
include W3CValidators
|
5
|
+
validator = MarkupValidator.new
|
6
|
+
|
7
|
+
urls = %w{http://university.rubymendicant.com/
|
8
|
+
http://university.rubymendicant.com/changelog.html
|
9
|
+
http://university.rubymendicant.com/alumni.html}
|
10
|
+
totals = {:errors => 0, :warnings => 0}
|
11
|
+
|
12
|
+
urls.each do |url|
|
13
|
+
puts "\nValidating markup of #{url}"
|
14
|
+
results = validator.validate_uri(url)
|
15
|
+
puts "#{results.errors.count} errors, #{results.warnings.count} warnings"
|
16
|
+
totals[:errors] += results.errors.count
|
17
|
+
totals[:warnings] += results.warnings.count
|
18
|
+
end
|
19
|
+
|
20
|
+
puts "\nTOTAL:#{totals[:errors]} errors, #{totals[:warnings]} warnings"
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# Example of validation of a list of URLs from a text file
|
2
|
+
|
3
|
+
require 'w3c_validators'
|
4
|
+
include W3CValidators
|
5
|
+
validator = MarkupValidator.new
|
6
|
+
|
7
|
+
totals = {:errors => 0, :warnings => 0}
|
8
|
+
File.open("urls.txt", "r") do |file|
|
9
|
+
file.each_line do |url|
|
10
|
+
puts "\nValidating markup of #{url}"
|
11
|
+
results = validator.validate_uri(url)
|
12
|
+
puts "#{results.errors.count} errors, #{results.warnings.count} warnings"
|
13
|
+
totals[:errors] += results.errors.count
|
14
|
+
totals[:warnings] += results.warnings.count
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
puts "\nTOTAL:#{totals[:errors]} errors, #{totals[:warnings]} warnings"
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# Example of validation of a list of URLs from a local XML sitemap file
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'w3c_validators'
|
5
|
+
include W3CValidators
|
6
|
+
validator = MarkupValidator.new
|
7
|
+
|
8
|
+
totals = {:errors => 0, :warnings => 0}
|
9
|
+
|
10
|
+
doc = Nokogiri::XML(File.open("sitemap.xml"))
|
11
|
+
doc.css('loc').collect {|item| item.text}.each do |url|
|
12
|
+
puts "\nValidating markup of #{url}"
|
13
|
+
results = validator.validate_uri(url)
|
14
|
+
puts "#{results.errors.count} errors, #{results.warnings.count} warnings"
|
15
|
+
totals[:errors] += results.errors.count
|
16
|
+
totals[:warnings] += results.warnings.count
|
17
|
+
end
|
18
|
+
|
19
|
+
puts "\nTOTAL:#{totals[:errors]} errors, #{totals[:warnings]} warnings"
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# Example of validation of a list of URLs from a remote XML sitemap
|
2
|
+
|
3
|
+
require 'open-uri'
|
4
|
+
require 'nokogiri'
|
5
|
+
require 'w3c_validators'
|
6
|
+
include W3CValidators
|
7
|
+
validator = MarkupValidator.new
|
8
|
+
|
9
|
+
totals = {:errors => 0, :warnings => 0}
|
10
|
+
|
11
|
+
doc = Nokogiri::XML(open('https://github.com/jaimeiniesta/w3clove/raw/master/heatup/sitemap.xml'))
|
12
|
+
doc.css('loc').collect {|item| item.text}.each do |url|
|
13
|
+
puts "\nValidating markup of #{url}"
|
14
|
+
results = validator.validate_uri(url)
|
15
|
+
puts "#{results.errors.count} errors, #{results.warnings.count} warnings"
|
16
|
+
totals[:errors] += results.errors.count
|
17
|
+
totals[:warnings] += results.warnings.count
|
18
|
+
end
|
19
|
+
|
20
|
+
puts "\nTOTAL:#{totals[:errors]} errors, #{totals[:warnings]} warnings"
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
|
data/heatup/sitemap.xml
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
3
|
+
<url>
|
4
|
+
<loc>http://university.rubymendicant.com/</loc>
|
5
|
+
</url>
|
6
|
+
<url>
|
7
|
+
<loc>http://university.rubymendicant.com/changelog.html</loc>
|
8
|
+
</url>
|
9
|
+
<url>
|
10
|
+
<loc>http://university.rubymendicant.com/alumni.html</loc>
|
11
|
+
</url>
|
12
|
+
</url>
|
data/heatup/urls.txt
ADDED
data/lib/w3clove.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
module W3clove
|
2
|
+
require 'open-uri'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'w3c_validators'
|
5
|
+
include W3CValidators
|
6
|
+
|
7
|
+
extend self
|
8
|
+
|
9
|
+
##
|
10
|
+
# Parses a remote xml sitemap and checks markup validation for each url within
|
11
|
+
def check_sitemap(sitemap_url)
|
12
|
+
validator = MarkupValidator.new
|
13
|
+
|
14
|
+
totals = {:errors => 0, :warnings => 0}
|
15
|
+
|
16
|
+
doc = Nokogiri::XML(open(sitemap_url))
|
17
|
+
doc.css('loc').collect {|item| item.text}.each do |url|
|
18
|
+
puts "\nValidating markup of #{url}"
|
19
|
+
results = validator.validate_uri(url)
|
20
|
+
puts "#{results.errors.count} errors, #{results.warnings.count} warnings"
|
21
|
+
totals[:errors] += results.errors.count
|
22
|
+
totals[:warnings] += results.warnings.count
|
23
|
+
end
|
24
|
+
|
25
|
+
puts "\nTOTAL:#{totals[:errors]} errors, #{totals[:warnings]} warnings"
|
26
|
+
end
|
27
|
+
end
|
data/w3clove.gemspec
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "w3clove/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "w3clove"
|
7
|
+
s.version = W3clove::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Jaime Iniesta"]
|
10
|
+
s.email = ["jaimeiniesta@gmail.com"]
|
11
|
+
s.homepage = "https://github.com/jaimeiniesta/w3clove/"
|
12
|
+
s.summary = %q{ command-line tool to validate the markup of a whole site against the W3C validator }
|
13
|
+
s.description = %q{ this tool allows you to check the markup validation of a whole site passing an XML sitemap,
|
14
|
+
and outputs a detailed report with all errors and warnings }
|
15
|
+
|
16
|
+
s.rubyforge_project = "w3clove"
|
17
|
+
|
18
|
+
s.add_dependency 'w3c_validators'
|
19
|
+
s.add_dependency 'nokogiri'
|
20
|
+
|
21
|
+
s.files = `git ls-files`.split("\n")
|
22
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
23
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
24
|
+
s.require_paths = ["lib"]
|
25
|
+
end
|
metadata
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: w3clove
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Jaime Iniesta
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2011-03-15 00:00:00 +01:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: w3c_validators
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
version: "0"
|
31
|
+
type: :runtime
|
32
|
+
version_requirements: *id001
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: nokogiri
|
35
|
+
prerelease: false
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
segments:
|
42
|
+
- 0
|
43
|
+
version: "0"
|
44
|
+
type: :runtime
|
45
|
+
version_requirements: *id002
|
46
|
+
description: " this tool allows you to check the markup validation of a whole site passing an XML sitemap,\n\
|
47
|
+
and outputs a detailed report with all errors and warnings "
|
48
|
+
email:
|
49
|
+
- jaimeiniesta@gmail.com
|
50
|
+
executables:
|
51
|
+
- w3clove
|
52
|
+
extensions: []
|
53
|
+
|
54
|
+
extra_rdoc_files: []
|
55
|
+
|
56
|
+
files:
|
57
|
+
- .gitignore
|
58
|
+
- Gemfile
|
59
|
+
- README.rdoc
|
60
|
+
- Rakefile
|
61
|
+
- bin/w3clove
|
62
|
+
- heatup/01_single_validation.rb
|
63
|
+
- heatup/02_multiple_validation.rb
|
64
|
+
- heatup/03_multiple_validation_from_text_file.rb
|
65
|
+
- heatup/04_multiple_validations_from_xml_file.rb
|
66
|
+
- heatup/05_multiple_validations_from_remote_xml_sitemap.rb
|
67
|
+
- heatup/sitemap.xml
|
68
|
+
- heatup/urls.txt
|
69
|
+
- lib/w3clove.rb
|
70
|
+
- lib/w3clove/version.rb
|
71
|
+
- w3clove.gemspec
|
72
|
+
has_rdoc: true
|
73
|
+
homepage: https://github.com/jaimeiniesta/w3clove/
|
74
|
+
licenses: []
|
75
|
+
|
76
|
+
post_install_message:
|
77
|
+
rdoc_options: []
|
78
|
+
|
79
|
+
require_paths:
|
80
|
+
- lib
|
81
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
82
|
+
none: false
|
83
|
+
requirements:
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
segments:
|
87
|
+
- 0
|
88
|
+
version: "0"
|
89
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
|
+
none: false
|
91
|
+
requirements:
|
92
|
+
- - ">="
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
segments:
|
95
|
+
- 0
|
96
|
+
version: "0"
|
97
|
+
requirements: []
|
98
|
+
|
99
|
+
rubyforge_project: w3clove
|
100
|
+
rubygems_version: 1.3.7
|
101
|
+
signing_key:
|
102
|
+
specification_version: 3
|
103
|
+
summary: command-line tool to validate the markup of a whole site against the W3C validator
|
104
|
+
test_files: []
|
105
|
+
|