validate-website 0.5.7 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (109) hide show
  1. data/README.rdoc +5 -4
  2. data/Rakefile +3 -3
  3. data/bin/validate-website +9 -11
  4. data/bin/validate-website-static +7 -18
  5. data/lib/validate_website.rb +1 -210
  6. data/lib/validate_website/colorful_messages.rb +28 -0
  7. data/lib/validate_website/core.rb +141 -0
  8. data/lib/validate_website/option_parser.rb +133 -0
  9. data/lib/validate_website/runner.rb +35 -0
  10. data/lib/validate_website/validator.rb +69 -0
  11. data/man/man1/validate-website-static.1 +82 -0
  12. data/{lib/xhtml → share/schemas}/frameset.dtd +0 -0
  13. data/{lib/xhtml → share/schemas}/loose.dtd +0 -0
  14. data/{lib/xhtml → share/schemas}/strict.dtd +0 -0
  15. data/{lib/xhtml → share/schemas}/xframes-1.xsd +0 -0
  16. data/{lib/xhtml → share/schemas}/xhtml-access-1.xsd +0 -0
  17. data/{lib/xhtml → share/schemas}/xhtml-applet-1.xsd +0 -0
  18. data/{lib/xhtml → share/schemas}/xhtml-attribs-1.xsd +0 -0
  19. data/{lib/xhtml → share/schemas}/xhtml-base-1.xsd +0 -0
  20. data/{lib/xhtml → share/schemas}/xhtml-basic-form-1.xsd +0 -0
  21. data/{lib/xhtml → share/schemas}/xhtml-basic-table-1.xsd +0 -0
  22. data/{lib/xhtml → share/schemas}/xhtml-basic10-model-1.xsd +0 -0
  23. data/{lib/xhtml → share/schemas}/xhtml-basic10-module-redefines-1.xsd +0 -0
  24. data/{lib/xhtml → share/schemas}/xhtml-basic10-modules-1.xsd +0 -0
  25. data/{lib/xhtml → share/schemas}/xhtml-basic10.xsd +0 -0
  26. data/{lib/xhtml → share/schemas}/xhtml-basic11-model-1.xsd +0 -0
  27. data/{lib/xhtml → share/schemas}/xhtml-basic11-modules-1.xsd +0 -0
  28. data/{lib/xhtml → share/schemas}/xhtml-basic11.dtd +0 -0
  29. data/{lib/xhtml → share/schemas}/xhtml-basic11.xsd +0 -0
  30. data/{lib/xhtml → share/schemas}/xhtml-bdo-1.xsd +0 -0
  31. data/{lib/xhtml → share/schemas}/xhtml-blkphras-1.xsd +0 -0
  32. data/{lib/xhtml → share/schemas}/xhtml-blkpres-1.xsd +0 -0
  33. data/{lib/xhtml → share/schemas}/xhtml-blkstruct-1.xsd +0 -0
  34. data/{lib/xhtml → share/schemas}/xhtml-charent-1.xsd +0 -0
  35. data/{lib/xhtml → share/schemas}/xhtml-copyright-1.xsd +0 -0
  36. data/{lib/xhtml → share/schemas}/xhtml-csismap-1.xsd +0 -0
  37. data/{lib/xhtml → share/schemas}/xhtml-datatypes-1.xsd +0 -0
  38. data/{lib/xhtml → share/schemas}/xhtml-edit-1.xsd +0 -0
  39. data/{lib/xhtml → share/schemas}/xhtml-events-1.xsd +0 -0
  40. data/{lib/xhtml → share/schemas}/xhtml-form-1.xsd +0 -0
  41. data/{lib/xhtml → share/schemas}/xhtml-frames-1.xsd +0 -0
  42. data/{lib/xhtml → share/schemas}/xhtml-framework-1.xsd +0 -0
  43. data/{lib/xhtml → share/schemas}/xhtml-hypertext-1.xsd +0 -0
  44. data/{lib/xhtml → share/schemas}/xhtml-iframe-1.xsd +0 -0
  45. data/{lib/xhtml → share/schemas}/xhtml-image-1.xsd +0 -0
  46. data/{lib/xhtml → share/schemas}/xhtml-inlphras-1.xsd +0 -0
  47. data/{lib/xhtml → share/schemas}/xhtml-inlpres-1.xsd +0 -0
  48. data/{lib/xhtml → share/schemas}/xhtml-inlstruct-1.xsd +0 -0
  49. data/{lib/xhtml → share/schemas}/xhtml-inlstyle-1.xsd +0 -0
  50. data/{lib/xhtml → share/schemas}/xhtml-inputmode-1.xsd +0 -0
  51. data/{lib/xhtml → share/schemas}/xhtml-lat1.ent +0 -0
  52. data/{lib/xhtml → share/schemas}/xhtml-legacy-1.xsd +0 -0
  53. data/{lib/xhtml → share/schemas}/xhtml-link-1.xsd +0 -0
  54. data/{lib/xhtml → share/schemas}/xhtml-list-1.xsd +0 -0
  55. data/{lib/xhtml → share/schemas}/xhtml-meta-1.xsd +0 -0
  56. data/{lib/xhtml → share/schemas}/xhtml-metaAttributes-1.xsd +0 -0
  57. data/{lib/xhtml → share/schemas}/xhtml-misc-1.xsd +0 -0
  58. data/{lib/xhtml → share/schemas}/xhtml-nameident-1.xsd +0 -0
  59. data/{lib/xhtml → share/schemas}/xhtml-notations-1.xsd +0 -0
  60. data/{lib/xhtml → share/schemas}/xhtml-object-1.xsd +0 -0
  61. data/{lib/xhtml → share/schemas}/xhtml-param-1.xsd +0 -0
  62. data/{lib/xhtml → share/schemas}/xhtml-pres-1.xsd +0 -0
  63. data/{lib/xhtml → share/schemas}/xhtml-print-1.xsd +0 -0
  64. data/{lib/xhtml → share/schemas}/xhtml-print-model-1.xsd +0 -0
  65. data/{lib/xhtml → share/schemas}/xhtml-print-modules-1.xsd +0 -0
  66. data/{lib/xhtml → share/schemas}/xhtml-rdfa-1.dtd +0 -0
  67. data/{lib/xhtml → share/schemas}/xhtml-rdfa-1.xsd +0 -0
  68. data/{lib/xhtml → share/schemas}/xhtml-rdfa-model-1.xsd +0 -0
  69. data/{lib/xhtml → share/schemas}/xhtml-rdfa-modules-1.xsd +0 -0
  70. data/{lib/xhtml → share/schemas}/xhtml-ruby-1.xsd +0 -0
  71. data/{lib/xhtml → share/schemas}/xhtml-ruby-basic-1.xsd +0 -0
  72. data/{lib/xhtml → share/schemas}/xhtml-script-1.xsd +0 -0
  73. data/{lib/xhtml → share/schemas}/xhtml-special.ent +0 -0
  74. data/{lib/xhtml → share/schemas}/xhtml-ssismap-1.xsd +0 -0
  75. data/{lib/xhtml → share/schemas}/xhtml-struct-1.xsd +0 -0
  76. data/{lib/xhtml → share/schemas}/xhtml-style-1.xsd +0 -0
  77. data/{lib/xhtml → share/schemas}/xhtml-symbol.ent +0 -0
  78. data/{lib/xhtml → share/schemas}/xhtml-table-1.xsd +0 -0
  79. data/{lib/xhtml → share/schemas}/xhtml-target-1.xsd +0 -0
  80. data/{lib/xhtml → share/schemas}/xhtml-text-1.xsd +0 -0
  81. data/{lib/xhtml → share/schemas}/xhtml1-frameset.dtd +0 -0
  82. data/{lib/xhtml → share/schemas}/xhtml1-frameset.xsd +0 -0
  83. data/{lib/xhtml → share/schemas}/xhtml1-strict.dtd +0 -0
  84. data/{lib/xhtml → share/schemas}/xhtml1-strict.xsd +0 -0
  85. data/{lib/xhtml → share/schemas}/xhtml1-transitional.dtd +0 -0
  86. data/{lib/xhtml → share/schemas}/xhtml1-transitional.xsd +0 -0
  87. data/{lib/xhtml → share/schemas}/xhtml11-model-1.xsd +0 -0
  88. data/{lib/xhtml → share/schemas}/xhtml11-module-redefines-1.xsd +0 -0
  89. data/{lib/xhtml → share/schemas}/xhtml11-modules-1.xsd +0 -0
  90. data/{lib/xhtml → share/schemas}/xhtml11.xsd +0 -0
  91. data/{lib/xhtml → share/schemas}/xhtml2.xsd +0 -0
  92. data/{lib/xhtml → share/schemas}/xml-events-1.xsd +0 -0
  93. data/{lib/xhtml → share/schemas}/xml-events-2.xsd +0 -0
  94. data/{lib/xhtml → share/schemas}/xml-events-attribs-1.xsd +0 -0
  95. data/{lib/xhtml → share/schemas}/xml-events-attribs-2.xsd +0 -0
  96. data/{lib/xhtml → share/schemas}/xml-events-copyright-1.xsd +0 -0
  97. data/{lib/xhtml → share/schemas}/xml-events-copyright-2.xsd +0 -0
  98. data/{lib/xhtml → share/schemas}/xml-handlers-1.xsd +0 -0
  99. data/{lib/xhtml → share/schemas}/xml-handlers-2.xsd +0 -0
  100. data/{lib/xhtml → share/schemas}/xml-script-1.xsd +0 -0
  101. data/{lib/xhtml → share/schemas}/xml.xsd +0 -0
  102. data/spec/core_spec.rb +56 -0
  103. data/spec/spec_helper.rb +1 -1
  104. data/spec/validator_spec.rb +3 -1
  105. metadata +102 -99
  106. data/lib/colorful_messages.rb +0 -28
  107. data/lib/validator.rb +0 -67
  108. data/spec/data/index.cs.html +0 -243
  109. data/spec/validate_website_spec.rb +0 -54
@@ -0,0 +1,133 @@
1
+ # encoding: utf-8
2
+ require 'optparse'
3
+
4
+ module ValidateWebsite
5
+ class Parser
6
+ DEFAULT_OPTS_CRAWL = {
7
+ :site => 'http://localhost:3000/',
8
+ :markup_validation => true,
9
+ :exclude => nil,
10
+ :file => nil,
11
+ # log not found url (404 status code)
12
+ :not_found => false,
13
+ # internal verbose for ValidateWebsite
14
+ :validate_verbose => false,
15
+ :quiet => false,
16
+
17
+ # Anemone options see anemone/lib/anemone/core.rb
18
+ :verbose => false,
19
+ :cookies => nil,
20
+ :accept_cookies => true,
21
+ :redirect_limit => 0,
22
+ }
23
+
24
+ DEFAULT_OPTS_STATIC = {
25
+ :pattern => '**/*.html',
26
+ :file => nil,
27
+ :validate_verbose => false,
28
+ :quiet => false,
29
+ }
30
+
31
+ def self.parse(options, type)
32
+ if const_defined?("DEFAULT_OPTS_#{type.to_s.upcase}")
33
+ @@default_opts = const_get("DEFAULT_OPTS_#{type.to_s.upcase}")
34
+ if Array === options
35
+ send("command_line_parse_#{type}", options)
36
+ else
37
+ @@default_opts.merge(options)
38
+ end
39
+ else
40
+ raise ArgumentError, "Unknown options type : #{type}"
41
+ end
42
+ end
43
+
44
+ def self.command_line_parse_crawl(args)
45
+ options = {}
46
+ opts = OptionParser.new do |o|
47
+ o.set_summary_indent(' ')
48
+ o.banner = 'Usage: validate-website [OPTIONS]'
49
+ o.define_head 'validate-website - Web crawler for checking the ' +
50
+ 'validity of your documents'
51
+ o.separator ''
52
+
53
+ o.on("-s", "--site 'SITE'", String,
54
+ "Website to crawl (Default: #{@@default_opts[:site]})") { |v|
55
+ options[:site] = v
56
+ }
57
+ o.on("-u", "--user-agent 'USERAGENT'", String,
58
+ "Change user agent") { |v|
59
+ options[:user_agent] = v
60
+ }
61
+ o.on("-e", "--exclude 'EXCLUDE'", String,
62
+ "Url to exclude (ex: 'redirect|news')") { |v|
63
+ options[:exclude] = v
64
+ }
65
+ o.on("-f", "--file 'FILE'", String,
66
+ "Save not well formed or not found urls") { |v|
67
+ options[:file] = v
68
+ }
69
+
70
+ o.on("-c", "--cookies 'COOKIES'", String,
71
+ "Set defaults cookies") { |v|
72
+ options[:cookies] = v
73
+ }
74
+
75
+ o.on("-m", "--[no-]markup-validation",
76
+ "Markup validation (Default: #{@@default_opts[:markup_validation]})") { |v|
77
+ options[:markup_validation] = v
78
+ }
79
+ o.on("-n", "--not-found",
80
+ "Log not found url (Default: #{@@default_opts[:not_found]})") { |v|
81
+ options[:not_found] = v
82
+ }
83
+ o.on("-v", "--verbose",
84
+ "Show validator errors (Default: #{@@default_opts[:validate_verbose]})") { |v|
85
+ options[:validate_verbose] = v
86
+ }
87
+ o.on("-q", "--quiet",
88
+ "Only report errors (Default: #{@@default_opts[:quiet]})") { |v|
89
+ options[:quiet] = v
90
+ }
91
+ o.on("-d", "--debug",
92
+ "Show anemone log (Default: #{@@default_opts[:verbose]})") { |v|
93
+ options[:verbose] = v
94
+ }
95
+
96
+ o.separator ""
97
+ o.on_tail("-h", "--help", "Show this help message.") { puts o; exit }
98
+ end
99
+ opts.parse!(args)
100
+ @@default_opts.merge(options)
101
+ end
102
+
103
+ def self.command_line_parse_static(args)
104
+ options = {}
105
+ opts = OptionParser.new do |o|
106
+ o.set_summary_indent(' ')
107
+ o.banner = 'Usage: validate-website-static [OPTIONS]'
108
+ o.define_head 'validate-website-static - check the validity of ' +
109
+ 'your documents'
110
+ o.separator ''
111
+
112
+ o.on("-p", "--pattern 'PATTERN'", String,
113
+ "Change filenames pattern (Default: #{@@default_opts[:pattern]})") { |v|
114
+ options[:pattern] = v.strip
115
+ }
116
+ o.on("-f", "--file 'FILE'", String,
117
+ "Save not well formed urls") { |v|
118
+ options[:file] = v
119
+ }
120
+ o.on("-v", "--verbose",
121
+ "Show validator errors (Default: #{@@default_opts[:validate_verbose]})") { |v|
122
+ options[:validate_verbose] = v
123
+ }
124
+ o.on("-q", "--quiet",
125
+ "Only report errors (Default: #{@@default_opts[:quiet]})") { |v|
126
+ options[:quiet] = v
127
+ }
128
+ end
129
+ opts.parse!(args)
130
+ @@default_opts.merge(options)
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,35 @@
1
+ require 'validate_website/core'
2
+
3
+ module ValidateWebsite
4
+ class Runner
5
+ def self.trap_interrupt
6
+ trap('INT') do
7
+ STDERR.puts "\nExiting..."
8
+ exit!(1)
9
+ end
10
+ end
11
+
12
+ def self.run_crawl(args)
13
+ trap_interrupt
14
+ validate_website = ValidateWebsite::Core.new(args, :crawl)
15
+ validate_website.crawl
16
+ validate_website.exit_status
17
+ end
18
+
19
+ def self.run_static(args)
20
+ trap_interrupt
21
+ validate_website = ValidateWebsite::Core.new(args, :static)
22
+
23
+ files = Dir.glob(validate_website.options[:pattern])
24
+ files.each do |f|
25
+ next unless File.file?(f)
26
+
27
+ body = open(f).read
28
+ doc = Nokogiri::HTML(body)
29
+
30
+ validate_website.validate(doc, body, f)
31
+ end
32
+ validate_website.exit_status
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,69 @@
1
+ # encoding: utf-8
2
+
3
+ module ValidateWebsite
4
+ class Validator
5
+ XHTML_PATH = File.join(File.dirname(__FILE__), '..', '..', 'share', 'schemas')
6
+
7
+ attr_reader :original_doc, :body, :dtd, :doc, :namespace, :xsd, :errors
8
+
9
+ def initialize(original_doc, body)
10
+ @original_doc = original_doc
11
+ @body = body
12
+ @dtd = @original_doc.internal_subset
13
+ init_namespace(@dtd)
14
+ @errors = []
15
+ @errors << 'Unknown document' if @namespace.nil?
16
+
17
+ if @errors.empty?
18
+ if @dtd_uri && @body.match(@dtd_uri.to_s)
19
+ document = @body.sub(@dtd_uri.to_s, @namespace + '.dtd')
20
+ else
21
+ document = @body
22
+ end
23
+ @doc = Dir.chdir(XHTML_PATH) do
24
+ Nokogiri::XML(document) { |cfg|
25
+ cfg.noent.dtdload.dtdvalid
26
+ }
27
+ end
28
+
29
+ # http://www.w3.org/TR/xhtml1-schema/
30
+ @xsd = Dir.chdir(XHTML_PATH) do
31
+ if File.exists?(@namespace + '.xsd')
32
+ Nokogiri::XML::Schema(File.read(@namespace + '.xsd'))
33
+ end
34
+ end
35
+
36
+ if @xsd
37
+ # have the xsd so use it
38
+ @errors = @xsd.validate(@doc)
39
+ else
40
+ # dont have xsd fall back to dtd
41
+ @doc = Dir.chdir(XHTML_PATH) do
42
+ Nokogiri::HTML.parse(document)
43
+ end
44
+ @errors = @doc.errors
45
+ end
46
+ end
47
+
48
+ rescue Nokogiri::XML::SyntaxError => e
49
+ # http://nokogiri.org/tutorials/ensuring_well_formed_markup.html
50
+ @errors << e
51
+ end
52
+
53
+ def valid?
54
+ @errors.length == 0
55
+ end
56
+
57
+ private
58
+ def init_namespace(dtd)
59
+ if dtd.system_id
60
+ dtd_uri = URI.parse(dtd.system_id)
61
+ if dtd.system_id && dtd_uri.path
62
+ @dtd_uri = dtd_uri
63
+ # http://www.w3.org/TR/xhtml1/#dtds
64
+ @namespace = File.basename(@dtd_uri.path, '.dtd')
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,82 @@
1
+ '\" t
2
+ .\" Title: validate-website-static
3
+ .\" Author: [see the "AUTHOR" section]
4
+ .\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
5
+ .\" Date: 12/24/2010
6
+ .\" Manual: \ \&
7
+ .\" Source: \ \&
8
+ .\" Language: English
9
+ .\"
10
+ .TH "VALIDATE\-WEBSITE\-S" "1" "12/24/2010" "\ \&" "\ \&"
11
+ .\" -----------------------------------------------------------------
12
+ .\" * Define some portability stuff
13
+ .\" -----------------------------------------------------------------
14
+ .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
15
+ .\" http://bugs.debian.org/507673
16
+ .\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
17
+ .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
18
+ .ie \n(.g .ds Aq \(aq
19
+ .el .ds Aq '
20
+ .\" -----------------------------------------------------------------
21
+ .\" * set default formatting
22
+ .\" -----------------------------------------------------------------
23
+ .\" disable hyphenation
24
+ .nh
25
+ .\" disable justification (adjust text to left margin only)
26
+ .ad l
27
+ .\" -----------------------------------------------------------------
28
+ .\" * MAIN CONTENT STARTS HERE *
29
+ .\" -----------------------------------------------------------------
30
+ .SH "NAME"
31
+ validate-website-static \- check the validity of your documents
32
+ .SH "SYNOPSIS"
33
+ .sp
34
+ \fBvalidate\-website\-static\fR [\fIOPTIONS\fR]
35
+ .SH "DESCRIPTION"
36
+ .sp
37
+ validate\-website\-static check the markup validity of your local documents\&.
38
+ .SH "OPTIONS"
39
+ .PP
40
+ \fB\-p\fR, \fB\-\-pattern\fR \fIPATTERN\fR
41
+ .RS 4
42
+ Change filenames pattern (Default: **/*\&.html)
43
+ .RE
44
+ .PP
45
+ \fB\-f\fR, \fB\-\-file\fR \fIFILE\fR
46
+ .RS 4
47
+ Save not well formed urls
48
+ .RE
49
+ .PP
50
+ \fB\-v\fR, \fB\-\-verbose\fR
51
+ .RS 4
52
+ Show detail of validator errors (Default: false)\&.
53
+ .RE
54
+ .PP
55
+ \fB\-q\fR, \fB\-\-quiet\fR
56
+ .RS 4
57
+ Only report errors (Default: false)\&.
58
+ .RE
59
+ .PP
60
+ \fB\-h\fR, \fB\-\-help\fR
61
+ .RS 4
62
+ Show help message and exit\&.
63
+ .RE
64
+ .SH "EXIT STATUS"
65
+ .PP
66
+ 0
67
+ .RS 4
68
+ Markup is valid\&.
69
+ .RE
70
+ .PP
71
+ 64
72
+ .RS 4
73
+ Not valid markup found\&.
74
+ .RE
75
+ .SH "AUTHOR"
76
+ .sp
77
+ Laurent Arnoud <laurent@spkdev\&.net>
78
+ .SH "LICENSE"
79
+ .sp
80
+ The MIT License
81
+ .sp
82
+ Copyright (c) 2009\-2010 Laurent Arnoud <laurent@spkdev\&.net>
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes