validate-website 0.6.5 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -41,6 +41,8 @@ HTML5 support with Validator.nu Web Service.
41
41
  Markup validation (Default: true)
42
42
  -n, --not-found
43
43
  Log not found url (Default: false)
44
+ --[no-]color
45
+ Show colored output (Default: true)
44
46
  -v, --verbose
45
47
  Show detail of validator errors (Default: false).
46
48
  -q, --quiet
@@ -3,26 +3,31 @@ require 'rainbow'
3
3
 
4
4
  module ValidateWebsite
5
5
  module ColorfulMessages
6
+ def color(type, message, colored=true)
7
+ return message unless colored
8
+ send(type, message)
9
+ end
10
+
6
11
  def error(message)
7
- message.to_s.foreground(:red)
12
+ message.foreground(:red)
8
13
  end
9
14
 
10
15
  def warning(message)
11
- message.to_s.foreground(:yellow)
16
+ message.foreground(:yellow)
12
17
  end
13
18
 
14
19
  def success(message)
15
- message.to_s.foreground(:green)
20
+ message.foreground(:green)
16
21
  end
17
22
 
18
23
  alias_method :message, :success
19
24
 
20
25
  def note(message)
21
- message.to_s.foreground(:magenta)
26
+ message.foreground(:magenta)
22
27
  end
23
28
 
24
29
  def info(message)
25
- message.to_s.foreground(:blue)
30
+ message.foreground(:blue)
26
31
  end
27
32
  end
28
33
  end
@@ -37,53 +37,24 @@ module ValidateWebsite
37
37
  @site = @options[:site]
38
38
  end
39
39
 
40
- def validate(doc, body, url, opts={})
41
- opts = @options.merge(opts)
42
- validator = Validator.new(doc, body)
43
- msg = " well formed? %s" % validator.valid?
44
- if validator.valid?
45
- unless opts[:quiet]
46
- print info(url)
47
- puts success(msg)
48
- end
49
- else
50
- @markup_error = true
51
- print info(url)
52
- puts error(msg)
53
- puts error(validator.errors.join(", ")) if opts[:validate_verbose]
54
- to_file(url)
55
- end
56
- end
57
-
58
40
  def crawl(opts={})
59
41
  opts = @options.merge(opts)
60
- puts note("Validating #{@site}") if opts[:validate_verbose]
42
+ puts color(:note, "validating #{@site}", opts[:color])
61
43
 
62
44
  @anemone = Anemone.crawl(@site, opts) do |anemone|
63
45
  anemone.skip_links_like Regexp.new(opts[:exclude]) if opts[:exclude]
64
46
 
65
47
  # select the links on each page to follow (iframe, link, css url)
66
- anemone.focus_crawl { |p|
48
+ anemone.focus_crawl { |page|
67
49
  links = []
68
- if p.html?
69
- p.doc.css('img, script, iframe').each do |elem|
70
- url = get_url(p, elem, "src")
71
- links << url unless url.nil?
72
- end
73
- p.doc.css('link').each do |link|
74
- url = get_url(p, link, "href")
75
- links << url unless url.nil?
76
- end
50
+ if page.html?
51
+ links.concat extract_urls_from_img_script_iframe_link(page)
77
52
  end
78
- if p.content_type == 'text/css'
79
- p.body.scan(/url\((['".\/\w-]+)\)/).each do |url|
80
- url = url.first.gsub("'", "").gsub('"', '')
81
- abs = p.to_absolute(URI(url))
82
- links << abs
83
- end
53
+ if page.content_type == 'text/css'
54
+ links.concat extract_urls_from_css(page)
84
55
  end
85
56
  links.uniq!
86
- p.links.concat(links)
57
+ page.links.concat(links)
87
58
  }
88
59
 
89
60
  anemone.on_every_page { |page|
@@ -98,7 +69,7 @@ module ValidateWebsite
98
69
 
99
70
  if opts[:not_found] && page.not_found?
100
71
  @not_found_error = true
101
- puts error("%s linked in %s but not exist" % [url, page.referer])
72
+ puts color(:error, "%s linked in %s but not exist" % [url, page.referer], opts[:color])
102
73
  to_file(url)
103
74
  end
104
75
 
@@ -108,17 +79,24 @@ module ValidateWebsite
108
79
  end
109
80
  end
110
81
 
111
- # check files linked on static document
112
- # see lib/validate_website/runner.rb
113
- def check_static_not_found(links, opts={})
82
+ def crawl_static(opts={})
114
83
  opts = @options.merge(opts)
115
- if opts[:not_found]
116
- links.each do |l|
117
- unless File.exists?(l.path)
118
- @not_found_error = true
119
- puts error("%s linked but not exist" % l)
120
- to_file(l)
121
- end
84
+ puts color(:note, "validating #{@site}", opts[:color])
85
+
86
+ files = Dir.glob(opts[:pattern])
87
+ files.each do |f|
88
+ next unless File.file?(f)
89
+
90
+ page = Anemone::Page.new(URI.parse(opts[:site] + f), :body => open(f).read,
91
+ :headers => {'content-type' => ['text/html', 'application/xhtml+xml']})
92
+
93
+ if opts[:markup_validation]
94
+ validate(page.doc, page.body, f)
95
+ end
96
+ if opts[:not_found]
97
+ links = page.links
98
+ links.concat extract_urls_from_img_script_iframe_link(page)
99
+ check_static_not_found(links.uniq)
122
100
  end
123
101
  end
124
102
  end
@@ -136,6 +114,7 @@ module ValidateWebsite
136
114
  end
137
115
 
138
116
  private
117
+
139
118
  def to_file(msg)
140
119
  if @file && File.exist?(@file)
141
120
  open(@file, 'a').write("#{msg}\n")
@@ -143,14 +122,81 @@ module ValidateWebsite
143
122
  end
144
123
 
145
124
  def get_url(page, elem, attrname)
146
- u = elem.attributes[attrname] if elem.attributes[attrname]
147
- return if u.nil?
148
- begin
149
- abs = page.to_absolute(URI(u))
150
- rescue
151
- abs = nil
125
+ u = elem.attributes[attrname].to_s
126
+ return if u.nil? || u.empty?
127
+ abs = page.to_absolute(u) rescue nil
128
+ abs if abs && page.in_domain?(abs)
129
+ end
130
+
131
+ # check files linked on static document
132
+ # see lib/validate_website/runner.rb
133
+ def check_static_not_found(links, opts={})
134
+ opts = @options.merge(opts)
135
+ links.each do |l|
136
+ file_location = URI.parse(File.join(Dir.getwd, l.path)).path
137
+ # Check CSS url()
138
+ if File.exists?(file_location) && File.extname(file_location) == '.css'
139
+ css_page = Anemone::Page.new(l, :body => File.read(file_location),
140
+ :headers => {'content-type' => ['text/css']})
141
+ links.concat extract_urls_from_css(css_page)
142
+ links.uniq!
143
+ end
144
+ unless File.exists?(file_location)
145
+ @not_found_error = true
146
+ puts color(:error, "%s linked but not exist" % file_location, opts[:color])
147
+ to_file(file_location)
148
+ end
149
+ end
150
+ end
151
+
152
+ # Extract urls from img script iframe and link element
153
+ #
154
+ # @param [Anemone::Page] an Anemone::Page object
155
+ # @return [Array] Lists of urls
156
+ #
157
+ def extract_urls_from_img_script_iframe_link(page)
158
+ links = []
159
+ page.doc.css('img, script, iframe').each do |elem|
160
+ url = get_url(page, elem, "src")
161
+ links << url unless url.nil? || url.to_s.empty?
162
+ end
163
+ page.doc.css('link').each do |link|
164
+ url = get_url(page, link, "href")
165
+ links << url unless url.nil? || url.to_s.empty?
166
+ end
167
+ links
168
+ end
169
+
170
+ # Extract urls from CSS page
171
+ #
172
+ # @param [Anemone::Page] an Anemone::Page object
173
+ # @return [Array] Lists of urls
174
+ #
175
+ def extract_urls_from_css(page)
176
+ page.body.scan(/url\((['".\/\w-]+)\)/).inject([]) do |result, url|
177
+ url = url.first.gsub("'", "").gsub('"', '')
178
+ abs = page.to_absolute(URI.parse(url))
179
+ result << abs
152
180
  end
153
- return abs if abs && page.in_domain?(abs)
154
181
  end
182
+
183
+ def validate(doc, body, url, opts={})
184
+ opts = @options.merge(opts)
185
+ validator = Validator.new(doc, body)
186
+ msg = " well formed? %s" % validator.valid?
187
+ if validator.valid?
188
+ unless opts[:quiet]
189
+ print color(:info, url, opts[:color])
190
+ puts color(:success, msg, opts[:color])
191
+ end
192
+ else
193
+ @markup_error = true
194
+ print color(:info, url, opts[:color])
195
+ puts color(:error, msg, opts[:color])
196
+ puts color(:error, validator.errors.join(', '), opts[:color]) if opts[:validate_verbose]
197
+ to_file(url)
198
+ end
199
+ end
200
+
155
201
  end
156
202
  end
@@ -3,7 +3,6 @@ require 'optparse'
3
3
 
4
4
  module ValidateWebsite
5
5
  class Parser
6
- # TODO: no color
7
6
  DEFAULT_OPTS_CRAWL = {
8
7
  :site => 'http://localhost:3000/',
9
8
  :markup_validation => true,
@@ -20,9 +19,11 @@ module ValidateWebsite
20
19
  :cookies => nil,
21
20
  :accept_cookies => true,
22
21
  :redirect_limit => 0,
22
+ :color => true,
23
23
  }
24
24
 
25
25
  DEFAULT_OPTS_STATIC = {
26
+ :site => 'http://www.example.com/',
26
27
  :pattern => '**/*.html',
27
28
  :file => nil,
28
29
  :validate_verbose => false,
@@ -30,6 +31,7 @@ module ValidateWebsite
30
31
  :markup_validation => true,
31
32
  # log not found url (not on filesystem, pwd considered as root « / »)
32
33
  :not_found => false,
34
+ :color => true,
33
35
  }
34
36
 
35
37
  def self.parse(options, type)
@@ -84,6 +86,10 @@ module ValidateWebsite
84
86
  "Log not found url (Default: #{@@default_opts[:not_found]})") { |v|
85
87
  options[:not_found] = v
86
88
  }
89
+ o.on("--[no-]color",
90
+ "Show colored output (Default: #{@@default_opts[:color]})") { |v|
91
+ options[:color] = v
92
+ }
87
93
  o.on("-v", "--verbose",
88
94
  "Show validator errors (Default: #{@@default_opts[:validate_verbose]})") { |v|
89
95
  options[:validate_verbose] = v
@@ -100,8 +106,7 @@ module ValidateWebsite
100
106
  o.separator ""
101
107
  o.on_tail("-h", "--help", "Show this help message.") { puts o; exit }
102
108
  end
103
- opts.parse!(args)
104
- @@default_opts.merge(options)
109
+ command_line_parse!(opts, args, options)
105
110
  end
106
111
 
107
112
  def self.command_line_parse_static(args)
@@ -113,6 +118,10 @@ module ValidateWebsite
113
118
  'your documents'
114
119
  o.separator ''
115
120
 
121
+ o.on("-s", "--site 'SITE'", String,
122
+ "Where static files will be hosted (Default: #{@@default_opts[:site]})") { |v|
123
+ options[:site] = v
124
+ }
116
125
  o.on("-p", "--pattern 'PATTERN'", String,
117
126
  "Change filenames pattern (Default: #{@@default_opts[:pattern]})") { |v|
118
127
  options[:pattern] = v.strip
@@ -138,7 +147,17 @@ module ValidateWebsite
138
147
  options[:quiet] = v
139
148
  }
140
149
  end
141
- opts.parse!(args)
150
+ command_line_parse!(opts, args, options)
151
+ end
152
+
153
+ def self.command_line_parse!(opts, args, options)
154
+ begin
155
+ opts.parse!(args)
156
+ rescue OptionParser::InvalidOption, OptionParser::MissingArgument
157
+ puts $!.to_s
158
+ puts opts
159
+ exit 128
160
+ end
142
161
  @@default_opts.merge(options)
143
162
  end
144
163
  end
@@ -19,32 +19,7 @@ module ValidateWebsite
19
19
  def self.run_static(args)
20
20
  trap_interrupt
21
21
  validate_website = ValidateWebsite::Core.new(args, :static)
22
- opts = validate_website.options
23
- links = []
24
- files = Dir.glob(opts[:pattern])
25
- files.each do |f|
26
- next unless File.file?(f)
27
-
28
- body = open(f).read
29
- doc = Nokogiri::HTML(body)
30
-
31
- # TODO: check css url for static files
32
- if opts[:not_found]
33
- doc.search("//a[@href]").each do |a|
34
- u = a['href']
35
- next if u.nil? || u.empty?
36
- next if u.match(/^https?:\/\//)
37
- abs = URI(File.join(Dir.getwd, u)) rescue next
38
- links << abs if abs.host.nil?
39
- end
40
- links.uniq!
41
- end
42
-
43
- if opts[:markup_validation]
44
- validate_website.validate(doc, body, f)
45
- end
46
- end
47
- validate_website.check_static_not_found(links)
22
+ validate_website.crawl_static
48
23
  validate_website.exit_status
49
24
  end
50
25
  end
@@ -37,6 +37,12 @@ validate-website-static \- check the validity of your documents
37
37
  validate\-website\-static check the markup validity of your local documents with XML Schema / DTD\&. HTML5 support with Validator\&.nu Web Service\&.
38
38
  .SH "OPTIONS"
39
39
  .PP
40
+ \fB\-s\fR, \fB\-\-site\fR \fISITE\fR
41
+ .RS 4
42
+ Where static files will be hosted (Default:
43
+ http://www\&.example\&.com/)
44
+ .RE
45
+ .PP
40
46
  \fB\-p\fR, \fB\-\-pattern\fR \fIPATTERN\fR
41
47
  .RS 4
42
48
  Change filenames pattern (Default: **/*\&.html)
@@ -57,6 +63,11 @@ Markup validation (Default: true)
57
63
  Log files not on filesystem, pwd considered as root \(Fo / \(Fc (Default: false)
58
64
  .RE
59
65
  .PP
66
+ \fB\-\-[no\-]color\fR
67
+ .RS 4
68
+ Show colored output (Default: true)
69
+ .RE
70
+ .PP
60
71
  \fB\-v\fR, \fB\-\-verbose\fR
61
72
  .RS 4
62
73
  Show detail of validator errors (Default: false)\&.
@@ -74,6 +74,11 @@ Markup validation (Default: true)
74
74
  Log not found url (Default: false)
75
75
  .RE
76
76
  .PP
77
+ \fB\-\-[no\-]color\fR
78
+ .RS 4
79
+ Show colored output (Default: true)
80
+ .RE
81
+ .PP
77
82
  \fB\-v\fR, \fB\-\-verbose\fR
78
83
  .RS 4
79
84
  Show detail of validator errors (Default: false)\&.
data/spec/core_spec.rb CHANGED
@@ -5,7 +5,7 @@ module ValidateWebsite
5
5
 
6
6
  before(:each) do
7
7
  FakeWeb.clean_registry
8
- @validate_website = ValidateWebsite::Core.new
8
+ @validate_website = ValidateWebsite::Core.new(:color => false)
9
9
  end
10
10
 
11
11
  context('html') do
@@ -17,7 +17,7 @@ module ValidateWebsite
17
17
  :content_type => 'text/html')
18
18
  @validate_website.site = page.url
19
19
  @validate_website.crawl(:quiet => true)
20
- @validate_website.anemone.should have(3).pages
20
+ @validate_website.anemone.should have(5).pages
21
21
  end
22
22
  end
23
23
 
@@ -15,5 +15,8 @@
15
15
  <li><a href="/my-url2" title="title">my url</a></li>
16
16
  <li><a href="/my-url1" title="title">my url</a></li>
17
17
  </ul>
18
+ <p><img src="http://test.com/img.png" alt="non local img" /></p>
19
+ <p><img src="http://www.example.com/img1.png" alt="local img with absolute uri" /></p>
20
+ <p><img src="/img2.png" alt="local img with non absolute uri" /></p>
18
21
  </body>
19
22
  </html>
metadata CHANGED
@@ -1,91 +1,128 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: validate-website
3
- version: !ruby/object:Gem::Version
4
- version: 0.6.5
3
+ version: !ruby/object:Gem::Version
4
+ hash: 3
5
5
  prerelease:
6
+ segments:
7
+ - 0
8
+ - 7
9
+ - 0
10
+ version: 0.7.0
6
11
  platform: ruby
7
- authors:
12
+ authors:
8
13
  - Laurent Arnoud
9
14
  autorequire:
10
15
  bindir: bin
11
16
  cert_chain: []
12
- date: 2011-06-04 00:00:00.000000000Z
13
- dependencies:
14
- - !ruby/object:Gem::Dependency
17
+
18
+ date: 2011-06-06 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
15
21
  name: anemone
16
- requirement: &22543340 !ruby/object:Gem::Requirement
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
17
24
  none: false
18
- requirements:
19
- - - ! '>='
20
- - !ruby/object:Gem::Version
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 5
29
+ segments:
30
+ - 0
31
+ - 6
32
+ - 1
21
33
  version: 0.6.1
22
34
  type: :runtime
23
- prerelease: false
24
- version_requirements: *22543340
25
- - !ruby/object:Gem::Dependency
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
26
37
  name: rainbow
27
- requirement: &22542860 !ruby/object:Gem::Requirement
38
+ prerelease: false
39
+ requirement: &id002 !ruby/object:Gem::Requirement
28
40
  none: false
29
- requirements:
30
- - - ! '>='
31
- - !ruby/object:Gem::Version
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ hash: 17
45
+ segments:
46
+ - 1
47
+ - 1
48
+ - 1
32
49
  version: 1.1.1
33
50
  type: :runtime
34
- prerelease: false
35
- version_requirements: *22542860
36
- - !ruby/object:Gem::Dependency
51
+ version_requirements: *id002
52
+ - !ruby/object:Gem::Dependency
37
53
  name: multipart_body
38
- requirement: &22542400 !ruby/object:Gem::Requirement
54
+ prerelease: false
55
+ requirement: &id003 !ruby/object:Gem::Requirement
39
56
  none: false
40
- requirements:
41
- - - ! '>='
42
- - !ruby/object:Gem::Version
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ hash: 21
61
+ segments:
62
+ - 0
63
+ - 2
64
+ - 1
43
65
  version: 0.2.1
44
66
  type: :runtime
45
- prerelease: false
46
- version_requirements: *22542400
47
- - !ruby/object:Gem::Dependency
67
+ version_requirements: *id003
68
+ - !ruby/object:Gem::Dependency
48
69
  name: rake
49
- requirement: &22541940 !ruby/object:Gem::Requirement
70
+ prerelease: false
71
+ requirement: &id004 !ruby/object:Gem::Requirement
50
72
  none: false
51
- requirements:
52
- - - ! '>='
53
- - !ruby/object:Gem::Version
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ hash: 49
77
+ segments:
78
+ - 0
79
+ - 8
80
+ - 7
54
81
  version: 0.8.7
55
82
  type: :development
56
- prerelease: false
57
- version_requirements: *22541940
58
- - !ruby/object:Gem::Dependency
83
+ version_requirements: *id004
84
+ - !ruby/object:Gem::Dependency
59
85
  name: rspec
60
- requirement: &22541480 !ruby/object:Gem::Requirement
86
+ prerelease: false
87
+ requirement: &id005 !ruby/object:Gem::Requirement
61
88
  none: false
62
- requirements:
63
- - - ! '>='
64
- - !ruby/object:Gem::Version
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ hash: 23
93
+ segments:
94
+ - 2
95
+ - 6
96
+ - 0
65
97
  version: 2.6.0
66
98
  type: :development
67
- prerelease: false
68
- version_requirements: *22541480
69
- - !ruby/object:Gem::Dependency
99
+ version_requirements: *id005
100
+ - !ruby/object:Gem::Dependency
70
101
  name: fakeweb
71
- requirement: &22541020 !ruby/object:Gem::Requirement
102
+ prerelease: false
103
+ requirement: &id006 !ruby/object:Gem::Requirement
72
104
  none: false
73
- requirements:
74
- - - ! '>='
75
- - !ruby/object:Gem::Version
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ hash: 27
109
+ segments:
110
+ - 1
111
+ - 3
112
+ - 0
76
113
  version: 1.3.0
77
114
  type: :development
78
- prerelease: false
79
- version_requirements: *22541020
80
- description: validate-website is a web crawler for checking the markup validity with
81
- XML Schema / DTD and not found urls.
115
+ version_requirements: *id006
116
+ description: validate-website is a web crawler for checking the markup validity with XML Schema / DTD and not found urls.
82
117
  email: laurent@spkdev.net
83
- executables:
118
+ executables:
84
119
  - validate-website
85
120
  - validate-website-static
86
121
  extensions: []
122
+
87
123
  extra_rdoc_files: []
88
- files:
124
+
125
+ files:
89
126
  - README.rdoc
90
127
  - Rakefile
91
128
  - LICENSE
@@ -199,25 +236,32 @@ files:
199
236
  - bin/validate-website
200
237
  - bin/validate-website-static
201
238
  homepage: http://github.com/spk/validate-website
202
- licenses:
239
+ licenses:
203
240
  - MIT
204
241
  post_install_message:
205
242
  rdoc_options: []
206
- require_paths:
243
+
244
+ require_paths:
207
245
  - lib
208
- required_ruby_version: !ruby/object:Gem::Requirement
246
+ required_ruby_version: !ruby/object:Gem::Requirement
209
247
  none: false
210
- requirements:
211
- - - ! '>='
212
- - !ruby/object:Gem::Version
213
- version: '0'
214
- required_rubygems_version: !ruby/object:Gem::Requirement
248
+ requirements:
249
+ - - ">="
250
+ - !ruby/object:Gem::Version
251
+ hash: 3
252
+ segments:
253
+ - 0
254
+ version: "0"
255
+ required_rubygems_version: !ruby/object:Gem::Requirement
215
256
  none: false
216
- requirements:
217
- - - ! '>='
218
- - !ruby/object:Gem::Version
219
- version: '0'
220
- requirements:
257
+ requirements:
258
+ - - ">="
259
+ - !ruby/object:Gem::Version
260
+ hash: 3
261
+ segments:
262
+ - 0
263
+ version: "0"
264
+ requirements:
221
265
  - anemone
222
266
  - rainbow
223
267
  - multipart_body
@@ -226,7 +270,7 @@ rubygems_version: 1.7.2
226
270
  signing_key:
227
271
  specification_version: 3
228
272
  summary: Web crawler for checking the validity of your documents
229
- test_files:
273
+ test_files:
230
274
  - spec/spec_helper.rb
231
275
  - spec/data/html5.html
232
276
  - spec/data/html4-strict.html
@@ -236,3 +280,4 @@ test_files:
236
280
  - spec/core_spec.rb
237
281
  - spec/fakeweb_helper.rb
238
282
  - spec/validator_spec.rb
283
+ has_rdoc: