validate-website 0.6.5 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -41,6 +41,8 @@ HTML5 support with Validator.nu Web Service.
41
41
  Markup validation (Default: true)
42
42
  -n, --not-found
43
43
  Log not found url (Default: false)
44
+ --[no-]color
45
+ Show colored output (Default: true)
44
46
  -v, --verbose
45
47
  Show detail of validator errors (Default: false).
46
48
  -q, --quiet
@@ -3,26 +3,31 @@ require 'rainbow'
3
3
 
4
4
  module ValidateWebsite
5
5
  module ColorfulMessages
6
+ def color(type, message, colored=true)
7
+ return message unless colored
8
+ send(type, message)
9
+ end
10
+
6
11
  def error(message)
7
- message.to_s.foreground(:red)
12
+ message.foreground(:red)
8
13
  end
9
14
 
10
15
  def warning(message)
11
- message.to_s.foreground(:yellow)
16
+ message.foreground(:yellow)
12
17
  end
13
18
 
14
19
  def success(message)
15
- message.to_s.foreground(:green)
20
+ message.foreground(:green)
16
21
  end
17
22
 
18
23
  alias_method :message, :success
19
24
 
20
25
  def note(message)
21
- message.to_s.foreground(:magenta)
26
+ message.foreground(:magenta)
22
27
  end
23
28
 
24
29
  def info(message)
25
- message.to_s.foreground(:blue)
30
+ message.foreground(:blue)
26
31
  end
27
32
  end
28
33
  end
@@ -37,53 +37,24 @@ module ValidateWebsite
37
37
  @site = @options[:site]
38
38
  end
39
39
 
40
- def validate(doc, body, url, opts={})
41
- opts = @options.merge(opts)
42
- validator = Validator.new(doc, body)
43
- msg = " well formed? %s" % validator.valid?
44
- if validator.valid?
45
- unless opts[:quiet]
46
- print info(url)
47
- puts success(msg)
48
- end
49
- else
50
- @markup_error = true
51
- print info(url)
52
- puts error(msg)
53
- puts error(validator.errors.join(", ")) if opts[:validate_verbose]
54
- to_file(url)
55
- end
56
- end
57
-
58
40
  def crawl(opts={})
59
41
  opts = @options.merge(opts)
60
- puts note("Validating #{@site}") if opts[:validate_verbose]
42
+ puts color(:note, "validating #{@site}", opts[:color])
61
43
 
62
44
  @anemone = Anemone.crawl(@site, opts) do |anemone|
63
45
  anemone.skip_links_like Regexp.new(opts[:exclude]) if opts[:exclude]
64
46
 
65
47
  # select the links on each page to follow (iframe, link, css url)
66
- anemone.focus_crawl { |p|
48
+ anemone.focus_crawl { |page|
67
49
  links = []
68
- if p.html?
69
- p.doc.css('img, script, iframe').each do |elem|
70
- url = get_url(p, elem, "src")
71
- links << url unless url.nil?
72
- end
73
- p.doc.css('link').each do |link|
74
- url = get_url(p, link, "href")
75
- links << url unless url.nil?
76
- end
50
+ if page.html?
51
+ links.concat extract_urls_from_img_script_iframe_link(page)
77
52
  end
78
- if p.content_type == 'text/css'
79
- p.body.scan(/url\((['".\/\w-]+)\)/).each do |url|
80
- url = url.first.gsub("'", "").gsub('"', '')
81
- abs = p.to_absolute(URI(url))
82
- links << abs
83
- end
53
+ if page.content_type == 'text/css'
54
+ links.concat extract_urls_from_css(page)
84
55
  end
85
56
  links.uniq!
86
- p.links.concat(links)
57
+ page.links.concat(links)
87
58
  }
88
59
 
89
60
  anemone.on_every_page { |page|
@@ -98,7 +69,7 @@ module ValidateWebsite
98
69
 
99
70
  if opts[:not_found] && page.not_found?
100
71
  @not_found_error = true
101
- puts error("%s linked in %s but not exist" % [url, page.referer])
72
+ puts color(:error, "%s linked in %s but not exist" % [url, page.referer], opts[:color])
102
73
  to_file(url)
103
74
  end
104
75
 
@@ -108,17 +79,24 @@ module ValidateWebsite
108
79
  end
109
80
  end
110
81
 
111
- # check files linked on static document
112
- # see lib/validate_website/runner.rb
113
- def check_static_not_found(links, opts={})
82
+ def crawl_static(opts={})
114
83
  opts = @options.merge(opts)
115
- if opts[:not_found]
116
- links.each do |l|
117
- unless File.exists?(l.path)
118
- @not_found_error = true
119
- puts error("%s linked but not exist" % l)
120
- to_file(l)
121
- end
84
+ puts color(:note, "validating #{@site}", opts[:color])
85
+
86
+ files = Dir.glob(opts[:pattern])
87
+ files.each do |f|
88
+ next unless File.file?(f)
89
+
90
+ page = Anemone::Page.new(URI.parse(opts[:site] + f), :body => open(f).read,
91
+ :headers => {'content-type' => ['text/html', 'application/xhtml+xml']})
92
+
93
+ if opts[:markup_validation]
94
+ validate(page.doc, page.body, f)
95
+ end
96
+ if opts[:not_found]
97
+ links = page.links
98
+ links.concat extract_urls_from_img_script_iframe_link(page)
99
+ check_static_not_found(links.uniq)
122
100
  end
123
101
  end
124
102
  end
@@ -136,6 +114,7 @@ module ValidateWebsite
136
114
  end
137
115
 
138
116
  private
117
+
139
118
  def to_file(msg)
140
119
  if @file && File.exist?(@file)
141
120
  open(@file, 'a').write("#{msg}\n")
@@ -143,14 +122,81 @@ module ValidateWebsite
143
122
  end
144
123
 
145
124
  def get_url(page, elem, attrname)
146
- u = elem.attributes[attrname] if elem.attributes[attrname]
147
- return if u.nil?
148
- begin
149
- abs = page.to_absolute(URI(u))
150
- rescue
151
- abs = nil
125
+ u = elem.attributes[attrname].to_s
126
+ return if u.nil? || u.empty?
127
+ abs = page.to_absolute(u) rescue nil
128
+ abs if abs && page.in_domain?(abs)
129
+ end
130
+
131
+ # check files linked on static document
132
+ # see lib/validate_website/runner.rb
133
+ def check_static_not_found(links, opts={})
134
+ opts = @options.merge(opts)
135
+ links.each do |l|
136
+ file_location = URI.parse(File.join(Dir.getwd, l.path)).path
137
+ # Check CSS url()
138
+ if File.exists?(file_location) && File.extname(file_location) == '.css'
139
+ css_page = Anemone::Page.new(l, :body => File.read(file_location),
140
+ :headers => {'content-type' => ['text/css']})
141
+ links.concat extract_urls_from_css(css_page)
142
+ links.uniq!
143
+ end
144
+ unless File.exists?(file_location)
145
+ @not_found_error = true
146
+ puts color(:error, "%s linked but not exist" % file_location, opts[:color])
147
+ to_file(file_location)
148
+ end
149
+ end
150
+ end
151
+
152
+ # Extract urls from img script iframe and link element
153
+ #
154
+ # @param [Anemone::Page] an Anemone::Page object
155
+ # @return [Array] Lists of urls
156
+ #
157
+ def extract_urls_from_img_script_iframe_link(page)
158
+ links = []
159
+ page.doc.css('img, script, iframe').each do |elem|
160
+ url = get_url(page, elem, "src")
161
+ links << url unless url.nil? || url.to_s.empty?
162
+ end
163
+ page.doc.css('link').each do |link|
164
+ url = get_url(page, link, "href")
165
+ links << url unless url.nil? || url.to_s.empty?
166
+ end
167
+ links
168
+ end
169
+
170
+ # Extract urls from CSS page
171
+ #
172
+ # @param [Anemone::Page] an Anemone::Page object
173
+ # @return [Array] Lists of urls
174
+ #
175
+ def extract_urls_from_css(page)
176
+ page.body.scan(/url\((['".\/\w-]+)\)/).inject([]) do |result, url|
177
+ url = url.first.gsub("'", "").gsub('"', '')
178
+ abs = page.to_absolute(URI.parse(url))
179
+ result << abs
152
180
  end
153
- return abs if abs && page.in_domain?(abs)
154
181
  end
182
+
183
+ def validate(doc, body, url, opts={})
184
+ opts = @options.merge(opts)
185
+ validator = Validator.new(doc, body)
186
+ msg = " well formed? %s" % validator.valid?
187
+ if validator.valid?
188
+ unless opts[:quiet]
189
+ print color(:info, url, opts[:color])
190
+ puts color(:success, msg, opts[:color])
191
+ end
192
+ else
193
+ @markup_error = true
194
+ print color(:info, url, opts[:color])
195
+ puts color(:error, msg, opts[:color])
196
+ puts color(:error, validator.errors.join(', '), opts[:color]) if opts[:validate_verbose]
197
+ to_file(url)
198
+ end
199
+ end
200
+
155
201
  end
156
202
  end
@@ -3,7 +3,6 @@ require 'optparse'
3
3
 
4
4
  module ValidateWebsite
5
5
  class Parser
6
- # TODO: no color
7
6
  DEFAULT_OPTS_CRAWL = {
8
7
  :site => 'http://localhost:3000/',
9
8
  :markup_validation => true,
@@ -20,9 +19,11 @@ module ValidateWebsite
20
19
  :cookies => nil,
21
20
  :accept_cookies => true,
22
21
  :redirect_limit => 0,
22
+ :color => true,
23
23
  }
24
24
 
25
25
  DEFAULT_OPTS_STATIC = {
26
+ :site => 'http://www.example.com/',
26
27
  :pattern => '**/*.html',
27
28
  :file => nil,
28
29
  :validate_verbose => false,
@@ -30,6 +31,7 @@ module ValidateWebsite
30
31
  :markup_validation => true,
31
32
  # log not found url (not on filesystem, pwd considered as root « / »)
32
33
  :not_found => false,
34
+ :color => true,
33
35
  }
34
36
 
35
37
  def self.parse(options, type)
@@ -84,6 +86,10 @@ module ValidateWebsite
84
86
  "Log not found url (Default: #{@@default_opts[:not_found]})") { |v|
85
87
  options[:not_found] = v
86
88
  }
89
+ o.on("--[no-]color",
90
+ "Show colored output (Default: #{@@default_opts[:color]})") { |v|
91
+ options[:color] = v
92
+ }
87
93
  o.on("-v", "--verbose",
88
94
  "Show validator errors (Default: #{@@default_opts[:validate_verbose]})") { |v|
89
95
  options[:validate_verbose] = v
@@ -100,8 +106,7 @@ module ValidateWebsite
100
106
  o.separator ""
101
107
  o.on_tail("-h", "--help", "Show this help message.") { puts o; exit }
102
108
  end
103
- opts.parse!(args)
104
- @@default_opts.merge(options)
109
+ command_line_parse!(opts, args, options)
105
110
  end
106
111
 
107
112
  def self.command_line_parse_static(args)
@@ -113,6 +118,10 @@ module ValidateWebsite
113
118
  'your documents'
114
119
  o.separator ''
115
120
 
121
+ o.on("-s", "--site 'SITE'", String,
122
+ "Where static files will be hosted (Default: #{@@default_opts[:site]})") { |v|
123
+ options[:site] = v
124
+ }
116
125
  o.on("-p", "--pattern 'PATTERN'", String,
117
126
  "Change filenames pattern (Default: #{@@default_opts[:pattern]})") { |v|
118
127
  options[:pattern] = v.strip
@@ -138,7 +147,17 @@ module ValidateWebsite
138
147
  options[:quiet] = v
139
148
  }
140
149
  end
141
- opts.parse!(args)
150
+ command_line_parse!(opts, args, options)
151
+ end
152
+
153
+ def self.command_line_parse!(opts, args, options)
154
+ begin
155
+ opts.parse!(args)
156
+ rescue OptionParser::InvalidOption, OptionParser::MissingArgument
157
+ puts $!.to_s
158
+ puts opts
159
+ exit 128
160
+ end
142
161
  @@default_opts.merge(options)
143
162
  end
144
163
  end
@@ -19,32 +19,7 @@ module ValidateWebsite
19
19
  def self.run_static(args)
20
20
  trap_interrupt
21
21
  validate_website = ValidateWebsite::Core.new(args, :static)
22
- opts = validate_website.options
23
- links = []
24
- files = Dir.glob(opts[:pattern])
25
- files.each do |f|
26
- next unless File.file?(f)
27
-
28
- body = open(f).read
29
- doc = Nokogiri::HTML(body)
30
-
31
- # TODO: check css url for static files
32
- if opts[:not_found]
33
- doc.search("//a[@href]").each do |a|
34
- u = a['href']
35
- next if u.nil? || u.empty?
36
- next if u.match(/^https?:\/\//)
37
- abs = URI(File.join(Dir.getwd, u)) rescue next
38
- links << abs if abs.host.nil?
39
- end
40
- links.uniq!
41
- end
42
-
43
- if opts[:markup_validation]
44
- validate_website.validate(doc, body, f)
45
- end
46
- end
47
- validate_website.check_static_not_found(links)
22
+ validate_website.crawl_static
48
23
  validate_website.exit_status
49
24
  end
50
25
  end
@@ -37,6 +37,12 @@ validate-website-static \- check the validity of your documents
37
37
  validate\-website\-static check the markup validity of your local documents with XML Schema / DTD\&. HTML5 support with Validator\&.nu Web Service\&.
38
38
  .SH "OPTIONS"
39
39
  .PP
40
+ \fB\-s\fR, \fB\-\-site\fR \fISITE\fR
41
+ .RS 4
42
+ Where static files will be hosted (Default:
43
+ http://www\&.example\&.com/)
44
+ .RE
45
+ .PP
40
46
  \fB\-p\fR, \fB\-\-pattern\fR \fIPATTERN\fR
41
47
  .RS 4
42
48
  Change filenames pattern (Default: **/*\&.html)
@@ -57,6 +63,11 @@ Markup validation (Default: true)
57
63
  Log files not on filesystem, pwd considered as root \(Fo / \(Fc (Default: false)
58
64
  .RE
59
65
  .PP
66
+ \fB\-\-[no\-]color\fR
67
+ .RS 4
68
+ Show colored output (Default: true)
69
+ .RE
70
+ .PP
60
71
  \fB\-v\fR, \fB\-\-verbose\fR
61
72
  .RS 4
62
73
  Show detail of validator errors (Default: false)\&.
@@ -74,6 +74,11 @@ Markup validation (Default: true)
74
74
  Log not found url (Default: false)
75
75
  .RE
76
76
  .PP
77
+ \fB\-\-[no\-]color\fR
78
+ .RS 4
79
+ Show colored output (Default: true)
80
+ .RE
81
+ .PP
77
82
  \fB\-v\fR, \fB\-\-verbose\fR
78
83
  .RS 4
79
84
  Show detail of validator errors (Default: false)\&.
data/spec/core_spec.rb CHANGED
@@ -5,7 +5,7 @@ module ValidateWebsite
5
5
 
6
6
  before(:each) do
7
7
  FakeWeb.clean_registry
8
- @validate_website = ValidateWebsite::Core.new
8
+ @validate_website = ValidateWebsite::Core.new(:color => false)
9
9
  end
10
10
 
11
11
  context('html') do
@@ -17,7 +17,7 @@ module ValidateWebsite
17
17
  :content_type => 'text/html')
18
18
  @validate_website.site = page.url
19
19
  @validate_website.crawl(:quiet => true)
20
- @validate_website.anemone.should have(3).pages
20
+ @validate_website.anemone.should have(5).pages
21
21
  end
22
22
  end
23
23
 
@@ -15,5 +15,8 @@
15
15
  <li><a href="/my-url2" title="title">my url</a></li>
16
16
  <li><a href="/my-url1" title="title">my url</a></li>
17
17
  </ul>
18
+ <p><img src="http://test.com/img.png" alt="non local img" /></p>
19
+ <p><img src="http://www.example.com/img1.png" alt="local img with absolute uri" /></p>
20
+ <p><img src="/img2.png" alt="local img with non absolute uri" /></p>
18
21
  </body>
19
22
  </html>
metadata CHANGED
@@ -1,91 +1,128 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: validate-website
3
- version: !ruby/object:Gem::Version
4
- version: 0.6.5
3
+ version: !ruby/object:Gem::Version
4
+ hash: 3
5
5
  prerelease:
6
+ segments:
7
+ - 0
8
+ - 7
9
+ - 0
10
+ version: 0.7.0
6
11
  platform: ruby
7
- authors:
12
+ authors:
8
13
  - Laurent Arnoud
9
14
  autorequire:
10
15
  bindir: bin
11
16
  cert_chain: []
12
- date: 2011-06-04 00:00:00.000000000Z
13
- dependencies:
14
- - !ruby/object:Gem::Dependency
17
+
18
+ date: 2011-06-06 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
15
21
  name: anemone
16
- requirement: &22543340 !ruby/object:Gem::Requirement
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
17
24
  none: false
18
- requirements:
19
- - - ! '>='
20
- - !ruby/object:Gem::Version
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 5
29
+ segments:
30
+ - 0
31
+ - 6
32
+ - 1
21
33
  version: 0.6.1
22
34
  type: :runtime
23
- prerelease: false
24
- version_requirements: *22543340
25
- - !ruby/object:Gem::Dependency
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
26
37
  name: rainbow
27
- requirement: &22542860 !ruby/object:Gem::Requirement
38
+ prerelease: false
39
+ requirement: &id002 !ruby/object:Gem::Requirement
28
40
  none: false
29
- requirements:
30
- - - ! '>='
31
- - !ruby/object:Gem::Version
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ hash: 17
45
+ segments:
46
+ - 1
47
+ - 1
48
+ - 1
32
49
  version: 1.1.1
33
50
  type: :runtime
34
- prerelease: false
35
- version_requirements: *22542860
36
- - !ruby/object:Gem::Dependency
51
+ version_requirements: *id002
52
+ - !ruby/object:Gem::Dependency
37
53
  name: multipart_body
38
- requirement: &22542400 !ruby/object:Gem::Requirement
54
+ prerelease: false
55
+ requirement: &id003 !ruby/object:Gem::Requirement
39
56
  none: false
40
- requirements:
41
- - - ! '>='
42
- - !ruby/object:Gem::Version
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ hash: 21
61
+ segments:
62
+ - 0
63
+ - 2
64
+ - 1
43
65
  version: 0.2.1
44
66
  type: :runtime
45
- prerelease: false
46
- version_requirements: *22542400
47
- - !ruby/object:Gem::Dependency
67
+ version_requirements: *id003
68
+ - !ruby/object:Gem::Dependency
48
69
  name: rake
49
- requirement: &22541940 !ruby/object:Gem::Requirement
70
+ prerelease: false
71
+ requirement: &id004 !ruby/object:Gem::Requirement
50
72
  none: false
51
- requirements:
52
- - - ! '>='
53
- - !ruby/object:Gem::Version
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ hash: 49
77
+ segments:
78
+ - 0
79
+ - 8
80
+ - 7
54
81
  version: 0.8.7
55
82
  type: :development
56
- prerelease: false
57
- version_requirements: *22541940
58
- - !ruby/object:Gem::Dependency
83
+ version_requirements: *id004
84
+ - !ruby/object:Gem::Dependency
59
85
  name: rspec
60
- requirement: &22541480 !ruby/object:Gem::Requirement
86
+ prerelease: false
87
+ requirement: &id005 !ruby/object:Gem::Requirement
61
88
  none: false
62
- requirements:
63
- - - ! '>='
64
- - !ruby/object:Gem::Version
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ hash: 23
93
+ segments:
94
+ - 2
95
+ - 6
96
+ - 0
65
97
  version: 2.6.0
66
98
  type: :development
67
- prerelease: false
68
- version_requirements: *22541480
69
- - !ruby/object:Gem::Dependency
99
+ version_requirements: *id005
100
+ - !ruby/object:Gem::Dependency
70
101
  name: fakeweb
71
- requirement: &22541020 !ruby/object:Gem::Requirement
102
+ prerelease: false
103
+ requirement: &id006 !ruby/object:Gem::Requirement
72
104
  none: false
73
- requirements:
74
- - - ! '>='
75
- - !ruby/object:Gem::Version
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ hash: 27
109
+ segments:
110
+ - 1
111
+ - 3
112
+ - 0
76
113
  version: 1.3.0
77
114
  type: :development
78
- prerelease: false
79
- version_requirements: *22541020
80
- description: validate-website is a web crawler for checking the markup validity with
81
- XML Schema / DTD and not found urls.
115
+ version_requirements: *id006
116
+ description: validate-website is a web crawler for checking the markup validity with XML Schema / DTD and not found urls.
82
117
  email: laurent@spkdev.net
83
- executables:
118
+ executables:
84
119
  - validate-website
85
120
  - validate-website-static
86
121
  extensions: []
122
+
87
123
  extra_rdoc_files: []
88
- files:
124
+
125
+ files:
89
126
  - README.rdoc
90
127
  - Rakefile
91
128
  - LICENSE
@@ -199,25 +236,32 @@ files:
199
236
  - bin/validate-website
200
237
  - bin/validate-website-static
201
238
  homepage: http://github.com/spk/validate-website
202
- licenses:
239
+ licenses:
203
240
  - MIT
204
241
  post_install_message:
205
242
  rdoc_options: []
206
- require_paths:
243
+
244
+ require_paths:
207
245
  - lib
208
- required_ruby_version: !ruby/object:Gem::Requirement
246
+ required_ruby_version: !ruby/object:Gem::Requirement
209
247
  none: false
210
- requirements:
211
- - - ! '>='
212
- - !ruby/object:Gem::Version
213
- version: '0'
214
- required_rubygems_version: !ruby/object:Gem::Requirement
248
+ requirements:
249
+ - - ">="
250
+ - !ruby/object:Gem::Version
251
+ hash: 3
252
+ segments:
253
+ - 0
254
+ version: "0"
255
+ required_rubygems_version: !ruby/object:Gem::Requirement
215
256
  none: false
216
- requirements:
217
- - - ! '>='
218
- - !ruby/object:Gem::Version
219
- version: '0'
220
- requirements:
257
+ requirements:
258
+ - - ">="
259
+ - !ruby/object:Gem::Version
260
+ hash: 3
261
+ segments:
262
+ - 0
263
+ version: "0"
264
+ requirements:
221
265
  - anemone
222
266
  - rainbow
223
267
  - multipart_body
@@ -226,7 +270,7 @@ rubygems_version: 1.7.2
226
270
  signing_key:
227
271
  specification_version: 3
228
272
  summary: Web crawler for checking the validity of your documents
229
- test_files:
273
+ test_files:
230
274
  - spec/spec_helper.rb
231
275
  - spec/data/html5.html
232
276
  - spec/data/html4-strict.html
@@ -236,3 +280,4 @@ test_files:
236
280
  - spec/core_spec.rb
237
281
  - spec/fakeweb_helper.rb
238
282
  - spec/validator_spec.rb
283
+ has_rdoc: