validate-website 1.1.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 134cbdd3da2da6847c525ffe5a2ee68f1a380ae2
4
- data.tar.gz: 678ff514b9f6f368bbb78e93a8dc42f38a35a803
3
+ metadata.gz: fd93ea8bbe188ac4cb33752c93a971f75106971e
4
+ data.tar.gz: 9322f70f9cd7433c35d201f0d042117ff2974147
5
5
  SHA512:
6
- metadata.gz: dd90c5dec7d0c80ea9b94abcb1a38a425ee59be32c73fe9fc97620a5a00ee4c9cf2dae52aa099509c8573f2dfca117377ed15373bce2d93ed7c25a43e6ed067e
7
- data.tar.gz: 4d24ac9b1dccd744a3b7bd7ebbb18b946c05c4043a0c76898eafc543892e886715e2d99670ea91f3b9d0c3203425cf0de420445d041360c44fb67a934c6165c2
6
+ metadata.gz: e1d587ebf07f0f3f93ee4f7ebfd96112919c435eeddda17874c76423ee0e1500865a0afe0f5112c459cd900023445e818d347380800c306c0d398ed9369de710
7
+ data.tar.gz: 818c8b7f437a4afb0482914fd799e0e0701013ee9b772db56abe3e3aa2259f08cba9444fff65df278b10a8e4144aa08021e38515419ccfebe832c717d22fa63b
data/Rakefile CHANGED
@@ -1,13 +1,8 @@
1
- require 'rdoc/task'
2
1
  require 'rake/testtask'
2
+ require 'rubocop/rake_task'
3
3
 
4
4
  task default: [:test]
5
5
 
6
- RDoc::Task.new do |rd|
7
- rd.main = 'README.rdoc'
8
- rd.rdoc_files.include('README.rdoc', 'lib/**/*.rb')
9
- end
10
-
11
6
  # install asciidoc libxml2-utils xmlto docbook-xsl docbook-xml
12
7
  desc 'Update manpage from asciidoc file'
13
8
  task :manpage do
@@ -15,6 +10,11 @@ task :manpage do
15
10
  end
16
11
 
17
12
  Rake::TestTask.new do |t|
18
- t.pattern = 'spec/*_spec.rb'
13
+ t.pattern = 'test/**/*_test.rb'
19
14
  end
20
15
  task spec: :test
16
+
17
+ desc 'Execute rubocop'
18
+ RuboCop::RakeTask.new(:rubocop) do |t|
19
+ t.options = ['-D'] # display cop name
20
+ end
@@ -7,6 +7,7 @@ require 'validate_website/validator'
7
7
  require 'validate_website/colorful_messages'
8
8
 
9
9
  require 'spidr'
10
+ require 'crass'
10
11
 
11
12
  # Base module ValidateWebsite
12
13
  module ValidateWebsite
@@ -62,20 +63,25 @@ module ValidateWebsite
62
63
  end
63
64
  end
64
65
 
65
- # Extract urls from CSS page
66
- #
67
- # @param [Spidr::Page] an Spidr::Page object
68
- # @return [Array] Lists of urls
69
- #
70
- def self.extract_urls_from_css(page)
71
- page.body.scan(%r{url\((['".\/\w-]+)\)}).reduce(Set[]) do |result, url|
72
- url = url.first.gsub("'", '').gsub('"', '')
73
- abs = page.to_absolute(url)
74
- result << abs.to_s
75
- end
66
+ private
67
+
68
+ def check_css_syntax(page)
69
+ nodes = Crass::Parser.parse_stylesheet(page.body)
70
+ return unless any_css_errors?(nodes)
71
+ handle_validation_error(page.url)
76
72
  end
77
73
 
78
- private
74
+ def any_css_errors?(nodes)
75
+ nodes.any? do |node|
76
+ if node[:children]
77
+ any_css_errors? node.delete(:children)
78
+ elsif node[:tokens]
79
+ any_css_errors? node.delete(:tokens)
80
+ else
81
+ node[:node] == :error || node[:error] == true
82
+ end
83
+ end
84
+ end
79
85
 
80
86
  def print_status_line(total, failures, not_founds, errors)
81
87
  puts "\n\n"
@@ -102,16 +108,20 @@ module ValidateWebsite
102
108
  if validator.valid?
103
109
  print color(:success, '.', options[:color]) # rspec style
104
110
  else
105
- handle_validation_error(validator, url)
111
+ handle_html_validation_error(validator, url)
106
112
  end
107
113
  end
108
114
 
109
- def handle_validation_error(validator, url)
115
+ def handle_html_validation_error(validator, url)
116
+ handle_validation_error(url)
117
+ return unless options[:verbose]
118
+ puts color(:error, validator.errors.join(', '), options[:color])
119
+ end
120
+
121
+ def handle_validation_error(url)
110
122
  @errors_count += 1
111
123
  puts "\n"
112
124
  puts color(:error, "* #{url}", options[:color])
113
- return unless options[:verbose]
114
- puts color(:error, validator.errors.join(', '), options[:color])
115
125
  end
116
126
  end
117
127
  end
@@ -1,4 +1,5 @@
1
1
  require 'validate_website/core'
2
+ require 'validate_website/utils'
2
3
 
3
4
  module ValidateWebsite
4
5
  # Class for http website validation
@@ -44,13 +45,14 @@ module ValidateWebsite
44
45
  crawler.cookies[@host] = default_cookies if options[:cookies]
45
46
  on_every_css_page(crawler)
46
47
  on_every_html_page(crawler)
47
- on_every_failed_url(crawler)
48
+ on_every_failed_url(crawler) if options[:not_found]
48
49
  end
49
50
  end
50
51
 
51
52
  def on_every_css_page(crawler)
52
53
  crawler.every_css_page do |page|
53
- ValidateWebsite::Core.extract_urls_from_css(page).each do |u|
54
+ check_css_syntax(page) if options[:css_syntax]
55
+ ValidateWebsite::Utils.extract_urls_from_css(page).each do |u|
54
56
  crawler.enqueue(u)
55
57
  end
56
58
  end
@@ -69,7 +71,6 @@ module ValidateWebsite
69
71
  end
70
72
 
71
73
  def on_every_failed_url(crawler)
72
- return unless options[:not_found]
73
74
  crawler.every_failed_url do |url|
74
75
  not_found_error(url)
75
76
  end
@@ -12,6 +12,7 @@ module ValidateWebsite
12
12
  exclude: nil,
13
13
  user_agent: nil,
14
14
  markup: true,
15
+ css_syntax: false,
15
16
  # crawler: log not found url (404 status code)
16
17
  # static: log not found url (not on filesystem, `pwd` considered
17
18
  # as root " / ")
@@ -39,6 +40,7 @@ module ValidateWebsite
39
40
  def self.default_args
40
41
  Slop.parse do |o|
41
42
  yield o if block_given?
43
+ markup_syntax(o)
42
44
  boolean_options(o)
43
45
  o.regexp('-i', '--ignore',
44
46
  'Validation errors to ignore (ex: "valign|autocorrect")')
@@ -48,10 +50,16 @@ module ValidateWebsite
48
50
  end
49
51
  end
50
52
 
51
- def self.boolean_options(o)
53
+ def self.markup_syntax(o)
52
54
  o.bool('-m', '--markup',
53
55
  "Markup validation (default: #{DEFAULT_OPTIONS[:markup]})",
54
56
  default: DEFAULT_OPTIONS[:markup])
57
+ o.bool('--css-syntax',
58
+ "Css validation (default: #{DEFAULT_OPTIONS[:css_syntax]})",
59
+ default: DEFAULT_OPTIONS[:css_syntax])
60
+ end
61
+
62
+ def self.boolean_options(o)
55
63
  o.bool('-n', '--not-found',
56
64
  "Log not found url (default: #{DEFAULT_OPTIONS[:not_found]})",
57
65
  default: DEFAULT_OPTIONS[:not_found])
@@ -94,7 +102,7 @@ module ValidateWebsite
94
102
  o.string('-s', '--site',
95
103
  "Website to crawl (default: #{DEFAULT_OPTIONS[:site]})",
96
104
  default: DEFAULT_OPTIONS[:site])
97
- o.regexp('-p', '--pattern',
105
+ o.string('-p', '--pattern',
98
106
  "Filename pattern (default: #{DEFAULT_OPTIONS[:pattern]})",
99
107
  default: DEFAULT_OPTIONS[:pattern])
100
108
  end
@@ -1,4 +1,5 @@
1
1
  require 'validate_website/core'
2
+ require 'validate_website/utils'
2
3
 
3
4
  module ValidateWebsite
4
5
  # Class for validation Static website
@@ -25,15 +26,17 @@ module ValidateWebsite
25
26
 
26
27
  private
27
28
 
28
- def generate_static_page(f)
29
- response = self.class.fake_httpresponse(open(f).read)
30
- Spidr::Page.new(URI.join(@site, URI.encode(f)), response)
29
+ def check_static_file(f)
30
+ page = StaticLink.new(f, @site).page
31
+ check_page(f, page)
32
+ check_css_syntax(page) if page.css? && options[:css_syntax]
31
33
  end
32
34
 
33
- def check_static_file(f)
34
- page = generate_static_page(f)
35
- validate(page.doc, page.body, f, @options[:ignore]) if @options[:markup]
36
- check_static_not_found(page.links) if @options[:not_found]
35
+ def check_page(f, page)
36
+ if page.html? && options[:markup]
37
+ validate(page.doc, page.body, f, options[:ignore])
38
+ end
39
+ check_static_not_found(page.links) if options[:not_found]
37
40
  end
38
41
 
39
42
  StaticLink = Struct.new(:link, :site) do
@@ -47,12 +50,34 @@ module ValidateWebsite
47
50
  URI.parse(site).host == link_uri.host
48
51
  end
49
52
 
53
+ def content_types
54
+ if css?
55
+ ['text/css']
56
+ else
57
+ CONTENT_TYPES
58
+ end
59
+ end
60
+
61
+ def body
62
+ if File.exist?(link)
63
+ open(link).read
64
+ else
65
+ open(file_path).read
66
+ end
67
+ end
68
+
69
+ def response
70
+ @response ||= ValidateWebsite::Static.fake_httpresponse(
71
+ body,
72
+ content_types)
73
+ end
74
+
75
+ def page
76
+ @page ||= Spidr::Page.new(link_uri, response)
77
+ end
78
+
50
79
  def extract_urls_from_fake_css_response
51
- response = ValidateWebsite::Static.fake_httpresponse(
52
- open(file_path).read,
53
- ['text/css'])
54
- css_page = Spidr::Page.new(link_uri, response)
55
- ValidateWebsite::Core.extract_urls_from_css(css_page)
80
+ ValidateWebsite::Utils.extract_urls_from_css(page)
56
81
  end
57
82
 
58
83
  def file_path
@@ -65,6 +90,10 @@ module ValidateWebsite
65
90
  @extname ||= File.extname(file_path)
66
91
  end
67
92
 
93
+ def css?
94
+ extname == '.css'
95
+ end
96
+
68
97
  def check?
69
98
  !link.include?('#') && in_static_domain?
70
99
  end
@@ -78,7 +107,7 @@ module ValidateWebsite
78
107
  next unless static_link.check?
79
108
  not_found_error(static_link.file_path) &&
80
109
  next unless File.exist?(static_link.file_path)
81
- next unless static_link.extname == '.css'
110
+ next unless static_link.css?
82
111
  check_static_not_found static_link.extract_urls_from_fake_css_response
83
112
  end
84
113
  end
@@ -0,0 +1,48 @@
1
+ # Base module ValidateWebsite
2
+ module ValidateWebsite
3
+ # Utils class for CSS helpers
4
+ class Utils
5
+ # Extract urls from CSS page
6
+ #
7
+ # @param [Spidr::Page] a Spidr::Page object
8
+ # @return [Set] Lists of urls
9
+ #
10
+ def self.extract_urls_from_css(page)
11
+ return Set[] unless page
12
+ return Set[] if page.body.nil?
13
+ nodes = Crass::Parser.parse_stylesheet(page.body)
14
+ extract_urls_from_nodes nodes, page
15
+ end
16
+
17
+ # Return urls as absolute from Crass nodes
18
+ #
19
+ # @param [Hash] node from Crass
20
+ # @param [Spidr::Page] a Spidr::Page object
21
+ # @return [Set] list of obsolute urls
22
+ def self.urls_to_absolute(node, page)
23
+ if node[:node] == :function && node[:name] == 'url' || node[:node] == :url
24
+ Array(node[:value]).map do |v|
25
+ url = v.is_a?(String) ? v : v[:value]
26
+ page.to_absolute(url).to_s
27
+ end
28
+ else
29
+ Set.new
30
+ end
31
+ end
32
+
33
+ # Extract urls from Crass nodes
34
+ # @param [Array] Array of nodes from Crass
35
+ # @param [Spidr::Page] a Spidr::Page object
36
+ # @param [Set] memo for recursivity
37
+ # @return [Set] list of urls
38
+ def self.extract_urls_from_nodes(nodes, page, memo = Set[])
39
+ nodes.each_with_object(memo) do |node, result|
40
+ result.merge urls_to_absolute(node, page)
41
+ if node[:children]
42
+ extract_urls_from_nodes node.delete(:children), page, result
43
+ end
44
+ result
45
+ end
46
+ end
47
+ end
48
+ end
@@ -1,3 +1,4 @@
1
+ # Version file for ValidateWebsite
1
2
  module ValidateWebsite
2
- VERSION = '1.1.0'.freeze
3
+ VERSION = '1.5.0'.freeze
3
4
  end
@@ -1,3 +1,2 @@
1
- # encoding: utf-8
2
1
  require 'validate_website/core'
3
2
  require 'validate_website/version'
@@ -2,12 +2,12 @@
2
2
  .\" Title: validate-website-static
3
3
  .\" Author: [see the "AUTHOR" section]
4
4
  .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
5
- .\" Date: 05/17/2015
5
+ .\" Date: 07/21/2015
6
6
  .\" Manual: \ \&
7
7
  .\" Source: \ \&
8
8
  .\" Language: English
9
9
  .\"
10
- .TH "VALIDATE\-WEBSITE\-S" "1" "05/17/2015" "\ \&" "\ \&"
10
+ .TH "VALIDATE\-WEBSITE\-S" "1" "07/21/2015" "\ \&" "\ \&"
11
11
  .\" -----------------------------------------------------------------
12
12
  .\" * Define some portability stuff
13
13
  .\" -----------------------------------------------------------------
@@ -59,6 +59,11 @@ Ignore certain validation errors (ex:
59
59
  Markup validation (Default: true)
60
60
  .RE
61
61
  .PP
62
+ \fB\-\-css\-syntax\fR
63
+ .RS 4
64
+ Css validation (Default: false)
65
+ .RE
66
+ .PP
62
67
  \fB\-n\fR, \fB\-\-not\-found\fR
63
68
  .RS 4
64
69
  Log files not on filesystem, pwd considered as root \(Fo / \(Fc (Default: false)
@@ -2,12 +2,12 @@
2
2
  .\" Title: validate-website
3
3
  .\" Author: [see the "AUTHOR" section]
4
4
  .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
5
- .\" Date: 05/17/2015
5
+ .\" Date: 07/21/2015
6
6
  .\" Manual: \ \&
7
7
  .\" Source: \ \&
8
8
  .\" Language: English
9
9
  .\"
10
- .TH "VALIDATE\-WEBSITE" "1" "05/17/2015" "\ \&" "\ \&"
10
+ .TH "VALIDATE\-WEBSITE" "1" "07/21/2015" "\ \&" "\ \&"
11
11
  .\" -----------------------------------------------------------------
12
12
  .\" * Define some portability stuff
13
13
  .\" -----------------------------------------------------------------
@@ -70,6 +70,11 @@ Set defaults cookies
70
70
  Markup validation (Default: true)
71
71
  .RE
72
72
  .PP
73
+ \fB\-\-css\-syntax\fR
74
+ .RS 4
75
+ Css validation (Default: false)
76
+ .RE
77
+ .PP
73
78
  \fB\-n\fR, \fB\-\-not\-found\fR
74
79
  .RS 4
75
80
  Log not found url (Default: false)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: validate-website
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Laurent Arnoud
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-07-07 00:00:00.000000000 Z
11
+ date: 2015-07-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: spidr
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0.4'
27
+ - !ruby/object:Gem::Dependency
28
+ name: crass
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: paint
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -246,27 +260,11 @@ files:
246
260
  - lib/validate_website/option_parser.rb
247
261
  - lib/validate_website/runner.rb
248
262
  - lib/validate_website/static.rb
263
+ - lib/validate_website/utils.rb
249
264
  - lib/validate_website/validator.rb
250
265
  - lib/validate_website/version.rb
251
266
  - man/man1/validate-website-static.1
252
267
  - man/man1/validate-website.1
253
- - spec/core_spec.rb
254
- - spec/crawler_spec.rb
255
- - spec/data/assets/application-92f19110a9d47a56d2ebe744e15af301.css
256
- - spec/data/html4-strict.html
257
- - spec/data/html5-linuxfr.html
258
- - spec/data/html5.html
259
- - spec/data/news/ryzom-naissance-du-projet-libre-ryzom-forge.md
260
- - spec/data/validator.nu-excessive.html
261
- - spec/data/validator.nu-failure.html
262
- - spec/data/validator.nu-success.html
263
- - spec/data/w3.org-xhtml1-strict-errors.html
264
- - spec/data/xhtml1-strict.html
265
- - spec/example/ruby smalltalk/blockcamp-paris-le-28-novembre.html
266
- - spec/spec_helper.rb
267
- - spec/static_spec.rb
268
- - spec/validator_spec.rb
269
- - spec/webmock_helper.rb
270
268
  homepage: http://github.com/spk/validate-website
271
269
  licenses:
272
270
  - MIT
@@ -294,9 +292,5 @@ rubygems_version: 2.4.5
294
292
  signing_key:
295
293
  specification_version: 4
296
294
  summary: Web crawler for checking the validity of your documents
297
- test_files:
298
- - spec/static_spec.rb
299
- - spec/core_spec.rb
300
- - spec/crawler_spec.rb
301
- - spec/validator_spec.rb
295
+ test_files: []
302
296
  has_rdoc: