validate-website 1.1.0 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 134cbdd3da2da6847c525ffe5a2ee68f1a380ae2
4
- data.tar.gz: 678ff514b9f6f368bbb78e93a8dc42f38a35a803
3
+ metadata.gz: fd93ea8bbe188ac4cb33752c93a971f75106971e
4
+ data.tar.gz: 9322f70f9cd7433c35d201f0d042117ff2974147
5
5
  SHA512:
6
- metadata.gz: dd90c5dec7d0c80ea9b94abcb1a38a425ee59be32c73fe9fc97620a5a00ee4c9cf2dae52aa099509c8573f2dfca117377ed15373bce2d93ed7c25a43e6ed067e
7
- data.tar.gz: 4d24ac9b1dccd744a3b7bd7ebbb18b946c05c4043a0c76898eafc543892e886715e2d99670ea91f3b9d0c3203425cf0de420445d041360c44fb67a934c6165c2
6
+ metadata.gz: e1d587ebf07f0f3f93ee4f7ebfd96112919c435eeddda17874c76423ee0e1500865a0afe0f5112c459cd900023445e818d347380800c306c0d398ed9369de710
7
+ data.tar.gz: 818c8b7f437a4afb0482914fd799e0e0701013ee9b772db56abe3e3aa2259f08cba9444fff65df278b10a8e4144aa08021e38515419ccfebe832c717d22fa63b
data/Rakefile CHANGED
@@ -1,13 +1,8 @@
1
- require 'rdoc/task'
2
1
  require 'rake/testtask'
2
+ require 'rubocop/rake_task'
3
3
 
4
4
  task default: [:test]
5
5
 
6
- RDoc::Task.new do |rd|
7
- rd.main = 'README.rdoc'
8
- rd.rdoc_files.include('README.rdoc', 'lib/**/*.rb')
9
- end
10
-
11
6
  # install asciidoc libxml2-utils xmlto docbook-xsl docbook-xml
12
7
  desc 'Update manpage from asciidoc file'
13
8
  task :manpage do
@@ -15,6 +10,11 @@ task :manpage do
15
10
  end
16
11
 
17
12
  Rake::TestTask.new do |t|
18
- t.pattern = 'spec/*_spec.rb'
13
+ t.pattern = 'test/**/*_test.rb'
19
14
  end
20
15
  task spec: :test
16
+
17
+ desc 'Execute rubocop'
18
+ RuboCop::RakeTask.new(:rubocop) do |t|
19
+ t.options = ['-D'] # display cop name
20
+ end
@@ -7,6 +7,7 @@ require 'validate_website/validator'
7
7
  require 'validate_website/colorful_messages'
8
8
 
9
9
  require 'spidr'
10
+ require 'crass'
10
11
 
11
12
  # Base module ValidateWebsite
12
13
  module ValidateWebsite
@@ -62,20 +63,25 @@ module ValidateWebsite
62
63
  end
63
64
  end
64
65
 
65
- # Extract urls from CSS page
66
- #
67
- # @param [Spidr::Page] an Spidr::Page object
68
- # @return [Array] Lists of urls
69
- #
70
- def self.extract_urls_from_css(page)
71
- page.body.scan(%r{url\((['".\/\w-]+)\)}).reduce(Set[]) do |result, url|
72
- url = url.first.gsub("'", '').gsub('"', '')
73
- abs = page.to_absolute(url)
74
- result << abs.to_s
75
- end
66
+ private
67
+
68
+ def check_css_syntax(page)
69
+ nodes = Crass::Parser.parse_stylesheet(page.body)
70
+ return unless any_css_errors?(nodes)
71
+ handle_validation_error(page.url)
76
72
  end
77
73
 
78
- private
74
+ def any_css_errors?(nodes)
75
+ nodes.any? do |node|
76
+ if node[:children]
77
+ any_css_errors? node.delete(:children)
78
+ elsif node[:tokens]
79
+ any_css_errors? node.delete(:tokens)
80
+ else
81
+ node[:node] == :error || node[:error] == true
82
+ end
83
+ end
84
+ end
79
85
 
80
86
  def print_status_line(total, failures, not_founds, errors)
81
87
  puts "\n\n"
@@ -102,16 +108,20 @@ module ValidateWebsite
102
108
  if validator.valid?
103
109
  print color(:success, '.', options[:color]) # rspec style
104
110
  else
105
- handle_validation_error(validator, url)
111
+ handle_html_validation_error(validator, url)
106
112
  end
107
113
  end
108
114
 
109
- def handle_validation_error(validator, url)
115
+ def handle_html_validation_error(validator, url)
116
+ handle_validation_error(url)
117
+ return unless options[:verbose]
118
+ puts color(:error, validator.errors.join(', '), options[:color])
119
+ end
120
+
121
+ def handle_validation_error(url)
110
122
  @errors_count += 1
111
123
  puts "\n"
112
124
  puts color(:error, "* #{url}", options[:color])
113
- return unless options[:verbose]
114
- puts color(:error, validator.errors.join(', '), options[:color])
115
125
  end
116
126
  end
117
127
  end
@@ -1,4 +1,5 @@
1
1
  require 'validate_website/core'
2
+ require 'validate_website/utils'
2
3
 
3
4
  module ValidateWebsite
4
5
  # Class for http website validation
@@ -44,13 +45,14 @@ module ValidateWebsite
44
45
  crawler.cookies[@host] = default_cookies if options[:cookies]
45
46
  on_every_css_page(crawler)
46
47
  on_every_html_page(crawler)
47
- on_every_failed_url(crawler)
48
+ on_every_failed_url(crawler) if options[:not_found]
48
49
  end
49
50
  end
50
51
 
51
52
  def on_every_css_page(crawler)
52
53
  crawler.every_css_page do |page|
53
- ValidateWebsite::Core.extract_urls_from_css(page).each do |u|
54
+ check_css_syntax(page) if options[:css_syntax]
55
+ ValidateWebsite::Utils.extract_urls_from_css(page).each do |u|
54
56
  crawler.enqueue(u)
55
57
  end
56
58
  end
@@ -69,7 +71,6 @@ module ValidateWebsite
69
71
  end
70
72
 
71
73
  def on_every_failed_url(crawler)
72
- return unless options[:not_found]
73
74
  crawler.every_failed_url do |url|
74
75
  not_found_error(url)
75
76
  end
@@ -12,6 +12,7 @@ module ValidateWebsite
12
12
  exclude: nil,
13
13
  user_agent: nil,
14
14
  markup: true,
15
+ css_syntax: false,
15
16
  # crawler: log not found url (404 status code)
16
17
  # static: log not found url (not on filesystem, `pwd` considered
17
18
  # as root " / ")
@@ -39,6 +40,7 @@ module ValidateWebsite
39
40
  def self.default_args
40
41
  Slop.parse do |o|
41
42
  yield o if block_given?
43
+ markup_syntax(o)
42
44
  boolean_options(o)
43
45
  o.regexp('-i', '--ignore',
44
46
  'Validation errors to ignore (ex: "valign|autocorrect")')
@@ -48,10 +50,16 @@ module ValidateWebsite
48
50
  end
49
51
  end
50
52
 
51
- def self.boolean_options(o)
53
+ def self.markup_syntax(o)
52
54
  o.bool('-m', '--markup',
53
55
  "Markup validation (default: #{DEFAULT_OPTIONS[:markup]})",
54
56
  default: DEFAULT_OPTIONS[:markup])
57
+ o.bool('--css-syntax',
58
+ "Css validation (default: #{DEFAULT_OPTIONS[:css_syntax]})",
59
+ default: DEFAULT_OPTIONS[:css_syntax])
60
+ end
61
+
62
+ def self.boolean_options(o)
55
63
  o.bool('-n', '--not-found',
56
64
  "Log not found url (default: #{DEFAULT_OPTIONS[:not_found]})",
57
65
  default: DEFAULT_OPTIONS[:not_found])
@@ -94,7 +102,7 @@ module ValidateWebsite
94
102
  o.string('-s', '--site',
95
103
  "Website to crawl (default: #{DEFAULT_OPTIONS[:site]})",
96
104
  default: DEFAULT_OPTIONS[:site])
97
- o.regexp('-p', '--pattern',
105
+ o.string('-p', '--pattern',
98
106
  "Filename pattern (default: #{DEFAULT_OPTIONS[:pattern]})",
99
107
  default: DEFAULT_OPTIONS[:pattern])
100
108
  end
@@ -1,4 +1,5 @@
1
1
  require 'validate_website/core'
2
+ require 'validate_website/utils'
2
3
 
3
4
  module ValidateWebsite
4
5
  # Class for validation Static website
@@ -25,15 +26,17 @@ module ValidateWebsite
25
26
 
26
27
  private
27
28
 
28
- def generate_static_page(f)
29
- response = self.class.fake_httpresponse(open(f).read)
30
- Spidr::Page.new(URI.join(@site, URI.encode(f)), response)
29
+ def check_static_file(f)
30
+ page = StaticLink.new(f, @site).page
31
+ check_page(f, page)
32
+ check_css_syntax(page) if page.css? && options[:css_syntax]
31
33
  end
32
34
 
33
- def check_static_file(f)
34
- page = generate_static_page(f)
35
- validate(page.doc, page.body, f, @options[:ignore]) if @options[:markup]
36
- check_static_not_found(page.links) if @options[:not_found]
35
+ def check_page(f, page)
36
+ if page.html? && options[:markup]
37
+ validate(page.doc, page.body, f, options[:ignore])
38
+ end
39
+ check_static_not_found(page.links) if options[:not_found]
37
40
  end
38
41
 
39
42
  StaticLink = Struct.new(:link, :site) do
@@ -47,12 +50,34 @@ module ValidateWebsite
47
50
  URI.parse(site).host == link_uri.host
48
51
  end
49
52
 
53
+ def content_types
54
+ if css?
55
+ ['text/css']
56
+ else
57
+ CONTENT_TYPES
58
+ end
59
+ end
60
+
61
+ def body
62
+ if File.exist?(link)
63
+ open(link).read
64
+ else
65
+ open(file_path).read
66
+ end
67
+ end
68
+
69
+ def response
70
+ @response ||= ValidateWebsite::Static.fake_httpresponse(
71
+ body,
72
+ content_types)
73
+ end
74
+
75
+ def page
76
+ @page ||= Spidr::Page.new(link_uri, response)
77
+ end
78
+
50
79
  def extract_urls_from_fake_css_response
51
- response = ValidateWebsite::Static.fake_httpresponse(
52
- open(file_path).read,
53
- ['text/css'])
54
- css_page = Spidr::Page.new(link_uri, response)
55
- ValidateWebsite::Core.extract_urls_from_css(css_page)
80
+ ValidateWebsite::Utils.extract_urls_from_css(page)
56
81
  end
57
82
 
58
83
  def file_path
@@ -65,6 +90,10 @@ module ValidateWebsite
65
90
  @extname ||= File.extname(file_path)
66
91
  end
67
92
 
93
+ def css?
94
+ extname == '.css'
95
+ end
96
+
68
97
  def check?
69
98
  !link.include?('#') && in_static_domain?
70
99
  end
@@ -78,7 +107,7 @@ module ValidateWebsite
78
107
  next unless static_link.check?
79
108
  not_found_error(static_link.file_path) &&
80
109
  next unless File.exist?(static_link.file_path)
81
- next unless static_link.extname == '.css'
110
+ next unless static_link.css?
82
111
  check_static_not_found static_link.extract_urls_from_fake_css_response
83
112
  end
84
113
  end
@@ -0,0 +1,48 @@
1
+ # Base module ValidateWebsite
2
+ module ValidateWebsite
3
+ # Utils class for CSS helpers
4
+ class Utils
5
+ # Extract urls from CSS page
6
+ #
7
+ # @param [Spidr::Page] a Spidr::Page object
8
+ # @return [Set] Lists of urls
9
+ #
10
+ def self.extract_urls_from_css(page)
11
+ return Set[] unless page
12
+ return Set[] if page.body.nil?
13
+ nodes = Crass::Parser.parse_stylesheet(page.body)
14
+ extract_urls_from_nodes nodes, page
15
+ end
16
+
17
+ # Return urls as absolute from Crass nodes
18
+ #
19
+ # @param [Hash] node from Crass
20
+ # @param [Spidr::Page] a Spidr::Page object
21
+ # @return [Set] list of obsolute urls
22
+ def self.urls_to_absolute(node, page)
23
+ if node[:node] == :function && node[:name] == 'url' || node[:node] == :url
24
+ Array(node[:value]).map do |v|
25
+ url = v.is_a?(String) ? v : v[:value]
26
+ page.to_absolute(url).to_s
27
+ end
28
+ else
29
+ Set.new
30
+ end
31
+ end
32
+
33
+ # Extract urls from Crass nodes
34
+ # @param [Array] Array of nodes from Crass
35
+ # @param [Spidr::Page] a Spidr::Page object
36
+ # @param [Set] memo for recursivity
37
+ # @return [Set] list of urls
38
+ def self.extract_urls_from_nodes(nodes, page, memo = Set[])
39
+ nodes.each_with_object(memo) do |node, result|
40
+ result.merge urls_to_absolute(node, page)
41
+ if node[:children]
42
+ extract_urls_from_nodes node.delete(:children), page, result
43
+ end
44
+ result
45
+ end
46
+ end
47
+ end
48
+ end
@@ -1,3 +1,4 @@
1
+ # Version file for ValidateWebsite
1
2
  module ValidateWebsite
2
- VERSION = '1.1.0'.freeze
3
+ VERSION = '1.5.0'.freeze
3
4
  end
@@ -1,3 +1,2 @@
1
- # encoding: utf-8
2
1
  require 'validate_website/core'
3
2
  require 'validate_website/version'
@@ -2,12 +2,12 @@
2
2
  .\" Title: validate-website-static
3
3
  .\" Author: [see the "AUTHOR" section]
4
4
  .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
5
- .\" Date: 05/17/2015
5
+ .\" Date: 07/21/2015
6
6
  .\" Manual: \ \&
7
7
  .\" Source: \ \&
8
8
  .\" Language: English
9
9
  .\"
10
- .TH "VALIDATE\-WEBSITE\-S" "1" "05/17/2015" "\ \&" "\ \&"
10
+ .TH "VALIDATE\-WEBSITE\-S" "1" "07/21/2015" "\ \&" "\ \&"
11
11
  .\" -----------------------------------------------------------------
12
12
  .\" * Define some portability stuff
13
13
  .\" -----------------------------------------------------------------
@@ -59,6 +59,11 @@ Ignore certain validation errors (ex:
59
59
  Markup validation (Default: true)
60
60
  .RE
61
61
  .PP
62
+ \fB\-\-css\-syntax\fR
63
+ .RS 4
64
+ Css validation (Default: false)
65
+ .RE
66
+ .PP
62
67
  \fB\-n\fR, \fB\-\-not\-found\fR
63
68
  .RS 4
64
69
  Log files not on filesystem, pwd considered as root \(Fo / \(Fc (Default: false)
@@ -2,12 +2,12 @@
2
2
  .\" Title: validate-website
3
3
  .\" Author: [see the "AUTHOR" section]
4
4
  .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
5
- .\" Date: 05/17/2015
5
+ .\" Date: 07/21/2015
6
6
  .\" Manual: \ \&
7
7
  .\" Source: \ \&
8
8
  .\" Language: English
9
9
  .\"
10
- .TH "VALIDATE\-WEBSITE" "1" "05/17/2015" "\ \&" "\ \&"
10
+ .TH "VALIDATE\-WEBSITE" "1" "07/21/2015" "\ \&" "\ \&"
11
11
  .\" -----------------------------------------------------------------
12
12
  .\" * Define some portability stuff
13
13
  .\" -----------------------------------------------------------------
@@ -70,6 +70,11 @@ Set defaults cookies
70
70
  Markup validation (Default: true)
71
71
  .RE
72
72
  .PP
73
+ \fB\-\-css\-syntax\fR
74
+ .RS 4
75
+ Css validation (Default: false)
76
+ .RE
77
+ .PP
73
78
  \fB\-n\fR, \fB\-\-not\-found\fR
74
79
  .RS 4
75
80
  Log not found url (Default: false)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: validate-website
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Laurent Arnoud
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-07-07 00:00:00.000000000 Z
11
+ date: 2015-07-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: spidr
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0.4'
27
+ - !ruby/object:Gem::Dependency
28
+ name: crass
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: paint
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -246,27 +260,11 @@ files:
246
260
  - lib/validate_website/option_parser.rb
247
261
  - lib/validate_website/runner.rb
248
262
  - lib/validate_website/static.rb
263
+ - lib/validate_website/utils.rb
249
264
  - lib/validate_website/validator.rb
250
265
  - lib/validate_website/version.rb
251
266
  - man/man1/validate-website-static.1
252
267
  - man/man1/validate-website.1
253
- - spec/core_spec.rb
254
- - spec/crawler_spec.rb
255
- - spec/data/assets/application-92f19110a9d47a56d2ebe744e15af301.css
256
- - spec/data/html4-strict.html
257
- - spec/data/html5-linuxfr.html
258
- - spec/data/html5.html
259
- - spec/data/news/ryzom-naissance-du-projet-libre-ryzom-forge.md
260
- - spec/data/validator.nu-excessive.html
261
- - spec/data/validator.nu-failure.html
262
- - spec/data/validator.nu-success.html
263
- - spec/data/w3.org-xhtml1-strict-errors.html
264
- - spec/data/xhtml1-strict.html
265
- - spec/example/ruby smalltalk/blockcamp-paris-le-28-novembre.html
266
- - spec/spec_helper.rb
267
- - spec/static_spec.rb
268
- - spec/validator_spec.rb
269
- - spec/webmock_helper.rb
270
268
  homepage: http://github.com/spk/validate-website
271
269
  licenses:
272
270
  - MIT
@@ -294,9 +292,5 @@ rubygems_version: 2.4.5
294
292
  signing_key:
295
293
  specification_version: 4
296
294
  summary: Web crawler for checking the validity of your documents
297
- test_files:
298
- - spec/static_spec.rb
299
- - spec/core_spec.rb
300
- - spec/crawler_spec.rb
301
- - spec/validator_spec.rb
295
+ test_files: []
302
296
  has_rdoc: