validate-website 1.8.1 → 1.10.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'validate_website/core'
2
4
 
3
5
  module ValidateWebsite
@@ -5,7 +7,7 @@ module ValidateWebsite
5
7
  class Runner
6
8
  def self.trap_interrupt
7
9
  trap('INT') do
8
- STDERR.puts "\nExiting..."
10
+ warn "\nExiting..."
9
11
  exit!(1)
10
12
  end
11
13
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'validate_website/core'
2
4
  require 'validate_website/utils'
3
5
 
@@ -5,12 +7,14 @@ module ValidateWebsite
5
7
  # Class for validation Static website
6
8
  class Static < Core
7
9
  CONTENT_TYPES = ['text/html', 'text/xhtml+xml'].freeze
10
+ START_MESSAGE_TYPE = 'files'
8
11
 
9
12
  attr_reader :history_count
10
13
 
11
14
  def initialize(options = {}, validation_type = :static)
12
15
  @history_count = 0
13
16
  super
17
+ start_message("#{START_MESSAGE_TYPE} in #{Dir.pwd} (#{@site} as site)")
14
18
  end
15
19
 
16
20
  # @param [Hash] options
@@ -20,11 +24,12 @@ module ValidateWebsite
20
24
  @site = @options[:site]
21
25
 
22
26
  files = Dir.glob(@options[:pattern])
23
- files.each do |f|
24
- next unless File.file?(f)
25
- next if @options[:exclude].is_a?(Regexp) && @options[:exclude].match(f)
27
+ files.each do |file|
28
+ next unless File.file?(file)
29
+ next if @options[:exclude]&.match(file)
30
+
26
31
  @history_count += 1
27
- check_static_file(f)
32
+ check_static_file(file)
28
33
  end
29
34
  print_status_line(files.size, 0, @not_founds_count, @errors_count)
30
35
  end
@@ -47,15 +52,18 @@ module ValidateWebsite
47
52
 
48
53
  private
49
54
 
50
- def check_static_file(f)
51
- page = StaticLink.new(f, @site).page
52
- check_page(f, page)
55
+ def check_static_file(file)
56
+ page = StaticLink.new(file, @site).page
57
+ check_page(file, page)
53
58
  check_css_syntax(page) if page.css? && options[:css_syntax]
54
59
  end
55
60
 
56
- def check_page(f, page)
61
+ def check_page(file, page)
57
62
  if page.html? && options[:markup]
58
- validate(page.doc, page.body, f, options[:ignore])
63
+ keys = %i[ignore html5_validator]
64
+ # slice does not exists on Ruby <= 2.4
65
+ slice = Hash[[keys, options.values_at(*keys)].transpose]
66
+ validate(page.doc, page.body, file, slice)
59
67
  end
60
68
  check_static_not_found(page.links) if options[:not_found]
61
69
  end
@@ -66,11 +74,13 @@ module ValidateWebsite
66
74
  static_links = links.map { |l| StaticLink.new(l, @site) }
67
75
  static_links.each do |static_link|
68
76
  next unless static_link.check?
77
+
69
78
  unless File.exist?(static_link.file_path)
70
79
  not_found_error(static_link.file_path)
71
80
  next
72
81
  end
73
82
  next unless static_link.css?
83
+
74
84
  check_static_not_found static_link.extract_urls_from_fake_css_response
75
85
  end
76
86
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'uri'
2
4
  require 'validate_website/utils'
3
5
  require 'validate_website/static'
@@ -25,9 +27,9 @@ StaticLink = Struct.new(:link, :site) do
25
27
 
26
28
  def body
27
29
  if File.exist?(link)
28
- open(link).read
30
+ File.open(link).read
29
31
  else
30
- open(file_path).read
32
+ File.open(file_path).read
31
33
  end
32
34
  end
33
35
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Base module ValidateWebsite
2
4
  module ValidateWebsite
3
5
  # Utils class for CSS helpers
@@ -10,6 +12,7 @@ module ValidateWebsite
10
12
  def self.extract_urls_from_css(page)
11
13
  return Set[] unless page
12
14
  return Set[] if page.body.nil?
15
+
13
16
  nodes = Crass::Parser.parse_stylesheet(page.body)
14
17
  extract_urls_from_nodes nodes, page
15
18
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'uri'
2
4
 
3
5
  require 'nokogiri'
@@ -10,27 +12,25 @@ module ValidateWebsite
10
12
  class Validator
11
13
  extend ValidatorClassMethods
12
14
 
13
- @html5_validator_service_url = 'https://checker.html5.org/'
15
+ @html5_validator_service_url = 'https://validator.nu/'
16
+ XHTML_SCHEMA_PATH = File.expand_path('../../data/schemas', __dir__)
17
+ @mutex = Mutex.new
18
+
14
19
  class << self
15
20
  attr_accessor :html5_validator_service_url
16
- end
17
21
 
18
- XHTML_PATH = File.expand_path('../../../data/schemas', __FILE__)
19
-
20
- @xsd_schemas ||= {}
21
-
22
- # `Dir.chdir` is needed by `Nokogiri::XML::Schema` to validate with local
23
- # files and cannot use file absolute path.
24
- Dir.glob(File.join(XHTML_PATH, '*.xsd')).each do |schema|
25
- Dir.chdir(XHTML_PATH) do
26
- schema_name = File.basename(schema, '.xsd')
27
- schema_content = File.read(File.basename(schema))
28
- begin
29
- @xsd_schemas[schema_name] = Nokogiri::XML::Schema(schema_content)
30
- rescue Nokogiri::XML::SyntaxError
31
- STDERR.puts "XSD SCHEMA: #{schema} cannot be loaded"
22
+ # http://www.w3.org/TR/xhtml1-schema/
23
+ def schema(namespace)
24
+ @mutex.synchronize do
25
+ Dir.chdir(XHTML_SCHEMA_PATH) do
26
+ if File.exist?("#{namespace}.xsd")
27
+ Nokogiri::XML::Schema(File.read("#{namespace}.xsd"))
28
+ end
29
+ end
32
30
  end
33
31
  end
32
+
33
+ alias xsd schema
34
34
  end
35
35
 
36
36
  attr_reader :original_doc, :body, :dtd, :doc, :namespace, :html5_validator
@@ -65,25 +65,22 @@ module ValidateWebsite
65
65
  @ignore ? @errors.reject { |e| @ignore =~ e } : @errors
66
66
  end
67
67
 
68
- # http://www.w3.org/TR/xhtml1-schema/
69
- def self.xsd(namespace)
70
- return unless namespace
71
- @xsd_schemas[namespace] if @xsd_schemas.key? namespace
72
- end
73
-
74
68
  private
75
69
 
76
70
  # http://www.w3.org/TR/xhtml1/#dtds
77
71
  def find_namespace(dtd)
78
72
  return unless dtd.system_id
73
+
79
74
  dtd_uri = URI.parse(dtd.system_id)
80
75
  return unless dtd_uri.path
76
+
81
77
  @dtd_uri = dtd_uri
82
78
  File.basename(@dtd_uri.path, '.dtd')
83
79
  end
84
80
 
85
81
  def document
86
82
  return @document if @document
83
+
87
84
  @document = if @dtd_uri && @body.match(@dtd_uri.to_s)
88
85
  @body.sub(@dtd_uri.to_s, @namespace + '.dtd')
89
86
  else
@@ -95,11 +92,11 @@ module ValidateWebsite
95
92
  def validate
96
93
  if document =~ /^\<!DOCTYPE html\>/i
97
94
  html5_validate
98
- elsif self.class.xsd(@namespace)
99
- self.class.xsd(@namespace).validate(xhtml_doc)
95
+ elsif self.class.schema(@namespace)
96
+ self.class.schema(@namespace).validate(xhtml_doc)
100
97
  else
101
98
  # dont have xsd fall back to dtd
102
- Dir.chdir(XHTML_PATH) do
99
+ Dir.chdir(XHTML_SCHEMA_PATH) do
103
100
  Nokogiri::HTML.parse(document)
104
101
  end.errors
105
102
  end
@@ -138,8 +135,8 @@ module ValidateWebsite
138
135
  end
139
136
 
140
137
  def xhtml_doc
141
- Dir.chdir(XHTML_PATH) do
142
- Nokogiri::XML(document) { |cfg| cfg.noent.dtdload.dtdvalid }
138
+ Dir.chdir(XHTML_SCHEMA_PATH) do
139
+ Nokogiri::XML(document) { |cfg| cfg.nonoent.dtdload.dtdvalid.nonet }
143
140
  end
144
141
  end
145
142
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'tidy_ffi'
2
4
 
3
5
  # Validator Class Methods
@@ -9,6 +11,7 @@ module ValidatorClassMethods
9
11
 
10
12
  def tidy
11
13
  return @tidy if defined?(@tidy)
14
+
12
15
  @lib_tidy = TidyFFI::LibTidy
13
16
  @tidy = TidyFFI::Tidy
14
17
  rescue TidyFFI::LibTidyNotInstalled
@@ -2,5 +2,5 @@
2
2
 
3
3
  # Version file for ValidateWebsite
4
4
  module ValidateWebsite
5
- VERSION = '1.8.1'.freeze
5
+ VERSION = '1.10.0'
6
6
  end
@@ -1,23 +1,32 @@
1
1
  '\" t
2
2
  .\" Title: validate-website-static
3
- .\" Author: [see the "AUTHORS" section]
4
- .\" Generator: Asciidoctor 1.5.6.1
5
- .\" Date: 2018-01-20
3
+ .\" Author: [see the "AUTHOR(S)" section]
4
+ .\" Generator: Asciidoctor 1.5.8
5
+ .\" Date: 2019-01-09
6
6
  .\" Manual: \ \&
7
7
  .\" Source: \ \&
8
8
  .\" Language: English
9
9
  .\"
10
- .TH "VALIDATE\-WEBSITE\-STATIC" "1" "2018-01-20" "\ \&" "\ \&"
10
+ .TH "VALIDATE\-WEBSITE\-STATIC" "1" "2019-01-09" "\ \&" "\ \&"
11
11
  .ie \n(.g .ds Aq \(aq
12
12
  .el .ds Aq '
13
13
  .ss \n[.ss] 0
14
14
  .nh
15
15
  .ad l
16
16
  .de URL
17
- \\$2 \(laURL: \\$1 \(ra\\$3
17
+ \fI\\$2\fP <\\$1>\\$3
18
18
  ..
19
- .if \n[.g] .mso www.tmac
20
- .LINKSTYLE blue R < >
19
+ .als MTO URL
20
+ .if \n[.g] \{\
21
+ . mso www.tmac
22
+ . am URL
23
+ . ad l
24
+ . .
25
+ . am MTO
26
+ . ad l
27
+ . .
28
+ . LINKSTYLE blue R < >
29
+ .\}
21
30
  .SH "NAME"
22
31
  validate\-website\-static \- check the validity of your documents
23
32
  .SH "SYNOPSIS"
@@ -119,5 +128,5 @@ Laurent Arnoud \c
119
128
  .sp
120
129
  The MIT License
121
130
  .sp
122
- Copyright (c) 2009\-2018 \c
131
+ Copyright (c) 2009\-2019 \c
123
132
  .MTO "laurent\(atspkdev.net" "Laurent Arnoud" ""
@@ -1,23 +1,32 @@
1
1
  '\" t
2
2
  .\" Title: validate-website
3
- .\" Author: [see the "AUTHORS" section]
4
- .\" Generator: Asciidoctor 1.5.6.1
5
- .\" Date: 2018-01-20
3
+ .\" Author: [see the "AUTHOR(S)" section]
4
+ .\" Generator: Asciidoctor 1.5.8
5
+ .\" Date: 2019-01-09
6
6
  .\" Manual: \ \&
7
7
  .\" Source: \ \&
8
8
  .\" Language: English
9
9
  .\"
10
- .TH "VALIDATE\-WEBSITE" "1" "2018-01-20" "\ \&" "\ \&"
10
+ .TH "VALIDATE\-WEBSITE" "1" "2019-01-09" "\ \&" "\ \&"
11
11
  .ie \n(.g .ds Aq \(aq
12
12
  .el .ds Aq '
13
13
  .ss \n[.ss] 0
14
14
  .nh
15
15
  .ad l
16
16
  .de URL
17
- \\$2 \(laURL: \\$1 \(ra\\$3
17
+ \fI\\$2\fP <\\$1>\\$3
18
18
  ..
19
- .if \n[.g] .mso www.tmac
20
- .LINKSTYLE blue R < >
19
+ .als MTO URL
20
+ .if \n[.g] \{\
21
+ . mso www.tmac
22
+ . am URL
23
+ . ad l
24
+ . .
25
+ . am MTO
26
+ . ad l
27
+ . .
28
+ . LINKSTYLE blue R < >
29
+ .\}
21
30
  .SH "NAME"
22
31
  validate\-website \- Web crawler for checking the validity of your documents
23
32
  .SH "SYNOPSIS"
@@ -124,5 +133,5 @@ Laurent Arnoud \c
124
133
  .sp
125
134
  The MIT License
126
135
  .sp
127
- Copyright (c) 2009\-2018 \c
136
+ Copyright (c) 2009\-2019 \c
128
137
  .MTO "laurent\(atspkdev.net" "Laurent Arnoud" ""
@@ -1,9 +1,11 @@
1
- require File.expand_path('../test_helper', __FILE__)
1
+ # frozen_string_literal: true
2
+
3
+ require File.expand_path('test_helper', __dir__)
2
4
 
3
5
  describe ValidateWebsite::Core do
4
6
  describe 'invalid options' do
5
7
  it 'raise ArgumentError on wrong validation_type' do
6
- proc { ValidateWebsite::Core.new({ color: false }, :fail) }
8
+ _(proc { ValidateWebsite::Core.new({ color: false }, :fail) })
7
9
  .must_raise ArgumentError
8
10
  end
9
11
  end
@@ -1,4 +1,6 @@
1
- require File.expand_path('../test_helper', __FILE__)
1
+ # frozen_string_literal: true
2
+
3
+ require File.expand_path('test_helper', __dir__)
2
4
 
3
5
  # rubocop:disable Metrics/BlockLength
4
6
  describe ValidateWebsite::Crawl do
@@ -21,7 +23,7 @@ describe ValidateWebsite::Crawl do
21
23
  _out, _err = capture_io do
22
24
  v = ValidateWebsite::Crawl.new(site: TEST_DOMAIN, user_agent: ua)
23
25
  v.crawl
24
- v.crawler.user_agent.must_equal ua
26
+ _(v.crawler.user_agent).must_equal ua
25
27
  end
26
28
  end
27
29
 
@@ -31,7 +33,7 @@ describe ValidateWebsite::Crawl do
31
33
  _out, _err = capture_io do
32
34
  ValidateWebsite::Crawl.new(site: TEST_DOMAIN,
33
35
  html5_validator_service_url: new)
34
- validator.html5_validator_service_url.must_equal new
36
+ _(validator.html5_validator_service_url).must_equal new
35
37
  validator.html5_validator_service_url = original
36
38
  end
37
39
  end
@@ -43,7 +45,8 @@ describe ValidateWebsite::Crawl do
43
45
  _out, _err = capture_io do
44
46
  v = ValidateWebsite::Crawl.new(site: TEST_DOMAIN, cookies: cookies)
45
47
  v.crawl
46
- v.crawler.cookies.cookies_for_host(v.host).must_equal v.default_cookies
48
+ _(v.crawler.cookies.cookies_for_host(v.host))
49
+ .must_equal v.default_cookies
47
50
  end
48
51
  end
49
52
  end
@@ -53,37 +56,53 @@ describe ValidateWebsite::Crawl do
53
56
  name = 'xhtml1-strict'
54
57
  file = File.join('test', 'data', "#{name}.html")
55
58
  page = FakePage.new(name,
56
- body: open(file).read,
59
+ body: File.open(file).read,
57
60
  content_type: 'text/html')
58
61
  @validate_website.site = page.url
59
62
  _out, _err = capture_io do
60
63
  @validate_website.crawl
61
64
  end
62
- @validate_website.history_count.must_equal 5
65
+ _(@validate_website.history_count).must_equal 5
63
66
  end
64
67
 
65
68
  it 'extract link' do
66
69
  name = 'html4-strict'
67
70
  file = File.join('test', 'data', "#{name}.html")
68
71
  page = FakePage.new(name,
69
- body: open(file).read,
72
+ body: File.open(file).read,
70
73
  content_type: 'text/html')
71
74
  @validate_website.site = page.url
72
75
  _out, _err = capture_io do
73
76
  @validate_website.crawl
74
77
  end
75
- @validate_website.history_count.must_equal 98
78
+ _(@validate_website.history_count).must_equal 98
79
+ end
80
+
81
+ it 'can change validator' do
82
+ name = 'html5-fail'
83
+ file = File.join('test', 'data', "#{name}.html")
84
+ page = FakePage.new(name,
85
+ body: File.open(file).read,
86
+ content_type: 'text/html')
87
+ validator_res = File.join('test', 'data', 'validator.nu-failure.json')
88
+ stub_request(:any, /#{validator.html5_validator_service_url}/)
89
+ .to_return(body: File.open(validator_res).read)
90
+ @validate_website.site = page.url
91
+ _out, _err = capture_io do
92
+ @validate_website.crawl(html5_validator: :nu, ignore: /Warning/)
93
+ end
94
+ _(@validate_website.errors_count).must_equal 1
76
95
  end
77
96
 
78
97
  it 'crawl when URLs are not ascii only' do
79
98
  name = 'cozy-community'
80
99
  file = File.join('test', 'data', "#{name}.html")
81
100
  page = FakePage.new(name,
82
- body: open(file).read,
101
+ body: File.open(file).read,
83
102
  content_type: 'text/html')
84
103
  validator_res = File.join('test', 'data', 'validator.nu-failure.json')
85
104
  stub_request(:any, /#{validator.html5_validator_service_url}/)
86
- .to_return(body: open(validator_res).read)
105
+ .to_return(body: File.open(validator_res).read)
87
106
  @validate_website.site = page.url
88
107
  _out, _err = capture_io do
89
108
  @validate_website.crawl
@@ -119,7 +138,7 @@ describe ValidateWebsite::Crawl do
119
138
  _out, _err = capture_io do
120
139
  @validate_website.crawl
121
140
  end
122
- @validate_website.history_count.must_equal 5
141
+ _(@validate_website.history_count).must_equal 5
123
142
  end
124
143
 
125
144
  it 'should extract url with single quote' do
@@ -130,7 +149,7 @@ describe ValidateWebsite::Crawl do
130
149
  _out, _err = capture_io do
131
150
  @validate_website.crawl
132
151
  end
133
- @validate_website.history_count.must_equal 2
152
+ _(@validate_website.history_count).must_equal 2
134
153
  end
135
154
 
136
155
  it 'should extract url with double quote' do
@@ -141,7 +160,7 @@ describe ValidateWebsite::Crawl do
141
160
  _out, _err = capture_io do
142
161
  @validate_website.crawl
143
162
  end
144
- @validate_website.history_count.must_equal 2
163
+ _(@validate_website.history_count).must_equal 2
145
164
  end
146
165
 
147
166
  it 'should extract url with params' do
@@ -152,7 +171,7 @@ describe ValidateWebsite::Crawl do
152
171
  _out, _err = capture_io do
153
172
  @validate_website.crawl
154
173
  end
155
- @validate_website.history_count.must_equal 2
174
+ _(@validate_website.history_count).must_equal 2
156
175
  end
157
176
 
158
177
  it 'should not extract invalid urls' do
@@ -163,7 +182,7 @@ describe ValidateWebsite::Crawl do
163
182
  _out, _err = capture_io do
164
183
  @validate_website.crawl
165
184
  end
166
- @validate_website.history_count.must_equal 1
185
+ _(@validate_website.history_count).must_equal 1
167
186
  end
168
187
  end
169
188
 
@@ -182,7 +201,7 @@ describe ValidateWebsite::Crawl do
182
201
  _out, _err = capture_io do
183
202
  @validate_website.crawl
184
203
  end
185
- @validate_website.errors_count.must_equal 1
204
+ _(@validate_website.errors_count).must_equal 1
186
205
  end
187
206
 
188
207
  it 'should be invalid with syntax error' do
@@ -193,7 +212,7 @@ describe ValidateWebsite::Crawl do
193
212
  _out, _err = capture_io do
194
213
  @validate_website.crawl
195
214
  end
196
- @validate_website.errors_count.must_equal 1
215
+ _(@validate_website.errors_count).must_equal 1
197
216
  end
198
217
  end
199
218
  end