validate-website 1.8.1 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'validate_website/core'
2
4
 
3
5
  module ValidateWebsite
@@ -5,7 +7,7 @@ module ValidateWebsite
5
7
  class Runner
6
8
  def self.trap_interrupt
7
9
  trap('INT') do
8
- STDERR.puts "\nExiting..."
10
+ warn "\nExiting..."
9
11
  exit!(1)
10
12
  end
11
13
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'validate_website/core'
2
4
  require 'validate_website/utils'
3
5
 
@@ -5,12 +7,14 @@ module ValidateWebsite
5
7
  # Class for validation Static website
6
8
  class Static < Core
7
9
  CONTENT_TYPES = ['text/html', 'text/xhtml+xml'].freeze
10
+ START_MESSAGE_TYPE = 'files'
8
11
 
9
12
  attr_reader :history_count
10
13
 
11
14
  def initialize(options = {}, validation_type = :static)
12
15
  @history_count = 0
13
16
  super
17
+ start_message("#{START_MESSAGE_TYPE} in #{Dir.pwd} (#{@site} as site)")
14
18
  end
15
19
 
16
20
  # @param [Hash] options
@@ -20,11 +24,12 @@ module ValidateWebsite
20
24
  @site = @options[:site]
21
25
 
22
26
  files = Dir.glob(@options[:pattern])
23
- files.each do |f|
24
- next unless File.file?(f)
25
- next if @options[:exclude].is_a?(Regexp) && @options[:exclude].match(f)
27
+ files.each do |file|
28
+ next unless File.file?(file)
29
+ next if @options[:exclude]&.match(file)
30
+
26
31
  @history_count += 1
27
- check_static_file(f)
32
+ check_static_file(file)
28
33
  end
29
34
  print_status_line(files.size, 0, @not_founds_count, @errors_count)
30
35
  end
@@ -47,15 +52,18 @@ module ValidateWebsite
47
52
 
48
53
  private
49
54
 
50
- def check_static_file(f)
51
- page = StaticLink.new(f, @site).page
52
- check_page(f, page)
55
+ def check_static_file(file)
56
+ page = StaticLink.new(file, @site).page
57
+ check_page(file, page)
53
58
  check_css_syntax(page) if page.css? && options[:css_syntax]
54
59
  end
55
60
 
56
- def check_page(f, page)
61
+ def check_page(file, page)
57
62
  if page.html? && options[:markup]
58
- validate(page.doc, page.body, f, options[:ignore])
63
+ keys = %i[ignore html5_validator]
64
+ # slice does not exists on Ruby <= 2.4
65
+ slice = Hash[[keys, options.values_at(*keys)].transpose]
66
+ validate(page.doc, page.body, file, slice)
59
67
  end
60
68
  check_static_not_found(page.links) if options[:not_found]
61
69
  end
@@ -66,11 +74,13 @@ module ValidateWebsite
66
74
  static_links = links.map { |l| StaticLink.new(l, @site) }
67
75
  static_links.each do |static_link|
68
76
  next unless static_link.check?
77
+
69
78
  unless File.exist?(static_link.file_path)
70
79
  not_found_error(static_link.file_path)
71
80
  next
72
81
  end
73
82
  next unless static_link.css?
83
+
74
84
  check_static_not_found static_link.extract_urls_from_fake_css_response
75
85
  end
76
86
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'uri'
2
4
  require 'validate_website/utils'
3
5
  require 'validate_website/static'
@@ -25,9 +27,9 @@ StaticLink = Struct.new(:link, :site) do
25
27
 
26
28
  def body
27
29
  if File.exist?(link)
28
- open(link).read
30
+ File.open(link).read
29
31
  else
30
- open(file_path).read
32
+ File.open(file_path).read
31
33
  end
32
34
  end
33
35
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Base module ValidateWebsite
2
4
  module ValidateWebsite
3
5
  # Utils class for CSS helpers
@@ -10,6 +12,7 @@ module ValidateWebsite
10
12
  def self.extract_urls_from_css(page)
11
13
  return Set[] unless page
12
14
  return Set[] if page.body.nil?
15
+
13
16
  nodes = Crass::Parser.parse_stylesheet(page.body)
14
17
  extract_urls_from_nodes nodes, page
15
18
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'uri'
2
4
 
3
5
  require 'nokogiri'
@@ -10,27 +12,25 @@ module ValidateWebsite
10
12
  class Validator
11
13
  extend ValidatorClassMethods
12
14
 
13
- @html5_validator_service_url = 'https://checker.html5.org/'
15
+ @html5_validator_service_url = 'https://validator.nu/'
16
+ XHTML_SCHEMA_PATH = File.expand_path('../../data/schemas', __dir__)
17
+ @mutex = Mutex.new
18
+
14
19
  class << self
15
20
  attr_accessor :html5_validator_service_url
16
- end
17
21
 
18
- XHTML_PATH = File.expand_path('../../../data/schemas', __FILE__)
19
-
20
- @xsd_schemas ||= {}
21
-
22
- # `Dir.chdir` is needed by `Nokogiri::XML::Schema` to validate with local
23
- # files and cannot use file absolute path.
24
- Dir.glob(File.join(XHTML_PATH, '*.xsd')).each do |schema|
25
- Dir.chdir(XHTML_PATH) do
26
- schema_name = File.basename(schema, '.xsd')
27
- schema_content = File.read(File.basename(schema))
28
- begin
29
- @xsd_schemas[schema_name] = Nokogiri::XML::Schema(schema_content)
30
- rescue Nokogiri::XML::SyntaxError
31
- STDERR.puts "XSD SCHEMA: #{schema} cannot be loaded"
22
+ # http://www.w3.org/TR/xhtml1-schema/
23
+ def schema(namespace)
24
+ @mutex.synchronize do
25
+ Dir.chdir(XHTML_SCHEMA_PATH) do
26
+ if File.exist?("#{namespace}.xsd")
27
+ Nokogiri::XML::Schema(File.read("#{namespace}.xsd"))
28
+ end
29
+ end
32
30
  end
33
31
  end
32
+
33
+ alias xsd schema
34
34
  end
35
35
 
36
36
  attr_reader :original_doc, :body, :dtd, :doc, :namespace, :html5_validator
@@ -65,25 +65,22 @@ module ValidateWebsite
65
65
  @ignore ? @errors.reject { |e| @ignore =~ e } : @errors
66
66
  end
67
67
 
68
- # http://www.w3.org/TR/xhtml1-schema/
69
- def self.xsd(namespace)
70
- return unless namespace
71
- @xsd_schemas[namespace] if @xsd_schemas.key? namespace
72
- end
73
-
74
68
  private
75
69
 
76
70
  # http://www.w3.org/TR/xhtml1/#dtds
77
71
  def find_namespace(dtd)
78
72
  return unless dtd.system_id
73
+
79
74
  dtd_uri = URI.parse(dtd.system_id)
80
75
  return unless dtd_uri.path
76
+
81
77
  @dtd_uri = dtd_uri
82
78
  File.basename(@dtd_uri.path, '.dtd')
83
79
  end
84
80
 
85
81
  def document
86
82
  return @document if @document
83
+
87
84
  @document = if @dtd_uri && @body.match(@dtd_uri.to_s)
88
85
  @body.sub(@dtd_uri.to_s, @namespace + '.dtd')
89
86
  else
@@ -95,11 +92,11 @@ module ValidateWebsite
95
92
  def validate
96
93
  if document =~ /^\<!DOCTYPE html\>/i
97
94
  html5_validate
98
- elsif self.class.xsd(@namespace)
99
- self.class.xsd(@namespace).validate(xhtml_doc)
95
+ elsif self.class.schema(@namespace)
96
+ self.class.schema(@namespace).validate(xhtml_doc)
100
97
  else
101
98
  # dont have xsd fall back to dtd
102
- Dir.chdir(XHTML_PATH) do
99
+ Dir.chdir(XHTML_SCHEMA_PATH) do
103
100
  Nokogiri::HTML.parse(document)
104
101
  end.errors
105
102
  end
@@ -138,8 +135,8 @@ module ValidateWebsite
138
135
  end
139
136
 
140
137
  def xhtml_doc
141
- Dir.chdir(XHTML_PATH) do
142
- Nokogiri::XML(document) { |cfg| cfg.noent.dtdload.dtdvalid }
138
+ Dir.chdir(XHTML_SCHEMA_PATH) do
139
+ Nokogiri::XML(document) { |cfg| cfg.nonoent.dtdload.dtdvalid.nonet }
143
140
  end
144
141
  end
145
142
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'tidy_ffi'
2
4
 
3
5
  # Validator Class Methods
@@ -9,6 +11,7 @@ module ValidatorClassMethods
9
11
 
10
12
  def tidy
11
13
  return @tidy if defined?(@tidy)
14
+
12
15
  @lib_tidy = TidyFFI::LibTidy
13
16
  @tidy = TidyFFI::Tidy
14
17
  rescue TidyFFI::LibTidyNotInstalled
@@ -2,5 +2,5 @@
2
2
 
3
3
  # Version file for ValidateWebsite
4
4
  module ValidateWebsite
5
- VERSION = '1.8.1'.freeze
5
+ VERSION = '1.10.0'
6
6
  end
@@ -1,23 +1,32 @@
1
1
  '\" t
2
2
  .\" Title: validate-website-static
3
- .\" Author: [see the "AUTHORS" section]
4
- .\" Generator: Asciidoctor 1.5.6.1
5
- .\" Date: 2018-01-20
3
+ .\" Author: [see the "AUTHOR(S)" section]
4
+ .\" Generator: Asciidoctor 1.5.8
5
+ .\" Date: 2019-01-09
6
6
  .\" Manual: \ \&
7
7
  .\" Source: \ \&
8
8
  .\" Language: English
9
9
  .\"
10
- .TH "VALIDATE\-WEBSITE\-STATIC" "1" "2018-01-20" "\ \&" "\ \&"
10
+ .TH "VALIDATE\-WEBSITE\-STATIC" "1" "2019-01-09" "\ \&" "\ \&"
11
11
  .ie \n(.g .ds Aq \(aq
12
12
  .el .ds Aq '
13
13
  .ss \n[.ss] 0
14
14
  .nh
15
15
  .ad l
16
16
  .de URL
17
- \\$2 \(laURL: \\$1 \(ra\\$3
17
+ \fI\\$2\fP <\\$1>\\$3
18
18
  ..
19
- .if \n[.g] .mso www.tmac
20
- .LINKSTYLE blue R < >
19
+ .als MTO URL
20
+ .if \n[.g] \{\
21
+ . mso www.tmac
22
+ . am URL
23
+ . ad l
24
+ . .
25
+ . am MTO
26
+ . ad l
27
+ . .
28
+ . LINKSTYLE blue R < >
29
+ .\}
21
30
  .SH "NAME"
22
31
  validate\-website\-static \- check the validity of your documents
23
32
  .SH "SYNOPSIS"
@@ -119,5 +128,5 @@ Laurent Arnoud \c
119
128
  .sp
120
129
  The MIT License
121
130
  .sp
122
- Copyright (c) 2009\-2018 \c
131
+ Copyright (c) 2009\-2019 \c
123
132
  .MTO "laurent\(atspkdev.net" "Laurent Arnoud" ""
@@ -1,23 +1,32 @@
1
1
  '\" t
2
2
  .\" Title: validate-website
3
- .\" Author: [see the "AUTHORS" section]
4
- .\" Generator: Asciidoctor 1.5.6.1
5
- .\" Date: 2018-01-20
3
+ .\" Author: [see the "AUTHOR(S)" section]
4
+ .\" Generator: Asciidoctor 1.5.8
5
+ .\" Date: 2019-01-09
6
6
  .\" Manual: \ \&
7
7
  .\" Source: \ \&
8
8
  .\" Language: English
9
9
  .\"
10
- .TH "VALIDATE\-WEBSITE" "1" "2018-01-20" "\ \&" "\ \&"
10
+ .TH "VALIDATE\-WEBSITE" "1" "2019-01-09" "\ \&" "\ \&"
11
11
  .ie \n(.g .ds Aq \(aq
12
12
  .el .ds Aq '
13
13
  .ss \n[.ss] 0
14
14
  .nh
15
15
  .ad l
16
16
  .de URL
17
- \\$2 \(laURL: \\$1 \(ra\\$3
17
+ \fI\\$2\fP <\\$1>\\$3
18
18
  ..
19
- .if \n[.g] .mso www.tmac
20
- .LINKSTYLE blue R < >
19
+ .als MTO URL
20
+ .if \n[.g] \{\
21
+ . mso www.tmac
22
+ . am URL
23
+ . ad l
24
+ . .
25
+ . am MTO
26
+ . ad l
27
+ . .
28
+ . LINKSTYLE blue R < >
29
+ .\}
21
30
  .SH "NAME"
22
31
  validate\-website \- Web crawler for checking the validity of your documents
23
32
  .SH "SYNOPSIS"
@@ -124,5 +133,5 @@ Laurent Arnoud \c
124
133
  .sp
125
134
  The MIT License
126
135
  .sp
127
- Copyright (c) 2009\-2018 \c
136
+ Copyright (c) 2009\-2019 \c
128
137
  .MTO "laurent\(atspkdev.net" "Laurent Arnoud" ""
@@ -1,9 +1,11 @@
1
- require File.expand_path('../test_helper', __FILE__)
1
+ # frozen_string_literal: true
2
+
3
+ require File.expand_path('test_helper', __dir__)
2
4
 
3
5
  describe ValidateWebsite::Core do
4
6
  describe 'invalid options' do
5
7
  it 'raise ArgumentError on wrong validation_type' do
6
- proc { ValidateWebsite::Core.new({ color: false }, :fail) }
8
+ _(proc { ValidateWebsite::Core.new({ color: false }, :fail) })
7
9
  .must_raise ArgumentError
8
10
  end
9
11
  end
@@ -1,4 +1,6 @@
1
- require File.expand_path('../test_helper', __FILE__)
1
+ # frozen_string_literal: true
2
+
3
+ require File.expand_path('test_helper', __dir__)
2
4
 
3
5
  # rubocop:disable Metrics/BlockLength
4
6
  describe ValidateWebsite::Crawl do
@@ -21,7 +23,7 @@ describe ValidateWebsite::Crawl do
21
23
  _out, _err = capture_io do
22
24
  v = ValidateWebsite::Crawl.new(site: TEST_DOMAIN, user_agent: ua)
23
25
  v.crawl
24
- v.crawler.user_agent.must_equal ua
26
+ _(v.crawler.user_agent).must_equal ua
25
27
  end
26
28
  end
27
29
 
@@ -31,7 +33,7 @@ describe ValidateWebsite::Crawl do
31
33
  _out, _err = capture_io do
32
34
  ValidateWebsite::Crawl.new(site: TEST_DOMAIN,
33
35
  html5_validator_service_url: new)
34
- validator.html5_validator_service_url.must_equal new
36
+ _(validator.html5_validator_service_url).must_equal new
35
37
  validator.html5_validator_service_url = original
36
38
  end
37
39
  end
@@ -43,7 +45,8 @@ describe ValidateWebsite::Crawl do
43
45
  _out, _err = capture_io do
44
46
  v = ValidateWebsite::Crawl.new(site: TEST_DOMAIN, cookies: cookies)
45
47
  v.crawl
46
- v.crawler.cookies.cookies_for_host(v.host).must_equal v.default_cookies
48
+ _(v.crawler.cookies.cookies_for_host(v.host))
49
+ .must_equal v.default_cookies
47
50
  end
48
51
  end
49
52
  end
@@ -53,37 +56,53 @@ describe ValidateWebsite::Crawl do
53
56
  name = 'xhtml1-strict'
54
57
  file = File.join('test', 'data', "#{name}.html")
55
58
  page = FakePage.new(name,
56
- body: open(file).read,
59
+ body: File.open(file).read,
57
60
  content_type: 'text/html')
58
61
  @validate_website.site = page.url
59
62
  _out, _err = capture_io do
60
63
  @validate_website.crawl
61
64
  end
62
- @validate_website.history_count.must_equal 5
65
+ _(@validate_website.history_count).must_equal 5
63
66
  end
64
67
 
65
68
  it 'extract link' do
66
69
  name = 'html4-strict'
67
70
  file = File.join('test', 'data', "#{name}.html")
68
71
  page = FakePage.new(name,
69
- body: open(file).read,
72
+ body: File.open(file).read,
70
73
  content_type: 'text/html')
71
74
  @validate_website.site = page.url
72
75
  _out, _err = capture_io do
73
76
  @validate_website.crawl
74
77
  end
75
- @validate_website.history_count.must_equal 98
78
+ _(@validate_website.history_count).must_equal 98
79
+ end
80
+
81
+ it 'can change validator' do
82
+ name = 'html5-fail'
83
+ file = File.join('test', 'data', "#{name}.html")
84
+ page = FakePage.new(name,
85
+ body: File.open(file).read,
86
+ content_type: 'text/html')
87
+ validator_res = File.join('test', 'data', 'validator.nu-failure.json')
88
+ stub_request(:any, /#{validator.html5_validator_service_url}/)
89
+ .to_return(body: File.open(validator_res).read)
90
+ @validate_website.site = page.url
91
+ _out, _err = capture_io do
92
+ @validate_website.crawl(html5_validator: :nu, ignore: /Warning/)
93
+ end
94
+ _(@validate_website.errors_count).must_equal 1
76
95
  end
77
96
 
78
97
  it 'crawl when URLs are not ascii only' do
79
98
  name = 'cozy-community'
80
99
  file = File.join('test', 'data', "#{name}.html")
81
100
  page = FakePage.new(name,
82
- body: open(file).read,
101
+ body: File.open(file).read,
83
102
  content_type: 'text/html')
84
103
  validator_res = File.join('test', 'data', 'validator.nu-failure.json')
85
104
  stub_request(:any, /#{validator.html5_validator_service_url}/)
86
- .to_return(body: open(validator_res).read)
105
+ .to_return(body: File.open(validator_res).read)
87
106
  @validate_website.site = page.url
88
107
  _out, _err = capture_io do
89
108
  @validate_website.crawl
@@ -119,7 +138,7 @@ describe ValidateWebsite::Crawl do
119
138
  _out, _err = capture_io do
120
139
  @validate_website.crawl
121
140
  end
122
- @validate_website.history_count.must_equal 5
141
+ _(@validate_website.history_count).must_equal 5
123
142
  end
124
143
 
125
144
  it 'should extract url with single quote' do
@@ -130,7 +149,7 @@ describe ValidateWebsite::Crawl do
130
149
  _out, _err = capture_io do
131
150
  @validate_website.crawl
132
151
  end
133
- @validate_website.history_count.must_equal 2
152
+ _(@validate_website.history_count).must_equal 2
134
153
  end
135
154
 
136
155
  it 'should extract url with double quote' do
@@ -141,7 +160,7 @@ describe ValidateWebsite::Crawl do
141
160
  _out, _err = capture_io do
142
161
  @validate_website.crawl
143
162
  end
144
- @validate_website.history_count.must_equal 2
163
+ _(@validate_website.history_count).must_equal 2
145
164
  end
146
165
 
147
166
  it 'should extract url with params' do
@@ -152,7 +171,7 @@ describe ValidateWebsite::Crawl do
152
171
  _out, _err = capture_io do
153
172
  @validate_website.crawl
154
173
  end
155
- @validate_website.history_count.must_equal 2
174
+ _(@validate_website.history_count).must_equal 2
156
175
  end
157
176
 
158
177
  it 'should not extract invalid urls' do
@@ -163,7 +182,7 @@ describe ValidateWebsite::Crawl do
163
182
  _out, _err = capture_io do
164
183
  @validate_website.crawl
165
184
  end
166
- @validate_website.history_count.must_equal 1
185
+ _(@validate_website.history_count).must_equal 1
167
186
  end
168
187
  end
169
188
 
@@ -182,7 +201,7 @@ describe ValidateWebsite::Crawl do
182
201
  _out, _err = capture_io do
183
202
  @validate_website.crawl
184
203
  end
185
- @validate_website.errors_count.must_equal 1
204
+ _(@validate_website.errors_count).must_equal 1
186
205
  end
187
206
 
188
207
  it 'should be invalid with syntax error' do
@@ -193,7 +212,7 @@ describe ValidateWebsite::Crawl do
193
212
  _out, _err = capture_io do
194
213
  @validate_website.crawl
195
214
  end
196
- @validate_website.errors_count.must_equal 1
215
+ _(@validate_website.errors_count).must_equal 1
197
216
  end
198
217
  end
199
218
  end