validate-website 1.5.4 → 1.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/History.md +8 -0
- data/LICENSE +1 -1
- data/README.md +6 -5
- data/lib/validate_website/colorful_messages.rb +1 -1
- data/lib/validate_website/crawl.rb +7 -2
- data/lib/validate_website/option_parser.rb +20 -8
- data/lib/validate_website/static.rb +17 -17
- data/lib/validate_website/validator.rb +6 -6
- data/lib/validate_website/version.rb +3 -1
- data/test/crawler_test.rb +16 -1
- data/test/test_helper.rb +3 -1
- metadata +18 -36
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 36bfd11556df68d70d38ead12dd46b45054638de
|
4
|
+
data.tar.gz: 4172bdd3bce9e93a8a9d86e12009943e27c0cc00
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eb3290172ad8af25ea097144603e94c739aa7039fd8a29dc1a58daf5987f8a2844584e0585c64baf0cb760cffc4c9d88907f7b0b6ccd3802de128b30a14d52d4
|
7
|
+
data.tar.gz: 5eea33893e997b004f08bb9604b3e0006e4d7daf10bf9e10dad28d383d55f329ffbbb54aa8ddbf1ac72ef11fc18d82f0d5404ea000cfb0cf1c2c88447ce110c7
|
data/History.md
CHANGED
data/LICENSE
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
The MIT License
|
2
2
|
|
3
|
-
Copyright (c) 2009-
|
3
|
+
Copyright (c) 2009-2016 Laurent Arnoud <laurent@spkdev.net>
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining
|
6
6
|
a copy of this software and associated documentation files (the
|
data/README.md
CHANGED
@@ -11,7 +11,7 @@ Web crawler for checking the validity of your documents
|
|
11
11
|
### Debian
|
12
12
|
|
13
13
|
~~~ console
|
14
|
-
|
14
|
+
apt install ruby-dev libxslt-dev libxml2-dev
|
15
15
|
~~~
|
16
16
|
|
17
17
|
### RubyGems
|
@@ -120,11 +120,12 @@ See [GitHub](https://github.com/spk/validate-website/graphs/contributors).
|
|
120
120
|
|
121
121
|
The MIT License
|
122
122
|
|
123
|
-
Copyright (c) 2009-
|
123
|
+
Copyright (c) 2009-2016 Laurent Arnoud <laurent@spkdev.net>
|
124
124
|
|
125
125
|
---
|
126
|
-
[](https://travis-ci.org/spk/validate-website)
|
127
|
+
[](https://rubygems.org/gems/validate-website)
|
128
|
+
[](http://www.rubydoc.info/gems/validate-website)
|
129
|
+
[](http://opensource.org/licenses/MIT "MIT")
|
128
130
|
[](https://codeclimate.com/github/spk/validate-website)
|
129
131
|
[](http://inch-ci.org/github/spk/validate-website)
|
130
|
-
[](http://opensource.org/licenses/MIT)
|
@@ -16,7 +16,7 @@ module ValidateWebsite
|
|
16
16
|
#
|
17
17
|
def crawl(options = {})
|
18
18
|
@options = @options.merge(options)
|
19
|
-
@options
|
19
|
+
@options[:ignore_links] = @options[:exclude] if @options[:exclude]
|
20
20
|
|
21
21
|
@crawler = spidr_crawler(@site, @options)
|
22
22
|
print_status_line(@crawler.history.size,
|
@@ -33,6 +33,7 @@ module ValidateWebsite
|
|
33
33
|
# @return [Array] Lists of urls
|
34
34
|
#
|
35
35
|
def extract_imgs_from_page(page)
|
36
|
+
return Set[] if page.is_redirect?
|
36
37
|
page.doc.search('//img[@src]').reduce(Set[]) do |result, elem|
|
37
38
|
u = elem.attributes['src'].content
|
38
39
|
result << page.to_absolute(URI.parse(URI.encode(u)))
|
@@ -58,13 +59,17 @@ module ValidateWebsite
|
|
58
59
|
end
|
59
60
|
end
|
60
61
|
|
62
|
+
def validate?(page)
|
63
|
+
options[:markup] && page.html? && !page.is_redirect?
|
64
|
+
end
|
65
|
+
|
61
66
|
def on_every_html_page(crawler)
|
62
67
|
crawler.every_html_page do |page|
|
63
68
|
extract_imgs_from_page(page).each do |i|
|
64
69
|
crawler.enqueue(i)
|
65
70
|
end
|
66
71
|
|
67
|
-
if
|
72
|
+
if validate?(page)
|
68
73
|
validate(page.doc, page.body, page.url, options[:ignore])
|
69
74
|
end
|
70
75
|
end
|
@@ -23,11 +23,11 @@ module ValidateWebsite
|
|
23
23
|
color: true,
|
24
24
|
# internal verbose for ValidateWebsite
|
25
25
|
verbose: false
|
26
|
-
}
|
26
|
+
}.freeze
|
27
27
|
|
28
28
|
# Generic parse method for crawl or static options
|
29
29
|
def self.parse(options, type)
|
30
|
-
|
30
|
+
raise ArgumentError unless VALID_TYPES.include?(type)
|
31
31
|
# We are in command line (ARGV)
|
32
32
|
if options.is_a?(Array)
|
33
33
|
send("command_line_parse_#{type}", options)
|
@@ -42,14 +42,19 @@ module ValidateWebsite
|
|
42
42
|
yield o if block_given?
|
43
43
|
markup_syntax(o)
|
44
44
|
boolean_options(o)
|
45
|
-
o
|
46
|
-
|
47
|
-
o
|
48
|
-
'Change default html5 validator service URL')
|
49
|
-
verbose_help_options(o)
|
45
|
+
ignore_html5_options(o)
|
46
|
+
verbose_option(o)
|
47
|
+
version_help(o)
|
50
48
|
end
|
51
49
|
end
|
52
50
|
|
51
|
+
def self.ignore_html5_options(o)
|
52
|
+
o.regexp('-i', '--ignore',
|
53
|
+
'Validation errors to ignore (ex: "valign|autocorrect")')
|
54
|
+
o.string('-5', '--html5-validator-service-url',
|
55
|
+
'Change default html5 validator service URL')
|
56
|
+
end
|
57
|
+
|
53
58
|
def self.markup_syntax(o)
|
54
59
|
o.bool('-m', '--markup',
|
55
60
|
"Markup validation (default: #{DEFAULT_OPTIONS[:markup]})",
|
@@ -68,10 +73,17 @@ module ValidateWebsite
|
|
68
73
|
default: DEFAULT_OPTIONS[:color])
|
69
74
|
end
|
70
75
|
|
71
|
-
def self.
|
76
|
+
def self.verbose_option(o)
|
72
77
|
o.bool('-v', '--verbose',
|
73
78
|
"Show validator errors (default: #{DEFAULT_OPTIONS[:verbose]})",
|
74
79
|
default: DEFAULT_OPTIONS[:verbose])
|
80
|
+
end
|
81
|
+
|
82
|
+
def self.version_help(o)
|
83
|
+
o.on('--version', 'Display version.') do
|
84
|
+
puts ValidateWebsite::VERSION
|
85
|
+
exit
|
86
|
+
end
|
75
87
|
o.on('-h', '--help', 'Display this help message.') do
|
76
88
|
puts o
|
77
89
|
exit
|
@@ -4,7 +4,7 @@ require 'validate_website/utils'
|
|
4
4
|
module ValidateWebsite
|
5
5
|
# Class for validation Static website
|
6
6
|
class Static < Core
|
7
|
-
CONTENT_TYPES = ['text/html', 'text/xhtml+xml']
|
7
|
+
CONTENT_TYPES = ['text/html', 'text/xhtml+xml'].freeze
|
8
8
|
|
9
9
|
def initialize(options = {}, validation_type = :static)
|
10
10
|
super
|
@@ -24,6 +24,22 @@ module ValidateWebsite
|
|
24
24
|
print_status_line(files.size, 0, @not_founds_count, @errors_count)
|
25
25
|
end
|
26
26
|
|
27
|
+
# Fake http response for Spidr static crawling
|
28
|
+
# see https://github.com/ruby/ruby/blob/trunk/lib/net/http/response.rb
|
29
|
+
#
|
30
|
+
# @param [String] response body
|
31
|
+
# @param [Array] content types
|
32
|
+
# @return [Net::HTTPResponse] fake http response
|
33
|
+
def self.fake_httpresponse(body, content_types = CONTENT_TYPES)
|
34
|
+
response = Net::HTTPResponse.new '1.1', 200, 'OK'
|
35
|
+
response.instance_variable_set(:@read, true)
|
36
|
+
response.body = body
|
37
|
+
content_types.each do |c|
|
38
|
+
response.add_field('content-type', c)
|
39
|
+
end
|
40
|
+
response
|
41
|
+
end
|
42
|
+
|
27
43
|
private
|
28
44
|
|
29
45
|
def check_static_file(f)
|
@@ -111,21 +127,5 @@ module ValidateWebsite
|
|
111
127
|
check_static_not_found static_link.extract_urls_from_fake_css_response
|
112
128
|
end
|
113
129
|
end
|
114
|
-
|
115
|
-
# Fake http response for Spidr static crawling
|
116
|
-
# see https://github.com/ruby/ruby/blob/trunk/lib/net/http/response.rb
|
117
|
-
#
|
118
|
-
# @param [String] response body
|
119
|
-
# @param [Array] content types
|
120
|
-
# @return [Net::HTTPResponse] fake http response
|
121
|
-
def self.fake_httpresponse(body, content_types = CONTENT_TYPES)
|
122
|
-
response = Net::HTTPResponse.new '1.1', 200, 'OK'
|
123
|
-
response.instance_variable_set(:@read, true)
|
124
|
-
response.body = body
|
125
|
-
content_types.each do |c|
|
126
|
-
response.add_field('content-type', c)
|
127
|
-
end
|
128
|
-
response
|
129
|
-
end
|
130
130
|
end
|
131
131
|
end
|
@@ -33,7 +33,7 @@ module ValidateWebsite
|
|
33
33
|
# @return [Boolean]
|
34
34
|
def valid?
|
35
35
|
find_errors
|
36
|
-
errors.
|
36
|
+
errors.empty?
|
37
37
|
end
|
38
38
|
|
39
39
|
# @return [Array] of errors
|
@@ -55,11 +55,11 @@ module ValidateWebsite
|
|
55
55
|
|
56
56
|
def document
|
57
57
|
return @document if @document
|
58
|
-
if @dtd_uri && @body.match(@dtd_uri.to_s)
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
58
|
+
@document = if @dtd_uri && @body.match(@dtd_uri.to_s)
|
59
|
+
@body.sub(@dtd_uri.to_s, @namespace + '.dtd')
|
60
|
+
else
|
61
|
+
@body
|
62
|
+
end
|
63
63
|
end
|
64
64
|
|
65
65
|
# http://www.w3.org/TR/xhtml1-schema/
|
data/test/crawler_test.rb
CHANGED
@@ -84,6 +84,21 @@ describe ValidateWebsite::Crawl do
|
|
84
84
|
@validate_website.crawl
|
85
85
|
end
|
86
86
|
end
|
87
|
+
|
88
|
+
it 'dont try to extract imgs for redirect' do
|
89
|
+
url = 'https://wordpress.org/support/bb-login.php'
|
90
|
+
stub_request(:get, url).to_return(
|
91
|
+
status: 302,
|
92
|
+
headers: {
|
93
|
+
'Location' => 'https://login.wordpress.org/',
|
94
|
+
'Content-Type' => 'text/html; charset=UTF-8'
|
95
|
+
}
|
96
|
+
)
|
97
|
+
@validate_website.site = url
|
98
|
+
_out, _err = capture_io do
|
99
|
+
@validate_website.crawl
|
100
|
+
end
|
101
|
+
end
|
87
102
|
end
|
88
103
|
|
89
104
|
describe('css') do
|
@@ -115,7 +130,7 @@ describe ValidateWebsite::Crawl do
|
|
115
130
|
|
116
131
|
it 'should extract url with double quote' do
|
117
132
|
page = FakePage.new('test.css',
|
118
|
-
body:
|
133
|
+
body: '.test {background-image: url("pouet");}',
|
119
134
|
content_type: 'text/css')
|
120
135
|
@validate_website.site = page.url
|
121
136
|
_out, _err = capture_io do
|
data/test/test_helper.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'minitest/autorun'
|
2
4
|
require 'spidr'
|
3
5
|
|
@@ -5,5 +7,5 @@ require 'validate_website/core'
|
|
5
7
|
|
6
8
|
require File.expand_path('../webmock_helper', __FILE__)
|
7
9
|
|
8
|
-
TEST_DOMAIN = 'http://www.example.com/'
|
10
|
+
TEST_DOMAIN = 'http://www.example.com/'.freeze
|
9
11
|
ENV['LC_ALL'] = 'C.UTF-8' if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'jruby'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: validate-website
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.5.
|
4
|
+
version: 1.5.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Laurent Arnoud
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-03-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: spidr
|
@@ -16,42 +16,42 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '0.
|
19
|
+
version: '0.5'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '0.
|
26
|
+
version: '0.5'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: crass
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '1
|
33
|
+
version: '1'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '1
|
40
|
+
version: '1'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: paint
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '1
|
47
|
+
version: '1'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '1
|
54
|
+
version: '1'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: multipart_body
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -86,70 +86,56 @@ dependencies:
|
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: '10
|
89
|
+
version: '10'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: '10
|
96
|
+
version: '10'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: minitest
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: '5
|
103
|
+
version: '5'
|
104
104
|
type: :development
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: '5
|
111
|
-
- !ruby/object:Gem::Dependency
|
112
|
-
name: minitest-line
|
113
|
-
requirement: !ruby/object:Gem::Requirement
|
114
|
-
requirements:
|
115
|
-
- - "~>"
|
116
|
-
- !ruby/object:Gem::Version
|
117
|
-
version: '0.6'
|
118
|
-
type: :development
|
119
|
-
prerelease: false
|
120
|
-
version_requirements: !ruby/object:Gem::Requirement
|
121
|
-
requirements:
|
122
|
-
- - "~>"
|
123
|
-
- !ruby/object:Gem::Version
|
124
|
-
version: '0.6'
|
110
|
+
version: '5'
|
125
111
|
- !ruby/object:Gem::Dependency
|
126
112
|
name: webmock
|
127
113
|
requirement: !ruby/object:Gem::Requirement
|
128
114
|
requirements:
|
129
115
|
- - "~>"
|
130
116
|
- !ruby/object:Gem::Version
|
131
|
-
version: '1
|
117
|
+
version: '1'
|
132
118
|
type: :development
|
133
119
|
prerelease: false
|
134
120
|
version_requirements: !ruby/object:Gem::Requirement
|
135
121
|
requirements:
|
136
122
|
- - "~>"
|
137
123
|
- !ruby/object:Gem::Version
|
138
|
-
version: '1
|
124
|
+
version: '1'
|
139
125
|
- !ruby/object:Gem::Dependency
|
140
126
|
name: rubocop
|
141
127
|
requirement: !ruby/object:Gem::Requirement
|
142
128
|
requirements:
|
143
129
|
- - "~>"
|
144
130
|
- !ruby/object:Gem::Version
|
145
|
-
version: '0'
|
131
|
+
version: '0.37'
|
146
132
|
type: :development
|
147
133
|
prerelease: false
|
148
134
|
version_requirements: !ruby/object:Gem::Requirement
|
149
135
|
requirements:
|
150
136
|
- - "~>"
|
151
137
|
- !ruby/object:Gem::Version
|
152
|
-
version: '0'
|
138
|
+
version: '0.37'
|
153
139
|
description: validate-website is a web crawler for checking the markup validity with
|
154
140
|
XML Schema / DTD and not found urls.
|
155
141
|
email: laurent@spkdev.net
|
@@ -297,16 +283,13 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
297
283
|
requirements:
|
298
284
|
- - ">="
|
299
285
|
- !ruby/object:Gem::Version
|
300
|
-
version:
|
286
|
+
version: 2.0.0
|
301
287
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
302
288
|
requirements:
|
303
289
|
- - ">="
|
304
290
|
- !ruby/object:Gem::Version
|
305
291
|
version: '0'
|
306
|
-
requirements:
|
307
|
-
- spidr
|
308
|
-
- rainbow
|
309
|
-
- multipart_body
|
292
|
+
requirements: []
|
310
293
|
rubyforge_project:
|
311
294
|
rubygems_version: 2.4.5.1
|
312
295
|
signing_key:
|
@@ -317,4 +300,3 @@ test_files:
|
|
317
300
|
- test/core_test.rb
|
318
301
|
- test/validator_test.rb
|
319
302
|
- test/crawler_test.rb
|
320
|
-
has_rdoc:
|