web-checker 0.4 → 0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/Gemfile.lock +50 -0
- data/Rakefile +9 -2
- data/lib/web-checker/version.rb +1 -1
- data/lib/web-checker.rb +47 -81
- data/web-checker.gemspec +5 -7
- metadata +18 -45
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5b78dbee67359c44fa8f2bed401f9d308f700c18602babff9177e65d77b1623e
|
4
|
+
data.tar.gz: 517db6e636ebc4d7ff6f136c6fef49549848bba9a99140efbf6392c645b6a178
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 354cdfcb90dd17744ccb1e925405ce6b564530f07b5e46414e99727671c2d8dd9afbad4cd4b4ba41b961e3290ad8906782d801bdb7364980efe08a1fd535a46b
|
7
|
+
data.tar.gz: aee34beaad0221d622e52dd95b9002d24d07401fb4f7e3ec2ae08efcf55646585ab8b01a47bff654f316f89b3b69280cbf0d2ab35ce76aa7347c95d912a86cf5
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
web-checker (0.5)
|
5
|
+
addressable (~> 2.8)
|
6
|
+
http (~> 5.0)
|
7
|
+
nokogiri (~> 1.12)
|
8
|
+
path (~> 2.0)
|
9
|
+
|
10
|
+
GEM
|
11
|
+
remote: https://rubygems.org/
|
12
|
+
specs:
|
13
|
+
addressable (2.8.0)
|
14
|
+
public_suffix (>= 2.0.2, < 5.0)
|
15
|
+
domain_name (0.5.20190701)
|
16
|
+
unf (>= 0.0.5, < 1.0.0)
|
17
|
+
ffi (1.15.4)
|
18
|
+
ffi-compiler (1.0.1)
|
19
|
+
ffi (>= 1.0.0)
|
20
|
+
rake
|
21
|
+
http (5.0.2)
|
22
|
+
addressable (~> 2.8)
|
23
|
+
http-cookie (~> 1.0)
|
24
|
+
http-form_data (~> 2.2)
|
25
|
+
llhttp-ffi (~> 0.4.0)
|
26
|
+
http-cookie (1.0.4)
|
27
|
+
domain_name (~> 0.5)
|
28
|
+
http-form_data (2.3.0)
|
29
|
+
llhttp-ffi (0.4.0)
|
30
|
+
ffi-compiler (~> 1.0)
|
31
|
+
rake (~> 13.0)
|
32
|
+
nokogiri (1.12.4-arm64-darwin)
|
33
|
+
racc (~> 1.4)
|
34
|
+
path (2.0.1)
|
35
|
+
public_suffix (4.0.6)
|
36
|
+
racc (1.5.2)
|
37
|
+
rake (13.0.6)
|
38
|
+
unf (0.1.4)
|
39
|
+
unf_ext
|
40
|
+
unf_ext (0.0.8)
|
41
|
+
|
42
|
+
PLATFORMS
|
43
|
+
arm64-darwin-20
|
44
|
+
|
45
|
+
DEPENDENCIES
|
46
|
+
rake (~> 13.0)
|
47
|
+
web-checker!
|
48
|
+
|
49
|
+
BUNDLED WITH
|
50
|
+
2.2.27
|
data/Rakefile
CHANGED
data/lib/web-checker/version.rb
CHANGED
data/lib/web-checker.rb
CHANGED
@@ -1,26 +1,14 @@
|
|
1
1
|
require 'addressable'
|
2
2
|
require 'http'
|
3
3
|
require 'nokogiri'
|
4
|
-
require 'nokogumbo'
|
5
4
|
require 'path'
|
6
5
|
|
7
6
|
class WebChecker
|
8
7
|
|
9
|
-
IgnoreErrors = %Q{
|
10
|
-
<table> lacks "summary" attribute
|
11
|
-
<img> lacks "alt" attribute
|
12
|
-
<form> proprietary attribute "novalidate"
|
13
|
-
<input> attribute "type" has invalid value "email"
|
14
|
-
<input> attribute "tabindex" has invalid value "-1"
|
15
|
-
<input> proprietary attribute "border"
|
16
|
-
trimming empty <p>
|
17
|
-
<iframe> proprietary attribute "allowfullscreen"
|
18
|
-
}.split(/\n/).map(&:strip)
|
19
|
-
LinkElementsXPath = '//@href | //@src'
|
20
8
|
SchemasDir = Path.new(__FILE__).dirname / 'web-checker' / 'schemas'
|
21
|
-
|
22
|
-
'feed' =>
|
23
|
-
'urlset' =>
|
9
|
+
SchemaNames = {
|
10
|
+
'feed' => 'atom',
|
11
|
+
'urlset' => 'sitemap',
|
24
12
|
}
|
25
13
|
|
26
14
|
class Error < Exception; end
|
@@ -29,7 +17,8 @@ class WebChecker
|
|
29
17
|
@site_uri = Addressable::URI.parse(site_uri)
|
30
18
|
@site_dir = Path.new(site_dir)
|
31
19
|
@schemas = {}
|
32
|
-
@
|
20
|
+
@seen = {}
|
21
|
+
@files = []
|
33
22
|
end
|
34
23
|
|
35
24
|
def check
|
@@ -41,26 +30,27 @@ class WebChecker
|
|
41
30
|
def check_uri(uri)
|
42
31
|
uri = Addressable::URI.parse(uri)
|
43
32
|
uri.normalize!
|
44
|
-
return
|
45
|
-
|
33
|
+
return if seen?(uri)
|
34
|
+
return unless http?(uri)
|
35
|
+
is_local = local?(uri)
|
36
|
+
;;warn "CHECKING: #{uri}"
|
46
37
|
response = HTTP.get(uri)
|
47
38
|
# ;;pp(response: response)
|
48
|
-
@
|
39
|
+
@seen[uri] = true
|
49
40
|
case response.code
|
50
41
|
when 200...300
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
;;warn "skipping unknown resource type: #{uri} (#{type})"
|
42
|
+
if is_local
|
43
|
+
data = response.body.to_s
|
44
|
+
case (type = response.headers['Content-Type'])
|
45
|
+
when 'text/html', 'text/xml', 'application/xml'
|
46
|
+
check_markup(uri, data)
|
47
|
+
when 'text/css'
|
48
|
+
check_css(uri, data)
|
49
|
+
when %r{^image/}, 'application/javascript'
|
50
|
+
# ignore
|
51
|
+
else
|
52
|
+
;;warn "skipping unknown resource type: #{uri} (#{type})"
|
53
|
+
end
|
64
54
|
end
|
65
55
|
when 300...400
|
66
56
|
redirect_uri = Addressable::URI.parse(response.headers['Location'])
|
@@ -72,60 +62,32 @@ class WebChecker
|
|
72
62
|
end
|
73
63
|
end
|
74
64
|
|
75
|
-
def
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
errors = errors.map { |str|
|
85
|
-
# line 82 column 1 - Warning: <table> lacks "summary" attribute
|
86
|
-
str =~ /^line (\d+) column (\d+) - (.*?): (.*)$/ or raise "Can't parse error: #{str.inspect}"
|
87
|
-
{
|
88
|
-
msg: str,
|
89
|
-
line: $1.to_i,
|
90
|
-
column: $2.to_i,
|
91
|
-
type: $3.downcase.to_sym,
|
92
|
-
error: $4.strip,
|
93
|
-
}
|
94
|
-
}.reject { |e|
|
95
|
-
IgnoreErrors.include?(e[:error])
|
96
|
-
}
|
97
|
-
unless errors.empty?
|
98
|
-
warn "#{uri} has invalid HTML"
|
99
|
-
show_errors(errors)
|
100
|
-
raise Error, "HTML parsing failed (via Tidy)"
|
65
|
+
def check_markup(uri, data)
|
66
|
+
# ;;warn "validating markup: #{uri}"
|
67
|
+
doc = case data
|
68
|
+
when /^<\?xml/i
|
69
|
+
Nokogiri::XML(data) { |c| c.strict }
|
70
|
+
when /^<!DOCTYPE html>/i
|
71
|
+
Nokogiri::HTML5(data, max_errors: -1)
|
72
|
+
else
|
73
|
+
Nokogiri::HTML4(data) { |c| c.strict }
|
101
74
|
end
|
102
|
-
end
|
103
|
-
|
104
|
-
def check_html_nokogiri(uri, html)
|
105
|
-
doc_class = (html =~ /<!DOCTYPE html>/i) ? Nokogiri::HTML5 : Nokogiri::HTML
|
106
|
-
doc = doc_class.parse(html) { |config| config.strict }
|
107
75
|
unless doc.errors.empty?
|
108
76
|
show_errors(doc.errors)
|
109
|
-
raise Error, "
|
77
|
+
raise Error, "markup parsing failed"
|
110
78
|
end
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
79
|
+
if (schema_name = SchemaNames[doc.root.name])
|
80
|
+
schema_file = (SchemasDir / schema_name).add_extension('.xsd')
|
81
|
+
schema = (@schemas[schema_file] ||= Nokogiri::XML::Schema(schema_file.open))
|
82
|
+
validation_errors = schema.validate(doc)
|
83
|
+
unless validation_errors.empty?
|
84
|
+
show_errors(validation_errors)
|
85
|
+
raise Error, "schema validation failed"
|
86
|
+
end
|
119
87
|
end
|
120
|
-
|
121
|
-
|
122
|
-
schema = (@schemas[schema_file] ||= Nokogiri::XML::Schema(schema_file.open))
|
123
|
-
validation_errors = schema.validate(xml_doc)
|
124
|
-
unless validation_errors.empty?
|
125
|
-
show_errors(validation_errors)
|
126
|
-
raise Error, "XML validation failed"
|
88
|
+
doc.xpath('//@href | //@src').each do |elem|
|
89
|
+
check_uri(uri + elem.value)
|
127
90
|
end
|
128
|
-
xml_doc.xpath(LinkElementsXPath).each { |e| check_uri(uri + e.value) }
|
129
91
|
end
|
130
92
|
|
131
93
|
def show_errors(errors)
|
@@ -140,13 +102,17 @@ class WebChecker
|
|
140
102
|
end
|
141
103
|
end
|
142
104
|
|
105
|
+
def http?(uri)
|
106
|
+
!uri.scheme || %w[http https].include?(uri.scheme)
|
107
|
+
end
|
108
|
+
|
143
109
|
def local?(uri)
|
144
110
|
(!uri.scheme && !uri.host) ||
|
145
111
|
(uri.scheme == @site_uri.scheme && uri.host == @site_uri.host && uri.port == @site_uri.port)
|
146
112
|
end
|
147
113
|
|
148
114
|
def seen?(uri)
|
149
|
-
@
|
115
|
+
@seen[uri]
|
150
116
|
end
|
151
117
|
|
152
118
|
def report
|
data/web-checker.gemspec
CHANGED
@@ -18,12 +18,10 @@ Gem::Specification.new do |s|
|
|
18
18
|
s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
|
19
19
|
s.require_path = 'lib'
|
20
20
|
|
21
|
-
s.add_dependency 'addressable', '~> 2'
|
22
|
-
s.add_dependency 'http', '~>
|
23
|
-
s.add_dependency 'nokogiri', '~> 1'
|
24
|
-
s.add_dependency '
|
25
|
-
s.add_dependency 'path', '~> 2'
|
21
|
+
s.add_dependency 'addressable', '~> 2.8'
|
22
|
+
s.add_dependency 'http', '~> 5.0'
|
23
|
+
s.add_dependency 'nokogiri', '~> 1.12'
|
24
|
+
s.add_dependency 'path', '~> 2.0'
|
26
25
|
|
27
|
-
s.add_development_dependency 'rake', '~>
|
28
|
-
s.add_development_dependency 'rubygems-tasks', '~> 0.2'
|
26
|
+
s.add_development_dependency 'rake', '~> 13.0'
|
29
27
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web-checker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.5'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Labovitz
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-09-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -16,98 +16,70 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '2'
|
19
|
+
version: '2.8'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '2'
|
26
|
+
version: '2.8'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: http
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '5.0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '5.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: nokogiri
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '1'
|
47
|
+
version: '1.12'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '1'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: nokogumbo
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '2'
|
62
|
-
type: :runtime
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - "~>"
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '2'
|
54
|
+
version: '1.12'
|
69
55
|
- !ruby/object:Gem::Dependency
|
70
56
|
name: path
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|
72
58
|
requirements:
|
73
59
|
- - "~>"
|
74
60
|
- !ruby/object:Gem::Version
|
75
|
-
version: '2'
|
61
|
+
version: '2.0'
|
76
62
|
type: :runtime
|
77
63
|
prerelease: false
|
78
64
|
version_requirements: !ruby/object:Gem::Requirement
|
79
65
|
requirements:
|
80
66
|
- - "~>"
|
81
67
|
- !ruby/object:Gem::Version
|
82
|
-
version: '2'
|
68
|
+
version: '2.0'
|
83
69
|
- !ruby/object:Gem::Dependency
|
84
70
|
name: rake
|
85
71
|
requirement: !ruby/object:Gem::Requirement
|
86
72
|
requirements:
|
87
73
|
- - "~>"
|
88
74
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - "~>"
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '12'
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: rubygems-tasks
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - "~>"
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: '0.2'
|
75
|
+
version: '13.0'
|
104
76
|
type: :development
|
105
77
|
prerelease: false
|
106
78
|
version_requirements: !ruby/object:Gem::Requirement
|
107
79
|
requirements:
|
108
80
|
- - "~>"
|
109
81
|
- !ruby/object:Gem::Version
|
110
|
-
version: '0
|
82
|
+
version: '13.0'
|
111
83
|
description: "\n WebChecker checks static websites for consistency.\n "
|
112
84
|
email: johnl@johnlabovitz.com
|
113
85
|
executables:
|
@@ -116,6 +88,8 @@ extensions: []
|
|
116
88
|
extra_rdoc_files: []
|
117
89
|
files:
|
118
90
|
- ".gitignore"
|
91
|
+
- Gemfile
|
92
|
+
- Gemfile.lock
|
119
93
|
- Rakefile
|
120
94
|
- bin/web-checker
|
121
95
|
- lib/web-checker.rb
|
@@ -127,7 +101,7 @@ homepage: http://github.com/jslabovitz/web-checker
|
|
127
101
|
licenses:
|
128
102
|
- MIT
|
129
103
|
metadata: {}
|
130
|
-
post_install_message:
|
104
|
+
post_install_message:
|
131
105
|
rdoc_options: []
|
132
106
|
require_paths:
|
133
107
|
- lib
|
@@ -142,9 +116,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
142
116
|
- !ruby/object:Gem::Version
|
143
117
|
version: '0'
|
144
118
|
requirements: []
|
145
|
-
|
146
|
-
|
147
|
-
signing_key:
|
119
|
+
rubygems_version: 3.2.27
|
120
|
+
signing_key:
|
148
121
|
specification_version: 4
|
149
122
|
summary: Check static websites for consistency.
|
150
123
|
test_files: []
|