web-checker 0.4 → 0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/Gemfile.lock +50 -0
- data/Rakefile +9 -2
- data/lib/web-checker/version.rb +1 -1
- data/lib/web-checker.rb +47 -81
- data/web-checker.gemspec +5 -7
- metadata +18 -45
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5b78dbee67359c44fa8f2bed401f9d308f700c18602babff9177e65d77b1623e
|
4
|
+
data.tar.gz: 517db6e636ebc4d7ff6f136c6fef49549848bba9a99140efbf6392c645b6a178
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 354cdfcb90dd17744ccb1e925405ce6b564530f07b5e46414e99727671c2d8dd9afbad4cd4b4ba41b961e3290ad8906782d801bdb7364980efe08a1fd535a46b
|
7
|
+
data.tar.gz: aee34beaad0221d622e52dd95b9002d24d07401fb4f7e3ec2ae08efcf55646585ab8b01a47bff654f316f89b3b69280cbf0d2ab35ce76aa7347c95d912a86cf5
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
web-checker (0.5)
|
5
|
+
addressable (~> 2.8)
|
6
|
+
http (~> 5.0)
|
7
|
+
nokogiri (~> 1.12)
|
8
|
+
path (~> 2.0)
|
9
|
+
|
10
|
+
GEM
|
11
|
+
remote: https://rubygems.org/
|
12
|
+
specs:
|
13
|
+
addressable (2.8.0)
|
14
|
+
public_suffix (>= 2.0.2, < 5.0)
|
15
|
+
domain_name (0.5.20190701)
|
16
|
+
unf (>= 0.0.5, < 1.0.0)
|
17
|
+
ffi (1.15.4)
|
18
|
+
ffi-compiler (1.0.1)
|
19
|
+
ffi (>= 1.0.0)
|
20
|
+
rake
|
21
|
+
http (5.0.2)
|
22
|
+
addressable (~> 2.8)
|
23
|
+
http-cookie (~> 1.0)
|
24
|
+
http-form_data (~> 2.2)
|
25
|
+
llhttp-ffi (~> 0.4.0)
|
26
|
+
http-cookie (1.0.4)
|
27
|
+
domain_name (~> 0.5)
|
28
|
+
http-form_data (2.3.0)
|
29
|
+
llhttp-ffi (0.4.0)
|
30
|
+
ffi-compiler (~> 1.0)
|
31
|
+
rake (~> 13.0)
|
32
|
+
nokogiri (1.12.4-arm64-darwin)
|
33
|
+
racc (~> 1.4)
|
34
|
+
path (2.0.1)
|
35
|
+
public_suffix (4.0.6)
|
36
|
+
racc (1.5.2)
|
37
|
+
rake (13.0.6)
|
38
|
+
unf (0.1.4)
|
39
|
+
unf_ext
|
40
|
+
unf_ext (0.0.8)
|
41
|
+
|
42
|
+
PLATFORMS
|
43
|
+
arm64-darwin-20
|
44
|
+
|
45
|
+
DEPENDENCIES
|
46
|
+
rake (~> 13.0)
|
47
|
+
web-checker!
|
48
|
+
|
49
|
+
BUNDLED WITH
|
50
|
+
2.2.27
|
data/Rakefile
CHANGED
data/lib/web-checker/version.rb
CHANGED
data/lib/web-checker.rb
CHANGED
@@ -1,26 +1,14 @@
|
|
1
1
|
require 'addressable'
|
2
2
|
require 'http'
|
3
3
|
require 'nokogiri'
|
4
|
-
require 'nokogumbo'
|
5
4
|
require 'path'
|
6
5
|
|
7
6
|
class WebChecker
|
8
7
|
|
9
|
-
IgnoreErrors = %Q{
|
10
|
-
<table> lacks "summary" attribute
|
11
|
-
<img> lacks "alt" attribute
|
12
|
-
<form> proprietary attribute "novalidate"
|
13
|
-
<input> attribute "type" has invalid value "email"
|
14
|
-
<input> attribute "tabindex" has invalid value "-1"
|
15
|
-
<input> proprietary attribute "border"
|
16
|
-
trimming empty <p>
|
17
|
-
<iframe> proprietary attribute "allowfullscreen"
|
18
|
-
}.split(/\n/).map(&:strip)
|
19
|
-
LinkElementsXPath = '//@href | //@src'
|
20
8
|
SchemasDir = Path.new(__FILE__).dirname / 'web-checker' / 'schemas'
|
21
|
-
|
22
|
-
'feed' =>
|
23
|
-
'urlset' =>
|
9
|
+
SchemaNames = {
|
10
|
+
'feed' => 'atom',
|
11
|
+
'urlset' => 'sitemap',
|
24
12
|
}
|
25
13
|
|
26
14
|
class Error < Exception; end
|
@@ -29,7 +17,8 @@ class WebChecker
|
|
29
17
|
@site_uri = Addressable::URI.parse(site_uri)
|
30
18
|
@site_dir = Path.new(site_dir)
|
31
19
|
@schemas = {}
|
32
|
-
@
|
20
|
+
@seen = {}
|
21
|
+
@files = []
|
33
22
|
end
|
34
23
|
|
35
24
|
def check
|
@@ -41,26 +30,27 @@ class WebChecker
|
|
41
30
|
def check_uri(uri)
|
42
31
|
uri = Addressable::URI.parse(uri)
|
43
32
|
uri.normalize!
|
44
|
-
return
|
45
|
-
|
33
|
+
return if seen?(uri)
|
34
|
+
return unless http?(uri)
|
35
|
+
is_local = local?(uri)
|
36
|
+
;;warn "CHECKING: #{uri}"
|
46
37
|
response = HTTP.get(uri)
|
47
38
|
# ;;pp(response: response)
|
48
|
-
@
|
39
|
+
@seen[uri] = true
|
49
40
|
case response.code
|
50
41
|
when 200...300
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
;;warn "skipping unknown resource type: #{uri} (#{type})"
|
42
|
+
if is_local
|
43
|
+
data = response.body.to_s
|
44
|
+
case (type = response.headers['Content-Type'])
|
45
|
+
when 'text/html', 'text/xml', 'application/xml'
|
46
|
+
check_markup(uri, data)
|
47
|
+
when 'text/css'
|
48
|
+
check_css(uri, data)
|
49
|
+
when %r{^image/}, 'application/javascript'
|
50
|
+
# ignore
|
51
|
+
else
|
52
|
+
;;warn "skipping unknown resource type: #{uri} (#{type})"
|
53
|
+
end
|
64
54
|
end
|
65
55
|
when 300...400
|
66
56
|
redirect_uri = Addressable::URI.parse(response.headers['Location'])
|
@@ -72,60 +62,32 @@ class WebChecker
|
|
72
62
|
end
|
73
63
|
end
|
74
64
|
|
75
|
-
def
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
errors = errors.map { |str|
|
85
|
-
# line 82 column 1 - Warning: <table> lacks "summary" attribute
|
86
|
-
str =~ /^line (\d+) column (\d+) - (.*?): (.*)$/ or raise "Can't parse error: #{str.inspect}"
|
87
|
-
{
|
88
|
-
msg: str,
|
89
|
-
line: $1.to_i,
|
90
|
-
column: $2.to_i,
|
91
|
-
type: $3.downcase.to_sym,
|
92
|
-
error: $4.strip,
|
93
|
-
}
|
94
|
-
}.reject { |e|
|
95
|
-
IgnoreErrors.include?(e[:error])
|
96
|
-
}
|
97
|
-
unless errors.empty?
|
98
|
-
warn "#{uri} has invalid HTML"
|
99
|
-
show_errors(errors)
|
100
|
-
raise Error, "HTML parsing failed (via Tidy)"
|
65
|
+
def check_markup(uri, data)
|
66
|
+
# ;;warn "validating markup: #{uri}"
|
67
|
+
doc = case data
|
68
|
+
when /^<\?xml/i
|
69
|
+
Nokogiri::XML(data) { |c| c.strict }
|
70
|
+
when /^<!DOCTYPE html>/i
|
71
|
+
Nokogiri::HTML5(data, max_errors: -1)
|
72
|
+
else
|
73
|
+
Nokogiri::HTML4(data) { |c| c.strict }
|
101
74
|
end
|
102
|
-
end
|
103
|
-
|
104
|
-
def check_html_nokogiri(uri, html)
|
105
|
-
doc_class = (html =~ /<!DOCTYPE html>/i) ? Nokogiri::HTML5 : Nokogiri::HTML
|
106
|
-
doc = doc_class.parse(html) { |config| config.strict }
|
107
75
|
unless doc.errors.empty?
|
108
76
|
show_errors(doc.errors)
|
109
|
-
raise Error, "
|
77
|
+
raise Error, "markup parsing failed"
|
110
78
|
end
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
79
|
+
if (schema_name = SchemaNames[doc.root.name])
|
80
|
+
schema_file = (SchemasDir / schema_name).add_extension('.xsd')
|
81
|
+
schema = (@schemas[schema_file] ||= Nokogiri::XML::Schema(schema_file.open))
|
82
|
+
validation_errors = schema.validate(doc)
|
83
|
+
unless validation_errors.empty?
|
84
|
+
show_errors(validation_errors)
|
85
|
+
raise Error, "schema validation failed"
|
86
|
+
end
|
119
87
|
end
|
120
|
-
|
121
|
-
|
122
|
-
schema = (@schemas[schema_file] ||= Nokogiri::XML::Schema(schema_file.open))
|
123
|
-
validation_errors = schema.validate(xml_doc)
|
124
|
-
unless validation_errors.empty?
|
125
|
-
show_errors(validation_errors)
|
126
|
-
raise Error, "XML validation failed"
|
88
|
+
doc.xpath('//@href | //@src').each do |elem|
|
89
|
+
check_uri(uri + elem.value)
|
127
90
|
end
|
128
|
-
xml_doc.xpath(LinkElementsXPath).each { |e| check_uri(uri + e.value) }
|
129
91
|
end
|
130
92
|
|
131
93
|
def show_errors(errors)
|
@@ -140,13 +102,17 @@ class WebChecker
|
|
140
102
|
end
|
141
103
|
end
|
142
104
|
|
105
|
+
def http?(uri)
|
106
|
+
!uri.scheme || %w[http https].include?(uri.scheme)
|
107
|
+
end
|
108
|
+
|
143
109
|
def local?(uri)
|
144
110
|
(!uri.scheme && !uri.host) ||
|
145
111
|
(uri.scheme == @site_uri.scheme && uri.host == @site_uri.host && uri.port == @site_uri.port)
|
146
112
|
end
|
147
113
|
|
148
114
|
def seen?(uri)
|
149
|
-
@
|
115
|
+
@seen[uri]
|
150
116
|
end
|
151
117
|
|
152
118
|
def report
|
data/web-checker.gemspec
CHANGED
@@ -18,12 +18,10 @@ Gem::Specification.new do |s|
|
|
18
18
|
s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
|
19
19
|
s.require_path = 'lib'
|
20
20
|
|
21
|
-
s.add_dependency 'addressable', '~> 2'
|
22
|
-
s.add_dependency 'http', '~>
|
23
|
-
s.add_dependency 'nokogiri', '~> 1'
|
24
|
-
s.add_dependency '
|
25
|
-
s.add_dependency 'path', '~> 2'
|
21
|
+
s.add_dependency 'addressable', '~> 2.8'
|
22
|
+
s.add_dependency 'http', '~> 5.0'
|
23
|
+
s.add_dependency 'nokogiri', '~> 1.12'
|
24
|
+
s.add_dependency 'path', '~> 2.0'
|
26
25
|
|
27
|
-
s.add_development_dependency 'rake', '~>
|
28
|
-
s.add_development_dependency 'rubygems-tasks', '~> 0.2'
|
26
|
+
s.add_development_dependency 'rake', '~> 13.0'
|
29
27
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web-checker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.5'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Labovitz
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-09-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -16,98 +16,70 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '2'
|
19
|
+
version: '2.8'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '2'
|
26
|
+
version: '2.8'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: http
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '5.0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '5.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: nokogiri
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '1'
|
47
|
+
version: '1.12'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '1'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: nokogumbo
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '2'
|
62
|
-
type: :runtime
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - "~>"
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '2'
|
54
|
+
version: '1.12'
|
69
55
|
- !ruby/object:Gem::Dependency
|
70
56
|
name: path
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|
72
58
|
requirements:
|
73
59
|
- - "~>"
|
74
60
|
- !ruby/object:Gem::Version
|
75
|
-
version: '2'
|
61
|
+
version: '2.0'
|
76
62
|
type: :runtime
|
77
63
|
prerelease: false
|
78
64
|
version_requirements: !ruby/object:Gem::Requirement
|
79
65
|
requirements:
|
80
66
|
- - "~>"
|
81
67
|
- !ruby/object:Gem::Version
|
82
|
-
version: '2'
|
68
|
+
version: '2.0'
|
83
69
|
- !ruby/object:Gem::Dependency
|
84
70
|
name: rake
|
85
71
|
requirement: !ruby/object:Gem::Requirement
|
86
72
|
requirements:
|
87
73
|
- - "~>"
|
88
74
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - "~>"
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '12'
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: rubygems-tasks
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - "~>"
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: '0.2'
|
75
|
+
version: '13.0'
|
104
76
|
type: :development
|
105
77
|
prerelease: false
|
106
78
|
version_requirements: !ruby/object:Gem::Requirement
|
107
79
|
requirements:
|
108
80
|
- - "~>"
|
109
81
|
- !ruby/object:Gem::Version
|
110
|
-
version: '0
|
82
|
+
version: '13.0'
|
111
83
|
description: "\n WebChecker checks static websites for consistency.\n "
|
112
84
|
email: johnl@johnlabovitz.com
|
113
85
|
executables:
|
@@ -116,6 +88,8 @@ extensions: []
|
|
116
88
|
extra_rdoc_files: []
|
117
89
|
files:
|
118
90
|
- ".gitignore"
|
91
|
+
- Gemfile
|
92
|
+
- Gemfile.lock
|
119
93
|
- Rakefile
|
120
94
|
- bin/web-checker
|
121
95
|
- lib/web-checker.rb
|
@@ -127,7 +101,7 @@ homepage: http://github.com/jslabovitz/web-checker
|
|
127
101
|
licenses:
|
128
102
|
- MIT
|
129
103
|
metadata: {}
|
130
|
-
post_install_message:
|
104
|
+
post_install_message:
|
131
105
|
rdoc_options: []
|
132
106
|
require_paths:
|
133
107
|
- lib
|
@@ -142,9 +116,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
142
116
|
- !ruby/object:Gem::Version
|
143
117
|
version: '0'
|
144
118
|
requirements: []
|
145
|
-
|
146
|
-
|
147
|
-
signing_key:
|
119
|
+
rubygems_version: 3.2.27
|
120
|
+
signing_key:
|
148
121
|
specification_version: 4
|
149
122
|
summary: Check static websites for consistency.
|
150
123
|
test_files: []
|