web-checker 0.4 → 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9d2121dbc5997e578f935a0cc26174e9c714d4d7d0ea57968a3161310290b630
4
- data.tar.gz: 8799aef79293dcc714a7abae2d917c9e665367a57e4b61481065966f02874aa4
3
+ metadata.gz: 5b78dbee67359c44fa8f2bed401f9d308f700c18602babff9177e65d77b1623e
4
+ data.tar.gz: 517db6e636ebc4d7ff6f136c6fef49549848bba9a99140efbf6392c645b6a178
5
5
  SHA512:
6
- metadata.gz: 59c2cf78d72952d6989968cfdfb43d6919833398aa662293d16805e418f0a9684d354bc6edf59f8bcbf24c66dc0cf7138e872a3e54abda4bd417a3bed0942799
7
- data.tar.gz: 9d280283a9067380d5196e9b87cc5c965fa7cb587f10ed195423df415aeeb887cd7117169ec72da7fde486dc3490df56a2532fbd19141891054d5e7857648131
6
+ metadata.gz: 354cdfcb90dd17744ccb1e925405ce6b564530f07b5e46414e99727671c2d8dd9afbad4cd4b4ba41b961e3290ad8906782d801bdb7364980efe08a1fd535a46b
7
+ data.tar.gz: aee34beaad0221d622e52dd95b9002d24d07401fb4f7e3ec2ae08efcf55646585ab8b01a47bff654f316f89b3b69280cbf0d2ab35ce76aa7347c95d912a86cf5
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,50 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ web-checker (0.5)
5
+ addressable (~> 2.8)
6
+ http (~> 5.0)
7
+ nokogiri (~> 1.12)
8
+ path (~> 2.0)
9
+
10
+ GEM
11
+ remote: https://rubygems.org/
12
+ specs:
13
+ addressable (2.8.0)
14
+ public_suffix (>= 2.0.2, < 5.0)
15
+ domain_name (0.5.20190701)
16
+ unf (>= 0.0.5, < 1.0.0)
17
+ ffi (1.15.4)
18
+ ffi-compiler (1.0.1)
19
+ ffi (>= 1.0.0)
20
+ rake
21
+ http (5.0.2)
22
+ addressable (~> 2.8)
23
+ http-cookie (~> 1.0)
24
+ http-form_data (~> 2.2)
25
+ llhttp-ffi (~> 0.4.0)
26
+ http-cookie (1.0.4)
27
+ domain_name (~> 0.5)
28
+ http-form_data (2.3.0)
29
+ llhttp-ffi (0.4.0)
30
+ ffi-compiler (~> 1.0)
31
+ rake (~> 13.0)
32
+ nokogiri (1.12.4-arm64-darwin)
33
+ racc (~> 1.4)
34
+ path (2.0.1)
35
+ public_suffix (4.0.6)
36
+ racc (1.5.2)
37
+ rake (13.0.6)
38
+ unf (0.1.4)
39
+ unf_ext
40
+ unf_ext (0.0.8)
41
+
42
+ PLATFORMS
43
+ arm64-darwin-20
44
+
45
+ DEPENDENCIES
46
+ rake (~> 13.0)
47
+ web-checker!
48
+
49
+ BUNDLED WITH
50
+ 2.2.27
data/Rakefile CHANGED
@@ -1,2 +1,9 @@
1
- require 'rubygems/tasks'
2
- Gem::Tasks.new
1
+ require 'bundler/gem_tasks'
2
+ Bundler.require
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.test_files = FileList['test/*test.rb']
7
+ end
8
+
9
+ task :default => :test
@@ -1,5 +1,5 @@
1
1
  class WebChecker
2
2
 
3
- VERSION = '0.4'
3
+ VERSION = '0.5'
4
4
 
5
5
  end
data/lib/web-checker.rb CHANGED
@@ -1,26 +1,14 @@
1
1
  require 'addressable'
2
2
  require 'http'
3
3
  require 'nokogiri'
4
- require 'nokogumbo'
5
4
  require 'path'
6
5
 
7
6
  class WebChecker
8
7
 
9
- IgnoreErrors = %Q{
10
- <table> lacks "summary" attribute
11
- <img> lacks "alt" attribute
12
- <form> proprietary attribute "novalidate"
13
- <input> attribute "type" has invalid value "email"
14
- <input> attribute "tabindex" has invalid value "-1"
15
- <input> proprietary attribute "border"
16
- trimming empty <p>
17
- <iframe> proprietary attribute "allowfullscreen"
18
- }.split(/\n/).map(&:strip)
19
- LinkElementsXPath = '//@href | //@src'
20
8
  SchemasDir = Path.new(__FILE__).dirname / 'web-checker' / 'schemas'
21
- Schemas = {
22
- 'feed' => SchemasDir / 'atom.xsd',
23
- 'urlset' => SchemasDir / 'sitemap.xsd',
9
+ SchemaNames = {
10
+ 'feed' => 'atom',
11
+ 'urlset' => 'sitemap',
24
12
  }
25
13
 
26
14
  class Error < Exception; end
@@ -29,7 +17,8 @@ class WebChecker
29
17
  @site_uri = Addressable::URI.parse(site_uri)
30
18
  @site_dir = Path.new(site_dir)
31
19
  @schemas = {}
32
- @visited = {}
20
+ @seen = {}
21
+ @files = []
33
22
  end
34
23
 
35
24
  def check
@@ -41,26 +30,27 @@ class WebChecker
41
30
  def check_uri(uri)
42
31
  uri = Addressable::URI.parse(uri)
43
32
  uri.normalize!
44
- return unless local?(uri) && !seen?(uri)
45
- # ;;warn "CHECKING: #{uri}"
33
+ return if seen?(uri)
34
+ return unless http?(uri)
35
+ is_local = local?(uri)
36
+ ;;warn "CHECKING: #{uri}"
46
37
  response = HTTP.get(uri)
47
38
  # ;;pp(response: response)
48
- @visited[uri] = true
39
+ @seen[uri] = true
49
40
  case response.code
50
41
  when 200...300
51
- body = response.body.to_s
52
- # ;;pp(body: body)
53
- case (type = response.headers['Content-Type'])
54
- when 'text/html'
55
- check_html(uri, body)
56
- when 'text/css'
57
- check_css(uri, body)
58
- when 'application/xml', 'text/xml'
59
- check_xml(uri, body)
60
- when 'image/jpeg', 'image/png', 'image/gif', 'application/javascript'
61
- # ignore
62
- else
63
- ;;warn "skipping unknown resource type: #{uri} (#{type})"
42
+ if is_local
43
+ data = response.body.to_s
44
+ case (type = response.headers['Content-Type'])
45
+ when 'text/html', 'text/xml', 'application/xml'
46
+ check_markup(uri, data)
47
+ when 'text/css'
48
+ check_css(uri, data)
49
+ when %r{^image/}, 'application/javascript'
50
+ # ignore
51
+ else
52
+ ;;warn "skipping unknown resource type: #{uri} (#{type})"
53
+ end
64
54
  end
65
55
  when 300...400
66
56
  redirect_uri = Addressable::URI.parse(response.headers['Location'])
@@ -72,60 +62,32 @@ class WebChecker
72
62
  end
73
63
  end
74
64
 
75
- def check_html(uri, html)
76
- check_html_tidy(uri, html)
77
- check_html_nokogiri(uri, html)
78
- end
79
-
80
- def check_html_tidy(uri, html)
81
- tmp_file = Path.tmpfile
82
- tmp_file.write(html)
83
- errors = %x{tidy -utf8 -quiet -errors #{tmp_file} 2>&1}.split("\n")
84
- errors = errors.map { |str|
85
- # line 82 column 1 - Warning: <table> lacks "summary" attribute
86
- str =~ /^line (\d+) column (\d+) - (.*?): (.*)$/ or raise "Can't parse error: #{str.inspect}"
87
- {
88
- msg: str,
89
- line: $1.to_i,
90
- column: $2.to_i,
91
- type: $3.downcase.to_sym,
92
- error: $4.strip,
93
- }
94
- }.reject { |e|
95
- IgnoreErrors.include?(e[:error])
96
- }
97
- unless errors.empty?
98
- warn "#{uri} has invalid HTML"
99
- show_errors(errors)
100
- raise Error, "HTML parsing failed (via Tidy)"
65
+ def check_markup(uri, data)
66
+ # ;;warn "validating markup: #{uri}"
67
+ doc = case data
68
+ when /^<\?xml/i
69
+ Nokogiri::XML(data) { |c| c.strict }
70
+ when /^<!DOCTYPE html>/i
71
+ Nokogiri::HTML5(data, max_errors: -1)
72
+ else
73
+ Nokogiri::HTML4(data) { |c| c.strict }
101
74
  end
102
- end
103
-
104
- def check_html_nokogiri(uri, html)
105
- doc_class = (html =~ /<!DOCTYPE html>/i) ? Nokogiri::HTML5 : Nokogiri::HTML
106
- doc = doc_class.parse(html) { |config| config.strict }
107
75
  unless doc.errors.empty?
108
76
  show_errors(doc.errors)
109
- raise Error, "HTML parsing failed (via Nokogiri)"
77
+ raise Error, "markup parsing failed"
110
78
  end
111
- doc.xpath(LinkElementsXPath).each { |e| check_uri(uri + e.value) }
112
- end
113
-
114
- def check_xml(uri, xml)
115
- xml_doc = Nokogiri::XML::Document.parse(xml) { |config| config.strict }
116
- unless xml_doc.errors.empty?
117
- show_errors(xml_doc.errors)
118
- raise Error, "XML parsing failed"
79
+ if (schema_name = SchemaNames[doc.root.name])
80
+ schema_file = (SchemasDir / schema_name).add_extension('.xsd')
81
+ schema = (@schemas[schema_file] ||= Nokogiri::XML::Schema(schema_file.open))
82
+ validation_errors = schema.validate(doc)
83
+ unless validation_errors.empty?
84
+ show_errors(validation_errors)
85
+ raise Error, "schema validation failed"
86
+ end
119
87
  end
120
- root_name = xml_doc.root.name
121
- schema_file = Schemas[root_name] or raise Error, "Unknown schema: #{root_name.inspect}"
122
- schema = (@schemas[schema_file] ||= Nokogiri::XML::Schema(schema_file.open))
123
- validation_errors = schema.validate(xml_doc)
124
- unless validation_errors.empty?
125
- show_errors(validation_errors)
126
- raise Error, "XML validation failed"
88
+ doc.xpath('//@href | //@src').each do |elem|
89
+ check_uri(uri + elem.value)
127
90
  end
128
- xml_doc.xpath(LinkElementsXPath).each { |e| check_uri(uri + e.value) }
129
91
  end
130
92
 
131
93
  def show_errors(errors)
@@ -140,13 +102,17 @@ class WebChecker
140
102
  end
141
103
  end
142
104
 
105
+ def http?(uri)
106
+ !uri.scheme || %w[http https].include?(uri.scheme)
107
+ end
108
+
143
109
  def local?(uri)
144
110
  (!uri.scheme && !uri.host) ||
145
111
  (uri.scheme == @site_uri.scheme && uri.host == @site_uri.host && uri.port == @site_uri.port)
146
112
  end
147
113
 
148
114
  def seen?(uri)
149
- @visited[uri]
115
+ @seen[uri]
150
116
  end
151
117
 
152
118
  def report
data/web-checker.gemspec CHANGED
@@ -18,12 +18,10 @@ Gem::Specification.new do |s|
18
18
  s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
19
19
  s.require_path = 'lib'
20
20
 
21
- s.add_dependency 'addressable', '~> 2'
22
- s.add_dependency 'http', '~> 4'
23
- s.add_dependency 'nokogiri', '~> 1'
24
- s.add_dependency 'nokogumbo', '~> 2'
25
- s.add_dependency 'path', '~> 2'
21
+ s.add_dependency 'addressable', '~> 2.8'
22
+ s.add_dependency 'http', '~> 5.0'
23
+ s.add_dependency 'nokogiri', '~> 1.12'
24
+ s.add_dependency 'path', '~> 2.0'
26
25
 
27
- s.add_development_dependency 'rake', '~> 12'
28
- s.add_development_dependency 'rubygems-tasks', '~> 0.2'
26
+ s.add_development_dependency 'rake', '~> 13.0'
29
27
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web-checker
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.4'
4
+ version: '0.5'
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Labovitz
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-29 00:00:00.000000000 Z
11
+ date: 2021-09-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -16,98 +16,70 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '2'
19
+ version: '2.8'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2'
26
+ version: '2.8'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: http
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '4'
33
+ version: '5.0'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '4'
40
+ version: '5.0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: nokogiri
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '1'
47
+ version: '1.12'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '1'
55
- - !ruby/object:Gem::Dependency
56
- name: nokogumbo
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - "~>"
60
- - !ruby/object:Gem::Version
61
- version: '2'
62
- type: :runtime
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - "~>"
67
- - !ruby/object:Gem::Version
68
- version: '2'
54
+ version: '1.12'
69
55
  - !ruby/object:Gem::Dependency
70
56
  name: path
71
57
  requirement: !ruby/object:Gem::Requirement
72
58
  requirements:
73
59
  - - "~>"
74
60
  - !ruby/object:Gem::Version
75
- version: '2'
61
+ version: '2.0'
76
62
  type: :runtime
77
63
  prerelease: false
78
64
  version_requirements: !ruby/object:Gem::Requirement
79
65
  requirements:
80
66
  - - "~>"
81
67
  - !ruby/object:Gem::Version
82
- version: '2'
68
+ version: '2.0'
83
69
  - !ruby/object:Gem::Dependency
84
70
  name: rake
85
71
  requirement: !ruby/object:Gem::Requirement
86
72
  requirements:
87
73
  - - "~>"
88
74
  - !ruby/object:Gem::Version
89
- version: '12'
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - "~>"
95
- - !ruby/object:Gem::Version
96
- version: '12'
97
- - !ruby/object:Gem::Dependency
98
- name: rubygems-tasks
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - "~>"
102
- - !ruby/object:Gem::Version
103
- version: '0.2'
75
+ version: '13.0'
104
76
  type: :development
105
77
  prerelease: false
106
78
  version_requirements: !ruby/object:Gem::Requirement
107
79
  requirements:
108
80
  - - "~>"
109
81
  - !ruby/object:Gem::Version
110
- version: '0.2'
82
+ version: '13.0'
111
83
  description: "\n WebChecker checks static websites for consistency.\n "
112
84
  email: johnl@johnlabovitz.com
113
85
  executables:
@@ -116,6 +88,8 @@ extensions: []
116
88
  extra_rdoc_files: []
117
89
  files:
118
90
  - ".gitignore"
91
+ - Gemfile
92
+ - Gemfile.lock
119
93
  - Rakefile
120
94
  - bin/web-checker
121
95
  - lib/web-checker.rb
@@ -127,7 +101,7 @@ homepage: http://github.com/jslabovitz/web-checker
127
101
  licenses:
128
102
  - MIT
129
103
  metadata: {}
130
- post_install_message:
104
+ post_install_message:
131
105
  rdoc_options: []
132
106
  require_paths:
133
107
  - lib
@@ -142,9 +116,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
142
116
  - !ruby/object:Gem::Version
143
117
  version: '0'
144
118
  requirements: []
145
- rubyforge_project:
146
- rubygems_version: 2.7.7
147
- signing_key:
119
+ rubygems_version: 3.2.27
120
+ signing_key:
148
121
  specification_version: 4
149
122
  summary: Check static websites for consistency.
150
123
  test_files: []