web-checker 0.4 → 0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9d2121dbc5997e578f935a0cc26174e9c714d4d7d0ea57968a3161310290b630
4
- data.tar.gz: 8799aef79293dcc714a7abae2d917c9e665367a57e4b61481065966f02874aa4
3
+ metadata.gz: 5b78dbee67359c44fa8f2bed401f9d308f700c18602babff9177e65d77b1623e
4
+ data.tar.gz: 517db6e636ebc4d7ff6f136c6fef49549848bba9a99140efbf6392c645b6a178
5
5
  SHA512:
6
- metadata.gz: 59c2cf78d72952d6989968cfdfb43d6919833398aa662293d16805e418f0a9684d354bc6edf59f8bcbf24c66dc0cf7138e872a3e54abda4bd417a3bed0942799
7
- data.tar.gz: 9d280283a9067380d5196e9b87cc5c965fa7cb587f10ed195423df415aeeb887cd7117169ec72da7fde486dc3490df56a2532fbd19141891054d5e7857648131
6
+ metadata.gz: 354cdfcb90dd17744ccb1e925405ce6b564530f07b5e46414e99727671c2d8dd9afbad4cd4b4ba41b961e3290ad8906782d801bdb7364980efe08a1fd535a46b
7
+ data.tar.gz: aee34beaad0221d622e52dd95b9002d24d07401fb4f7e3ec2ae08efcf55646585ab8b01a47bff654f316f89b3b69280cbf0d2ab35ce76aa7347c95d912a86cf5
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,50 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ web-checker (0.5)
5
+ addressable (~> 2.8)
6
+ http (~> 5.0)
7
+ nokogiri (~> 1.12)
8
+ path (~> 2.0)
9
+
10
+ GEM
11
+ remote: https://rubygems.org/
12
+ specs:
13
+ addressable (2.8.0)
14
+ public_suffix (>= 2.0.2, < 5.0)
15
+ domain_name (0.5.20190701)
16
+ unf (>= 0.0.5, < 1.0.0)
17
+ ffi (1.15.4)
18
+ ffi-compiler (1.0.1)
19
+ ffi (>= 1.0.0)
20
+ rake
21
+ http (5.0.2)
22
+ addressable (~> 2.8)
23
+ http-cookie (~> 1.0)
24
+ http-form_data (~> 2.2)
25
+ llhttp-ffi (~> 0.4.0)
26
+ http-cookie (1.0.4)
27
+ domain_name (~> 0.5)
28
+ http-form_data (2.3.0)
29
+ llhttp-ffi (0.4.0)
30
+ ffi-compiler (~> 1.0)
31
+ rake (~> 13.0)
32
+ nokogiri (1.12.4-arm64-darwin)
33
+ racc (~> 1.4)
34
+ path (2.0.1)
35
+ public_suffix (4.0.6)
36
+ racc (1.5.2)
37
+ rake (13.0.6)
38
+ unf (0.1.4)
39
+ unf_ext
40
+ unf_ext (0.0.8)
41
+
42
+ PLATFORMS
43
+ arm64-darwin-20
44
+
45
+ DEPENDENCIES
46
+ rake (~> 13.0)
47
+ web-checker!
48
+
49
+ BUNDLED WITH
50
+ 2.2.27
data/Rakefile CHANGED
@@ -1,2 +1,9 @@
1
- require 'rubygems/tasks'
2
- Gem::Tasks.new
1
+ require 'bundler/gem_tasks'
2
+ Bundler.require
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.test_files = FileList['test/*test.rb']
7
+ end
8
+
9
+ task :default => :test
@@ -1,5 +1,5 @@
1
1
  class WebChecker
2
2
 
3
- VERSION = '0.4'
3
+ VERSION = '0.5'
4
4
 
5
5
  end
data/lib/web-checker.rb CHANGED
@@ -1,26 +1,14 @@
1
1
  require 'addressable'
2
2
  require 'http'
3
3
  require 'nokogiri'
4
- require 'nokogumbo'
5
4
  require 'path'
6
5
 
7
6
  class WebChecker
8
7
 
9
- IgnoreErrors = %Q{
10
- <table> lacks "summary" attribute
11
- <img> lacks "alt" attribute
12
- <form> proprietary attribute "novalidate"
13
- <input> attribute "type" has invalid value "email"
14
- <input> attribute "tabindex" has invalid value "-1"
15
- <input> proprietary attribute "border"
16
- trimming empty <p>
17
- <iframe> proprietary attribute "allowfullscreen"
18
- }.split(/\n/).map(&:strip)
19
- LinkElementsXPath = '//@href | //@src'
20
8
  SchemasDir = Path.new(__FILE__).dirname / 'web-checker' / 'schemas'
21
- Schemas = {
22
- 'feed' => SchemasDir / 'atom.xsd',
23
- 'urlset' => SchemasDir / 'sitemap.xsd',
9
+ SchemaNames = {
10
+ 'feed' => 'atom',
11
+ 'urlset' => 'sitemap',
24
12
  }
25
13
 
26
14
  class Error < Exception; end
@@ -29,7 +17,8 @@ class WebChecker
29
17
  @site_uri = Addressable::URI.parse(site_uri)
30
18
  @site_dir = Path.new(site_dir)
31
19
  @schemas = {}
32
- @visited = {}
20
+ @seen = {}
21
+ @files = []
33
22
  end
34
23
 
35
24
  def check
@@ -41,26 +30,27 @@ class WebChecker
41
30
  def check_uri(uri)
42
31
  uri = Addressable::URI.parse(uri)
43
32
  uri.normalize!
44
- return unless local?(uri) && !seen?(uri)
45
- # ;;warn "CHECKING: #{uri}"
33
+ return if seen?(uri)
34
+ return unless http?(uri)
35
+ is_local = local?(uri)
36
+ ;;warn "CHECKING: #{uri}"
46
37
  response = HTTP.get(uri)
47
38
  # ;;pp(response: response)
48
- @visited[uri] = true
39
+ @seen[uri] = true
49
40
  case response.code
50
41
  when 200...300
51
- body = response.body.to_s
52
- # ;;pp(body: body)
53
- case (type = response.headers['Content-Type'])
54
- when 'text/html'
55
- check_html(uri, body)
56
- when 'text/css'
57
- check_css(uri, body)
58
- when 'application/xml', 'text/xml'
59
- check_xml(uri, body)
60
- when 'image/jpeg', 'image/png', 'image/gif', 'application/javascript'
61
- # ignore
62
- else
63
- ;;warn "skipping unknown resource type: #{uri} (#{type})"
42
+ if is_local
43
+ data = response.body.to_s
44
+ case (type = response.headers['Content-Type'])
45
+ when 'text/html', 'text/xml', 'application/xml'
46
+ check_markup(uri, data)
47
+ when 'text/css'
48
+ check_css(uri, data)
49
+ when %r{^image/}, 'application/javascript'
50
+ # ignore
51
+ else
52
+ ;;warn "skipping unknown resource type: #{uri} (#{type})"
53
+ end
64
54
  end
65
55
  when 300...400
66
56
  redirect_uri = Addressable::URI.parse(response.headers['Location'])
@@ -72,60 +62,32 @@ class WebChecker
72
62
  end
73
63
  end
74
64
 
75
- def check_html(uri, html)
76
- check_html_tidy(uri, html)
77
- check_html_nokogiri(uri, html)
78
- end
79
-
80
- def check_html_tidy(uri, html)
81
- tmp_file = Path.tmpfile
82
- tmp_file.write(html)
83
- errors = %x{tidy -utf8 -quiet -errors #{tmp_file} 2>&1}.split("\n")
84
- errors = errors.map { |str|
85
- # line 82 column 1 - Warning: <table> lacks "summary" attribute
86
- str =~ /^line (\d+) column (\d+) - (.*?): (.*)$/ or raise "Can't parse error: #{str.inspect}"
87
- {
88
- msg: str,
89
- line: $1.to_i,
90
- column: $2.to_i,
91
- type: $3.downcase.to_sym,
92
- error: $4.strip,
93
- }
94
- }.reject { |e|
95
- IgnoreErrors.include?(e[:error])
96
- }
97
- unless errors.empty?
98
- warn "#{uri} has invalid HTML"
99
- show_errors(errors)
100
- raise Error, "HTML parsing failed (via Tidy)"
65
+ def check_markup(uri, data)
66
+ # ;;warn "validating markup: #{uri}"
67
+ doc = case data
68
+ when /^<\?xml/i
69
+ Nokogiri::XML(data) { |c| c.strict }
70
+ when /^<!DOCTYPE html>/i
71
+ Nokogiri::HTML5(data, max_errors: -1)
72
+ else
73
+ Nokogiri::HTML4(data) { |c| c.strict }
101
74
  end
102
- end
103
-
104
- def check_html_nokogiri(uri, html)
105
- doc_class = (html =~ /<!DOCTYPE html>/i) ? Nokogiri::HTML5 : Nokogiri::HTML
106
- doc = doc_class.parse(html) { |config| config.strict }
107
75
  unless doc.errors.empty?
108
76
  show_errors(doc.errors)
109
- raise Error, "HTML parsing failed (via Nokogiri)"
77
+ raise Error, "markup parsing failed"
110
78
  end
111
- doc.xpath(LinkElementsXPath).each { |e| check_uri(uri + e.value) }
112
- end
113
-
114
- def check_xml(uri, xml)
115
- xml_doc = Nokogiri::XML::Document.parse(xml) { |config| config.strict }
116
- unless xml_doc.errors.empty?
117
- show_errors(xml_doc.errors)
118
- raise Error, "XML parsing failed"
79
+ if (schema_name = SchemaNames[doc.root.name])
80
+ schema_file = (SchemasDir / schema_name).add_extension('.xsd')
81
+ schema = (@schemas[schema_file] ||= Nokogiri::XML::Schema(schema_file.open))
82
+ validation_errors = schema.validate(doc)
83
+ unless validation_errors.empty?
84
+ show_errors(validation_errors)
85
+ raise Error, "schema validation failed"
86
+ end
119
87
  end
120
- root_name = xml_doc.root.name
121
- schema_file = Schemas[root_name] or raise Error, "Unknown schema: #{root_name.inspect}"
122
- schema = (@schemas[schema_file] ||= Nokogiri::XML::Schema(schema_file.open))
123
- validation_errors = schema.validate(xml_doc)
124
- unless validation_errors.empty?
125
- show_errors(validation_errors)
126
- raise Error, "XML validation failed"
88
+ doc.xpath('//@href | //@src').each do |elem|
89
+ check_uri(uri + elem.value)
127
90
  end
128
- xml_doc.xpath(LinkElementsXPath).each { |e| check_uri(uri + e.value) }
129
91
  end
130
92
 
131
93
  def show_errors(errors)
@@ -140,13 +102,17 @@ class WebChecker
140
102
  end
141
103
  end
142
104
 
105
+ def http?(uri)
106
+ !uri.scheme || %w[http https].include?(uri.scheme)
107
+ end
108
+
143
109
  def local?(uri)
144
110
  (!uri.scheme && !uri.host) ||
145
111
  (uri.scheme == @site_uri.scheme && uri.host == @site_uri.host && uri.port == @site_uri.port)
146
112
  end
147
113
 
148
114
  def seen?(uri)
149
- @visited[uri]
115
+ @seen[uri]
150
116
  end
151
117
 
152
118
  def report
data/web-checker.gemspec CHANGED
@@ -18,12 +18,10 @@ Gem::Specification.new do |s|
18
18
  s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
19
19
  s.require_path = 'lib'
20
20
 
21
- s.add_dependency 'addressable', '~> 2'
22
- s.add_dependency 'http', '~> 4'
23
- s.add_dependency 'nokogiri', '~> 1'
24
- s.add_dependency 'nokogumbo', '~> 2'
25
- s.add_dependency 'path', '~> 2'
21
+ s.add_dependency 'addressable', '~> 2.8'
22
+ s.add_dependency 'http', '~> 5.0'
23
+ s.add_dependency 'nokogiri', '~> 1.12'
24
+ s.add_dependency 'path', '~> 2.0'
26
25
 
27
- s.add_development_dependency 'rake', '~> 12'
28
- s.add_development_dependency 'rubygems-tasks', '~> 0.2'
26
+ s.add_development_dependency 'rake', '~> 13.0'
29
27
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web-checker
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.4'
4
+ version: '0.5'
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Labovitz
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-29 00:00:00.000000000 Z
11
+ date: 2021-09-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -16,98 +16,70 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '2'
19
+ version: '2.8'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2'
26
+ version: '2.8'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: http
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '4'
33
+ version: '5.0'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '4'
40
+ version: '5.0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: nokogiri
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '1'
47
+ version: '1.12'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '1'
55
- - !ruby/object:Gem::Dependency
56
- name: nokogumbo
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - "~>"
60
- - !ruby/object:Gem::Version
61
- version: '2'
62
- type: :runtime
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - "~>"
67
- - !ruby/object:Gem::Version
68
- version: '2'
54
+ version: '1.12'
69
55
  - !ruby/object:Gem::Dependency
70
56
  name: path
71
57
  requirement: !ruby/object:Gem::Requirement
72
58
  requirements:
73
59
  - - "~>"
74
60
  - !ruby/object:Gem::Version
75
- version: '2'
61
+ version: '2.0'
76
62
  type: :runtime
77
63
  prerelease: false
78
64
  version_requirements: !ruby/object:Gem::Requirement
79
65
  requirements:
80
66
  - - "~>"
81
67
  - !ruby/object:Gem::Version
82
- version: '2'
68
+ version: '2.0'
83
69
  - !ruby/object:Gem::Dependency
84
70
  name: rake
85
71
  requirement: !ruby/object:Gem::Requirement
86
72
  requirements:
87
73
  - - "~>"
88
74
  - !ruby/object:Gem::Version
89
- version: '12'
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - "~>"
95
- - !ruby/object:Gem::Version
96
- version: '12'
97
- - !ruby/object:Gem::Dependency
98
- name: rubygems-tasks
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - "~>"
102
- - !ruby/object:Gem::Version
103
- version: '0.2'
75
+ version: '13.0'
104
76
  type: :development
105
77
  prerelease: false
106
78
  version_requirements: !ruby/object:Gem::Requirement
107
79
  requirements:
108
80
  - - "~>"
109
81
  - !ruby/object:Gem::Version
110
- version: '0.2'
82
+ version: '13.0'
111
83
  description: "\n WebChecker checks static websites for consistency.\n "
112
84
  email: johnl@johnlabovitz.com
113
85
  executables:
@@ -116,6 +88,8 @@ extensions: []
116
88
  extra_rdoc_files: []
117
89
  files:
118
90
  - ".gitignore"
91
+ - Gemfile
92
+ - Gemfile.lock
119
93
  - Rakefile
120
94
  - bin/web-checker
121
95
  - lib/web-checker.rb
@@ -127,7 +101,7 @@ homepage: http://github.com/jslabovitz/web-checker
127
101
  licenses:
128
102
  - MIT
129
103
  metadata: {}
130
- post_install_message:
104
+ post_install_message:
131
105
  rdoc_options: []
132
106
  require_paths:
133
107
  - lib
@@ -142,9 +116,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
142
116
  - !ruby/object:Gem::Version
143
117
  version: '0'
144
118
  requirements: []
145
- rubyforge_project:
146
- rubygems_version: 2.7.7
147
- signing_key:
119
+ rubygems_version: 3.2.27
120
+ signing_key:
148
121
  specification_version: 4
149
122
  summary: Check static websites for consistency.
150
123
  test_files: []