web-checker 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/Rakefile +2 -0
- data/bin/web-checker +9 -0
- data/lib/web-checker.rb +161 -0
- data/lib/web-checker/schemas/atom.xsd +244 -0
- data/lib/web-checker/schemas/sitemap.xsd +116 -0
- data/lib/web-checker/version.rb +5 -0
- data/web-checker.gemspec +29 -0
- metadata +150 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 89ddbaf96851d6efffd0acf6c7f86ac498710238f8ff0e078bed0fb5d8c40a4e
|
4
|
+
data.tar.gz: c776d89e28ec22d8230aebce749abc2ab475a8c62735b2fe40d36fc539a5e3fd
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 386bf65bcb250c8a0a192a97f200e5df15b5fec67a2dd80a65d0d964a7ccfbb47919fcbed5acf3d690e192bb95fe823e5f6671d2949e7703e48320b63346a6fb
|
7
|
+
data.tar.gz: ec7f58a6a3a1eb360b219aa135213c2b17d375e793961762ab1fac3471d0864904c4133a0ca63475a636cbb7e93d5117e47230f825df3f36aef9fca3cf7d4793
|
data/.gitignore
ADDED
data/Rakefile
ADDED
data/bin/web-checker
ADDED
data/lib/web-checker.rb
ADDED
@@ -0,0 +1,161 @@
|
|
1
|
+
require 'addressable'
|
2
|
+
require 'http'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'nokogumbo'
|
5
|
+
require 'path'
|
6
|
+
|
7
|
+
class WebChecker
|
8
|
+
|
9
|
+
IgnoreErrors = %Q{
|
10
|
+
<table> lacks "summary" attribute
|
11
|
+
<img> lacks "alt" attribute
|
12
|
+
<form> proprietary attribute "novalidate"
|
13
|
+
<input> attribute "type" has invalid value "email"
|
14
|
+
<input> attribute "tabindex" has invalid value "-1"
|
15
|
+
<input> proprietary attribute "border"
|
16
|
+
trimming empty <p>
|
17
|
+
<iframe> proprietary attribute "allowfullscreen"
|
18
|
+
}.split(/\n/).map(&:strip)
|
19
|
+
LinkElementsXPath = '//@href | //@src'
|
20
|
+
SchemasDir = Path.new(__FILE__).dirname / 'web-checker' / 'schemas'
|
21
|
+
Schemas = {
|
22
|
+
'feed' => SchemasDir / 'atom.xsd',
|
23
|
+
'urlset' => SchemasDir / 'sitemap.xsd',
|
24
|
+
}
|
25
|
+
|
26
|
+
class Error < Exception; end
|
27
|
+
|
28
|
+
def initialize(site_uri:, site_dir:)
|
29
|
+
@site_uri = Addressable::URI.parse(site_uri)
|
30
|
+
@site_dir = Path.new(site_dir)
|
31
|
+
@schemas = {}
|
32
|
+
@visited = {}
|
33
|
+
end
|
34
|
+
|
35
|
+
def check
|
36
|
+
# get/parse robots
|
37
|
+
# get/parse sitemap
|
38
|
+
check_uri(@site_uri)
|
39
|
+
end
|
40
|
+
|
41
|
+
def check_uri(uri)
|
42
|
+
uri = Addressable::URI.parse(uri)
|
43
|
+
uri.normalize!
|
44
|
+
return unless local?(uri) && !seen?(uri)
|
45
|
+
# ;;warn "CHECKING: #{uri}"
|
46
|
+
response = HTTP.get(uri)
|
47
|
+
# ;;pp(response: response)
|
48
|
+
@visited[uri] = true
|
49
|
+
case response.code
|
50
|
+
when 200...300
|
51
|
+
body = response.body.to_s
|
52
|
+
# ;;pp(body: body)
|
53
|
+
case (type = response.headers['Content-Type'])
|
54
|
+
when 'text/html'
|
55
|
+
check_html(uri, body)
|
56
|
+
when 'text/css'
|
57
|
+
check_css(uri, body)
|
58
|
+
when 'application/xml', 'text/xml'
|
59
|
+
check_xml(uri, body)
|
60
|
+
when 'image/jpeg', 'image/png', 'image/gif', 'application/javascript'
|
61
|
+
# ignore
|
62
|
+
else
|
63
|
+
;;warn "skipping unknown resource type: #{uri} (#{type})"
|
64
|
+
end
|
65
|
+
when 300...400
|
66
|
+
redirect_uri = Addressable::URI.parse(response.headers['Location'])
|
67
|
+
check_uri(uri + redirect_uri)
|
68
|
+
when 404
|
69
|
+
raise Error, "URI not found: #{uri}"
|
70
|
+
else
|
71
|
+
raise Error, "Bad status: #{response.inspect}"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def check_html(uri, html)
|
76
|
+
check_html_tidy(uri, html)
|
77
|
+
check_html_nokogiri(uri, html)
|
78
|
+
end
|
79
|
+
|
80
|
+
def check_html_tidy(uri, html)
|
81
|
+
tmp_file = Path.tmpfile
|
82
|
+
tmp_file.write(html)
|
83
|
+
errors = %x{tidy -utf8 -quiet -errors #{tmp_file} 2>&1}.split("\n")
|
84
|
+
errors = errors.map { |str|
|
85
|
+
# line 82 column 1 - Warning: <table> lacks "summary" attribute
|
86
|
+
str =~ /^line (\d+) column (\d+) - (.*?): (.*)$/ or raise "Can't parse error: #{str.inspect}"
|
87
|
+
{
|
88
|
+
msg: str,
|
89
|
+
line: $1.to_i,
|
90
|
+
column: $2.to_i,
|
91
|
+
type: $3.downcase.to_sym,
|
92
|
+
error: $4.strip,
|
93
|
+
}
|
94
|
+
}.reject { |e|
|
95
|
+
IgnoreErrors.include?(e[:error])
|
96
|
+
}
|
97
|
+
unless errors.empty?
|
98
|
+
warn "#{uri} has invalid HTML"
|
99
|
+
show_errors(errors)
|
100
|
+
raise Error, "HTML parsing failed (via Tidy)"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def check_html_nokogiri(uri, html)
|
105
|
+
doc_class = (html =~ /<!DOCTYPE html>/i) ? Nokogiri::HTML5 : Nokogiri::HTML
|
106
|
+
doc = doc_class.parse(html) { |config| config.strict }
|
107
|
+
unless doc.errors.empty?
|
108
|
+
show_errors(doc.errors)
|
109
|
+
raise Error, "HTML parsing failed (via Nokogiri)"
|
110
|
+
end
|
111
|
+
doc.xpath(LinkElementsXPath).each { |e| check_uri(uri + e.value) }
|
112
|
+
end
|
113
|
+
|
114
|
+
def check_xml(uri, xml)
|
115
|
+
xml_doc = Nokogiri::XML::Document.parse(xml) { |config| config.strict }
|
116
|
+
unless xml_doc.errors.empty?
|
117
|
+
show_errors(xml_doc.errors)
|
118
|
+
raise Error, "XML parsing failed"
|
119
|
+
end
|
120
|
+
root_name = xml_doc.root.name
|
121
|
+
schema_file = Schemas[root_name] or raise Error, "Unknown schema: #{root_name.inspect}"
|
122
|
+
schema = (@schemas[schema_file] ||= Nokogiri::XML::Schema(schema_file.open))
|
123
|
+
validation_errors = schema.validate(xml_doc)
|
124
|
+
unless validation_errors.empty?
|
125
|
+
show_errors(validation_errors)
|
126
|
+
raise Error, "XML validation failed"
|
127
|
+
end
|
128
|
+
xml_doc.xpath(LinkElementsXPath).each { |e| check_uri(uri + e.value) }
|
129
|
+
end
|
130
|
+
|
131
|
+
def show_errors(errors)
|
132
|
+
errors.each do |error|
|
133
|
+
warn "#{error} [line #{error[:line]}, column #{error[:column]}]"
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def check_css(uri, css)
|
138
|
+
css.gsub(/\burl\(\s*["'](.*?)["']\s*\)/) do
|
139
|
+
check_uri(uri + $1)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def local?(uri)
|
144
|
+
(!uri.scheme && !uri.host) ||
|
145
|
+
(uri.scheme == @site_uri.scheme && uri.host == @site_uri.host && uri.port == @site_uri.port)
|
146
|
+
end
|
147
|
+
|
148
|
+
def seen?(uri)
|
149
|
+
@visited[uri]
|
150
|
+
end
|
151
|
+
|
152
|
+
def report
|
153
|
+
unless @files.empty?
|
154
|
+
puts "\t" + "unreferenced files:"
|
155
|
+
@files.sort.each do |path|
|
156
|
+
puts "\t\t" + path.to_s
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
@@ -0,0 +1,244 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8" ?>
|
2
|
+
<xs:schema targetNamespace="http://www.w3.org/2005/Atom" elementFormDefault="qualified"
|
3
|
+
attributeFormDefault="unqualified"
|
4
|
+
xmlns:atom="http://www.w3.org/2005/Atom"
|
5
|
+
xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
6
|
+
xmlns:xml="http://www.w3.org/XML/1998/namespace">
|
7
|
+
<xs:annotation>
|
8
|
+
<xs:documentation>
|
9
|
+
This version of the Atom schema is based on version 1.0 of the format specifications,
|
10
|
+
found here http://www.atomenabled.org/developers/syndication/atom-format-spec.php.
|
11
|
+
</xs:documentation>
|
12
|
+
</xs:annotation>
|
13
|
+
<xs:import namespace="http://www.w3.org/XML/1998/namespace" schemaLocation="http://www.w3.org/2001/03/xml.xsd" />
|
14
|
+
<xs:annotation>
|
15
|
+
<xs:documentation>
|
16
|
+
An Atom document may have two root elements, feed and entry, as defined in section 2.
|
17
|
+
</xs:documentation>
|
18
|
+
</xs:annotation>
|
19
|
+
<xs:element name="feed" type="atom:feedType"/>
|
20
|
+
<xs:element name="entry" type="atom:entryType"/>
|
21
|
+
<xs:complexType name="textType" mixed="true">
|
22
|
+
<xs:annotation>
|
23
|
+
<xs:documentation>
|
24
|
+
The Atom text construct is defined in section 3.1 of the format spec.
|
25
|
+
</xs:documentation>
|
26
|
+
</xs:annotation>
|
27
|
+
<xs:sequence>
|
28
|
+
<xs:any namespace="http://www.w3.org/1999/xhtml" minOccurs="0"/>
|
29
|
+
</xs:sequence>
|
30
|
+
<xs:attribute name="type" >
|
31
|
+
<xs:simpleType>
|
32
|
+
<xs:restriction base="xs:token">
|
33
|
+
<xs:enumeration value="text"/>
|
34
|
+
<xs:enumeration value="html"/>
|
35
|
+
<xs:enumeration value="xhtml"/>
|
36
|
+
</xs:restriction>
|
37
|
+
</xs:simpleType>
|
38
|
+
</xs:attribute>
|
39
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
40
|
+
</xs:complexType>
|
41
|
+
<xs:complexType name="personType">
|
42
|
+
<xs:annotation>
|
43
|
+
<xs:documentation>
|
44
|
+
The Atom person construct is defined in section 3.2 of the format spec.
|
45
|
+
</xs:documentation>
|
46
|
+
</xs:annotation>
|
47
|
+
<xs:choice minOccurs="1" maxOccurs="unbounded">
|
48
|
+
<xs:element name="name" type="xs:string" minOccurs="1" maxOccurs="1" />
|
49
|
+
<xs:element name="uri" type="atom:uriType" minOccurs="0" maxOccurs="1" />
|
50
|
+
<xs:element name="email" type="atom:emailType" minOccurs="0" maxOccurs="1" />
|
51
|
+
<xs:any namespace="##other"/>
|
52
|
+
</xs:choice>
|
53
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
54
|
+
</xs:complexType>
|
55
|
+
<xs:simpleType name="emailType">
|
56
|
+
<xs:annotation>
|
57
|
+
<xs:documentation>
|
58
|
+
Schema definition for an email address.
|
59
|
+
</xs:documentation>
|
60
|
+
</xs:annotation>
|
61
|
+
<xs:restriction base="xs:normalizedString">
|
62
|
+
<!--
|
63
|
+
<xs:pattern value="\w+@(\w+\.)+\w+" />
|
64
|
+
-->
|
65
|
+
<xs:maxLength value="254"/>
|
66
|
+
<xs:pattern value="[_\-a-zA-Z0-9\.\+]+@[a-zA-Z0-9](\.?[\-a-zA-Z0-9]*[a-zA-Z0-9])*"/>
|
67
|
+
</xs:restriction>
|
68
|
+
</xs:simpleType>
|
69
|
+
<xs:complexType name="feedType">
|
70
|
+
<xs:annotation>
|
71
|
+
<xs:documentation>
|
72
|
+
The Atom feed construct is defined in section 4.1.1 of the format spec.
|
73
|
+
</xs:documentation>
|
74
|
+
</xs:annotation>
|
75
|
+
<xs:choice minOccurs="3" maxOccurs="unbounded">
|
76
|
+
<xs:element name="author" type="atom:personType" minOccurs="0" maxOccurs="unbounded" />
|
77
|
+
<xs:element name="category" type="atom:categoryType" minOccurs="0" maxOccurs="unbounded" />
|
78
|
+
<xs:element name="contributor" type="atom:personType" minOccurs="0" maxOccurs="unbounded" />
|
79
|
+
<xs:element name="generator" type="atom:generatorType" minOccurs="0" maxOccurs="1" />
|
80
|
+
<xs:element name="icon" type="atom:iconType" minOccurs="0" maxOccurs="1" />
|
81
|
+
<xs:element name="id" type="atom:idType" minOccurs="1" maxOccurs="1" />
|
82
|
+
<xs:element name="link" type="atom:linkType" minOccurs="0" maxOccurs="unbounded" />
|
83
|
+
<xs:element name="logo" type="atom:logoType" minOccurs="0" maxOccurs="1" />
|
84
|
+
<xs:element name="rights" type="atom:textType" minOccurs="0" maxOccurs="1" />
|
85
|
+
<xs:element name="subtitle" type="atom:textType" minOccurs="0" maxOccurs="1" />
|
86
|
+
<xs:element name="title" type="atom:textType" minOccurs="1" maxOccurs="1" />
|
87
|
+
<xs:element name="updated" type="atom:dateTimeType" minOccurs="1" maxOccurs="1" />
|
88
|
+
<xs:element name="entry" type="atom:entryType" minOccurs="0" maxOccurs="unbounded" />
|
89
|
+
<xs:any namespace="##other" minOccurs="0" maxOccurs="unbounded"/>
|
90
|
+
</xs:choice>
|
91
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
92
|
+
</xs:complexType>
|
93
|
+
<xs:complexType name="entryType">
|
94
|
+
<xs:annotation>
|
95
|
+
<xs:documentation>
|
96
|
+
The Atom entry construct is defined in section 4.1.2 of the format spec.
|
97
|
+
</xs:documentation>
|
98
|
+
</xs:annotation>
|
99
|
+
<xs:choice maxOccurs="unbounded">
|
100
|
+
<xs:element name="author" type="atom:personType" minOccurs="0" maxOccurs="unbounded" />
|
101
|
+
<xs:element name="category" type="atom:categoryType" minOccurs="0" maxOccurs="unbounded" />
|
102
|
+
<xs:element name="content" type="atom:contentType" minOccurs="0" maxOccurs="1" />
|
103
|
+
<xs:element name="contributor" type="atom:personType" minOccurs="0" maxOccurs="unbounded" />
|
104
|
+
<xs:element name="id" type="atom:idType" minOccurs="1" maxOccurs="1" />
|
105
|
+
<xs:element name="link" type="atom:linkType" minOccurs="0" maxOccurs="unbounded" />
|
106
|
+
<xs:element name="published" type="atom:dateTimeType" minOccurs="0" maxOccurs="1" />
|
107
|
+
<xs:element name="rights" type="atom:textType" minOccurs="0" maxOccurs="1" />
|
108
|
+
<xs:element name="source" type="atom:textType" minOccurs="0" maxOccurs="1" />
|
109
|
+
<xs:element name="summary" type="atom:textType" minOccurs="0" maxOccurs="1" />
|
110
|
+
<xs:element name="title" type="atom:textType" minOccurs="1" maxOccurs="1" />
|
111
|
+
<xs:element name="updated" type="atom:dateTimeType" minOccurs="1" maxOccurs="1" />
|
112
|
+
<xs:any namespace="##other" minOccurs="0" maxOccurs="unbounded"/>
|
113
|
+
</xs:choice>
|
114
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
115
|
+
</xs:complexType>
|
116
|
+
<xs:complexType name="contentType" mixed="true">
|
117
|
+
<xs:annotation>
|
118
|
+
<xs:documentation>
|
119
|
+
The Atom content construct is defined in section 4.1.3 of the format spec.
|
120
|
+
</xs:documentation>
|
121
|
+
</xs:annotation>
|
122
|
+
<xs:sequence>
|
123
|
+
<xs:any namespace="##other" minOccurs="0" maxOccurs="unbounded" />
|
124
|
+
</xs:sequence>
|
125
|
+
<xs:attribute name="type" type="xs:string"/>
|
126
|
+
<xs:attribute name="src" type="xs:anyURI"/>
|
127
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
128
|
+
</xs:complexType>
|
129
|
+
<xs:complexType name="categoryType">
|
130
|
+
<xs:annotation>
|
131
|
+
<xs:documentation>
|
132
|
+
The Atom cagegory construct is defined in section 4.2.2 of the format spec.
|
133
|
+
</xs:documentation>
|
134
|
+
</xs:annotation>
|
135
|
+
<xs:attribute name="term" type="xs:string" use="required"/>
|
136
|
+
<xs:attribute name="scheme" type="xs:anyURI" use="optional"/>
|
137
|
+
<xs:attribute name="label" type="xs:string" use="optional"/>
|
138
|
+
<xs:attributeGroup ref="atom:commonAttributes" />
|
139
|
+
</xs:complexType>
|
140
|
+
<xs:complexType name="generatorType">
|
141
|
+
<xs:annotation>
|
142
|
+
<xs:documentation>
|
143
|
+
The Atom generator element is defined in section 4.2.4 of the format spec.
|
144
|
+
</xs:documentation>
|
145
|
+
</xs:annotation>
|
146
|
+
<xs:simpleContent>
|
147
|
+
<xs:extension base="xs:string">
|
148
|
+
<xs:attribute name="uri" use="optional" type="xs:anyURI" />
|
149
|
+
<xs:attribute name="version" use="optional" type="xs:string" />
|
150
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
151
|
+
</xs:extension>
|
152
|
+
</xs:simpleContent>
|
153
|
+
</xs:complexType>
|
154
|
+
<xs:complexType name="iconType">
|
155
|
+
<xs:annotation>
|
156
|
+
<xs:documentation>
|
157
|
+
The Atom icon construct is defined in section 4.2.5 of the format spec.
|
158
|
+
</xs:documentation>
|
159
|
+
</xs:annotation>
|
160
|
+
<xs:simpleContent>
|
161
|
+
<xs:extension base="xs:anyURI">
|
162
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
163
|
+
</xs:extension>
|
164
|
+
</xs:simpleContent>
|
165
|
+
</xs:complexType>
|
166
|
+
<xs:complexType name="idType">
|
167
|
+
<xs:annotation>
|
168
|
+
<xs:documentation>
|
169
|
+
The Atom id construct is defined in section 4.2.6 of the format spec.
|
170
|
+
</xs:documentation>
|
171
|
+
</xs:annotation>
|
172
|
+
<xs:simpleContent>
|
173
|
+
<xs:extension base="xs:anyURI">
|
174
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
175
|
+
</xs:extension>
|
176
|
+
</xs:simpleContent>
|
177
|
+
</xs:complexType>
|
178
|
+
<xs:complexType name="linkType" mixed="true">
|
179
|
+
<xs:annotation>
|
180
|
+
<xs:documentation>
|
181
|
+
The Atom link construct is defined in section 3.4 of the format spec.
|
182
|
+
</xs:documentation>
|
183
|
+
</xs:annotation>
|
184
|
+
<xs:attribute name="href" use="required" type="xs:anyURI" />
|
185
|
+
<xs:attribute name="rel" type="xs:string" use="optional"/>
|
186
|
+
<xs:attribute name="type" use="optional" type="xs:string" />
|
187
|
+
<xs:attribute name="hreflang" use="optional" type="xs:NMTOKEN" />
|
188
|
+
<xs:attribute name="title" use="optional" type="xs:string" />
|
189
|
+
<xs:attribute name="length" use="optional" type="xs:positiveInteger" />
|
190
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
191
|
+
</xs:complexType>
|
192
|
+
<xs:complexType name="logoType">
|
193
|
+
<xs:annotation>
|
194
|
+
<xs:documentation>
|
195
|
+
The Atom logo construct is defined in section 4.2.8 of the format spec.
|
196
|
+
</xs:documentation>
|
197
|
+
</xs:annotation>
|
198
|
+
<xs:simpleContent>
|
199
|
+
<xs:extension base="xs:anyURI">
|
200
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
201
|
+
</xs:extension>
|
202
|
+
</xs:simpleContent>
|
203
|
+
</xs:complexType>
|
204
|
+
<xs:complexType name="sourceType">
|
205
|
+
<xs:annotation>
|
206
|
+
<xs:documentation>
|
207
|
+
The Atom source construct is defined in section 4.2.11 of the format spec.
|
208
|
+
</xs:documentation>
|
209
|
+
</xs:annotation>
|
210
|
+
<xs:choice maxOccurs="unbounded">
|
211
|
+
<xs:element name="author" type="atom:personType" minOccurs="0" maxOccurs="unbounded"/>
|
212
|
+
<xs:element name="category" type="atom:categoryType" minOccurs="0" maxOccurs="unbounded"/>
|
213
|
+
<xs:element name="contributor" type="atom:personType" minOccurs="0" maxOccurs="unbounded"/>
|
214
|
+
<xs:element name="generator" type="atom:generatorType" minOccurs="0" maxOccurs="1"/>
|
215
|
+
<xs:element name="icon" type="atom:iconType" minOccurs="0" maxOccurs="1"/>
|
216
|
+
<xs:element name="id" type="atom:idType" minOccurs="0" maxOccurs="1"/>
|
217
|
+
<xs:element name="link" type="atom:linkType" minOccurs="0" maxOccurs="unbounded"/>
|
218
|
+
<xs:element name="logo" type="atom:logoType" minOccurs="0" maxOccurs="1"/>
|
219
|
+
<xs:element name="rights" type="atom:textType" minOccurs="0" maxOccurs="1"/>
|
220
|
+
<xs:element name="subtitle" type="atom:textType" minOccurs="0" maxOccurs="1"/>
|
221
|
+
<xs:element name="title" type="atom:textType" minOccurs="0" maxOccurs="1"/>
|
222
|
+
<xs:element name="updated" type="atom:dateTimeType" minOccurs="0" maxOccurs="1"/>
|
223
|
+
<xs:any namespace="##other" minOccurs="0" maxOccurs="unbounded"/>
|
224
|
+
</xs:choice>
|
225
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
226
|
+
</xs:complexType>
|
227
|
+
<xs:complexType name="uriType">
|
228
|
+
<xs:simpleContent>
|
229
|
+
<xs:extension base="xs:anyURI">
|
230
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
231
|
+
</xs:extension>
|
232
|
+
</xs:simpleContent>
|
233
|
+
</xs:complexType>
|
234
|
+
<xs:complexType name="dateTimeType">
|
235
|
+
<xs:simpleContent>
|
236
|
+
<xs:extension base="xs:dateTime">
|
237
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
238
|
+
</xs:extension>
|
239
|
+
</xs:simpleContent>
|
240
|
+
</xs:complexType>
|
241
|
+
<xs:attributeGroup name="commonAttributes">
|
242
|
+
<xs:anyAttribute namespace="##other"/>
|
243
|
+
</xs:attributeGroup>
|
244
|
+
</xs:schema>
|
@@ -0,0 +1,116 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
|
3
|
+
targetNamespace="http://www.sitemaps.org/schemas/sitemap/0.9"
|
4
|
+
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
5
|
+
elementFormDefault="qualified">
|
6
|
+
<xsd:annotation>
|
7
|
+
<xsd:documentation>
|
8
|
+
XML Schema for Sitemap files.
|
9
|
+
Last Modifed 2008-03-26
|
10
|
+
</xsd:documentation>
|
11
|
+
</xsd:annotation>
|
12
|
+
|
13
|
+
<xsd:element name="urlset">
|
14
|
+
<xsd:annotation>
|
15
|
+
<xsd:documentation>
|
16
|
+
Container for a set of up to 50,000 document elements.
|
17
|
+
This is the root element of the XML file.
|
18
|
+
</xsd:documentation>
|
19
|
+
</xsd:annotation>
|
20
|
+
<xsd:complexType>
|
21
|
+
<xsd:sequence>
|
22
|
+
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
|
23
|
+
<xsd:element name="url" type="tUrl" maxOccurs="unbounded"/>
|
24
|
+
</xsd:sequence>
|
25
|
+
</xsd:complexType>
|
26
|
+
</xsd:element>
|
27
|
+
|
28
|
+
<xsd:complexType name="tUrl">
|
29
|
+
<xsd:annotation>
|
30
|
+
<xsd:documentation>
|
31
|
+
Container for the data needed to describe a document to crawl.
|
32
|
+
</xsd:documentation>
|
33
|
+
</xsd:annotation>
|
34
|
+
<xsd:sequence>
|
35
|
+
<xsd:element name="loc" type="tLoc"/>
|
36
|
+
<xsd:element name="lastmod" type="tLastmod" minOccurs="0"/>
|
37
|
+
<xsd:element name="changefreq" type="tChangeFreq" minOccurs="0"/>
|
38
|
+
<xsd:element name="priority" type="tPriority" minOccurs="0"/>
|
39
|
+
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
|
40
|
+
</xsd:sequence>
|
41
|
+
</xsd:complexType>
|
42
|
+
|
43
|
+
<xsd:simpleType name="tLoc">
|
44
|
+
<xsd:annotation>
|
45
|
+
<xsd:documentation>
|
46
|
+
REQUIRED: The location URI of a document.
|
47
|
+
The URI must conform to RFC 2396 (http://www.ietf.org/rfc/rfc2396.txt).
|
48
|
+
</xsd:documentation>
|
49
|
+
</xsd:annotation>
|
50
|
+
<xsd:restriction base="xsd:anyURI">
|
51
|
+
<xsd:minLength value="12"/>
|
52
|
+
<xsd:maxLength value="2048"/>
|
53
|
+
</xsd:restriction>
|
54
|
+
</xsd:simpleType>
|
55
|
+
|
56
|
+
<xsd:simpleType name="tLastmod">
|
57
|
+
<xsd:annotation>
|
58
|
+
<xsd:documentation>
|
59
|
+
OPTIONAL: The date the document was last modified. The date must conform
|
60
|
+
to the W3C DATETIME format (http://www.w3.org/TR/NOTE-datetime).
|
61
|
+
Example: 2005-05-10
|
62
|
+
Lastmod may also contain a timestamp.
|
63
|
+
Example: 2005-05-10T17:33:30+08:00
|
64
|
+
</xsd:documentation>
|
65
|
+
</xsd:annotation>
|
66
|
+
<xsd:union>
|
67
|
+
<xsd:simpleType>
|
68
|
+
<xsd:restriction base="xsd:date"/>
|
69
|
+
</xsd:simpleType>
|
70
|
+
<xsd:simpleType>
|
71
|
+
<xsd:restriction base="xsd:dateTime"/>
|
72
|
+
</xsd:simpleType>
|
73
|
+
</xsd:union>
|
74
|
+
</xsd:simpleType>
|
75
|
+
|
76
|
+
<xsd:simpleType name="tChangeFreq">
|
77
|
+
<xsd:annotation>
|
78
|
+
<xsd:documentation>
|
79
|
+
OPTIONAL: Indicates how frequently the content at a particular URL is
|
80
|
+
likely to change. The value "always" should be used to describe
|
81
|
+
documents that change each time they are accessed. The value "never"
|
82
|
+
should be used to describe archived URLs. Please note that web
|
83
|
+
crawlers may not necessarily crawl pages marked "always" more often.
|
84
|
+
Consider this element as a friendly suggestion and not a command.
|
85
|
+
</xsd:documentation>
|
86
|
+
</xsd:annotation>
|
87
|
+
<xsd:restriction base="xsd:string">
|
88
|
+
<xsd:enumeration value="always"/>
|
89
|
+
<xsd:enumeration value="hourly"/>
|
90
|
+
<xsd:enumeration value="daily"/>
|
91
|
+
<xsd:enumeration value="weekly"/>
|
92
|
+
<xsd:enumeration value="monthly"/>
|
93
|
+
<xsd:enumeration value="yearly"/>
|
94
|
+
<xsd:enumeration value="never"/>
|
95
|
+
</xsd:restriction>
|
96
|
+
</xsd:simpleType>
|
97
|
+
|
98
|
+
<xsd:simpleType name="tPriority">
|
99
|
+
<xsd:annotation>
|
100
|
+
<xsd:documentation>
|
101
|
+
OPTIONAL: The priority of a particular URL relative to other pages
|
102
|
+
on the same site. The value for this element is a number between
|
103
|
+
0.0 and 1.0 where 0.0 identifies the lowest priority page(s).
|
104
|
+
The default priority of a page is 0.5. Priority is used to select
|
105
|
+
between pages on your site. Setting a priority of 1.0 for all URLs
|
106
|
+
will not help you, as the relative priority of pages on your site
|
107
|
+
is what will be considered.
|
108
|
+
</xsd:documentation>
|
109
|
+
</xsd:annotation>
|
110
|
+
<xsd:restriction base="xsd:decimal">
|
111
|
+
<xsd:minInclusive value="0.0"/>
|
112
|
+
<xsd:maxInclusive value="1.0"/>
|
113
|
+
</xsd:restriction>
|
114
|
+
</xsd:simpleType>
|
115
|
+
|
116
|
+
</xsd:schema>
|
data/web-checker.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
|
3
|
+
require_relative 'lib/web-checker/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = 'web-checker'
|
7
|
+
s.version = WebChecker::VERSION
|
8
|
+
s.summary = 'Check static websites for consistency.'
|
9
|
+
s.author = 'John Labovitz'
|
10
|
+
s.email = 'johnl@johnlabovitz.com'
|
11
|
+
s.description = %q{
|
12
|
+
WebChecker checks static websites for consistency.
|
13
|
+
}
|
14
|
+
s.license = 'MIT'
|
15
|
+
s.homepage = 'http://github.com/jslabovitz/web-checker'
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
|
19
|
+
s.require_path = 'lib'
|
20
|
+
|
21
|
+
s.add_dependency 'addressable', '~> 2.5'
|
22
|
+
s.add_dependency 'http', '~> 3.0'
|
23
|
+
s.add_dependency 'nokogiri', '~> 1.8'
|
24
|
+
s.add_dependency 'nokogumbo', '~> 1.4'
|
25
|
+
s.add_dependency 'path', '~> 2.0'
|
26
|
+
|
27
|
+
s.add_development_dependency 'rake', '~> 12.3'
|
28
|
+
s.add_development_dependency 'rubygems-tasks', '~> 0.2'
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: web-checker
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.2'
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- John Labovitz
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-01-09 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: addressable
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.5'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.5'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: http
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3.0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '3.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: nokogiri
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.8'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.8'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: nokogumbo
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.4'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.4'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: path
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '2.0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '2.0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rake
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '12.3'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '12.3'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rubygems-tasks
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0.2'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0.2'
|
111
|
+
description: "\n WebChecker checks static websites for consistency.\n "
|
112
|
+
email: johnl@johnlabovitz.com
|
113
|
+
executables:
|
114
|
+
- web-checker
|
115
|
+
extensions: []
|
116
|
+
extra_rdoc_files: []
|
117
|
+
files:
|
118
|
+
- ".gitignore"
|
119
|
+
- Rakefile
|
120
|
+
- bin/web-checker
|
121
|
+
- lib/web-checker.rb
|
122
|
+
- lib/web-checker/schemas/atom.xsd
|
123
|
+
- lib/web-checker/schemas/sitemap.xsd
|
124
|
+
- lib/web-checker/version.rb
|
125
|
+
- web-checker.gemspec
|
126
|
+
homepage: http://github.com/jslabovitz/web-checker
|
127
|
+
licenses:
|
128
|
+
- MIT
|
129
|
+
metadata: {}
|
130
|
+
post_install_message:
|
131
|
+
rdoc_options: []
|
132
|
+
require_paths:
|
133
|
+
- lib
|
134
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
140
|
+
requirements:
|
141
|
+
- - ">="
|
142
|
+
- !ruby/object:Gem::Version
|
143
|
+
version: '0'
|
144
|
+
requirements: []
|
145
|
+
rubyforge_project:
|
146
|
+
rubygems_version: 2.7.4
|
147
|
+
signing_key:
|
148
|
+
specification_version: 4
|
149
|
+
summary: Check static websites for consistency.
|
150
|
+
test_files: []
|