web-checker 0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/Rakefile +2 -0
- data/bin/web-checker +9 -0
- data/lib/web-checker.rb +161 -0
- data/lib/web-checker/schemas/atom.xsd +244 -0
- data/lib/web-checker/schemas/sitemap.xsd +116 -0
- data/lib/web-checker/version.rb +5 -0
- data/web-checker.gemspec +29 -0
- metadata +150 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 89ddbaf96851d6efffd0acf6c7f86ac498710238f8ff0e078bed0fb5d8c40a4e
|
4
|
+
data.tar.gz: c776d89e28ec22d8230aebce749abc2ab475a8c62735b2fe40d36fc539a5e3fd
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 386bf65bcb250c8a0a192a97f200e5df15b5fec67a2dd80a65d0d964a7ccfbb47919fcbed5acf3d690e192bb95fe823e5f6671d2949e7703e48320b63346a6fb
|
7
|
+
data.tar.gz: ec7f58a6a3a1eb360b219aa135213c2b17d375e793961762ab1fac3471d0864904c4133a0ca63475a636cbb7e93d5117e47230f825df3f36aef9fca3cf7d4793
|
data/.gitignore
ADDED
data/Rakefile
ADDED
data/bin/web-checker
ADDED
data/lib/web-checker.rb
ADDED
@@ -0,0 +1,161 @@
|
|
1
|
+
require 'addressable'
|
2
|
+
require 'http'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'nokogumbo'
|
5
|
+
require 'path'
|
6
|
+
|
7
|
+
class WebChecker
|
8
|
+
|
9
|
+
IgnoreErrors = %Q{
|
10
|
+
<table> lacks "summary" attribute
|
11
|
+
<img> lacks "alt" attribute
|
12
|
+
<form> proprietary attribute "novalidate"
|
13
|
+
<input> attribute "type" has invalid value "email"
|
14
|
+
<input> attribute "tabindex" has invalid value "-1"
|
15
|
+
<input> proprietary attribute "border"
|
16
|
+
trimming empty <p>
|
17
|
+
<iframe> proprietary attribute "allowfullscreen"
|
18
|
+
}.split(/\n/).map(&:strip)
|
19
|
+
LinkElementsXPath = '//@href | //@src'
|
20
|
+
SchemasDir = Path.new(__FILE__).dirname / 'web-checker' / 'schemas'
|
21
|
+
Schemas = {
|
22
|
+
'feed' => SchemasDir / 'atom.xsd',
|
23
|
+
'urlset' => SchemasDir / 'sitemap.xsd',
|
24
|
+
}
|
25
|
+
|
26
|
+
class Error < Exception; end
|
27
|
+
|
28
|
+
def initialize(site_uri:, site_dir:)
|
29
|
+
@site_uri = Addressable::URI.parse(site_uri)
|
30
|
+
@site_dir = Path.new(site_dir)
|
31
|
+
@schemas = {}
|
32
|
+
@visited = {}
|
33
|
+
end
|
34
|
+
|
35
|
+
def check
|
36
|
+
# get/parse robots
|
37
|
+
# get/parse sitemap
|
38
|
+
check_uri(@site_uri)
|
39
|
+
end
|
40
|
+
|
41
|
+
def check_uri(uri)
|
42
|
+
uri = Addressable::URI.parse(uri)
|
43
|
+
uri.normalize!
|
44
|
+
return unless local?(uri) && !seen?(uri)
|
45
|
+
# ;;warn "CHECKING: #{uri}"
|
46
|
+
response = HTTP.get(uri)
|
47
|
+
# ;;pp(response: response)
|
48
|
+
@visited[uri] = true
|
49
|
+
case response.code
|
50
|
+
when 200...300
|
51
|
+
body = response.body.to_s
|
52
|
+
# ;;pp(body: body)
|
53
|
+
case (type = response.headers['Content-Type'])
|
54
|
+
when 'text/html'
|
55
|
+
check_html(uri, body)
|
56
|
+
when 'text/css'
|
57
|
+
check_css(uri, body)
|
58
|
+
when 'application/xml', 'text/xml'
|
59
|
+
check_xml(uri, body)
|
60
|
+
when 'image/jpeg', 'image/png', 'image/gif', 'application/javascript'
|
61
|
+
# ignore
|
62
|
+
else
|
63
|
+
;;warn "skipping unknown resource type: #{uri} (#{type})"
|
64
|
+
end
|
65
|
+
when 300...400
|
66
|
+
redirect_uri = Addressable::URI.parse(response.headers['Location'])
|
67
|
+
check_uri(uri + redirect_uri)
|
68
|
+
when 404
|
69
|
+
raise Error, "URI not found: #{uri}"
|
70
|
+
else
|
71
|
+
raise Error, "Bad status: #{response.inspect}"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def check_html(uri, html)
|
76
|
+
check_html_tidy(uri, html)
|
77
|
+
check_html_nokogiri(uri, html)
|
78
|
+
end
|
79
|
+
|
80
|
+
def check_html_tidy(uri, html)
|
81
|
+
tmp_file = Path.tmpfile
|
82
|
+
tmp_file.write(html)
|
83
|
+
errors = %x{tidy -utf8 -quiet -errors #{tmp_file} 2>&1}.split("\n")
|
84
|
+
errors = errors.map { |str|
|
85
|
+
# line 82 column 1 - Warning: <table> lacks "summary" attribute
|
86
|
+
str =~ /^line (\d+) column (\d+) - (.*?): (.*)$/ or raise "Can't parse error: #{str.inspect}"
|
87
|
+
{
|
88
|
+
msg: str,
|
89
|
+
line: $1.to_i,
|
90
|
+
column: $2.to_i,
|
91
|
+
type: $3.downcase.to_sym,
|
92
|
+
error: $4.strip,
|
93
|
+
}
|
94
|
+
}.reject { |e|
|
95
|
+
IgnoreErrors.include?(e[:error])
|
96
|
+
}
|
97
|
+
unless errors.empty?
|
98
|
+
warn "#{uri} has invalid HTML"
|
99
|
+
show_errors(errors)
|
100
|
+
raise Error, "HTML parsing failed (via Tidy)"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def check_html_nokogiri(uri, html)
|
105
|
+
doc_class = (html =~ /<!DOCTYPE html>/i) ? Nokogiri::HTML5 : Nokogiri::HTML
|
106
|
+
doc = doc_class.parse(html) { |config| config.strict }
|
107
|
+
unless doc.errors.empty?
|
108
|
+
show_errors(doc.errors)
|
109
|
+
raise Error, "HTML parsing failed (via Nokogiri)"
|
110
|
+
end
|
111
|
+
doc.xpath(LinkElementsXPath).each { |e| check_uri(uri + e.value) }
|
112
|
+
end
|
113
|
+
|
114
|
+
def check_xml(uri, xml)
|
115
|
+
xml_doc = Nokogiri::XML::Document.parse(xml) { |config| config.strict }
|
116
|
+
unless xml_doc.errors.empty?
|
117
|
+
show_errors(xml_doc.errors)
|
118
|
+
raise Error, "XML parsing failed"
|
119
|
+
end
|
120
|
+
root_name = xml_doc.root.name
|
121
|
+
schema_file = Schemas[root_name] or raise Error, "Unknown schema: #{root_name.inspect}"
|
122
|
+
schema = (@schemas[schema_file] ||= Nokogiri::XML::Schema(schema_file.open))
|
123
|
+
validation_errors = schema.validate(xml_doc)
|
124
|
+
unless validation_errors.empty?
|
125
|
+
show_errors(validation_errors)
|
126
|
+
raise Error, "XML validation failed"
|
127
|
+
end
|
128
|
+
xml_doc.xpath(LinkElementsXPath).each { |e| check_uri(uri + e.value) }
|
129
|
+
end
|
130
|
+
|
131
|
+
def show_errors(errors)
|
132
|
+
errors.each do |error|
|
133
|
+
warn "#{error} [line #{error[:line]}, column #{error[:column]}]"
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def check_css(uri, css)
|
138
|
+
css.gsub(/\burl\(\s*["'](.*?)["']\s*\)/) do
|
139
|
+
check_uri(uri + $1)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def local?(uri)
|
144
|
+
(!uri.scheme && !uri.host) ||
|
145
|
+
(uri.scheme == @site_uri.scheme && uri.host == @site_uri.host && uri.port == @site_uri.port)
|
146
|
+
end
|
147
|
+
|
148
|
+
def seen?(uri)
|
149
|
+
@visited[uri]
|
150
|
+
end
|
151
|
+
|
152
|
+
def report
|
153
|
+
unless @files.empty?
|
154
|
+
puts "\t" + "unreferenced files:"
|
155
|
+
@files.sort.each do |path|
|
156
|
+
puts "\t\t" + path.to_s
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
@@ -0,0 +1,244 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8" ?>
|
2
|
+
<xs:schema targetNamespace="http://www.w3.org/2005/Atom" elementFormDefault="qualified"
|
3
|
+
attributeFormDefault="unqualified"
|
4
|
+
xmlns:atom="http://www.w3.org/2005/Atom"
|
5
|
+
xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
6
|
+
xmlns:xml="http://www.w3.org/XML/1998/namespace">
|
7
|
+
<xs:annotation>
|
8
|
+
<xs:documentation>
|
9
|
+
This version of the Atom schema is based on version 1.0 of the format specifications,
|
10
|
+
found here http://www.atomenabled.org/developers/syndication/atom-format-spec.php.
|
11
|
+
</xs:documentation>
|
12
|
+
</xs:annotation>
|
13
|
+
<xs:import namespace="http://www.w3.org/XML/1998/namespace" schemaLocation="http://www.w3.org/2001/03/xml.xsd" />
|
14
|
+
<xs:annotation>
|
15
|
+
<xs:documentation>
|
16
|
+
An Atom document may have two root elements, feed and entry, as defined in section 2.
|
17
|
+
</xs:documentation>
|
18
|
+
</xs:annotation>
|
19
|
+
<xs:element name="feed" type="atom:feedType"/>
|
20
|
+
<xs:element name="entry" type="atom:entryType"/>
|
21
|
+
<xs:complexType name="textType" mixed="true">
|
22
|
+
<xs:annotation>
|
23
|
+
<xs:documentation>
|
24
|
+
The Atom text construct is defined in section 3.1 of the format spec.
|
25
|
+
</xs:documentation>
|
26
|
+
</xs:annotation>
|
27
|
+
<xs:sequence>
|
28
|
+
<xs:any namespace="http://www.w3.org/1999/xhtml" minOccurs="0"/>
|
29
|
+
</xs:sequence>
|
30
|
+
<xs:attribute name="type" >
|
31
|
+
<xs:simpleType>
|
32
|
+
<xs:restriction base="xs:token">
|
33
|
+
<xs:enumeration value="text"/>
|
34
|
+
<xs:enumeration value="html"/>
|
35
|
+
<xs:enumeration value="xhtml"/>
|
36
|
+
</xs:restriction>
|
37
|
+
</xs:simpleType>
|
38
|
+
</xs:attribute>
|
39
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
40
|
+
</xs:complexType>
|
41
|
+
<xs:complexType name="personType">
|
42
|
+
<xs:annotation>
|
43
|
+
<xs:documentation>
|
44
|
+
The Atom person construct is defined in section 3.2 of the format spec.
|
45
|
+
</xs:documentation>
|
46
|
+
</xs:annotation>
|
47
|
+
<xs:choice minOccurs="1" maxOccurs="unbounded">
|
48
|
+
<xs:element name="name" type="xs:string" minOccurs="1" maxOccurs="1" />
|
49
|
+
<xs:element name="uri" type="atom:uriType" minOccurs="0" maxOccurs="1" />
|
50
|
+
<xs:element name="email" type="atom:emailType" minOccurs="0" maxOccurs="1" />
|
51
|
+
<xs:any namespace="##other"/>
|
52
|
+
</xs:choice>
|
53
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
54
|
+
</xs:complexType>
|
55
|
+
<xs:simpleType name="emailType">
|
56
|
+
<xs:annotation>
|
57
|
+
<xs:documentation>
|
58
|
+
Schema definition for an email address.
|
59
|
+
</xs:documentation>
|
60
|
+
</xs:annotation>
|
61
|
+
<xs:restriction base="xs:normalizedString">
|
62
|
+
<!--
|
63
|
+
<xs:pattern value="\w+@(\w+\.)+\w+" />
|
64
|
+
-->
|
65
|
+
<xs:maxLength value="254"/>
|
66
|
+
<xs:pattern value="[_\-a-zA-Z0-9\.\+]+@[a-zA-Z0-9](\.?[\-a-zA-Z0-9]*[a-zA-Z0-9])*"/>
|
67
|
+
</xs:restriction>
|
68
|
+
</xs:simpleType>
|
69
|
+
<xs:complexType name="feedType">
|
70
|
+
<xs:annotation>
|
71
|
+
<xs:documentation>
|
72
|
+
The Atom feed construct is defined in section 4.1.1 of the format spec.
|
73
|
+
</xs:documentation>
|
74
|
+
</xs:annotation>
|
75
|
+
<xs:choice minOccurs="3" maxOccurs="unbounded">
|
76
|
+
<xs:element name="author" type="atom:personType" minOccurs="0" maxOccurs="unbounded" />
|
77
|
+
<xs:element name="category" type="atom:categoryType" minOccurs="0" maxOccurs="unbounded" />
|
78
|
+
<xs:element name="contributor" type="atom:personType" minOccurs="0" maxOccurs="unbounded" />
|
79
|
+
<xs:element name="generator" type="atom:generatorType" minOccurs="0" maxOccurs="1" />
|
80
|
+
<xs:element name="icon" type="atom:iconType" minOccurs="0" maxOccurs="1" />
|
81
|
+
<xs:element name="id" type="atom:idType" minOccurs="1" maxOccurs="1" />
|
82
|
+
<xs:element name="link" type="atom:linkType" minOccurs="0" maxOccurs="unbounded" />
|
83
|
+
<xs:element name="logo" type="atom:logoType" minOccurs="0" maxOccurs="1" />
|
84
|
+
<xs:element name="rights" type="atom:textType" minOccurs="0" maxOccurs="1" />
|
85
|
+
<xs:element name="subtitle" type="atom:textType" minOccurs="0" maxOccurs="1" />
|
86
|
+
<xs:element name="title" type="atom:textType" minOccurs="1" maxOccurs="1" />
|
87
|
+
<xs:element name="updated" type="atom:dateTimeType" minOccurs="1" maxOccurs="1" />
|
88
|
+
<xs:element name="entry" type="atom:entryType" minOccurs="0" maxOccurs="unbounded" />
|
89
|
+
<xs:any namespace="##other" minOccurs="0" maxOccurs="unbounded"/>
|
90
|
+
</xs:choice>
|
91
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
92
|
+
</xs:complexType>
|
93
|
+
<xs:complexType name="entryType">
|
94
|
+
<xs:annotation>
|
95
|
+
<xs:documentation>
|
96
|
+
The Atom entry construct is defined in section 4.1.2 of the format spec.
|
97
|
+
</xs:documentation>
|
98
|
+
</xs:annotation>
|
99
|
+
<xs:choice maxOccurs="unbounded">
|
100
|
+
<xs:element name="author" type="atom:personType" minOccurs="0" maxOccurs="unbounded" />
|
101
|
+
<xs:element name="category" type="atom:categoryType" minOccurs="0" maxOccurs="unbounded" />
|
102
|
+
<xs:element name="content" type="atom:contentType" minOccurs="0" maxOccurs="1" />
|
103
|
+
<xs:element name="contributor" type="atom:personType" minOccurs="0" maxOccurs="unbounded" />
|
104
|
+
<xs:element name="id" type="atom:idType" minOccurs="1" maxOccurs="1" />
|
105
|
+
<xs:element name="link" type="atom:linkType" minOccurs="0" maxOccurs="unbounded" />
|
106
|
+
<xs:element name="published" type="atom:dateTimeType" minOccurs="0" maxOccurs="1" />
|
107
|
+
<xs:element name="rights" type="atom:textType" minOccurs="0" maxOccurs="1" />
|
108
|
+
<xs:element name="source" type="atom:textType" minOccurs="0" maxOccurs="1" />
|
109
|
+
<xs:element name="summary" type="atom:textType" minOccurs="0" maxOccurs="1" />
|
110
|
+
<xs:element name="title" type="atom:textType" minOccurs="1" maxOccurs="1" />
|
111
|
+
<xs:element name="updated" type="atom:dateTimeType" minOccurs="1" maxOccurs="1" />
|
112
|
+
<xs:any namespace="##other" minOccurs="0" maxOccurs="unbounded"/>
|
113
|
+
</xs:choice>
|
114
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
115
|
+
</xs:complexType>
|
116
|
+
<xs:complexType name="contentType" mixed="true">
|
117
|
+
<xs:annotation>
|
118
|
+
<xs:documentation>
|
119
|
+
The Atom content construct is defined in section 4.1.3 of the format spec.
|
120
|
+
</xs:documentation>
|
121
|
+
</xs:annotation>
|
122
|
+
<xs:sequence>
|
123
|
+
<xs:any namespace="##other" minOccurs="0" maxOccurs="unbounded" />
|
124
|
+
</xs:sequence>
|
125
|
+
<xs:attribute name="type" type="xs:string"/>
|
126
|
+
<xs:attribute name="src" type="xs:anyURI"/>
|
127
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
128
|
+
</xs:complexType>
|
129
|
+
<xs:complexType name="categoryType">
|
130
|
+
<xs:annotation>
|
131
|
+
<xs:documentation>
|
132
|
+
The Atom cagegory construct is defined in section 4.2.2 of the format spec.
|
133
|
+
</xs:documentation>
|
134
|
+
</xs:annotation>
|
135
|
+
<xs:attribute name="term" type="xs:string" use="required"/>
|
136
|
+
<xs:attribute name="scheme" type="xs:anyURI" use="optional"/>
|
137
|
+
<xs:attribute name="label" type="xs:string" use="optional"/>
|
138
|
+
<xs:attributeGroup ref="atom:commonAttributes" />
|
139
|
+
</xs:complexType>
|
140
|
+
<xs:complexType name="generatorType">
|
141
|
+
<xs:annotation>
|
142
|
+
<xs:documentation>
|
143
|
+
The Atom generator element is defined in section 4.2.4 of the format spec.
|
144
|
+
</xs:documentation>
|
145
|
+
</xs:annotation>
|
146
|
+
<xs:simpleContent>
|
147
|
+
<xs:extension base="xs:string">
|
148
|
+
<xs:attribute name="uri" use="optional" type="xs:anyURI" />
|
149
|
+
<xs:attribute name="version" use="optional" type="xs:string" />
|
150
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
151
|
+
</xs:extension>
|
152
|
+
</xs:simpleContent>
|
153
|
+
</xs:complexType>
|
154
|
+
<xs:complexType name="iconType">
|
155
|
+
<xs:annotation>
|
156
|
+
<xs:documentation>
|
157
|
+
The Atom icon construct is defined in section 4.2.5 of the format spec.
|
158
|
+
</xs:documentation>
|
159
|
+
</xs:annotation>
|
160
|
+
<xs:simpleContent>
|
161
|
+
<xs:extension base="xs:anyURI">
|
162
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
163
|
+
</xs:extension>
|
164
|
+
</xs:simpleContent>
|
165
|
+
</xs:complexType>
|
166
|
+
<xs:complexType name="idType">
|
167
|
+
<xs:annotation>
|
168
|
+
<xs:documentation>
|
169
|
+
The Atom id construct is defined in section 4.2.6 of the format spec.
|
170
|
+
</xs:documentation>
|
171
|
+
</xs:annotation>
|
172
|
+
<xs:simpleContent>
|
173
|
+
<xs:extension base="xs:anyURI">
|
174
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
175
|
+
</xs:extension>
|
176
|
+
</xs:simpleContent>
|
177
|
+
</xs:complexType>
|
178
|
+
<xs:complexType name="linkType" mixed="true">
|
179
|
+
<xs:annotation>
|
180
|
+
<xs:documentation>
|
181
|
+
The Atom link construct is defined in section 3.4 of the format spec.
|
182
|
+
</xs:documentation>
|
183
|
+
</xs:annotation>
|
184
|
+
<xs:attribute name="href" use="required" type="xs:anyURI" />
|
185
|
+
<xs:attribute name="rel" type="xs:string" use="optional"/>
|
186
|
+
<xs:attribute name="type" use="optional" type="xs:string" />
|
187
|
+
<xs:attribute name="hreflang" use="optional" type="xs:NMTOKEN" />
|
188
|
+
<xs:attribute name="title" use="optional" type="xs:string" />
|
189
|
+
<xs:attribute name="length" use="optional" type="xs:positiveInteger" />
|
190
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
191
|
+
</xs:complexType>
|
192
|
+
<xs:complexType name="logoType">
|
193
|
+
<xs:annotation>
|
194
|
+
<xs:documentation>
|
195
|
+
The Atom logo construct is defined in section 4.2.8 of the format spec.
|
196
|
+
</xs:documentation>
|
197
|
+
</xs:annotation>
|
198
|
+
<xs:simpleContent>
|
199
|
+
<xs:extension base="xs:anyURI">
|
200
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
201
|
+
</xs:extension>
|
202
|
+
</xs:simpleContent>
|
203
|
+
</xs:complexType>
|
204
|
+
<xs:complexType name="sourceType">
|
205
|
+
<xs:annotation>
|
206
|
+
<xs:documentation>
|
207
|
+
The Atom source construct is defined in section 4.2.11 of the format spec.
|
208
|
+
</xs:documentation>
|
209
|
+
</xs:annotation>
|
210
|
+
<xs:choice maxOccurs="unbounded">
|
211
|
+
<xs:element name="author" type="atom:personType" minOccurs="0" maxOccurs="unbounded"/>
|
212
|
+
<xs:element name="category" type="atom:categoryType" minOccurs="0" maxOccurs="unbounded"/>
|
213
|
+
<xs:element name="contributor" type="atom:personType" minOccurs="0" maxOccurs="unbounded"/>
|
214
|
+
<xs:element name="generator" type="atom:generatorType" minOccurs="0" maxOccurs="1"/>
|
215
|
+
<xs:element name="icon" type="atom:iconType" minOccurs="0" maxOccurs="1"/>
|
216
|
+
<xs:element name="id" type="atom:idType" minOccurs="0" maxOccurs="1"/>
|
217
|
+
<xs:element name="link" type="atom:linkType" minOccurs="0" maxOccurs="unbounded"/>
|
218
|
+
<xs:element name="logo" type="atom:logoType" minOccurs="0" maxOccurs="1"/>
|
219
|
+
<xs:element name="rights" type="atom:textType" minOccurs="0" maxOccurs="1"/>
|
220
|
+
<xs:element name="subtitle" type="atom:textType" minOccurs="0" maxOccurs="1"/>
|
221
|
+
<xs:element name="title" type="atom:textType" minOccurs="0" maxOccurs="1"/>
|
222
|
+
<xs:element name="updated" type="atom:dateTimeType" minOccurs="0" maxOccurs="1"/>
|
223
|
+
<xs:any namespace="##other" minOccurs="0" maxOccurs="unbounded"/>
|
224
|
+
</xs:choice>
|
225
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
226
|
+
</xs:complexType>
|
227
|
+
<xs:complexType name="uriType">
|
228
|
+
<xs:simpleContent>
|
229
|
+
<xs:extension base="xs:anyURI">
|
230
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
231
|
+
</xs:extension>
|
232
|
+
</xs:simpleContent>
|
233
|
+
</xs:complexType>
|
234
|
+
<xs:complexType name="dateTimeType">
|
235
|
+
<xs:simpleContent>
|
236
|
+
<xs:extension base="xs:dateTime">
|
237
|
+
<xs:attributeGroup ref="atom:commonAttributes"/>
|
238
|
+
</xs:extension>
|
239
|
+
</xs:simpleContent>
|
240
|
+
</xs:complexType>
|
241
|
+
<xs:attributeGroup name="commonAttributes">
|
242
|
+
<xs:anyAttribute namespace="##other"/>
|
243
|
+
</xs:attributeGroup>
|
244
|
+
</xs:schema>
|
@@ -0,0 +1,116 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
|
3
|
+
targetNamespace="http://www.sitemaps.org/schemas/sitemap/0.9"
|
4
|
+
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
5
|
+
elementFormDefault="qualified">
|
6
|
+
<xsd:annotation>
|
7
|
+
<xsd:documentation>
|
8
|
+
XML Schema for Sitemap files.
|
9
|
+
Last Modifed 2008-03-26
|
10
|
+
</xsd:documentation>
|
11
|
+
</xsd:annotation>
|
12
|
+
|
13
|
+
<xsd:element name="urlset">
|
14
|
+
<xsd:annotation>
|
15
|
+
<xsd:documentation>
|
16
|
+
Container for a set of up to 50,000 document elements.
|
17
|
+
This is the root element of the XML file.
|
18
|
+
</xsd:documentation>
|
19
|
+
</xsd:annotation>
|
20
|
+
<xsd:complexType>
|
21
|
+
<xsd:sequence>
|
22
|
+
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
|
23
|
+
<xsd:element name="url" type="tUrl" maxOccurs="unbounded"/>
|
24
|
+
</xsd:sequence>
|
25
|
+
</xsd:complexType>
|
26
|
+
</xsd:element>
|
27
|
+
|
28
|
+
<xsd:complexType name="tUrl">
|
29
|
+
<xsd:annotation>
|
30
|
+
<xsd:documentation>
|
31
|
+
Container for the data needed to describe a document to crawl.
|
32
|
+
</xsd:documentation>
|
33
|
+
</xsd:annotation>
|
34
|
+
<xsd:sequence>
|
35
|
+
<xsd:element name="loc" type="tLoc"/>
|
36
|
+
<xsd:element name="lastmod" type="tLastmod" minOccurs="0"/>
|
37
|
+
<xsd:element name="changefreq" type="tChangeFreq" minOccurs="0"/>
|
38
|
+
<xsd:element name="priority" type="tPriority" minOccurs="0"/>
|
39
|
+
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
|
40
|
+
</xsd:sequence>
|
41
|
+
</xsd:complexType>
|
42
|
+
|
43
|
+
<xsd:simpleType name="tLoc">
|
44
|
+
<xsd:annotation>
|
45
|
+
<xsd:documentation>
|
46
|
+
REQUIRED: The location URI of a document.
|
47
|
+
The URI must conform to RFC 2396 (http://www.ietf.org/rfc/rfc2396.txt).
|
48
|
+
</xsd:documentation>
|
49
|
+
</xsd:annotation>
|
50
|
+
<xsd:restriction base="xsd:anyURI">
|
51
|
+
<xsd:minLength value="12"/>
|
52
|
+
<xsd:maxLength value="2048"/>
|
53
|
+
</xsd:restriction>
|
54
|
+
</xsd:simpleType>
|
55
|
+
|
56
|
+
<xsd:simpleType name="tLastmod">
|
57
|
+
<xsd:annotation>
|
58
|
+
<xsd:documentation>
|
59
|
+
OPTIONAL: The date the document was last modified. The date must conform
|
60
|
+
to the W3C DATETIME format (http://www.w3.org/TR/NOTE-datetime).
|
61
|
+
Example: 2005-05-10
|
62
|
+
Lastmod may also contain a timestamp.
|
63
|
+
Example: 2005-05-10T17:33:30+08:00
|
64
|
+
</xsd:documentation>
|
65
|
+
</xsd:annotation>
|
66
|
+
<xsd:union>
|
67
|
+
<xsd:simpleType>
|
68
|
+
<xsd:restriction base="xsd:date"/>
|
69
|
+
</xsd:simpleType>
|
70
|
+
<xsd:simpleType>
|
71
|
+
<xsd:restriction base="xsd:dateTime"/>
|
72
|
+
</xsd:simpleType>
|
73
|
+
</xsd:union>
|
74
|
+
</xsd:simpleType>
|
75
|
+
|
76
|
+
<xsd:simpleType name="tChangeFreq">
|
77
|
+
<xsd:annotation>
|
78
|
+
<xsd:documentation>
|
79
|
+
OPTIONAL: Indicates how frequently the content at a particular URL is
|
80
|
+
likely to change. The value "always" should be used to describe
|
81
|
+
documents that change each time they are accessed. The value "never"
|
82
|
+
should be used to describe archived URLs. Please note that web
|
83
|
+
crawlers may not necessarily crawl pages marked "always" more often.
|
84
|
+
Consider this element as a friendly suggestion and not a command.
|
85
|
+
</xsd:documentation>
|
86
|
+
</xsd:annotation>
|
87
|
+
<xsd:restriction base="xsd:string">
|
88
|
+
<xsd:enumeration value="always"/>
|
89
|
+
<xsd:enumeration value="hourly"/>
|
90
|
+
<xsd:enumeration value="daily"/>
|
91
|
+
<xsd:enumeration value="weekly"/>
|
92
|
+
<xsd:enumeration value="monthly"/>
|
93
|
+
<xsd:enumeration value="yearly"/>
|
94
|
+
<xsd:enumeration value="never"/>
|
95
|
+
</xsd:restriction>
|
96
|
+
</xsd:simpleType>
|
97
|
+
|
98
|
+
<xsd:simpleType name="tPriority">
|
99
|
+
<xsd:annotation>
|
100
|
+
<xsd:documentation>
|
101
|
+
OPTIONAL: The priority of a particular URL relative to other pages
|
102
|
+
on the same site. The value for this element is a number between
|
103
|
+
0.0 and 1.0 where 0.0 identifies the lowest priority page(s).
|
104
|
+
The default priority of a page is 0.5. Priority is used to select
|
105
|
+
between pages on your site. Setting a priority of 1.0 for all URLs
|
106
|
+
will not help you, as the relative priority of pages on your site
|
107
|
+
is what will be considered.
|
108
|
+
</xsd:documentation>
|
109
|
+
</xsd:annotation>
|
110
|
+
<xsd:restriction base="xsd:decimal">
|
111
|
+
<xsd:minInclusive value="0.0"/>
|
112
|
+
<xsd:maxInclusive value="1.0"/>
|
113
|
+
</xsd:restriction>
|
114
|
+
</xsd:simpleType>
|
115
|
+
|
116
|
+
</xsd:schema>
|
data/web-checker.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
|
3
|
+
require_relative 'lib/web-checker/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = 'web-checker'
|
7
|
+
s.version = WebChecker::VERSION
|
8
|
+
s.summary = 'Check static websites for consistency.'
|
9
|
+
s.author = 'John Labovitz'
|
10
|
+
s.email = 'johnl@johnlabovitz.com'
|
11
|
+
s.description = %q{
|
12
|
+
WebChecker checks static websites for consistency.
|
13
|
+
}
|
14
|
+
s.license = 'MIT'
|
15
|
+
s.homepage = 'http://github.com/jslabovitz/web-checker'
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
|
19
|
+
s.require_path = 'lib'
|
20
|
+
|
21
|
+
s.add_dependency 'addressable', '~> 2.5'
|
22
|
+
s.add_dependency 'http', '~> 3.0'
|
23
|
+
s.add_dependency 'nokogiri', '~> 1.8'
|
24
|
+
s.add_dependency 'nokogumbo', '~> 1.4'
|
25
|
+
s.add_dependency 'path', '~> 2.0'
|
26
|
+
|
27
|
+
s.add_development_dependency 'rake', '~> 12.3'
|
28
|
+
s.add_development_dependency 'rubygems-tasks', '~> 0.2'
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: web-checker
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.2'
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- John Labovitz
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-01-09 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: addressable
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.5'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.5'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: http
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3.0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '3.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: nokogiri
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.8'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.8'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: nokogumbo
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.4'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.4'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: path
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '2.0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '2.0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rake
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '12.3'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '12.3'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rubygems-tasks
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0.2'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0.2'
|
111
|
+
description: "\n WebChecker checks static websites for consistency.\n "
|
112
|
+
email: johnl@johnlabovitz.com
|
113
|
+
executables:
|
114
|
+
- web-checker
|
115
|
+
extensions: []
|
116
|
+
extra_rdoc_files: []
|
117
|
+
files:
|
118
|
+
- ".gitignore"
|
119
|
+
- Rakefile
|
120
|
+
- bin/web-checker
|
121
|
+
- lib/web-checker.rb
|
122
|
+
- lib/web-checker/schemas/atom.xsd
|
123
|
+
- lib/web-checker/schemas/sitemap.xsd
|
124
|
+
- lib/web-checker/version.rb
|
125
|
+
- web-checker.gemspec
|
126
|
+
homepage: http://github.com/jslabovitz/web-checker
|
127
|
+
licenses:
|
128
|
+
- MIT
|
129
|
+
metadata: {}
|
130
|
+
post_install_message:
|
131
|
+
rdoc_options: []
|
132
|
+
require_paths:
|
133
|
+
- lib
|
134
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
140
|
+
requirements:
|
141
|
+
- - ">="
|
142
|
+
- !ruby/object:Gem::Version
|
143
|
+
version: '0'
|
144
|
+
requirements: []
|
145
|
+
rubyforge_project:
|
146
|
+
rubygems_version: 2.7.4
|
147
|
+
signing_key:
|
148
|
+
specification_version: 4
|
149
|
+
summary: Check static websites for consistency.
|
150
|
+
test_files: []
|