web-checker 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 89ddbaf96851d6efffd0acf6c7f86ac498710238f8ff0e078bed0fb5d8c40a4e
4
+ data.tar.gz: c776d89e28ec22d8230aebce749abc2ab475a8c62735b2fe40d36fc539a5e3fd
5
+ SHA512:
6
+ metadata.gz: 386bf65bcb250c8a0a192a97f200e5df15b5fec67a2dd80a65d0d964a7ccfbb47919fcbed5acf3d690e192bb95fe823e5f6671d2949e7703e48320b63346a6fb
7
+ data.tar.gz: ec7f58a6a3a1eb360b219aa135213c2b17d375e793961762ab1fac3471d0864904c4133a0ca63475a636cbb7e93d5117e47230f825df3f36aef9fca3cf7d4793
@@ -0,0 +1,2 @@
1
+ *.gem
2
+ .DS_Store
@@ -0,0 +1,2 @@
1
+ require 'rubygems/tasks'
2
+ Gem::Tasks.new
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'web-checker'
4
+
5
+ uri, dir = *ARGV
6
+
7
+ checker = WebChecker.new(site_uri: uri, site_dir: dir)
8
+ checker.check
9
+ checker.report
@@ -0,0 +1,161 @@
1
+ require 'addressable'
2
+ require 'http'
3
+ require 'nokogiri'
4
+ require 'nokogumbo'
5
+ require 'path'
6
+
7
+ class WebChecker
8
+
9
+ IgnoreErrors = %Q{
10
+ <table> lacks "summary" attribute
11
+ <img> lacks "alt" attribute
12
+ <form> proprietary attribute "novalidate"
13
+ <input> attribute "type" has invalid value "email"
14
+ <input> attribute "tabindex" has invalid value "-1"
15
+ <input> proprietary attribute "border"
16
+ trimming empty <p>
17
+ <iframe> proprietary attribute "allowfullscreen"
18
+ }.split(/\n/).map(&:strip)
19
+ LinkElementsXPath = '//@href | //@src'
20
+ SchemasDir = Path.new(__FILE__).dirname / 'web-checker' / 'schemas'
21
+ Schemas = {
22
+ 'feed' => SchemasDir / 'atom.xsd',
23
+ 'urlset' => SchemasDir / 'sitemap.xsd',
24
+ }
25
+
26
+ class Error < Exception; end
27
+
28
+ def initialize(site_uri:, site_dir:)
29
+ @site_uri = Addressable::URI.parse(site_uri)
30
+ @site_dir = Path.new(site_dir)
31
+ @schemas = {}
32
+ @visited = {}
33
+ end
34
+
35
+ def check
36
+ # get/parse robots
37
+ # get/parse sitemap
38
+ check_uri(@site_uri)
39
+ end
40
+
41
+ def check_uri(uri)
42
+ uri = Addressable::URI.parse(uri)
43
+ uri.normalize!
44
+ return unless local?(uri) && !seen?(uri)
45
+ # ;;warn "CHECKING: #{uri}"
46
+ response = HTTP.get(uri)
47
+ # ;;pp(response: response)
48
+ @visited[uri] = true
49
+ case response.code
50
+ when 200...300
51
+ body = response.body.to_s
52
+ # ;;pp(body: body)
53
+ case (type = response.headers['Content-Type'])
54
+ when 'text/html'
55
+ check_html(uri, body)
56
+ when 'text/css'
57
+ check_css(uri, body)
58
+ when 'application/xml', 'text/xml'
59
+ check_xml(uri, body)
60
+ when 'image/jpeg', 'image/png', 'image/gif', 'application/javascript'
61
+ # ignore
62
+ else
63
+ ;;warn "skipping unknown resource type: #{uri} (#{type})"
64
+ end
65
+ when 300...400
66
+ redirect_uri = Addressable::URI.parse(response.headers['Location'])
67
+ check_uri(uri + redirect_uri)
68
+ when 404
69
+ raise Error, "URI not found: #{uri}"
70
+ else
71
+ raise Error, "Bad status: #{response.inspect}"
72
+ end
73
+ end
74
+
75
+ def check_html(uri, html)
76
+ check_html_tidy(uri, html)
77
+ check_html_nokogiri(uri, html)
78
+ end
79
+
80
+ def check_html_tidy(uri, html)
81
+ tmp_file = Path.tmpfile
82
+ tmp_file.write(html)
83
+ errors = %x{tidy -utf8 -quiet -errors #{tmp_file} 2>&1}.split("\n")
84
+ errors = errors.map { |str|
85
+ # line 82 column 1 - Warning: <table> lacks "summary" attribute
86
+ str =~ /^line (\d+) column (\d+) - (.*?): (.*)$/ or raise "Can't parse error: #{str.inspect}"
87
+ {
88
+ msg: str,
89
+ line: $1.to_i,
90
+ column: $2.to_i,
91
+ type: $3.downcase.to_sym,
92
+ error: $4.strip,
93
+ }
94
+ }.reject { |e|
95
+ IgnoreErrors.include?(e[:error])
96
+ }
97
+ unless errors.empty?
98
+ warn "#{uri} has invalid HTML"
99
+ show_errors(errors)
100
+ raise Error, "HTML parsing failed (via Tidy)"
101
+ end
102
+ end
103
+
104
+ def check_html_nokogiri(uri, html)
105
+ doc_class = (html =~ /<!DOCTYPE html>/i) ? Nokogiri::HTML5 : Nokogiri::HTML
106
+ doc = doc_class.parse(html) { |config| config.strict }
107
+ unless doc.errors.empty?
108
+ show_errors(doc.errors)
109
+ raise Error, "HTML parsing failed (via Nokogiri)"
110
+ end
111
+ doc.xpath(LinkElementsXPath).each { |e| check_uri(uri + e.value) }
112
+ end
113
+
114
+ def check_xml(uri, xml)
115
+ xml_doc = Nokogiri::XML::Document.parse(xml) { |config| config.strict }
116
+ unless xml_doc.errors.empty?
117
+ show_errors(xml_doc.errors)
118
+ raise Error, "XML parsing failed"
119
+ end
120
+ root_name = xml_doc.root.name
121
+ schema_file = Schemas[root_name] or raise Error, "Unknown schema: #{root_name.inspect}"
122
+ schema = (@schemas[schema_file] ||= Nokogiri::XML::Schema(schema_file.open))
123
+ validation_errors = schema.validate(xml_doc)
124
+ unless validation_errors.empty?
125
+ show_errors(validation_errors)
126
+ raise Error, "XML validation failed"
127
+ end
128
+ xml_doc.xpath(LinkElementsXPath).each { |e| check_uri(uri + e.value) }
129
+ end
130
+
131
+ def show_errors(errors)
132
+ errors.each do |error|
133
+ warn "#{error} [line #{error[:line]}, column #{error[:column]}]"
134
+ end
135
+ end
136
+
137
+ def check_css(uri, css)
138
+ css.gsub(/\burl\(\s*["'](.*?)["']\s*\)/) do
139
+ check_uri(uri + $1)
140
+ end
141
+ end
142
+
143
+ def local?(uri)
144
+ (!uri.scheme && !uri.host) ||
145
+ (uri.scheme == @site_uri.scheme && uri.host == @site_uri.host && uri.port == @site_uri.port)
146
+ end
147
+
148
+ def seen?(uri)
149
+ @visited[uri]
150
+ end
151
+
152
+ def report
153
+ unless @files.empty?
154
+ puts "\t" + "unreferenced files:"
155
+ @files.sort.each do |path|
156
+ puts "\t\t" + path.to_s
157
+ end
158
+ end
159
+ end
160
+
161
+ end
@@ -0,0 +1,244 @@
1
+ <?xml version="1.0" encoding="utf-8" ?>
2
+ <xs:schema targetNamespace="http://www.w3.org/2005/Atom" elementFormDefault="qualified"
3
+ attributeFormDefault="unqualified"
4
+ xmlns:atom="http://www.w3.org/2005/Atom"
5
+ xmlns:xs="http://www.w3.org/2001/XMLSchema"
6
+ xmlns:xml="http://www.w3.org/XML/1998/namespace">
7
+ <xs:annotation>
8
+ <xs:documentation>
9
+ This version of the Atom schema is based on version 1.0 of the format specifications,
10
+ found here http://www.atomenabled.org/developers/syndication/atom-format-spec.php.
11
+ </xs:documentation>
12
+ </xs:annotation>
13
+ <xs:import namespace="http://www.w3.org/XML/1998/namespace" schemaLocation="http://www.w3.org/2001/03/xml.xsd" />
14
+ <xs:annotation>
15
+ <xs:documentation>
16
+ An Atom document may have two root elements, feed and entry, as defined in section 2.
17
+ </xs:documentation>
18
+ </xs:annotation>
19
+ <xs:element name="feed" type="atom:feedType"/>
20
+ <xs:element name="entry" type="atom:entryType"/>
21
+ <xs:complexType name="textType" mixed="true">
22
+ <xs:annotation>
23
+ <xs:documentation>
24
+ The Atom text construct is defined in section 3.1 of the format spec.
25
+ </xs:documentation>
26
+ </xs:annotation>
27
+ <xs:sequence>
28
+ <xs:any namespace="http://www.w3.org/1999/xhtml" minOccurs="0"/>
29
+ </xs:sequence>
30
+ <xs:attribute name="type" >
31
+ <xs:simpleType>
32
+ <xs:restriction base="xs:token">
33
+ <xs:enumeration value="text"/>
34
+ <xs:enumeration value="html"/>
35
+ <xs:enumeration value="xhtml"/>
36
+ </xs:restriction>
37
+ </xs:simpleType>
38
+ </xs:attribute>
39
+ <xs:attributeGroup ref="atom:commonAttributes"/>
40
+ </xs:complexType>
41
+ <xs:complexType name="personType">
42
+ <xs:annotation>
43
+ <xs:documentation>
44
+ The Atom person construct is defined in section 3.2 of the format spec.
45
+ </xs:documentation>
46
+ </xs:annotation>
47
+ <xs:choice minOccurs="1" maxOccurs="unbounded">
48
+ <xs:element name="name" type="xs:string" minOccurs="1" maxOccurs="1" />
49
+ <xs:element name="uri" type="atom:uriType" minOccurs="0" maxOccurs="1" />
50
+ <xs:element name="email" type="atom:emailType" minOccurs="0" maxOccurs="1" />
51
+ <xs:any namespace="##other"/>
52
+ </xs:choice>
53
+ <xs:attributeGroup ref="atom:commonAttributes"/>
54
+ </xs:complexType>
55
+ <xs:simpleType name="emailType">
56
+ <xs:annotation>
57
+ <xs:documentation>
58
+ Schema definition for an email address.
59
+ </xs:documentation>
60
+ </xs:annotation>
61
+ <xs:restriction base="xs:normalizedString">
62
+ <!--
63
+ <xs:pattern value="\w+@(\w+\.)+\w+" />
64
+ -->
65
+ <xs:maxLength value="254"/>
66
+ <xs:pattern value="[_\-a-zA-Z0-9\.\+]+@[a-zA-Z0-9](\.?[\-a-zA-Z0-9]*[a-zA-Z0-9])*"/>
67
+ </xs:restriction>
68
+ </xs:simpleType>
69
+ <xs:complexType name="feedType">
70
+ <xs:annotation>
71
+ <xs:documentation>
72
+ The Atom feed construct is defined in section 4.1.1 of the format spec.
73
+ </xs:documentation>
74
+ </xs:annotation>
75
+ <xs:choice minOccurs="3" maxOccurs="unbounded">
76
+ <xs:element name="author" type="atom:personType" minOccurs="0" maxOccurs="unbounded" />
77
+ <xs:element name="category" type="atom:categoryType" minOccurs="0" maxOccurs="unbounded" />
78
+ <xs:element name="contributor" type="atom:personType" minOccurs="0" maxOccurs="unbounded" />
79
+ <xs:element name="generator" type="atom:generatorType" minOccurs="0" maxOccurs="1" />
80
+ <xs:element name="icon" type="atom:iconType" minOccurs="0" maxOccurs="1" />
81
+ <xs:element name="id" type="atom:idType" minOccurs="1" maxOccurs="1" />
82
+ <xs:element name="link" type="atom:linkType" minOccurs="0" maxOccurs="unbounded" />
83
+ <xs:element name="logo" type="atom:logoType" minOccurs="0" maxOccurs="1" />
84
+ <xs:element name="rights" type="atom:textType" minOccurs="0" maxOccurs="1" />
85
+ <xs:element name="subtitle" type="atom:textType" minOccurs="0" maxOccurs="1" />
86
+ <xs:element name="title" type="atom:textType" minOccurs="1" maxOccurs="1" />
87
+ <xs:element name="updated" type="atom:dateTimeType" minOccurs="1" maxOccurs="1" />
88
+ <xs:element name="entry" type="atom:entryType" minOccurs="0" maxOccurs="unbounded" />
89
+ <xs:any namespace="##other" minOccurs="0" maxOccurs="unbounded"/>
90
+ </xs:choice>
91
+ <xs:attributeGroup ref="atom:commonAttributes"/>
92
+ </xs:complexType>
93
+ <xs:complexType name="entryType">
94
+ <xs:annotation>
95
+ <xs:documentation>
96
+ The Atom entry construct is defined in section 4.1.2 of the format spec.
97
+ </xs:documentation>
98
+ </xs:annotation>
99
+ <xs:choice maxOccurs="unbounded">
100
+ <xs:element name="author" type="atom:personType" minOccurs="0" maxOccurs="unbounded" />
101
+ <xs:element name="category" type="atom:categoryType" minOccurs="0" maxOccurs="unbounded" />
102
+ <xs:element name="content" type="atom:contentType" minOccurs="0" maxOccurs="1" />
103
+ <xs:element name="contributor" type="atom:personType" minOccurs="0" maxOccurs="unbounded" />
104
+ <xs:element name="id" type="atom:idType" minOccurs="1" maxOccurs="1" />
105
+ <xs:element name="link" type="atom:linkType" minOccurs="0" maxOccurs="unbounded" />
106
+ <xs:element name="published" type="atom:dateTimeType" minOccurs="0" maxOccurs="1" />
107
+ <xs:element name="rights" type="atom:textType" minOccurs="0" maxOccurs="1" />
108
+ <xs:element name="source" type="atom:textType" minOccurs="0" maxOccurs="1" />
109
+ <xs:element name="summary" type="atom:textType" minOccurs="0" maxOccurs="1" />
110
+ <xs:element name="title" type="atom:textType" minOccurs="1" maxOccurs="1" />
111
+ <xs:element name="updated" type="atom:dateTimeType" minOccurs="1" maxOccurs="1" />
112
+ <xs:any namespace="##other" minOccurs="0" maxOccurs="unbounded"/>
113
+ </xs:choice>
114
+ <xs:attributeGroup ref="atom:commonAttributes"/>
115
+ </xs:complexType>
116
+ <xs:complexType name="contentType" mixed="true">
117
+ <xs:annotation>
118
+ <xs:documentation>
119
+ The Atom content construct is defined in section 4.1.3 of the format spec.
120
+ </xs:documentation>
121
+ </xs:annotation>
122
+ <xs:sequence>
123
+ <xs:any namespace="##other" minOccurs="0" maxOccurs="unbounded" />
124
+ </xs:sequence>
125
+ <xs:attribute name="type" type="xs:string"/>
126
+ <xs:attribute name="src" type="xs:anyURI"/>
127
+ <xs:attributeGroup ref="atom:commonAttributes"/>
128
+ </xs:complexType>
129
+ <xs:complexType name="categoryType">
130
+ <xs:annotation>
131
+ <xs:documentation>
132
+ The Atom cagegory construct is defined in section 4.2.2 of the format spec.
133
+ </xs:documentation>
134
+ </xs:annotation>
135
+ <xs:attribute name="term" type="xs:string" use="required"/>
136
+ <xs:attribute name="scheme" type="xs:anyURI" use="optional"/>
137
+ <xs:attribute name="label" type="xs:string" use="optional"/>
138
+ <xs:attributeGroup ref="atom:commonAttributes" />
139
+ </xs:complexType>
140
+ <xs:complexType name="generatorType">
141
+ <xs:annotation>
142
+ <xs:documentation>
143
+ The Atom generator element is defined in section 4.2.4 of the format spec.
144
+ </xs:documentation>
145
+ </xs:annotation>
146
+ <xs:simpleContent>
147
+ <xs:extension base="xs:string">
148
+ <xs:attribute name="uri" use="optional" type="xs:anyURI" />
149
+ <xs:attribute name="version" use="optional" type="xs:string" />
150
+ <xs:attributeGroup ref="atom:commonAttributes"/>
151
+ </xs:extension>
152
+ </xs:simpleContent>
153
+ </xs:complexType>
154
+ <xs:complexType name="iconType">
155
+ <xs:annotation>
156
+ <xs:documentation>
157
+ The Atom icon construct is defined in section 4.2.5 of the format spec.
158
+ </xs:documentation>
159
+ </xs:annotation>
160
+ <xs:simpleContent>
161
+ <xs:extension base="xs:anyURI">
162
+ <xs:attributeGroup ref="atom:commonAttributes"/>
163
+ </xs:extension>
164
+ </xs:simpleContent>
165
+ </xs:complexType>
166
+ <xs:complexType name="idType">
167
+ <xs:annotation>
168
+ <xs:documentation>
169
+ The Atom id construct is defined in section 4.2.6 of the format spec.
170
+ </xs:documentation>
171
+ </xs:annotation>
172
+ <xs:simpleContent>
173
+ <xs:extension base="xs:anyURI">
174
+ <xs:attributeGroup ref="atom:commonAttributes"/>
175
+ </xs:extension>
176
+ </xs:simpleContent>
177
+ </xs:complexType>
178
+ <xs:complexType name="linkType" mixed="true">
179
+ <xs:annotation>
180
+ <xs:documentation>
181
+ The Atom link construct is defined in section 3.4 of the format spec.
182
+ </xs:documentation>
183
+ </xs:annotation>
184
+ <xs:attribute name="href" use="required" type="xs:anyURI" />
185
+ <xs:attribute name="rel" type="xs:string" use="optional"/>
186
+ <xs:attribute name="type" use="optional" type="xs:string" />
187
+ <xs:attribute name="hreflang" use="optional" type="xs:NMTOKEN" />
188
+ <xs:attribute name="title" use="optional" type="xs:string" />
189
+ <xs:attribute name="length" use="optional" type="xs:positiveInteger" />
190
+ <xs:attributeGroup ref="atom:commonAttributes"/>
191
+ </xs:complexType>
192
+ <xs:complexType name="logoType">
193
+ <xs:annotation>
194
+ <xs:documentation>
195
+ The Atom logo construct is defined in section 4.2.8 of the format spec.
196
+ </xs:documentation>
197
+ </xs:annotation>
198
+ <xs:simpleContent>
199
+ <xs:extension base="xs:anyURI">
200
+ <xs:attributeGroup ref="atom:commonAttributes"/>
201
+ </xs:extension>
202
+ </xs:simpleContent>
203
+ </xs:complexType>
204
+ <xs:complexType name="sourceType">
205
+ <xs:annotation>
206
+ <xs:documentation>
207
+ The Atom source construct is defined in section 4.2.11 of the format spec.
208
+ </xs:documentation>
209
+ </xs:annotation>
210
+ <xs:choice maxOccurs="unbounded">
211
+ <xs:element name="author" type="atom:personType" minOccurs="0" maxOccurs="unbounded"/>
212
+ <xs:element name="category" type="atom:categoryType" minOccurs="0" maxOccurs="unbounded"/>
213
+ <xs:element name="contributor" type="atom:personType" minOccurs="0" maxOccurs="unbounded"/>
214
+ <xs:element name="generator" type="atom:generatorType" minOccurs="0" maxOccurs="1"/>
215
+ <xs:element name="icon" type="atom:iconType" minOccurs="0" maxOccurs="1"/>
216
+ <xs:element name="id" type="atom:idType" minOccurs="0" maxOccurs="1"/>
217
+ <xs:element name="link" type="atom:linkType" minOccurs="0" maxOccurs="unbounded"/>
218
+ <xs:element name="logo" type="atom:logoType" minOccurs="0" maxOccurs="1"/>
219
+ <xs:element name="rights" type="atom:textType" minOccurs="0" maxOccurs="1"/>
220
+ <xs:element name="subtitle" type="atom:textType" minOccurs="0" maxOccurs="1"/>
221
+ <xs:element name="title" type="atom:textType" minOccurs="0" maxOccurs="1"/>
222
+ <xs:element name="updated" type="atom:dateTimeType" minOccurs="0" maxOccurs="1"/>
223
+ <xs:any namespace="##other" minOccurs="0" maxOccurs="unbounded"/>
224
+ </xs:choice>
225
+ <xs:attributeGroup ref="atom:commonAttributes"/>
226
+ </xs:complexType>
227
+ <xs:complexType name="uriType">
228
+ <xs:simpleContent>
229
+ <xs:extension base="xs:anyURI">
230
+ <xs:attributeGroup ref="atom:commonAttributes"/>
231
+ </xs:extension>
232
+ </xs:simpleContent>
233
+ </xs:complexType>
234
+ <xs:complexType name="dateTimeType">
235
+ <xs:simpleContent>
236
+ <xs:extension base="xs:dateTime">
237
+ <xs:attributeGroup ref="atom:commonAttributes"/>
238
+ </xs:extension>
239
+ </xs:simpleContent>
240
+ </xs:complexType>
241
+ <xs:attributeGroup name="commonAttributes">
242
+ <xs:anyAttribute namespace="##other"/>
243
+ </xs:attributeGroup>
244
+ </xs:schema>
@@ -0,0 +1,116 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
3
+ targetNamespace="http://www.sitemaps.org/schemas/sitemap/0.9"
4
+ xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
5
+ elementFormDefault="qualified">
6
+ <xsd:annotation>
7
+ <xsd:documentation>
8
+ XML Schema for Sitemap files.
9
+ Last Modifed 2008-03-26
10
+ </xsd:documentation>
11
+ </xsd:annotation>
12
+
13
+ <xsd:element name="urlset">
14
+ <xsd:annotation>
15
+ <xsd:documentation>
16
+ Container for a set of up to 50,000 document elements.
17
+ This is the root element of the XML file.
18
+ </xsd:documentation>
19
+ </xsd:annotation>
20
+ <xsd:complexType>
21
+ <xsd:sequence>
22
+ <xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
23
+ <xsd:element name="url" type="tUrl" maxOccurs="unbounded"/>
24
+ </xsd:sequence>
25
+ </xsd:complexType>
26
+ </xsd:element>
27
+
28
+ <xsd:complexType name="tUrl">
29
+ <xsd:annotation>
30
+ <xsd:documentation>
31
+ Container for the data needed to describe a document to crawl.
32
+ </xsd:documentation>
33
+ </xsd:annotation>
34
+ <xsd:sequence>
35
+ <xsd:element name="loc" type="tLoc"/>
36
+ <xsd:element name="lastmod" type="tLastmod" minOccurs="0"/>
37
+ <xsd:element name="changefreq" type="tChangeFreq" minOccurs="0"/>
38
+ <xsd:element name="priority" type="tPriority" minOccurs="0"/>
39
+ <xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
40
+ </xsd:sequence>
41
+ </xsd:complexType>
42
+
43
+ <xsd:simpleType name="tLoc">
44
+ <xsd:annotation>
45
+ <xsd:documentation>
46
+ REQUIRED: The location URI of a document.
47
+ The URI must conform to RFC 2396 (http://www.ietf.org/rfc/rfc2396.txt).
48
+ </xsd:documentation>
49
+ </xsd:annotation>
50
+ <xsd:restriction base="xsd:anyURI">
51
+ <xsd:minLength value="12"/>
52
+ <xsd:maxLength value="2048"/>
53
+ </xsd:restriction>
54
+ </xsd:simpleType>
55
+
56
+ <xsd:simpleType name="tLastmod">
57
+ <xsd:annotation>
58
+ <xsd:documentation>
59
+ OPTIONAL: The date the document was last modified. The date must conform
60
+ to the W3C DATETIME format (http://www.w3.org/TR/NOTE-datetime).
61
+ Example: 2005-05-10
62
+ Lastmod may also contain a timestamp.
63
+ Example: 2005-05-10T17:33:30+08:00
64
+ </xsd:documentation>
65
+ </xsd:annotation>
66
+ <xsd:union>
67
+ <xsd:simpleType>
68
+ <xsd:restriction base="xsd:date"/>
69
+ </xsd:simpleType>
70
+ <xsd:simpleType>
71
+ <xsd:restriction base="xsd:dateTime"/>
72
+ </xsd:simpleType>
73
+ </xsd:union>
74
+ </xsd:simpleType>
75
+
76
+ <xsd:simpleType name="tChangeFreq">
77
+ <xsd:annotation>
78
+ <xsd:documentation>
79
+ OPTIONAL: Indicates how frequently the content at a particular URL is
80
+ likely to change. The value "always" should be used to describe
81
+ documents that change each time they are accessed. The value "never"
82
+ should be used to describe archived URLs. Please note that web
83
+ crawlers may not necessarily crawl pages marked "always" more often.
84
+ Consider this element as a friendly suggestion and not a command.
85
+ </xsd:documentation>
86
+ </xsd:annotation>
87
+ <xsd:restriction base="xsd:string">
88
+ <xsd:enumeration value="always"/>
89
+ <xsd:enumeration value="hourly"/>
90
+ <xsd:enumeration value="daily"/>
91
+ <xsd:enumeration value="weekly"/>
92
+ <xsd:enumeration value="monthly"/>
93
+ <xsd:enumeration value="yearly"/>
94
+ <xsd:enumeration value="never"/>
95
+ </xsd:restriction>
96
+ </xsd:simpleType>
97
+
98
+ <xsd:simpleType name="tPriority">
99
+ <xsd:annotation>
100
+ <xsd:documentation>
101
+ OPTIONAL: The priority of a particular URL relative to other pages
102
+ on the same site. The value for this element is a number between
103
+ 0.0 and 1.0 where 0.0 identifies the lowest priority page(s).
104
+ The default priority of a page is 0.5. Priority is used to select
105
+ between pages on your site. Setting a priority of 1.0 for all URLs
106
+ will not help you, as the relative priority of pages on your site
107
+ is what will be considered.
108
+ </xsd:documentation>
109
+ </xsd:annotation>
110
+ <xsd:restriction base="xsd:decimal">
111
+ <xsd:minInclusive value="0.0"/>
112
+ <xsd:maxInclusive value="1.0"/>
113
+ </xsd:restriction>
114
+ </xsd:simpleType>
115
+
116
+ </xsd:schema>
@@ -0,0 +1,5 @@
1
+ class WebChecker
2
+
3
+ VERSION = '0.2'
4
+
5
+ end
@@ -0,0 +1,29 @@
1
+ #encoding: utf-8
2
+
3
+ require_relative 'lib/web-checker/version'
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'web-checker'
7
+ s.version = WebChecker::VERSION
8
+ s.summary = 'Check static websites for consistency.'
9
+ s.author = 'John Labovitz'
10
+ s.email = 'johnl@johnlabovitz.com'
11
+ s.description = %q{
12
+ WebChecker checks static websites for consistency.
13
+ }
14
+ s.license = 'MIT'
15
+ s.homepage = 'http://github.com/jslabovitz/web-checker'
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
19
+ s.require_path = 'lib'
20
+
21
+ s.add_dependency 'addressable', '~> 2.5'
22
+ s.add_dependency 'http', '~> 3.0'
23
+ s.add_dependency 'nokogiri', '~> 1.8'
24
+ s.add_dependency 'nokogumbo', '~> 1.4'
25
+ s.add_dependency 'path', '~> 2.0'
26
+
27
+ s.add_development_dependency 'rake', '~> 12.3'
28
+ s.add_development_dependency 'rubygems-tasks', '~> 0.2'
29
+ end
metadata ADDED
@@ -0,0 +1,150 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: web-checker
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.2'
5
+ platform: ruby
6
+ authors:
7
+ - John Labovitz
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-01-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: addressable
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.5'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: http
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.8'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.8'
55
+ - !ruby/object:Gem::Dependency
56
+ name: nokogumbo
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.4'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.4'
69
+ - !ruby/object:Gem::Dependency
70
+ name: path
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '2.0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '2.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '12.3'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '12.3'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rubygems-tasks
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '0.2'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '0.2'
111
+ description: "\n WebChecker checks static websites for consistency.\n "
112
+ email: johnl@johnlabovitz.com
113
+ executables:
114
+ - web-checker
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - ".gitignore"
119
+ - Rakefile
120
+ - bin/web-checker
121
+ - lib/web-checker.rb
122
+ - lib/web-checker/schemas/atom.xsd
123
+ - lib/web-checker/schemas/sitemap.xsd
124
+ - lib/web-checker/version.rb
125
+ - web-checker.gemspec
126
+ homepage: http://github.com/jslabovitz/web-checker
127
+ licenses:
128
+ - MIT
129
+ metadata: {}
130
+ post_install_message:
131
+ rdoc_options: []
132
+ require_paths:
133
+ - lib
134
+ required_ruby_version: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ required_rubygems_version: !ruby/object:Gem::Requirement
140
+ requirements:
141
+ - - ">="
142
+ - !ruby/object:Gem::Version
143
+ version: '0'
144
+ requirements: []
145
+ rubyforge_project:
146
+ rubygems_version: 2.7.4
147
+ signing_key:
148
+ specification_version: 4
149
+ summary: Check static websites for consistency.
150
+ test_files: []