web-checker 0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 89ddbaf96851d6efffd0acf6c7f86ac498710238f8ff0e078bed0fb5d8c40a4e
4
+ data.tar.gz: c776d89e28ec22d8230aebce749abc2ab475a8c62735b2fe40d36fc539a5e3fd
5
+ SHA512:
6
+ metadata.gz: 386bf65bcb250c8a0a192a97f200e5df15b5fec67a2dd80a65d0d964a7ccfbb47919fcbed5acf3d690e192bb95fe823e5f6671d2949e7703e48320b63346a6fb
7
+ data.tar.gz: ec7f58a6a3a1eb360b219aa135213c2b17d375e793961762ab1fac3471d0864904c4133a0ca63475a636cbb7e93d5117e47230f825df3f36aef9fca3cf7d4793
@@ -0,0 +1,2 @@
1
+ *.gem
2
+ .DS_Store
@@ -0,0 +1,2 @@
1
+ require 'rubygems/tasks'
2
+ Gem::Tasks.new
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'web-checker'
4
+
5
+ uri, dir = *ARGV
6
+
7
+ checker = WebChecker.new(site_uri: uri, site_dir: dir)
8
+ checker.check
9
+ checker.report
@@ -0,0 +1,161 @@
1
+ require 'addressable'
2
+ require 'http'
3
+ require 'nokogiri'
4
+ require 'nokogumbo'
5
+ require 'path'
6
+
7
+ class WebChecker
8
+
9
+ IgnoreErrors = %Q{
10
+ <table> lacks "summary" attribute
11
+ <img> lacks "alt" attribute
12
+ <form> proprietary attribute "novalidate"
13
+ <input> attribute "type" has invalid value "email"
14
+ <input> attribute "tabindex" has invalid value "-1"
15
+ <input> proprietary attribute "border"
16
+ trimming empty <p>
17
+ <iframe> proprietary attribute "allowfullscreen"
18
+ }.split(/\n/).map(&:strip)
19
+ LinkElementsXPath = '//@href | //@src'
20
+ SchemasDir = Path.new(__FILE__).dirname / 'web-checker' / 'schemas'
21
+ Schemas = {
22
+ 'feed' => SchemasDir / 'atom.xsd',
23
+ 'urlset' => SchemasDir / 'sitemap.xsd',
24
+ }
25
+
26
+ class Error < Exception; end
27
+
28
+ def initialize(site_uri:, site_dir:)
29
+ @site_uri = Addressable::URI.parse(site_uri)
30
+ @site_dir = Path.new(site_dir)
31
+ @schemas = {}
32
+ @visited = {}
33
+ end
34
+
35
+ def check
36
+ # get/parse robots
37
+ # get/parse sitemap
38
+ check_uri(@site_uri)
39
+ end
40
+
41
+ def check_uri(uri)
42
+ uri = Addressable::URI.parse(uri)
43
+ uri.normalize!
44
+ return unless local?(uri) && !seen?(uri)
45
+ # ;;warn "CHECKING: #{uri}"
46
+ response = HTTP.get(uri)
47
+ # ;;pp(response: response)
48
+ @visited[uri] = true
49
+ case response.code
50
+ when 200...300
51
+ body = response.body.to_s
52
+ # ;;pp(body: body)
53
+ case (type = response.headers['Content-Type'])
54
+ when 'text/html'
55
+ check_html(uri, body)
56
+ when 'text/css'
57
+ check_css(uri, body)
58
+ when 'application/xml', 'text/xml'
59
+ check_xml(uri, body)
60
+ when 'image/jpeg', 'image/png', 'image/gif', 'application/javascript'
61
+ # ignore
62
+ else
63
+ ;;warn "skipping unknown resource type: #{uri} (#{type})"
64
+ end
65
+ when 300...400
66
+ redirect_uri = Addressable::URI.parse(response.headers['Location'])
67
+ check_uri(uri + redirect_uri)
68
+ when 404
69
+ raise Error, "URI not found: #{uri}"
70
+ else
71
+ raise Error, "Bad status: #{response.inspect}"
72
+ end
73
+ end
74
+
75
+ def check_html(uri, html)
76
+ check_html_tidy(uri, html)
77
+ check_html_nokogiri(uri, html)
78
+ end
79
+
80
+ def check_html_tidy(uri, html)
81
+ tmp_file = Path.tmpfile
82
+ tmp_file.write(html)
83
+ errors = %x{tidy -utf8 -quiet -errors #{tmp_file} 2>&1}.split("\n")
84
+ errors = errors.map { |str|
85
+ # line 82 column 1 - Warning: <table> lacks "summary" attribute
86
+ str =~ /^line (\d+) column (\d+) - (.*?): (.*)$/ or raise "Can't parse error: #{str.inspect}"
87
+ {
88
+ msg: str,
89
+ line: $1.to_i,
90
+ column: $2.to_i,
91
+ type: $3.downcase.to_sym,
92
+ error: $4.strip,
93
+ }
94
+ }.reject { |e|
95
+ IgnoreErrors.include?(e[:error])
96
+ }
97
+ unless errors.empty?
98
+ warn "#{uri} has invalid HTML"
99
+ show_errors(errors)
100
+ raise Error, "HTML parsing failed (via Tidy)"
101
+ end
102
+ end
103
+
104
+ def check_html_nokogiri(uri, html)
105
+ doc_class = (html =~ /<!DOCTYPE html>/i) ? Nokogiri::HTML5 : Nokogiri::HTML
106
+ doc = doc_class.parse(html) { |config| config.strict }
107
+ unless doc.errors.empty?
108
+ show_errors(doc.errors)
109
+ raise Error, "HTML parsing failed (via Nokogiri)"
110
+ end
111
+ doc.xpath(LinkElementsXPath).each { |e| check_uri(uri + e.value) }
112
+ end
113
+
114
+ def check_xml(uri, xml)
115
+ xml_doc = Nokogiri::XML::Document.parse(xml) { |config| config.strict }
116
+ unless xml_doc.errors.empty?
117
+ show_errors(xml_doc.errors)
118
+ raise Error, "XML parsing failed"
119
+ end
120
+ root_name = xml_doc.root.name
121
+ schema_file = Schemas[root_name] or raise Error, "Unknown schema: #{root_name.inspect}"
122
+ schema = (@schemas[schema_file] ||= Nokogiri::XML::Schema(schema_file.open))
123
+ validation_errors = schema.validate(xml_doc)
124
+ unless validation_errors.empty?
125
+ show_errors(validation_errors)
126
+ raise Error, "XML validation failed"
127
+ end
128
+ xml_doc.xpath(LinkElementsXPath).each { |e| check_uri(uri + e.value) }
129
+ end
130
+
131
+ def show_errors(errors)
132
+ errors.each do |error|
133
+ warn "#{error} [line #{error[:line]}, column #{error[:column]}]"
134
+ end
135
+ end
136
+
137
+ def check_css(uri, css)
138
+ css.gsub(/\burl\(\s*["'](.*?)["']\s*\)/) do
139
+ check_uri(uri + $1)
140
+ end
141
+ end
142
+
143
+ def local?(uri)
144
+ (!uri.scheme && !uri.host) ||
145
+ (uri.scheme == @site_uri.scheme && uri.host == @site_uri.host && uri.port == @site_uri.port)
146
+ end
147
+
148
+ def seen?(uri)
149
+ @visited[uri]
150
+ end
151
+
152
+ def report
153
+ unless @files.empty?
154
+ puts "\t" + "unreferenced files:"
155
+ @files.sort.each do |path|
156
+ puts "\t\t" + path.to_s
157
+ end
158
+ end
159
+ end
160
+
161
+ end
@@ -0,0 +1,244 @@
1
+ <?xml version="1.0" encoding="utf-8" ?>
2
+ <xs:schema targetNamespace="http://www.w3.org/2005/Atom" elementFormDefault="qualified"
3
+ attributeFormDefault="unqualified"
4
+ xmlns:atom="http://www.w3.org/2005/Atom"
5
+ xmlns:xs="http://www.w3.org/2001/XMLSchema"
6
+ xmlns:xml="http://www.w3.org/XML/1998/namespace">
7
+ <xs:annotation>
8
+ <xs:documentation>
9
+ This version of the Atom schema is based on version 1.0 of the format specifications,
10
+ found here http://www.atomenabled.org/developers/syndication/atom-format-spec.php.
11
+ </xs:documentation>
12
+ </xs:annotation>
13
+ <xs:import namespace="http://www.w3.org/XML/1998/namespace" schemaLocation="http://www.w3.org/2001/03/xml.xsd" />
14
+ <xs:annotation>
15
+ <xs:documentation>
16
+ An Atom document may have two root elements, feed and entry, as defined in section 2.
17
+ </xs:documentation>
18
+ </xs:annotation>
19
+ <xs:element name="feed" type="atom:feedType"/>
20
+ <xs:element name="entry" type="atom:entryType"/>
21
+ <xs:complexType name="textType" mixed="true">
22
+ <xs:annotation>
23
+ <xs:documentation>
24
+ The Atom text construct is defined in section 3.1 of the format spec.
25
+ </xs:documentation>
26
+ </xs:annotation>
27
+ <xs:sequence>
28
+ <xs:any namespace="http://www.w3.org/1999/xhtml" minOccurs="0"/>
29
+ </xs:sequence>
30
+ <xs:attribute name="type" >
31
+ <xs:simpleType>
32
+ <xs:restriction base="xs:token">
33
+ <xs:enumeration value="text"/>
34
+ <xs:enumeration value="html"/>
35
+ <xs:enumeration value="xhtml"/>
36
+ </xs:restriction>
37
+ </xs:simpleType>
38
+ </xs:attribute>
39
+ <xs:attributeGroup ref="atom:commonAttributes"/>
40
+ </xs:complexType>
41
+ <xs:complexType name="personType">
42
+ <xs:annotation>
43
+ <xs:documentation>
44
+ The Atom person construct is defined in section 3.2 of the format spec.
45
+ </xs:documentation>
46
+ </xs:annotation>
47
+ <xs:choice minOccurs="1" maxOccurs="unbounded">
48
+ <xs:element name="name" type="xs:string" minOccurs="1" maxOccurs="1" />
49
+ <xs:element name="uri" type="atom:uriType" minOccurs="0" maxOccurs="1" />
50
+ <xs:element name="email" type="atom:emailType" minOccurs="0" maxOccurs="1" />
51
+ <xs:any namespace="##other"/>
52
+ </xs:choice>
53
+ <xs:attributeGroup ref="atom:commonAttributes"/>
54
+ </xs:complexType>
55
+ <xs:simpleType name="emailType">
56
+ <xs:annotation>
57
+ <xs:documentation>
58
+ Schema definition for an email address.
59
+ </xs:documentation>
60
+ </xs:annotation>
61
+ <xs:restriction base="xs:normalizedString">
62
+ <!--
63
+ <xs:pattern value="\w+@(\w+\.)+\w+" />
64
+ -->
65
+ <xs:maxLength value="254"/>
66
+ <xs:pattern value="[_\-a-zA-Z0-9\.\+]+@[a-zA-Z0-9](\.?[\-a-zA-Z0-9]*[a-zA-Z0-9])*"/>
67
+ </xs:restriction>
68
+ </xs:simpleType>
69
+ <xs:complexType name="feedType">
70
+ <xs:annotation>
71
+ <xs:documentation>
72
+ The Atom feed construct is defined in section 4.1.1 of the format spec.
73
+ </xs:documentation>
74
+ </xs:annotation>
75
+ <xs:choice minOccurs="3" maxOccurs="unbounded">
76
+ <xs:element name="author" type="atom:personType" minOccurs="0" maxOccurs="unbounded" />
77
+ <xs:element name="category" type="atom:categoryType" minOccurs="0" maxOccurs="unbounded" />
78
+ <xs:element name="contributor" type="atom:personType" minOccurs="0" maxOccurs="unbounded" />
79
+ <xs:element name="generator" type="atom:generatorType" minOccurs="0" maxOccurs="1" />
80
+ <xs:element name="icon" type="atom:iconType" minOccurs="0" maxOccurs="1" />
81
+ <xs:element name="id" type="atom:idType" minOccurs="1" maxOccurs="1" />
82
+ <xs:element name="link" type="atom:linkType" minOccurs="0" maxOccurs="unbounded" />
83
+ <xs:element name="logo" type="atom:logoType" minOccurs="0" maxOccurs="1" />
84
+ <xs:element name="rights" type="atom:textType" minOccurs="0" maxOccurs="1" />
85
+ <xs:element name="subtitle" type="atom:textType" minOccurs="0" maxOccurs="1" />
86
+ <xs:element name="title" type="atom:textType" minOccurs="1" maxOccurs="1" />
87
+ <xs:element name="updated" type="atom:dateTimeType" minOccurs="1" maxOccurs="1" />
88
+ <xs:element name="entry" type="atom:entryType" minOccurs="0" maxOccurs="unbounded" />
89
+ <xs:any namespace="##other" minOccurs="0" maxOccurs="unbounded"/>
90
+ </xs:choice>
91
+ <xs:attributeGroup ref="atom:commonAttributes"/>
92
+ </xs:complexType>
93
+ <xs:complexType name="entryType">
94
+ <xs:annotation>
95
+ <xs:documentation>
96
+ The Atom entry construct is defined in section 4.1.2 of the format spec.
97
+ </xs:documentation>
98
+ </xs:annotation>
99
+ <xs:choice maxOccurs="unbounded">
100
+ <xs:element name="author" type="atom:personType" minOccurs="0" maxOccurs="unbounded" />
101
+ <xs:element name="category" type="atom:categoryType" minOccurs="0" maxOccurs="unbounded" />
102
+ <xs:element name="content" type="atom:contentType" minOccurs="0" maxOccurs="1" />
103
+ <xs:element name="contributor" type="atom:personType" minOccurs="0" maxOccurs="unbounded" />
104
+ <xs:element name="id" type="atom:idType" minOccurs="1" maxOccurs="1" />
105
+ <xs:element name="link" type="atom:linkType" minOccurs="0" maxOccurs="unbounded" />
106
+ <xs:element name="published" type="atom:dateTimeType" minOccurs="0" maxOccurs="1" />
107
+ <xs:element name="rights" type="atom:textType" minOccurs="0" maxOccurs="1" />
108
+ <xs:element name="source" type="atom:textType" minOccurs="0" maxOccurs="1" />
109
+ <xs:element name="summary" type="atom:textType" minOccurs="0" maxOccurs="1" />
110
+ <xs:element name="title" type="atom:textType" minOccurs="1" maxOccurs="1" />
111
+ <xs:element name="updated" type="atom:dateTimeType" minOccurs="1" maxOccurs="1" />
112
+ <xs:any namespace="##other" minOccurs="0" maxOccurs="unbounded"/>
113
+ </xs:choice>
114
+ <xs:attributeGroup ref="atom:commonAttributes"/>
115
+ </xs:complexType>
116
+ <xs:complexType name="contentType" mixed="true">
117
+ <xs:annotation>
118
+ <xs:documentation>
119
+ The Atom content construct is defined in section 4.1.3 of the format spec.
120
+ </xs:documentation>
121
+ </xs:annotation>
122
+ <xs:sequence>
123
+ <xs:any namespace="##other" minOccurs="0" maxOccurs="unbounded" />
124
+ </xs:sequence>
125
+ <xs:attribute name="type" type="xs:string"/>
126
+ <xs:attribute name="src" type="xs:anyURI"/>
127
+ <xs:attributeGroup ref="atom:commonAttributes"/>
128
+ </xs:complexType>
129
+ <xs:complexType name="categoryType">
130
+ <xs:annotation>
131
+ <xs:documentation>
132
+ The Atom cagegory construct is defined in section 4.2.2 of the format spec.
133
+ </xs:documentation>
134
+ </xs:annotation>
135
+ <xs:attribute name="term" type="xs:string" use="required"/>
136
+ <xs:attribute name="scheme" type="xs:anyURI" use="optional"/>
137
+ <xs:attribute name="label" type="xs:string" use="optional"/>
138
+ <xs:attributeGroup ref="atom:commonAttributes" />
139
+ </xs:complexType>
140
+ <xs:complexType name="generatorType">
141
+ <xs:annotation>
142
+ <xs:documentation>
143
+ The Atom generator element is defined in section 4.2.4 of the format spec.
144
+ </xs:documentation>
145
+ </xs:annotation>
146
+ <xs:simpleContent>
147
+ <xs:extension base="xs:string">
148
+ <xs:attribute name="uri" use="optional" type="xs:anyURI" />
149
+ <xs:attribute name="version" use="optional" type="xs:string" />
150
+ <xs:attributeGroup ref="atom:commonAttributes"/>
151
+ </xs:extension>
152
+ </xs:simpleContent>
153
+ </xs:complexType>
154
+ <xs:complexType name="iconType">
155
+ <xs:annotation>
156
+ <xs:documentation>
157
+ The Atom icon construct is defined in section 4.2.5 of the format spec.
158
+ </xs:documentation>
159
+ </xs:annotation>
160
+ <xs:simpleContent>
161
+ <xs:extension base="xs:anyURI">
162
+ <xs:attributeGroup ref="atom:commonAttributes"/>
163
+ </xs:extension>
164
+ </xs:simpleContent>
165
+ </xs:complexType>
166
+ <xs:complexType name="idType">
167
+ <xs:annotation>
168
+ <xs:documentation>
169
+ The Atom id construct is defined in section 4.2.6 of the format spec.
170
+ </xs:documentation>
171
+ </xs:annotation>
172
+ <xs:simpleContent>
173
+ <xs:extension base="xs:anyURI">
174
+ <xs:attributeGroup ref="atom:commonAttributes"/>
175
+ </xs:extension>
176
+ </xs:simpleContent>
177
+ </xs:complexType>
178
+ <xs:complexType name="linkType" mixed="true">
179
+ <xs:annotation>
180
+ <xs:documentation>
181
+ The Atom link construct is defined in section 3.4 of the format spec.
182
+ </xs:documentation>
183
+ </xs:annotation>
184
+ <xs:attribute name="href" use="required" type="xs:anyURI" />
185
+ <xs:attribute name="rel" type="xs:string" use="optional"/>
186
+ <xs:attribute name="type" use="optional" type="xs:string" />
187
+ <xs:attribute name="hreflang" use="optional" type="xs:NMTOKEN" />
188
+ <xs:attribute name="title" use="optional" type="xs:string" />
189
+ <xs:attribute name="length" use="optional" type="xs:positiveInteger" />
190
+ <xs:attributeGroup ref="atom:commonAttributes"/>
191
+ </xs:complexType>
192
+ <xs:complexType name="logoType">
193
+ <xs:annotation>
194
+ <xs:documentation>
195
+ The Atom logo construct is defined in section 4.2.8 of the format spec.
196
+ </xs:documentation>
197
+ </xs:annotation>
198
+ <xs:simpleContent>
199
+ <xs:extension base="xs:anyURI">
200
+ <xs:attributeGroup ref="atom:commonAttributes"/>
201
+ </xs:extension>
202
+ </xs:simpleContent>
203
+ </xs:complexType>
204
+ <xs:complexType name="sourceType">
205
+ <xs:annotation>
206
+ <xs:documentation>
207
+ The Atom source construct is defined in section 4.2.11 of the format spec.
208
+ </xs:documentation>
209
+ </xs:annotation>
210
+ <xs:choice maxOccurs="unbounded">
211
+ <xs:element name="author" type="atom:personType" minOccurs="0" maxOccurs="unbounded"/>
212
+ <xs:element name="category" type="atom:categoryType" minOccurs="0" maxOccurs="unbounded"/>
213
+ <xs:element name="contributor" type="atom:personType" minOccurs="0" maxOccurs="unbounded"/>
214
+ <xs:element name="generator" type="atom:generatorType" minOccurs="0" maxOccurs="1"/>
215
+ <xs:element name="icon" type="atom:iconType" minOccurs="0" maxOccurs="1"/>
216
+ <xs:element name="id" type="atom:idType" minOccurs="0" maxOccurs="1"/>
217
+ <xs:element name="link" type="atom:linkType" minOccurs="0" maxOccurs="unbounded"/>
218
+ <xs:element name="logo" type="atom:logoType" minOccurs="0" maxOccurs="1"/>
219
+ <xs:element name="rights" type="atom:textType" minOccurs="0" maxOccurs="1"/>
220
+ <xs:element name="subtitle" type="atom:textType" minOccurs="0" maxOccurs="1"/>
221
+ <xs:element name="title" type="atom:textType" minOccurs="0" maxOccurs="1"/>
222
+ <xs:element name="updated" type="atom:dateTimeType" minOccurs="0" maxOccurs="1"/>
223
+ <xs:any namespace="##other" minOccurs="0" maxOccurs="unbounded"/>
224
+ </xs:choice>
225
+ <xs:attributeGroup ref="atom:commonAttributes"/>
226
+ </xs:complexType>
227
+ <xs:complexType name="uriType">
228
+ <xs:simpleContent>
229
+ <xs:extension base="xs:anyURI">
230
+ <xs:attributeGroup ref="atom:commonAttributes"/>
231
+ </xs:extension>
232
+ </xs:simpleContent>
233
+ </xs:complexType>
234
+ <xs:complexType name="dateTimeType">
235
+ <xs:simpleContent>
236
+ <xs:extension base="xs:dateTime">
237
+ <xs:attributeGroup ref="atom:commonAttributes"/>
238
+ </xs:extension>
239
+ </xs:simpleContent>
240
+ </xs:complexType>
241
+ <xs:attributeGroup name="commonAttributes">
242
+ <xs:anyAttribute namespace="##other"/>
243
+ </xs:attributeGroup>
244
+ </xs:schema>
@@ -0,0 +1,116 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
3
+ targetNamespace="http://www.sitemaps.org/schemas/sitemap/0.9"
4
+ xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
5
+ elementFormDefault="qualified">
6
+ <xsd:annotation>
7
+ <xsd:documentation>
8
+ XML Schema for Sitemap files.
9
+ Last Modifed 2008-03-26
10
+ </xsd:documentation>
11
+ </xsd:annotation>
12
+
13
+ <xsd:element name="urlset">
14
+ <xsd:annotation>
15
+ <xsd:documentation>
16
+ Container for a set of up to 50,000 document elements.
17
+ This is the root element of the XML file.
18
+ </xsd:documentation>
19
+ </xsd:annotation>
20
+ <xsd:complexType>
21
+ <xsd:sequence>
22
+ <xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
23
+ <xsd:element name="url" type="tUrl" maxOccurs="unbounded"/>
24
+ </xsd:sequence>
25
+ </xsd:complexType>
26
+ </xsd:element>
27
+
28
+ <xsd:complexType name="tUrl">
29
+ <xsd:annotation>
30
+ <xsd:documentation>
31
+ Container for the data needed to describe a document to crawl.
32
+ </xsd:documentation>
33
+ </xsd:annotation>
34
+ <xsd:sequence>
35
+ <xsd:element name="loc" type="tLoc"/>
36
+ <xsd:element name="lastmod" type="tLastmod" minOccurs="0"/>
37
+ <xsd:element name="changefreq" type="tChangeFreq" minOccurs="0"/>
38
+ <xsd:element name="priority" type="tPriority" minOccurs="0"/>
39
+ <xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
40
+ </xsd:sequence>
41
+ </xsd:complexType>
42
+
43
+ <xsd:simpleType name="tLoc">
44
+ <xsd:annotation>
45
+ <xsd:documentation>
46
+ REQUIRED: The location URI of a document.
47
+ The URI must conform to RFC 2396 (http://www.ietf.org/rfc/rfc2396.txt).
48
+ </xsd:documentation>
49
+ </xsd:annotation>
50
+ <xsd:restriction base="xsd:anyURI">
51
+ <xsd:minLength value="12"/>
52
+ <xsd:maxLength value="2048"/>
53
+ </xsd:restriction>
54
+ </xsd:simpleType>
55
+
56
+ <xsd:simpleType name="tLastmod">
57
+ <xsd:annotation>
58
+ <xsd:documentation>
59
+ OPTIONAL: The date the document was last modified. The date must conform
60
+ to the W3C DATETIME format (http://www.w3.org/TR/NOTE-datetime).
61
+ Example: 2005-05-10
62
+ Lastmod may also contain a timestamp.
63
+ Example: 2005-05-10T17:33:30+08:00
64
+ </xsd:documentation>
65
+ </xsd:annotation>
66
+ <xsd:union>
67
+ <xsd:simpleType>
68
+ <xsd:restriction base="xsd:date"/>
69
+ </xsd:simpleType>
70
+ <xsd:simpleType>
71
+ <xsd:restriction base="xsd:dateTime"/>
72
+ </xsd:simpleType>
73
+ </xsd:union>
74
+ </xsd:simpleType>
75
+
76
+ <xsd:simpleType name="tChangeFreq">
77
+ <xsd:annotation>
78
+ <xsd:documentation>
79
+ OPTIONAL: Indicates how frequently the content at a particular URL is
80
+ likely to change. The value "always" should be used to describe
81
+ documents that change each time they are accessed. The value "never"
82
+ should be used to describe archived URLs. Please note that web
83
+ crawlers may not necessarily crawl pages marked "always" more often.
84
+ Consider this element as a friendly suggestion and not a command.
85
+ </xsd:documentation>
86
+ </xsd:annotation>
87
+ <xsd:restriction base="xsd:string">
88
+ <xsd:enumeration value="always"/>
89
+ <xsd:enumeration value="hourly"/>
90
+ <xsd:enumeration value="daily"/>
91
+ <xsd:enumeration value="weekly"/>
92
+ <xsd:enumeration value="monthly"/>
93
+ <xsd:enumeration value="yearly"/>
94
+ <xsd:enumeration value="never"/>
95
+ </xsd:restriction>
96
+ </xsd:simpleType>
97
+
98
+ <xsd:simpleType name="tPriority">
99
+ <xsd:annotation>
100
+ <xsd:documentation>
101
+ OPTIONAL: The priority of a particular URL relative to other pages
102
+ on the same site. The value for this element is a number between
103
+ 0.0 and 1.0 where 0.0 identifies the lowest priority page(s).
104
+ The default priority of a page is 0.5. Priority is used to select
105
+ between pages on your site. Setting a priority of 1.0 for all URLs
106
+ will not help you, as the relative priority of pages on your site
107
+ is what will be considered.
108
+ </xsd:documentation>
109
+ </xsd:annotation>
110
+ <xsd:restriction base="xsd:decimal">
111
+ <xsd:minInclusive value="0.0"/>
112
+ <xsd:maxInclusive value="1.0"/>
113
+ </xsd:restriction>
114
+ </xsd:simpleType>
115
+
116
+ </xsd:schema>
@@ -0,0 +1,5 @@
1
+ class WebChecker
2
+
3
+ VERSION = '0.2'
4
+
5
+ end
@@ -0,0 +1,29 @@
1
+ #encoding: utf-8
2
+
3
+ require_relative 'lib/web-checker/version'
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'web-checker'
7
+ s.version = WebChecker::VERSION
8
+ s.summary = 'Check static websites for consistency.'
9
+ s.author = 'John Labovitz'
10
+ s.email = 'johnl@johnlabovitz.com'
11
+ s.description = %q{
12
+ WebChecker checks static websites for consistency.
13
+ }
14
+ s.license = 'MIT'
15
+ s.homepage = 'http://github.com/jslabovitz/web-checker'
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
19
+ s.require_path = 'lib'
20
+
21
+ s.add_dependency 'addressable', '~> 2.5'
22
+ s.add_dependency 'http', '~> 3.0'
23
+ s.add_dependency 'nokogiri', '~> 1.8'
24
+ s.add_dependency 'nokogumbo', '~> 1.4'
25
+ s.add_dependency 'path', '~> 2.0'
26
+
27
+ s.add_development_dependency 'rake', '~> 12.3'
28
+ s.add_development_dependency 'rubygems-tasks', '~> 0.2'
29
+ end
metadata ADDED
@@ -0,0 +1,150 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: web-checker
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.2'
5
+ platform: ruby
6
+ authors:
7
+ - John Labovitz
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-01-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: addressable
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.5'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: http
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.8'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.8'
55
+ - !ruby/object:Gem::Dependency
56
+ name: nokogumbo
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.4'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.4'
69
+ - !ruby/object:Gem::Dependency
70
+ name: path
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '2.0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '2.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '12.3'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '12.3'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rubygems-tasks
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '0.2'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '0.2'
111
+ description: "\n WebChecker checks static websites for consistency.\n "
112
+ email: johnl@johnlabovitz.com
113
+ executables:
114
+ - web-checker
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - ".gitignore"
119
+ - Rakefile
120
+ - bin/web-checker
121
+ - lib/web-checker.rb
122
+ - lib/web-checker/schemas/atom.xsd
123
+ - lib/web-checker/schemas/sitemap.xsd
124
+ - lib/web-checker/version.rb
125
+ - web-checker.gemspec
126
+ homepage: http://github.com/jslabovitz/web-checker
127
+ licenses:
128
+ - MIT
129
+ metadata: {}
130
+ post_install_message:
131
+ rdoc_options: []
132
+ require_paths:
133
+ - lib
134
+ required_ruby_version: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ required_rubygems_version: !ruby/object:Gem::Requirement
140
+ requirements:
141
+ - - ">="
142
+ - !ruby/object:Gem::Version
143
+ version: '0'
144
+ requirements: []
145
+ rubyforge_project:
146
+ rubygems_version: 2.7.4
147
+ signing_key:
148
+ specification_version: 4
149
+ summary: Check static websites for consistency.
150
+ test_files: []