resync 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +42 -0
- data/.rubocop.yml +23 -0
- data/.ruby-version +1 -0
- data/.travis.yml +2 -0
- data/Gemfile +3 -0
- data/LICENSE.md +22 -0
- data/README.md +92 -0
- data/Rakefile +56 -0
- data/example.rb +100 -0
- data/lib/resync/capability_list.rb +85 -0
- data/lib/resync/change_dump.rb +15 -0
- data/lib/resync/change_dump_manifest.rb +15 -0
- data/lib/resync/change_list.rb +15 -0
- data/lib/resync/change_list_index.rb +26 -0
- data/lib/resync/link.rb +87 -0
- data/lib/resync/metadata.rb +112 -0
- data/lib/resync/resource.rb +72 -0
- data/lib/resync/resource_dump.rb +15 -0
- data/lib/resync/resource_dump_manifest.rb +15 -0
- data/lib/resync/resource_list.rb +15 -0
- data/lib/resync/resource_list_index.rb +15 -0
- data/lib/resync/shared/augmented.rb +76 -0
- data/lib/resync/shared/base_resource_list.rb +117 -0
- data/lib/resync/shared/descriptor.rb +135 -0
- data/lib/resync/shared/sitemap_index.rb +32 -0
- data/lib/resync/shared/sorted_resource_list.rb +60 -0
- data/lib/resync/source_description.rb +14 -0
- data/lib/resync/types/change.rb +14 -0
- data/lib/resync/types/change_frequency.rb +18 -0
- data/lib/resync/types.rb +6 -0
- data/lib/resync/version.rb +4 -0
- data/lib/resync/xml.rb +216 -0
- data/lib/resync/xml_parser.rb +65 -0
- data/lib/resync.rb +4 -0
- data/resync.gemspec +36 -0
- data/spec/acceptance/xml_parser_spec.rb +1049 -0
- data/spec/data/examples/README.md +1 -0
- data/spec/data/examples/example-1.xml +12 -0
- data/spec/data/examples/example-12.xml +25 -0
- data/spec/data/examples/example-13.xml +25 -0
- data/spec/data/examples/example-14.xml +23 -0
- data/spec/data/examples/example-15.xml +21 -0
- data/spec/data/examples/example-16.xml +24 -0
- data/spec/data/examples/example-17.xml +39 -0
- data/spec/data/examples/example-18.xml +25 -0
- data/spec/data/examples/example-19.xml +28 -0
- data/spec/data/examples/example-2.xml +18 -0
- data/spec/data/examples/example-20.xml +22 -0
- data/spec/data/examples/example-21.xml +31 -0
- data/spec/data/examples/example-22.xml +41 -0
- data/spec/data/examples/example-23.xml +41 -0
- data/spec/data/examples/example-24.xml +28 -0
- data/spec/data/examples/example-25.xml +21 -0
- data/spec/data/examples/example-26.xml +18 -0
- data/spec/data/examples/example-27.xml +36 -0
- data/spec/data/examples/example-28.xml +34 -0
- data/spec/data/examples/example-29.xml +27 -0
- data/spec/data/examples/example-3.xml +17 -0
- data/spec/data/examples/example-30.xml +18 -0
- data/spec/data/examples/example-31.xml +16 -0
- data/spec/data/examples/example-32.xml +22 -0
- data/spec/data/examples/example-33.xml +22 -0
- data/spec/data/examples/example-4.xml +10 -0
- data/spec/data/examples/example-5.xml +18 -0
- data/spec/data/examples/example-6.xml +21 -0
- data/spec/data/examples/example-7.xml +13 -0
- data/spec/data/examples/example-8.xml +12 -0
- data/spec/data/resourcesync.xsd +148 -0
- data/spec/data/siteindex.xsd +75 -0
- data/spec/data/sitemap.xsd +116 -0
- data/spec/rspec_custom_matchers.rb +89 -0
- data/spec/spec_helper.rb +31 -0
- data/spec/todo.rb +11 -0
- data/spec/unit/resync/capability_list_spec.rb +138 -0
- data/spec/unit/resync/change_dump_manifest_spec.rb +75 -0
- data/spec/unit/resync/change_dump_spec.rb +61 -0
- data/spec/unit/resync/change_list_index_spec.rb +49 -0
- data/spec/unit/resync/change_list_spec.rb +75 -0
- data/spec/unit/resync/link_spec.rb +93 -0
- data/spec/unit/resync/metadata_spec.rb +169 -0
- data/spec/unit/resync/resource_dump_manifest_spec.rb +59 -0
- data/spec/unit/resync/resource_dump_spec.rb +62 -0
- data/spec/unit/resync/resource_list_index_spec.rb +53 -0
- data/spec/unit/resync/resource_list_spec.rb +60 -0
- data/spec/unit/resync/resource_spec.rb +176 -0
- data/spec/unit/resync/shared/augmented_examples.rb +58 -0
- data/spec/unit/resync/shared/base_resource_list_examples.rb +103 -0
- data/spec/unit/resync/shared/descriptor_examples.rb +122 -0
- data/spec/unit/resync/shared/descriptor_spec.rb +33 -0
- data/spec/unit/resync/shared/sorted_list_examples.rb +134 -0
- data/spec/unit/resync/shared/uri_field_examples.rb +36 -0
- data/spec/unit/resync/source_description_spec.rb +55 -0
- data/spec/unit/resync/xml/timenode_spec.rb +48 -0
- data/spec/unit/resync/xml/xml_spec.rb +40 -0
- data/spec/unit/resync/xml_parser_spec.rb +82 -0
- metadata +340 -0
@@ -0,0 +1,22 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
3
|
+
xmlns:rs="http://www.openarchives.org/rs/terms/">
|
4
|
+
<rs:ln rel="up"
|
5
|
+
href="http://aggregator1.example.com/dataset1/capabilitylist.xml"/>
|
6
|
+
<rs:md capability="changelist"
|
7
|
+
from="2013-01-03T11:00:00Z"/>
|
8
|
+
<url>
|
9
|
+
<loc>http://aggregator1.example.com/res1.html</loc>
|
10
|
+
<lastmod>2013-01-03T20:00:00Z</lastmod>
|
11
|
+
<rs:md hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6"
|
12
|
+
length="8876"
|
13
|
+
type="text/html"
|
14
|
+
change="updated"/>
|
15
|
+
<rs:ln rel="via"
|
16
|
+
href="http://original.example.com/res1.html"
|
17
|
+
modified="2013-01-03T07:00:00Z"
|
18
|
+
hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6"
|
19
|
+
length="8876"
|
20
|
+
type="text/html"/>
|
21
|
+
</url>
|
22
|
+
</urlset>
|
@@ -0,0 +1,22 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
3
|
+
xmlns:rs="http://www.openarchives.org/rs/terms/">
|
4
|
+
<rs:ln rel="up"
|
5
|
+
href="http://aggregator2.example.com/dataset1/capabilitylist.xml"/>
|
6
|
+
<rs:md capability="changelist"
|
7
|
+
from="2013-01-03T12:00:00Z"/>
|
8
|
+
<url>
|
9
|
+
<loc>http://aggregator2.example.com/res1.html</loc>
|
10
|
+
<lastmod>2013-01-04T09:00:00Z</lastmod>
|
11
|
+
<rs:md hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6"
|
12
|
+
length="8876"
|
13
|
+
type="text/html"
|
14
|
+
change="updated"/>
|
15
|
+
<rs:ln rel="via"
|
16
|
+
href="http://original.example.com/res1.html"
|
17
|
+
modified="2013-01-03T07:00:00Z"
|
18
|
+
hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6"
|
19
|
+
length="8876"
|
20
|
+
type="text/html"/>
|
21
|
+
</url>
|
22
|
+
</urlset>
|
@@ -0,0 +1,10 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
3
|
+
xmlns:rs="http://www.openarchives.org/rs/terms/">
|
4
|
+
<rs:md capability="resourcedump"
|
5
|
+
at="2013-01-03T09:00:00Z"/>
|
6
|
+
<url>
|
7
|
+
<loc>http://example.com/resourcedump.zip</loc>
|
8
|
+
<lastmod>2013-01-03T09:00:00Z</lastmod>
|
9
|
+
</url>
|
10
|
+
</urlset>
|
@@ -0,0 +1,18 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
3
|
+
xmlns:rs="http://www.openarchives.org/rs/terms/">
|
4
|
+
<rs:md capability="resourcedump-manifest"
|
5
|
+
at="2013-01-03T09:00:00Z"/>
|
6
|
+
<url>
|
7
|
+
<loc>http://example.com/res1</loc>
|
8
|
+
<lastmod>2013-01-03T03:00:00Z</lastmod>
|
9
|
+
<rs:md hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6"
|
10
|
+
path="/resources/res1"/>
|
11
|
+
</url>
|
12
|
+
<url>
|
13
|
+
<loc>http://example.com/res2</loc>
|
14
|
+
<lastmod>2013-01-03T04:00:00Z</lastmod>
|
15
|
+
<rs:md hash="md5:1e0d5cb8ef6ba40c99b14c0237be735e"
|
16
|
+
path="/resources/res2"/>
|
17
|
+
</url>
|
18
|
+
</urlset>
|
@@ -0,0 +1,21 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
3
|
+
xmlns:rs="http://www.openarchives.org/rs/terms/">
|
4
|
+
<rs:ln rel="describedby"
|
5
|
+
href="http://example.com/info_about_set1_of_resources.xml"/>
|
6
|
+
<rs:ln rel="up"
|
7
|
+
href="http://example.com/resourcesync_description.xml"/>
|
8
|
+
<rs:md capability="capabilitylist"/>
|
9
|
+
<url>
|
10
|
+
<loc>http://example.com/dataset1/resourcelist.xml</loc>
|
11
|
+
<rs:md capability="resourcelist"/>
|
12
|
+
</url>
|
13
|
+
<url>
|
14
|
+
<loc>http://example.com/dataset1/resourcedump.xml</loc>
|
15
|
+
<rs:md capability="resourcedump"/>
|
16
|
+
</url>
|
17
|
+
<url>
|
18
|
+
<loc>http://example.com/dataset1/changelist.xml</loc>
|
19
|
+
<rs:md capability="changelist"/>
|
20
|
+
</url>
|
21
|
+
</urlset>
|
@@ -0,0 +1,13 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
3
|
+
xmlns:rs="http://www.openarchives.org/rs/terms/">
|
4
|
+
<rs:ln rel="describedby"
|
5
|
+
href="http://example.com/info-about-source.xml"/>
|
6
|
+
<rs:md capability="description"/>
|
7
|
+
<url>
|
8
|
+
<loc>http://example.com/dataset1/capabilitylist.xml</loc>
|
9
|
+
<rs:md capability="capabilitylist"/>
|
10
|
+
<rs:ln rel="describedby"
|
11
|
+
href="http://example.com/info_about_set1_of_resources.xml"/>
|
12
|
+
</url>
|
13
|
+
</urlset>
|
@@ -0,0 +1,12 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
3
|
+
xmlns:rs="http://www.openarchives.org/rs/terms/">
|
4
|
+
<rs:md capability="resourcelist"
|
5
|
+
at="2013-01-03T09:00:00Z"/>
|
6
|
+
<sitemap>
|
7
|
+
<loc>http://example.com/resourcelist-part1.xml</loc>
|
8
|
+
</sitemap>
|
9
|
+
<sitemap>
|
10
|
+
<loc>http://example.com/resourcelist-part2.xml</loc>
|
11
|
+
</sitemap>
|
12
|
+
</sitemapindex>
|
@@ -0,0 +1,148 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
|
3
|
+
targetNamespace="http://www.openarchives.org/rs/terms/"
|
4
|
+
xmlns="http://www.openarchives.org/rs/terms/"
|
5
|
+
elementFormDefault="qualified">
|
6
|
+
|
7
|
+
<xsd:annotation>
|
8
|
+
<xsd:documentation>
|
9
|
+
XML Schema for ResoureSync extensions to Sitemap files.
|
10
|
+
See: http://www.openarchives.org/rs/ .
|
11
|
+
2013-01-06 first stab [Simeon Warner],
|
12
|
+
2013-08-15 for v0.9.1 [Simeon Warner].
|
13
|
+
</xsd:documentation>
|
14
|
+
</xsd:annotation>
|
15
|
+
|
16
|
+
<xsd:element name="md">
|
17
|
+
<xsd:annotation>
|
18
|
+
<xsd:documentation>
|
19
|
+
Metadata about a resource
|
20
|
+
</xsd:documentation>
|
21
|
+
</xsd:annotation>
|
22
|
+
<xsd:complexType>
|
23
|
+
<xsd:complexContent>
|
24
|
+
<xsd:restriction base="xsd:anyType">
|
25
|
+
<xsd:attribute name="at" type="xsd:dateTime"/>
|
26
|
+
<xsd:attribute name="capability" type="xsd:string"/>
|
27
|
+
<xsd:attribute name="change" type="changeType"/>
|
28
|
+
<xsd:attribute name="completed" type="xsd:dateTime"/>
|
29
|
+
<xsd:attribute name="encoding" type="xsd:string"/>
|
30
|
+
<xsd:attribute name="from" type="xsd:dateTime"/>
|
31
|
+
<xsd:attribute name="hash" type="xsd:string"/>
|
32
|
+
<xsd:attribute name="length" type="xsd:integer"/>
|
33
|
+
<xsd:attribute name="modified" type="xsd:dateTime"/>
|
34
|
+
<xsd:attribute name="path" type="pathType"/>
|
35
|
+
<xsd:attribute name="type" type="contentType"/>
|
36
|
+
<xsd:attribute name="until" type="xsd:dateTime"/>
|
37
|
+
<xsd:anyAttribute namespace="##other" processContents="lax"/>
|
38
|
+
</xsd:restriction>
|
39
|
+
</xsd:complexContent>
|
40
|
+
</xsd:complexType>
|
41
|
+
</xsd:element>
|
42
|
+
|
43
|
+
<xsd:element name="ln">
|
44
|
+
<xsd:annotation>
|
45
|
+
<xsd:documentation>
|
46
|
+
Links to related resources
|
47
|
+
</xsd:documentation>
|
48
|
+
</xsd:annotation>
|
49
|
+
<xsd:complexType>
|
50
|
+
<xsd:complexContent>
|
51
|
+
<xsd:restriction base="xsd:anyType">
|
52
|
+
<xsd:attribute name="encoding" type="xsd:string"/>
|
53
|
+
<xsd:attribute name="hash" type="xsd:string"/>
|
54
|
+
<xsd:attribute name="href" type="xsd:anyURI"/>
|
55
|
+
<xsd:attribute name="length" type="xsd:nonNegativeInteger"/>
|
56
|
+
<xsd:attribute name="modified" type="xsd:dateTime"/>
|
57
|
+
<xsd:attribute name="path" type="pathType"/>
|
58
|
+
<xsd:attribute name="pri" type="priType"/>
|
59
|
+
<xsd:attribute name="rel" type="relType"/>
|
60
|
+
<xsd:attribute name="type" type="contentType"/>
|
61
|
+
<xsd:anyAttribute namespace="##other" processContents="lax"/>
|
62
|
+
</xsd:restriction>
|
63
|
+
</xsd:complexContent>
|
64
|
+
</xsd:complexType>
|
65
|
+
</xsd:element>
|
66
|
+
|
67
|
+
<xsd:simpleType name="changeType">
|
68
|
+
<xsd:annotation>
|
69
|
+
<xsd:documentation>
|
70
|
+
Allowed values of the change attribute are
|
71
|
+
created, updated, deleted
|
72
|
+
</xsd:documentation>
|
73
|
+
</xsd:annotation>
|
74
|
+
<xsd:restriction base="xsd:string">
|
75
|
+
<xsd:enumeration value="created"/>
|
76
|
+
<xsd:enumeration value="updated"/>
|
77
|
+
<xsd:enumeration value="deleted"/>
|
78
|
+
</xsd:restriction>
|
79
|
+
</xsd:simpleType>
|
80
|
+
|
81
|
+
<xsd:simpleType name="pathType">
|
82
|
+
<xsd:annotation>
|
83
|
+
<xsd:documentation>
|
84
|
+
Path values must start with a slash, must not end with a slash
|
85
|
+
</xsd:documentation>
|
86
|
+
</xsd:annotation>
|
87
|
+
<xsd:restriction base="xsd:string">
|
88
|
+
<xsd:pattern value="/.*[^/]"/>
|
89
|
+
</xsd:restriction>
|
90
|
+
</xsd:simpleType>
|
91
|
+
|
92
|
+
<xsd:simpleType name="contentType">
|
93
|
+
<xsd:annotation>
|
94
|
+
<xsd:documentation>
|
95
|
+
MIME Content Types are described in RFCs 2045,2046
|
96
|
+
http://tools.ietf.org/html/rfc2045
|
97
|
+
http://tools.ietf.org/html/rfc2046
|
98
|
+
</xsd:documentation>
|
99
|
+
</xsd:annotation>
|
100
|
+
<xsd:restriction base="xsd:string">
|
101
|
+
<xsd:pattern value="[\w-]+/[\w-]+"/>
|
102
|
+
</xsd:restriction>
|
103
|
+
</xsd:simpleType>
|
104
|
+
|
105
|
+
<xsd:simpleType name="priType">
|
106
|
+
<xsd:annotation>
|
107
|
+
<xsd:documentation>
|
108
|
+
The priority attribute may have values 1 through 999999
|
109
|
+
</xsd:documentation>
|
110
|
+
</xsd:annotation>
|
111
|
+
<xsd:restriction base="xsd:integer">
|
112
|
+
<xsd:minInclusive value="1"/>
|
113
|
+
<xsd:maxInclusive value="999999"/>
|
114
|
+
</xsd:restriction>
|
115
|
+
</xsd:simpleType>
|
116
|
+
|
117
|
+
<xsd:simpleType name="relType">
|
118
|
+
<xsd:annotation>
|
119
|
+
<xsd:documentation>
|
120
|
+
Syntax for link relation values is specified by
|
121
|
+
http://tools.ietf.org/html/rfc5988 and the relevant
|
122
|
+
portion is:
|
123
|
+
|
124
|
+
# link-param = ( ( "rel" "=" relation-types )
|
125
|
+
# ...
|
126
|
+
# relation-type = reg-rel-type | ext-rel-type
|
127
|
+
# reg-rel-type = LOALPHA *( LOALPHA | DIGIT | "." | "-" )
|
128
|
+
# ext-rel-type = URI
|
129
|
+
#
|
130
|
+
|
131
|
+
Registered link relations (reg-rel-type) are listed at
|
132
|
+
http://www.iana.org/assignments/link-relations/link-relations.xhtml
|
133
|
+
and URIs are allowed for extension (ext-rel-type)
|
134
|
+
</xsd:documentation>
|
135
|
+
</xsd:annotation>
|
136
|
+
<xsd:union>
|
137
|
+
<xsd:simpleType>
|
138
|
+
<xsd:restriction base="xsd:string">
|
139
|
+
<xsd:pattern value="[a-z][a-z0-9\.\-]*"/>
|
140
|
+
</xsd:restriction>
|
141
|
+
</xsd:simpleType>
|
142
|
+
<xsd:simpleType>
|
143
|
+
<xsd:restriction base="xsd:anyURI"/>
|
144
|
+
</xsd:simpleType>
|
145
|
+
</xsd:union>
|
146
|
+
</xsd:simpleType>
|
147
|
+
|
148
|
+
</xsd:schema>
|
@@ -0,0 +1,75 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
|
3
|
+
targetNamespace="http://www.sitemaps.org/schemas/sitemap/0.9"
|
4
|
+
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
5
|
+
elementFormDefault="qualified">
|
6
|
+
<xsd:annotation>
|
7
|
+
<xsd:documentation>
|
8
|
+
XML Schema for Sitemap index files.
|
9
|
+
Last Modifed 2009-04-08
|
10
|
+
</xsd:documentation>
|
11
|
+
</xsd:annotation>
|
12
|
+
|
13
|
+
<xsd:element name="sitemapindex">
|
14
|
+
<xsd:annotation>
|
15
|
+
<xsd:documentation>
|
16
|
+
Container for a set of up to 50,000 sitemap URLs.
|
17
|
+
This is the root element of the XML file.
|
18
|
+
</xsd:documentation>
|
19
|
+
</xsd:annotation>
|
20
|
+
<xsd:complexType>
|
21
|
+
<xsd:sequence>
|
22
|
+
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
|
23
|
+
<xsd:element name="sitemap" type="tSitemap" maxOccurs="unbounded"/>
|
24
|
+
</xsd:sequence>
|
25
|
+
</xsd:complexType>
|
26
|
+
</xsd:element>
|
27
|
+
|
28
|
+
<xsd:complexType name="tSitemap">
|
29
|
+
<xsd:annotation>
|
30
|
+
<xsd:documentation>
|
31
|
+
Container for the data needed to describe a sitemap.
|
32
|
+
</xsd:documentation>
|
33
|
+
</xsd:annotation>
|
34
|
+
<xsd:sequence>
|
35
|
+
<xsd:element name="loc" type="tLocSitemap"/>
|
36
|
+
<xsd:element name="lastmod" type="tLastmodSitemap" minOccurs="0"/>
|
37
|
+
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
|
38
|
+
</xsd:sequence>
|
39
|
+
</xsd:complexType>
|
40
|
+
|
41
|
+
<xsd:simpleType name="tLocSitemap">
|
42
|
+
<xsd:annotation>
|
43
|
+
<xsd:documentation>
|
44
|
+
REQUIRED: The location URI of a sitemap.
|
45
|
+
The URI must conform to RFC 2396 (http://www.ietf.org/rfc/rfc2396.txt).
|
46
|
+
</xsd:documentation>
|
47
|
+
</xsd:annotation>
|
48
|
+
<xsd:restriction base="xsd:anyURI">
|
49
|
+
<xsd:minLength value="12"/>
|
50
|
+
<xsd:maxLength value="2048"/>
|
51
|
+
</xsd:restriction>
|
52
|
+
</xsd:simpleType>
|
53
|
+
|
54
|
+
<xsd:simpleType name="tLastmodSitemap">
|
55
|
+
<xsd:annotation>
|
56
|
+
<xsd:documentation>
|
57
|
+
OPTIONAL: The date the document was last modified. The date must conform
|
58
|
+
to the W3C DATETIME format (http://www.w3.org/TR/NOTE-datetime).
|
59
|
+
Example: 2005-05-10
|
60
|
+
Lastmod may also contain a timestamp.
|
61
|
+
Example: 2005-05-10T17:33:30+08:00
|
62
|
+
</xsd:documentation>
|
63
|
+
</xsd:annotation>
|
64
|
+
<xsd:union>
|
65
|
+
<xsd:simpleType>
|
66
|
+
<xsd:restriction base="xsd:date"/>
|
67
|
+
</xsd:simpleType>
|
68
|
+
<xsd:simpleType>
|
69
|
+
<xsd:restriction base="xsd:dateTime"/>
|
70
|
+
</xsd:simpleType>
|
71
|
+
</xsd:union>
|
72
|
+
</xsd:simpleType>
|
73
|
+
|
74
|
+
|
75
|
+
</xsd:schema>
|
@@ -0,0 +1,116 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
|
3
|
+
targetNamespace="http://www.sitemaps.org/schemas/sitemap/0.9"
|
4
|
+
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
5
|
+
elementFormDefault="qualified">
|
6
|
+
<xsd:annotation>
|
7
|
+
<xsd:documentation>
|
8
|
+
XML Schema for Sitemap files.
|
9
|
+
Last Modifed 2008-03-26
|
10
|
+
</xsd:documentation>
|
11
|
+
</xsd:annotation>
|
12
|
+
|
13
|
+
<xsd:element name="urlset">
|
14
|
+
<xsd:annotation>
|
15
|
+
<xsd:documentation>
|
16
|
+
Container for a set of up to 50,000 document elements.
|
17
|
+
This is the root element of the XML file.
|
18
|
+
</xsd:documentation>
|
19
|
+
</xsd:annotation>
|
20
|
+
<xsd:complexType>
|
21
|
+
<xsd:sequence>
|
22
|
+
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
|
23
|
+
<xsd:element name="url" type="tUrl" maxOccurs="unbounded"/>
|
24
|
+
</xsd:sequence>
|
25
|
+
</xsd:complexType>
|
26
|
+
</xsd:element>
|
27
|
+
|
28
|
+
<xsd:complexType name="tUrl">
|
29
|
+
<xsd:annotation>
|
30
|
+
<xsd:documentation>
|
31
|
+
Container for the data needed to describe a document to crawl.
|
32
|
+
</xsd:documentation>
|
33
|
+
</xsd:annotation>
|
34
|
+
<xsd:sequence>
|
35
|
+
<xsd:element name="loc" type="tLoc"/>
|
36
|
+
<xsd:element name="lastmod" type="tLastmod" minOccurs="0"/>
|
37
|
+
<xsd:element name="changefreq" type="tChangeFreq" minOccurs="0"/>
|
38
|
+
<xsd:element name="priority" type="tPriority" minOccurs="0"/>
|
39
|
+
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
|
40
|
+
</xsd:sequence>
|
41
|
+
</xsd:complexType>
|
42
|
+
|
43
|
+
<xsd:simpleType name="tLoc">
|
44
|
+
<xsd:annotation>
|
45
|
+
<xsd:documentation>
|
46
|
+
REQUIRED: The location URI of a document.
|
47
|
+
The URI must conform to RFC 2396 (http://www.ietf.org/rfc/rfc2396.txt).
|
48
|
+
</xsd:documentation>
|
49
|
+
</xsd:annotation>
|
50
|
+
<xsd:restriction base="xsd:anyURI">
|
51
|
+
<xsd:minLength value="12"/>
|
52
|
+
<xsd:maxLength value="2048"/>
|
53
|
+
</xsd:restriction>
|
54
|
+
</xsd:simpleType>
|
55
|
+
|
56
|
+
<xsd:simpleType name="tLastmod">
|
57
|
+
<xsd:annotation>
|
58
|
+
<xsd:documentation>
|
59
|
+
OPTIONAL: The date the document was last modified. The date must conform
|
60
|
+
to the W3C DATETIME format (http://www.w3.org/TR/NOTE-datetime).
|
61
|
+
Example: 2005-05-10
|
62
|
+
Lastmod may also contain a timestamp.
|
63
|
+
Example: 2005-05-10T17:33:30+08:00
|
64
|
+
</xsd:documentation>
|
65
|
+
</xsd:annotation>
|
66
|
+
<xsd:union>
|
67
|
+
<xsd:simpleType>
|
68
|
+
<xsd:restriction base="xsd:date"/>
|
69
|
+
</xsd:simpleType>
|
70
|
+
<xsd:simpleType>
|
71
|
+
<xsd:restriction base="xsd:dateTime"/>
|
72
|
+
</xsd:simpleType>
|
73
|
+
</xsd:union>
|
74
|
+
</xsd:simpleType>
|
75
|
+
|
76
|
+
<xsd:simpleType name="tChangeFreq">
|
77
|
+
<xsd:annotation>
|
78
|
+
<xsd:documentation>
|
79
|
+
OPTIONAL: Indicates how frequently the content at a particular URL is
|
80
|
+
likely to change. The value "always" should be used to describe
|
81
|
+
documents that change each time they are accessed. The value "never"
|
82
|
+
should be used to describe archived URLs. Please note that web
|
83
|
+
crawlers may not necessarily crawl pages marked "always" more often.
|
84
|
+
Consider this element as a friendly suggestion and not a command.
|
85
|
+
</xsd:documentation>
|
86
|
+
</xsd:annotation>
|
87
|
+
<xsd:restriction base="xsd:string">
|
88
|
+
<xsd:enumeration value="always"/>
|
89
|
+
<xsd:enumeration value="hourly"/>
|
90
|
+
<xsd:enumeration value="daily"/>
|
91
|
+
<xsd:enumeration value="weekly"/>
|
92
|
+
<xsd:enumeration value="monthly"/>
|
93
|
+
<xsd:enumeration value="yearly"/>
|
94
|
+
<xsd:enumeration value="never"/>
|
95
|
+
</xsd:restriction>
|
96
|
+
</xsd:simpleType>
|
97
|
+
|
98
|
+
<xsd:simpleType name="tPriority">
|
99
|
+
<xsd:annotation>
|
100
|
+
<xsd:documentation>
|
101
|
+
OPTIONAL: The priority of a particular URL relative to other pages
|
102
|
+
on the same site. The value for this element is a number between
|
103
|
+
0.0 and 1.0 where 0.0 identifies the lowest priority page(s).
|
104
|
+
The default priority of a page is 0.5. Priority is used to select
|
105
|
+
between pages on your site. Setting a priority of 1.0 for all URLs
|
106
|
+
will not help you, as the relative priority of pages on your site
|
107
|
+
is what will be considered.
|
108
|
+
</xsd:documentation>
|
109
|
+
</xsd:annotation>
|
110
|
+
<xsd:restriction base="xsd:decimal">
|
111
|
+
<xsd:minInclusive value="0.0"/>
|
112
|
+
<xsd:maxInclusive value="1.0"/>
|
113
|
+
</xsd:restriction>
|
114
|
+
</xsd:simpleType>
|
115
|
+
|
116
|
+
</xsd:schema>
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'rspec/expectations'
|
2
|
+
require 'equivalent-xml'
|
3
|
+
|
4
|
+
RSpec::Matchers.define :be_xml do |expected|
|
5
|
+
|
6
|
+
def to_nokogiri(xml)
|
7
|
+
case xml
|
8
|
+
when Nokogiri::XML::Element
|
9
|
+
xml
|
10
|
+
when Nokogiri::XML::Document
|
11
|
+
xml.root
|
12
|
+
when String
|
13
|
+
to_nokogiri(Nokogiri::XML(xml))
|
14
|
+
when REXML::Element
|
15
|
+
to_nokogiri(xml.to_s)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def to_pretty(nokogiri)
|
20
|
+
return nil unless nokogiri
|
21
|
+
out = StringIO.new
|
22
|
+
save_options = Nokogiri::XML::Node::SaveOptions::FORMAT | Nokogiri::XML::Node::SaveOptions::NO_DECLARATION
|
23
|
+
nokogiri.write_xml_to(out, encoding: 'UTF-8', indent: 2, save_with: save_options)
|
24
|
+
out.string
|
25
|
+
end
|
26
|
+
|
27
|
+
match do |actual|
|
28
|
+
expected_xml = to_nokogiri(expected) || fail("expected value #{expected} does not appear to be XML")
|
29
|
+
actual_xml = to_nokogiri(actual)
|
30
|
+
|
31
|
+
EquivalentXml.equivalent?(expected_xml, actual_xml, element_order: false, normalize_whitespace: true)
|
32
|
+
end
|
33
|
+
|
34
|
+
failure_message do |actual|
|
35
|
+
expected_string = to_pretty(to_nokogiri(expected))
|
36
|
+
actual_string = to_pretty(to_nokogiri(actual)) || actual
|
37
|
+
"expected XML:\n#{expected_string}\n\nbut was:\n#{actual_string}"
|
38
|
+
end
|
39
|
+
|
40
|
+
failure_message_when_negated do |actual|
|
41
|
+
actual_xml = to_element(actual) || actual
|
42
|
+
"expected not to get XML:\n\t#{actual_xml}"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
RSpec::Matchers.define :be_time do |expected|
|
47
|
+
|
48
|
+
def to_string(time)
|
49
|
+
time.is_a?(Time) ? time.utc.round(2).iso8601(2) : time.to_s
|
50
|
+
end
|
51
|
+
|
52
|
+
match do |actual|
|
53
|
+
if expected
|
54
|
+
fail "Expected value #{expected} is not a Time" unless expected.is_a?(Time)
|
55
|
+
actual.is_a?(Time) && (to_string(expected) == to_string(actual))
|
56
|
+
else
|
57
|
+
return actual.nil?
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
failure_message do |actual|
|
62
|
+
expected_str = to_string(expected)
|
63
|
+
actual_str = to_string(actual)
|
64
|
+
"expected time:\n#{expected_str}\n\nbut was:\n#{actual_str}"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def to_mime_type(mime_type)
|
69
|
+
return nil unless mime_type
|
70
|
+
return mime_type if mime_type.is_a?(MIME::Type)
|
71
|
+
|
72
|
+
mt = MIME::Types[mime_type].first
|
73
|
+
return mt if mt
|
74
|
+
|
75
|
+
MIME::Type.new(mime_type)
|
76
|
+
end
|
77
|
+
|
78
|
+
RSpec::Matchers.define :be_mime_type do |expected|
|
79
|
+
|
80
|
+
expected_mime_type = to_mime_type(expected)
|
81
|
+
|
82
|
+
match do |actual|
|
83
|
+
actual == expected_mime_type
|
84
|
+
end
|
85
|
+
|
86
|
+
failure_message do |actual|
|
87
|
+
"expected MIME type:\n#{expected_mime_type}\nbut was:\n#{actual}"
|
88
|
+
end
|
89
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# ------------------------------------------------------------
|
2
|
+
# SimpleCov setup
|
3
|
+
|
4
|
+
if ENV['COVERAGE']
|
5
|
+
require 'simplecov'
|
6
|
+
require 'simplecov-console'
|
7
|
+
|
8
|
+
SimpleCov.minimum_coverage 100
|
9
|
+
SimpleCov.start do
|
10
|
+
add_filter '/spec/'
|
11
|
+
SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
|
12
|
+
SimpleCov::Formatter::HTMLFormatter,
|
13
|
+
SimpleCov::Formatter::Console,
|
14
|
+
]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# ------------------------------------------------------------
|
19
|
+
# Rspec configuration
|
20
|
+
|
21
|
+
RSpec.configure do |config|
|
22
|
+
config.raise_errors_for_deprecations!
|
23
|
+
config.mock_with :rspec
|
24
|
+
end
|
25
|
+
|
26
|
+
require 'rspec_custom_matchers'
|
27
|
+
|
28
|
+
# ------------------------------------------------------------
|
29
|
+
# Stash::Harvester
|
30
|
+
|
31
|
+
require 'resync'
|