resync 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +42 -0
- data/.rubocop.yml +23 -0
- data/.ruby-version +1 -0
- data/.travis.yml +2 -0
- data/Gemfile +3 -0
- data/LICENSE.md +22 -0
- data/README.md +92 -0
- data/Rakefile +56 -0
- data/example.rb +100 -0
- data/lib/resync/capability_list.rb +85 -0
- data/lib/resync/change_dump.rb +15 -0
- data/lib/resync/change_dump_manifest.rb +15 -0
- data/lib/resync/change_list.rb +15 -0
- data/lib/resync/change_list_index.rb +26 -0
- data/lib/resync/link.rb +87 -0
- data/lib/resync/metadata.rb +112 -0
- data/lib/resync/resource.rb +72 -0
- data/lib/resync/resource_dump.rb +15 -0
- data/lib/resync/resource_dump_manifest.rb +15 -0
- data/lib/resync/resource_list.rb +15 -0
- data/lib/resync/resource_list_index.rb +15 -0
- data/lib/resync/shared/augmented.rb +76 -0
- data/lib/resync/shared/base_resource_list.rb +117 -0
- data/lib/resync/shared/descriptor.rb +135 -0
- data/lib/resync/shared/sitemap_index.rb +32 -0
- data/lib/resync/shared/sorted_resource_list.rb +60 -0
- data/lib/resync/source_description.rb +14 -0
- data/lib/resync/types/change.rb +14 -0
- data/lib/resync/types/change_frequency.rb +18 -0
- data/lib/resync/types.rb +6 -0
- data/lib/resync/version.rb +4 -0
- data/lib/resync/xml.rb +216 -0
- data/lib/resync/xml_parser.rb +65 -0
- data/lib/resync.rb +4 -0
- data/resync.gemspec +36 -0
- data/spec/acceptance/xml_parser_spec.rb +1049 -0
- data/spec/data/examples/README.md +1 -0
- data/spec/data/examples/example-1.xml +12 -0
- data/spec/data/examples/example-12.xml +25 -0
- data/spec/data/examples/example-13.xml +25 -0
- data/spec/data/examples/example-14.xml +23 -0
- data/spec/data/examples/example-15.xml +21 -0
- data/spec/data/examples/example-16.xml +24 -0
- data/spec/data/examples/example-17.xml +39 -0
- data/spec/data/examples/example-18.xml +25 -0
- data/spec/data/examples/example-19.xml +28 -0
- data/spec/data/examples/example-2.xml +18 -0
- data/spec/data/examples/example-20.xml +22 -0
- data/spec/data/examples/example-21.xml +31 -0
- data/spec/data/examples/example-22.xml +41 -0
- data/spec/data/examples/example-23.xml +41 -0
- data/spec/data/examples/example-24.xml +28 -0
- data/spec/data/examples/example-25.xml +21 -0
- data/spec/data/examples/example-26.xml +18 -0
- data/spec/data/examples/example-27.xml +36 -0
- data/spec/data/examples/example-28.xml +34 -0
- data/spec/data/examples/example-29.xml +27 -0
- data/spec/data/examples/example-3.xml +17 -0
- data/spec/data/examples/example-30.xml +18 -0
- data/spec/data/examples/example-31.xml +16 -0
- data/spec/data/examples/example-32.xml +22 -0
- data/spec/data/examples/example-33.xml +22 -0
- data/spec/data/examples/example-4.xml +10 -0
- data/spec/data/examples/example-5.xml +18 -0
- data/spec/data/examples/example-6.xml +21 -0
- data/spec/data/examples/example-7.xml +13 -0
- data/spec/data/examples/example-8.xml +12 -0
- data/spec/data/resourcesync.xsd +148 -0
- data/spec/data/siteindex.xsd +75 -0
- data/spec/data/sitemap.xsd +116 -0
- data/spec/rspec_custom_matchers.rb +89 -0
- data/spec/spec_helper.rb +31 -0
- data/spec/todo.rb +11 -0
- data/spec/unit/resync/capability_list_spec.rb +138 -0
- data/spec/unit/resync/change_dump_manifest_spec.rb +75 -0
- data/spec/unit/resync/change_dump_spec.rb +61 -0
- data/spec/unit/resync/change_list_index_spec.rb +49 -0
- data/spec/unit/resync/change_list_spec.rb +75 -0
- data/spec/unit/resync/link_spec.rb +93 -0
- data/spec/unit/resync/metadata_spec.rb +169 -0
- data/spec/unit/resync/resource_dump_manifest_spec.rb +59 -0
- data/spec/unit/resync/resource_dump_spec.rb +62 -0
- data/spec/unit/resync/resource_list_index_spec.rb +53 -0
- data/spec/unit/resync/resource_list_spec.rb +60 -0
- data/spec/unit/resync/resource_spec.rb +176 -0
- data/spec/unit/resync/shared/augmented_examples.rb +58 -0
- data/spec/unit/resync/shared/base_resource_list_examples.rb +103 -0
- data/spec/unit/resync/shared/descriptor_examples.rb +122 -0
- data/spec/unit/resync/shared/descriptor_spec.rb +33 -0
- data/spec/unit/resync/shared/sorted_list_examples.rb +134 -0
- data/spec/unit/resync/shared/uri_field_examples.rb +36 -0
- data/spec/unit/resync/source_description_spec.rb +55 -0
- data/spec/unit/resync/xml/timenode_spec.rb +48 -0
- data/spec/unit/resync/xml/xml_spec.rb +40 -0
- data/spec/unit/resync/xml_parser_spec.rb +82 -0
- metadata +340 -0
@@ -0,0 +1,22 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
3
|
+
xmlns:rs="http://www.openarchives.org/rs/terms/">
|
4
|
+
<rs:ln rel="up"
|
5
|
+
href="http://aggregator1.example.com/dataset1/capabilitylist.xml"/>
|
6
|
+
<rs:md capability="changelist"
|
7
|
+
from="2013-01-03T11:00:00Z"/>
|
8
|
+
<url>
|
9
|
+
<loc>http://aggregator1.example.com/res1.html</loc>
|
10
|
+
<lastmod>2013-01-03T20:00:00Z</lastmod>
|
11
|
+
<rs:md hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6"
|
12
|
+
length="8876"
|
13
|
+
type="text/html"
|
14
|
+
change="updated"/>
|
15
|
+
<rs:ln rel="via"
|
16
|
+
href="http://original.example.com/res1.html"
|
17
|
+
modified="2013-01-03T07:00:00Z"
|
18
|
+
hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6"
|
19
|
+
length="8876"
|
20
|
+
type="text/html"/>
|
21
|
+
</url>
|
22
|
+
</urlset>
|
@@ -0,0 +1,22 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
3
|
+
xmlns:rs="http://www.openarchives.org/rs/terms/">
|
4
|
+
<rs:ln rel="up"
|
5
|
+
href="http://aggregator2.example.com/dataset1/capabilitylist.xml"/>
|
6
|
+
<rs:md capability="changelist"
|
7
|
+
from="2013-01-03T12:00:00Z"/>
|
8
|
+
<url>
|
9
|
+
<loc>http://aggregator2.example.com/res1.html</loc>
|
10
|
+
<lastmod>2013-01-04T09:00:00Z</lastmod>
|
11
|
+
<rs:md hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6"
|
12
|
+
length="8876"
|
13
|
+
type="text/html"
|
14
|
+
change="updated"/>
|
15
|
+
<rs:ln rel="via"
|
16
|
+
href="http://original.example.com/res1.html"
|
17
|
+
modified="2013-01-03T07:00:00Z"
|
18
|
+
hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6"
|
19
|
+
length="8876"
|
20
|
+
type="text/html"/>
|
21
|
+
</url>
|
22
|
+
</urlset>
|
@@ -0,0 +1,10 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
3
|
+
xmlns:rs="http://www.openarchives.org/rs/terms/">
|
4
|
+
<rs:md capability="resourcedump"
|
5
|
+
at="2013-01-03T09:00:00Z"/>
|
6
|
+
<url>
|
7
|
+
<loc>http://example.com/resourcedump.zip</loc>
|
8
|
+
<lastmod>2013-01-03T09:00:00Z</lastmod>
|
9
|
+
</url>
|
10
|
+
</urlset>
|
@@ -0,0 +1,18 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
3
|
+
xmlns:rs="http://www.openarchives.org/rs/terms/">
|
4
|
+
<rs:md capability="resourcedump-manifest"
|
5
|
+
at="2013-01-03T09:00:00Z"/>
|
6
|
+
<url>
|
7
|
+
<loc>http://example.com/res1</loc>
|
8
|
+
<lastmod>2013-01-03T03:00:00Z</lastmod>
|
9
|
+
<rs:md hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6"
|
10
|
+
path="/resources/res1"/>
|
11
|
+
</url>
|
12
|
+
<url>
|
13
|
+
<loc>http://example.com/res2</loc>
|
14
|
+
<lastmod>2013-01-03T04:00:00Z</lastmod>
|
15
|
+
<rs:md hash="md5:1e0d5cb8ef6ba40c99b14c0237be735e"
|
16
|
+
path="/resources/res2"/>
|
17
|
+
</url>
|
18
|
+
</urlset>
|
@@ -0,0 +1,21 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
3
|
+
xmlns:rs="http://www.openarchives.org/rs/terms/">
|
4
|
+
<rs:ln rel="describedby"
|
5
|
+
href="http://example.com/info_about_set1_of_resources.xml"/>
|
6
|
+
<rs:ln rel="up"
|
7
|
+
href="http://example.com/resourcesync_description.xml"/>
|
8
|
+
<rs:md capability="capabilitylist"/>
|
9
|
+
<url>
|
10
|
+
<loc>http://example.com/dataset1/resourcelist.xml</loc>
|
11
|
+
<rs:md capability="resourcelist"/>
|
12
|
+
</url>
|
13
|
+
<url>
|
14
|
+
<loc>http://example.com/dataset1/resourcedump.xml</loc>
|
15
|
+
<rs:md capability="resourcedump"/>
|
16
|
+
</url>
|
17
|
+
<url>
|
18
|
+
<loc>http://example.com/dataset1/changelist.xml</loc>
|
19
|
+
<rs:md capability="changelist"/>
|
20
|
+
</url>
|
21
|
+
</urlset>
|
@@ -0,0 +1,13 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
3
|
+
xmlns:rs="http://www.openarchives.org/rs/terms/">
|
4
|
+
<rs:ln rel="describedby"
|
5
|
+
href="http://example.com/info-about-source.xml"/>
|
6
|
+
<rs:md capability="description"/>
|
7
|
+
<url>
|
8
|
+
<loc>http://example.com/dataset1/capabilitylist.xml</loc>
|
9
|
+
<rs:md capability="capabilitylist"/>
|
10
|
+
<rs:ln rel="describedby"
|
11
|
+
href="http://example.com/info_about_set1_of_resources.xml"/>
|
12
|
+
</url>
|
13
|
+
</urlset>
|
@@ -0,0 +1,12 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
3
|
+
xmlns:rs="http://www.openarchives.org/rs/terms/">
|
4
|
+
<rs:md capability="resourcelist"
|
5
|
+
at="2013-01-03T09:00:00Z"/>
|
6
|
+
<sitemap>
|
7
|
+
<loc>http://example.com/resourcelist-part1.xml</loc>
|
8
|
+
</sitemap>
|
9
|
+
<sitemap>
|
10
|
+
<loc>http://example.com/resourcelist-part2.xml</loc>
|
11
|
+
</sitemap>
|
12
|
+
</sitemapindex>
|
@@ -0,0 +1,148 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
|
3
|
+
targetNamespace="http://www.openarchives.org/rs/terms/"
|
4
|
+
xmlns="http://www.openarchives.org/rs/terms/"
|
5
|
+
elementFormDefault="qualified">
|
6
|
+
|
7
|
+
<xsd:annotation>
|
8
|
+
<xsd:documentation>
|
9
|
+
XML Schema for ResoureSync extensions to Sitemap files.
|
10
|
+
See: http://www.openarchives.org/rs/ .
|
11
|
+
2013-01-06 first stab [Simeon Warner],
|
12
|
+
2013-08-15 for v0.9.1 [Simeon Warner].
|
13
|
+
</xsd:documentation>
|
14
|
+
</xsd:annotation>
|
15
|
+
|
16
|
+
<xsd:element name="md">
|
17
|
+
<xsd:annotation>
|
18
|
+
<xsd:documentation>
|
19
|
+
Metadata about a resource
|
20
|
+
</xsd:documentation>
|
21
|
+
</xsd:annotation>
|
22
|
+
<xsd:complexType>
|
23
|
+
<xsd:complexContent>
|
24
|
+
<xsd:restriction base="xsd:anyType">
|
25
|
+
<xsd:attribute name="at" type="xsd:dateTime"/>
|
26
|
+
<xsd:attribute name="capability" type="xsd:string"/>
|
27
|
+
<xsd:attribute name="change" type="changeType"/>
|
28
|
+
<xsd:attribute name="completed" type="xsd:dateTime"/>
|
29
|
+
<xsd:attribute name="encoding" type="xsd:string"/>
|
30
|
+
<xsd:attribute name="from" type="xsd:dateTime"/>
|
31
|
+
<xsd:attribute name="hash" type="xsd:string"/>
|
32
|
+
<xsd:attribute name="length" type="xsd:integer"/>
|
33
|
+
<xsd:attribute name="modified" type="xsd:dateTime"/>
|
34
|
+
<xsd:attribute name="path" type="pathType"/>
|
35
|
+
<xsd:attribute name="type" type="contentType"/>
|
36
|
+
<xsd:attribute name="until" type="xsd:dateTime"/>
|
37
|
+
<xsd:anyAttribute namespace="##other" processContents="lax"/>
|
38
|
+
</xsd:restriction>
|
39
|
+
</xsd:complexContent>
|
40
|
+
</xsd:complexType>
|
41
|
+
</xsd:element>
|
42
|
+
|
43
|
+
<xsd:element name="ln">
|
44
|
+
<xsd:annotation>
|
45
|
+
<xsd:documentation>
|
46
|
+
Links to related resources
|
47
|
+
</xsd:documentation>
|
48
|
+
</xsd:annotation>
|
49
|
+
<xsd:complexType>
|
50
|
+
<xsd:complexContent>
|
51
|
+
<xsd:restriction base="xsd:anyType">
|
52
|
+
<xsd:attribute name="encoding" type="xsd:string"/>
|
53
|
+
<xsd:attribute name="hash" type="xsd:string"/>
|
54
|
+
<xsd:attribute name="href" type="xsd:anyURI"/>
|
55
|
+
<xsd:attribute name="length" type="xsd:nonNegativeInteger"/>
|
56
|
+
<xsd:attribute name="modified" type="xsd:dateTime"/>
|
57
|
+
<xsd:attribute name="path" type="pathType"/>
|
58
|
+
<xsd:attribute name="pri" type="priType"/>
|
59
|
+
<xsd:attribute name="rel" type="relType"/>
|
60
|
+
<xsd:attribute name="type" type="contentType"/>
|
61
|
+
<xsd:anyAttribute namespace="##other" processContents="lax"/>
|
62
|
+
</xsd:restriction>
|
63
|
+
</xsd:complexContent>
|
64
|
+
</xsd:complexType>
|
65
|
+
</xsd:element>
|
66
|
+
|
67
|
+
<xsd:simpleType name="changeType">
|
68
|
+
<xsd:annotation>
|
69
|
+
<xsd:documentation>
|
70
|
+
Allowed values of the change attribute are
|
71
|
+
created, updated, deleted
|
72
|
+
</xsd:documentation>
|
73
|
+
</xsd:annotation>
|
74
|
+
<xsd:restriction base="xsd:string">
|
75
|
+
<xsd:enumeration value="created"/>
|
76
|
+
<xsd:enumeration value="updated"/>
|
77
|
+
<xsd:enumeration value="deleted"/>
|
78
|
+
</xsd:restriction>
|
79
|
+
</xsd:simpleType>
|
80
|
+
|
81
|
+
<xsd:simpleType name="pathType">
|
82
|
+
<xsd:annotation>
|
83
|
+
<xsd:documentation>
|
84
|
+
Path values must start with a slash, must not end with a slash
|
85
|
+
</xsd:documentation>
|
86
|
+
</xsd:annotation>
|
87
|
+
<xsd:restriction base="xsd:string">
|
88
|
+
<xsd:pattern value="/.*[^/]"/>
|
89
|
+
</xsd:restriction>
|
90
|
+
</xsd:simpleType>
|
91
|
+
|
92
|
+
<xsd:simpleType name="contentType">
|
93
|
+
<xsd:annotation>
|
94
|
+
<xsd:documentation>
|
95
|
+
MIME Content Types are described in RFCs 2045,2046
|
96
|
+
http://tools.ietf.org/html/rfc2045
|
97
|
+
http://tools.ietf.org/html/rfc2046
|
98
|
+
</xsd:documentation>
|
99
|
+
</xsd:annotation>
|
100
|
+
<xsd:restriction base="xsd:string">
|
101
|
+
<xsd:pattern value="[\w-]+/[\w-]+"/>
|
102
|
+
</xsd:restriction>
|
103
|
+
</xsd:simpleType>
|
104
|
+
|
105
|
+
<xsd:simpleType name="priType">
|
106
|
+
<xsd:annotation>
|
107
|
+
<xsd:documentation>
|
108
|
+
The priority attribute may have values 1 through 999999
|
109
|
+
</xsd:documentation>
|
110
|
+
</xsd:annotation>
|
111
|
+
<xsd:restriction base="xsd:integer">
|
112
|
+
<xsd:minInclusive value="1"/>
|
113
|
+
<xsd:maxInclusive value="999999"/>
|
114
|
+
</xsd:restriction>
|
115
|
+
</xsd:simpleType>
|
116
|
+
|
117
|
+
<xsd:simpleType name="relType">
|
118
|
+
<xsd:annotation>
|
119
|
+
<xsd:documentation>
|
120
|
+
Syntax for link relation values is specified by
|
121
|
+
http://tools.ietf.org/html/rfc5988 and the relevant
|
122
|
+
portion is:
|
123
|
+
|
124
|
+
# link-param = ( ( "rel" "=" relation-types )
|
125
|
+
# ...
|
126
|
+
# relation-type = reg-rel-type | ext-rel-type
|
127
|
+
# reg-rel-type = LOALPHA *( LOALPHA | DIGIT | "." | "-" )
|
128
|
+
# ext-rel-type = URI
|
129
|
+
#
|
130
|
+
|
131
|
+
Registered link relations (reg-rel-type) are listed at
|
132
|
+
http://www.iana.org/assignments/link-relations/link-relations.xhtml
|
133
|
+
and URIs are allowed for extension (ext-rel-type)
|
134
|
+
</xsd:documentation>
|
135
|
+
</xsd:annotation>
|
136
|
+
<xsd:union>
|
137
|
+
<xsd:simpleType>
|
138
|
+
<xsd:restriction base="xsd:string">
|
139
|
+
<xsd:pattern value="[a-z][a-z0-9\.\-]*"/>
|
140
|
+
</xsd:restriction>
|
141
|
+
</xsd:simpleType>
|
142
|
+
<xsd:simpleType>
|
143
|
+
<xsd:restriction base="xsd:anyURI"/>
|
144
|
+
</xsd:simpleType>
|
145
|
+
</xsd:union>
|
146
|
+
</xsd:simpleType>
|
147
|
+
|
148
|
+
</xsd:schema>
|
@@ -0,0 +1,75 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
|
3
|
+
targetNamespace="http://www.sitemaps.org/schemas/sitemap/0.9"
|
4
|
+
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
5
|
+
elementFormDefault="qualified">
|
6
|
+
<xsd:annotation>
|
7
|
+
<xsd:documentation>
|
8
|
+
XML Schema for Sitemap index files.
|
9
|
+
Last Modifed 2009-04-08
|
10
|
+
</xsd:documentation>
|
11
|
+
</xsd:annotation>
|
12
|
+
|
13
|
+
<xsd:element name="sitemapindex">
|
14
|
+
<xsd:annotation>
|
15
|
+
<xsd:documentation>
|
16
|
+
Container for a set of up to 50,000 sitemap URLs.
|
17
|
+
This is the root element of the XML file.
|
18
|
+
</xsd:documentation>
|
19
|
+
</xsd:annotation>
|
20
|
+
<xsd:complexType>
|
21
|
+
<xsd:sequence>
|
22
|
+
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
|
23
|
+
<xsd:element name="sitemap" type="tSitemap" maxOccurs="unbounded"/>
|
24
|
+
</xsd:sequence>
|
25
|
+
</xsd:complexType>
|
26
|
+
</xsd:element>
|
27
|
+
|
28
|
+
<xsd:complexType name="tSitemap">
|
29
|
+
<xsd:annotation>
|
30
|
+
<xsd:documentation>
|
31
|
+
Container for the data needed to describe a sitemap.
|
32
|
+
</xsd:documentation>
|
33
|
+
</xsd:annotation>
|
34
|
+
<xsd:sequence>
|
35
|
+
<xsd:element name="loc" type="tLocSitemap"/>
|
36
|
+
<xsd:element name="lastmod" type="tLastmodSitemap" minOccurs="0"/>
|
37
|
+
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
|
38
|
+
</xsd:sequence>
|
39
|
+
</xsd:complexType>
|
40
|
+
|
41
|
+
<xsd:simpleType name="tLocSitemap">
|
42
|
+
<xsd:annotation>
|
43
|
+
<xsd:documentation>
|
44
|
+
REQUIRED: The location URI of a sitemap.
|
45
|
+
The URI must conform to RFC 2396 (http://www.ietf.org/rfc/rfc2396.txt).
|
46
|
+
</xsd:documentation>
|
47
|
+
</xsd:annotation>
|
48
|
+
<xsd:restriction base="xsd:anyURI">
|
49
|
+
<xsd:minLength value="12"/>
|
50
|
+
<xsd:maxLength value="2048"/>
|
51
|
+
</xsd:restriction>
|
52
|
+
</xsd:simpleType>
|
53
|
+
|
54
|
+
<xsd:simpleType name="tLastmodSitemap">
|
55
|
+
<xsd:annotation>
|
56
|
+
<xsd:documentation>
|
57
|
+
OPTIONAL: The date the document was last modified. The date must conform
|
58
|
+
to the W3C DATETIME format (http://www.w3.org/TR/NOTE-datetime).
|
59
|
+
Example: 2005-05-10
|
60
|
+
Lastmod may also contain a timestamp.
|
61
|
+
Example: 2005-05-10T17:33:30+08:00
|
62
|
+
</xsd:documentation>
|
63
|
+
</xsd:annotation>
|
64
|
+
<xsd:union>
|
65
|
+
<xsd:simpleType>
|
66
|
+
<xsd:restriction base="xsd:date"/>
|
67
|
+
</xsd:simpleType>
|
68
|
+
<xsd:simpleType>
|
69
|
+
<xsd:restriction base="xsd:dateTime"/>
|
70
|
+
</xsd:simpleType>
|
71
|
+
</xsd:union>
|
72
|
+
</xsd:simpleType>
|
73
|
+
|
74
|
+
|
75
|
+
</xsd:schema>
|
@@ -0,0 +1,116 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
|
3
|
+
targetNamespace="http://www.sitemaps.org/schemas/sitemap/0.9"
|
4
|
+
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
5
|
+
elementFormDefault="qualified">
|
6
|
+
<xsd:annotation>
|
7
|
+
<xsd:documentation>
|
8
|
+
XML Schema for Sitemap files.
|
9
|
+
Last Modifed 2008-03-26
|
10
|
+
</xsd:documentation>
|
11
|
+
</xsd:annotation>
|
12
|
+
|
13
|
+
<xsd:element name="urlset">
|
14
|
+
<xsd:annotation>
|
15
|
+
<xsd:documentation>
|
16
|
+
Container for a set of up to 50,000 document elements.
|
17
|
+
This is the root element of the XML file.
|
18
|
+
</xsd:documentation>
|
19
|
+
</xsd:annotation>
|
20
|
+
<xsd:complexType>
|
21
|
+
<xsd:sequence>
|
22
|
+
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
|
23
|
+
<xsd:element name="url" type="tUrl" maxOccurs="unbounded"/>
|
24
|
+
</xsd:sequence>
|
25
|
+
</xsd:complexType>
|
26
|
+
</xsd:element>
|
27
|
+
|
28
|
+
<xsd:complexType name="tUrl">
|
29
|
+
<xsd:annotation>
|
30
|
+
<xsd:documentation>
|
31
|
+
Container for the data needed to describe a document to crawl.
|
32
|
+
</xsd:documentation>
|
33
|
+
</xsd:annotation>
|
34
|
+
<xsd:sequence>
|
35
|
+
<xsd:element name="loc" type="tLoc"/>
|
36
|
+
<xsd:element name="lastmod" type="tLastmod" minOccurs="0"/>
|
37
|
+
<xsd:element name="changefreq" type="tChangeFreq" minOccurs="0"/>
|
38
|
+
<xsd:element name="priority" type="tPriority" minOccurs="0"/>
|
39
|
+
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
|
40
|
+
</xsd:sequence>
|
41
|
+
</xsd:complexType>
|
42
|
+
|
43
|
+
<xsd:simpleType name="tLoc">
|
44
|
+
<xsd:annotation>
|
45
|
+
<xsd:documentation>
|
46
|
+
REQUIRED: The location URI of a document.
|
47
|
+
The URI must conform to RFC 2396 (http://www.ietf.org/rfc/rfc2396.txt).
|
48
|
+
</xsd:documentation>
|
49
|
+
</xsd:annotation>
|
50
|
+
<xsd:restriction base="xsd:anyURI">
|
51
|
+
<xsd:minLength value="12"/>
|
52
|
+
<xsd:maxLength value="2048"/>
|
53
|
+
</xsd:restriction>
|
54
|
+
</xsd:simpleType>
|
55
|
+
|
56
|
+
<xsd:simpleType name="tLastmod">
|
57
|
+
<xsd:annotation>
|
58
|
+
<xsd:documentation>
|
59
|
+
OPTIONAL: The date the document was last modified. The date must conform
|
60
|
+
to the W3C DATETIME format (http://www.w3.org/TR/NOTE-datetime).
|
61
|
+
Example: 2005-05-10
|
62
|
+
Lastmod may also contain a timestamp.
|
63
|
+
Example: 2005-05-10T17:33:30+08:00
|
64
|
+
</xsd:documentation>
|
65
|
+
</xsd:annotation>
|
66
|
+
<xsd:union>
|
67
|
+
<xsd:simpleType>
|
68
|
+
<xsd:restriction base="xsd:date"/>
|
69
|
+
</xsd:simpleType>
|
70
|
+
<xsd:simpleType>
|
71
|
+
<xsd:restriction base="xsd:dateTime"/>
|
72
|
+
</xsd:simpleType>
|
73
|
+
</xsd:union>
|
74
|
+
</xsd:simpleType>
|
75
|
+
|
76
|
+
<xsd:simpleType name="tChangeFreq">
|
77
|
+
<xsd:annotation>
|
78
|
+
<xsd:documentation>
|
79
|
+
OPTIONAL: Indicates how frequently the content at a particular URL is
|
80
|
+
likely to change. The value "always" should be used to describe
|
81
|
+
documents that change each time they are accessed. The value "never"
|
82
|
+
should be used to describe archived URLs. Please note that web
|
83
|
+
crawlers may not necessarily crawl pages marked "always" more often.
|
84
|
+
Consider this element as a friendly suggestion and not a command.
|
85
|
+
</xsd:documentation>
|
86
|
+
</xsd:annotation>
|
87
|
+
<xsd:restriction base="xsd:string">
|
88
|
+
<xsd:enumeration value="always"/>
|
89
|
+
<xsd:enumeration value="hourly"/>
|
90
|
+
<xsd:enumeration value="daily"/>
|
91
|
+
<xsd:enumeration value="weekly"/>
|
92
|
+
<xsd:enumeration value="monthly"/>
|
93
|
+
<xsd:enumeration value="yearly"/>
|
94
|
+
<xsd:enumeration value="never"/>
|
95
|
+
</xsd:restriction>
|
96
|
+
</xsd:simpleType>
|
97
|
+
|
98
|
+
<xsd:simpleType name="tPriority">
|
99
|
+
<xsd:annotation>
|
100
|
+
<xsd:documentation>
|
101
|
+
OPTIONAL: The priority of a particular URL relative to other pages
|
102
|
+
on the same site. The value for this element is a number between
|
103
|
+
0.0 and 1.0 where 0.0 identifies the lowest priority page(s).
|
104
|
+
The default priority of a page is 0.5. Priority is used to select
|
105
|
+
between pages on your site. Setting a priority of 1.0 for all URLs
|
106
|
+
will not help you, as the relative priority of pages on your site
|
107
|
+
is what will be considered.
|
108
|
+
</xsd:documentation>
|
109
|
+
</xsd:annotation>
|
110
|
+
<xsd:restriction base="xsd:decimal">
|
111
|
+
<xsd:minInclusive value="0.0"/>
|
112
|
+
<xsd:maxInclusive value="1.0"/>
|
113
|
+
</xsd:restriction>
|
114
|
+
</xsd:simpleType>
|
115
|
+
|
116
|
+
</xsd:schema>
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'rspec/expectations'
|
2
|
+
require 'equivalent-xml'
|
3
|
+
|
4
|
+
RSpec::Matchers.define :be_xml do |expected|
|
5
|
+
|
6
|
+
def to_nokogiri(xml)
|
7
|
+
case xml
|
8
|
+
when Nokogiri::XML::Element
|
9
|
+
xml
|
10
|
+
when Nokogiri::XML::Document
|
11
|
+
xml.root
|
12
|
+
when String
|
13
|
+
to_nokogiri(Nokogiri::XML(xml))
|
14
|
+
when REXML::Element
|
15
|
+
to_nokogiri(xml.to_s)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def to_pretty(nokogiri)
|
20
|
+
return nil unless nokogiri
|
21
|
+
out = StringIO.new
|
22
|
+
save_options = Nokogiri::XML::Node::SaveOptions::FORMAT | Nokogiri::XML::Node::SaveOptions::NO_DECLARATION
|
23
|
+
nokogiri.write_xml_to(out, encoding: 'UTF-8', indent: 2, save_with: save_options)
|
24
|
+
out.string
|
25
|
+
end
|
26
|
+
|
27
|
+
match do |actual|
|
28
|
+
expected_xml = to_nokogiri(expected) || fail("expected value #{expected} does not appear to be XML")
|
29
|
+
actual_xml = to_nokogiri(actual)
|
30
|
+
|
31
|
+
EquivalentXml.equivalent?(expected_xml, actual_xml, element_order: false, normalize_whitespace: true)
|
32
|
+
end
|
33
|
+
|
34
|
+
failure_message do |actual|
|
35
|
+
expected_string = to_pretty(to_nokogiri(expected))
|
36
|
+
actual_string = to_pretty(to_nokogiri(actual)) || actual
|
37
|
+
"expected XML:\n#{expected_string}\n\nbut was:\n#{actual_string}"
|
38
|
+
end
|
39
|
+
|
40
|
+
failure_message_when_negated do |actual|
|
41
|
+
actual_xml = to_element(actual) || actual
|
42
|
+
"expected not to get XML:\n\t#{actual_xml}"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
RSpec::Matchers.define :be_time do |expected|
|
47
|
+
|
48
|
+
def to_string(time)
|
49
|
+
time.is_a?(Time) ? time.utc.round(2).iso8601(2) : time.to_s
|
50
|
+
end
|
51
|
+
|
52
|
+
match do |actual|
|
53
|
+
if expected
|
54
|
+
fail "Expected value #{expected} is not a Time" unless expected.is_a?(Time)
|
55
|
+
actual.is_a?(Time) && (to_string(expected) == to_string(actual))
|
56
|
+
else
|
57
|
+
return actual.nil?
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
failure_message do |actual|
|
62
|
+
expected_str = to_string(expected)
|
63
|
+
actual_str = to_string(actual)
|
64
|
+
"expected time:\n#{expected_str}\n\nbut was:\n#{actual_str}"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def to_mime_type(mime_type)
|
69
|
+
return nil unless mime_type
|
70
|
+
return mime_type if mime_type.is_a?(MIME::Type)
|
71
|
+
|
72
|
+
mt = MIME::Types[mime_type].first
|
73
|
+
return mt if mt
|
74
|
+
|
75
|
+
MIME::Type.new(mime_type)
|
76
|
+
end
|
77
|
+
|
78
|
+
RSpec::Matchers.define :be_mime_type do |expected|
|
79
|
+
|
80
|
+
expected_mime_type = to_mime_type(expected)
|
81
|
+
|
82
|
+
match do |actual|
|
83
|
+
actual == expected_mime_type
|
84
|
+
end
|
85
|
+
|
86
|
+
failure_message do |actual|
|
87
|
+
"expected MIME type:\n#{expected_mime_type}\nbut was:\n#{actual}"
|
88
|
+
end
|
89
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# ------------------------------------------------------------
|
2
|
+
# SimpleCov setup
|
3
|
+
|
4
|
+
if ENV['COVERAGE']
|
5
|
+
require 'simplecov'
|
6
|
+
require 'simplecov-console'
|
7
|
+
|
8
|
+
SimpleCov.minimum_coverage 100
|
9
|
+
SimpleCov.start do
|
10
|
+
add_filter '/spec/'
|
11
|
+
SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
|
12
|
+
SimpleCov::Formatter::HTMLFormatter,
|
13
|
+
SimpleCov::Formatter::Console,
|
14
|
+
]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# ------------------------------------------------------------
|
19
|
+
# Rspec configuration
|
20
|
+
|
21
|
+
RSpec.configure do |config|
|
22
|
+
config.raise_errors_for_deprecations!
|
23
|
+
config.mock_with :rspec
|
24
|
+
end
|
25
|
+
|
26
|
+
require 'rspec_custom_matchers'
|
27
|
+
|
28
|
+
# ------------------------------------------------------------
|
29
|
+
# Stash::Harvester
|
30
|
+
|
31
|
+
require 'resync'
|