resync 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +42 -0
- data/.rubocop.yml +23 -0
- data/.ruby-version +1 -0
- data/.travis.yml +2 -0
- data/Gemfile +3 -0
- data/LICENSE.md +22 -0
- data/README.md +92 -0
- data/Rakefile +56 -0
- data/example.rb +100 -0
- data/lib/resync/capability_list.rb +85 -0
- data/lib/resync/change_dump.rb +15 -0
- data/lib/resync/change_dump_manifest.rb +15 -0
- data/lib/resync/change_list.rb +15 -0
- data/lib/resync/change_list_index.rb +26 -0
- data/lib/resync/link.rb +87 -0
- data/lib/resync/metadata.rb +112 -0
- data/lib/resync/resource.rb +72 -0
- data/lib/resync/resource_dump.rb +15 -0
- data/lib/resync/resource_dump_manifest.rb +15 -0
- data/lib/resync/resource_list.rb +15 -0
- data/lib/resync/resource_list_index.rb +15 -0
- data/lib/resync/shared/augmented.rb +76 -0
- data/lib/resync/shared/base_resource_list.rb +117 -0
- data/lib/resync/shared/descriptor.rb +135 -0
- data/lib/resync/shared/sitemap_index.rb +32 -0
- data/lib/resync/shared/sorted_resource_list.rb +60 -0
- data/lib/resync/source_description.rb +14 -0
- data/lib/resync/types/change.rb +14 -0
- data/lib/resync/types/change_frequency.rb +18 -0
- data/lib/resync/types.rb +6 -0
- data/lib/resync/version.rb +4 -0
- data/lib/resync/xml.rb +216 -0
- data/lib/resync/xml_parser.rb +65 -0
- data/lib/resync.rb +4 -0
- data/resync.gemspec +36 -0
- data/spec/acceptance/xml_parser_spec.rb +1049 -0
- data/spec/data/examples/README.md +1 -0
- data/spec/data/examples/example-1.xml +12 -0
- data/spec/data/examples/example-12.xml +25 -0
- data/spec/data/examples/example-13.xml +25 -0
- data/spec/data/examples/example-14.xml +23 -0
- data/spec/data/examples/example-15.xml +21 -0
- data/spec/data/examples/example-16.xml +24 -0
- data/spec/data/examples/example-17.xml +39 -0
- data/spec/data/examples/example-18.xml +25 -0
- data/spec/data/examples/example-19.xml +28 -0
- data/spec/data/examples/example-2.xml +18 -0
- data/spec/data/examples/example-20.xml +22 -0
- data/spec/data/examples/example-21.xml +31 -0
- data/spec/data/examples/example-22.xml +41 -0
- data/spec/data/examples/example-23.xml +41 -0
- data/spec/data/examples/example-24.xml +28 -0
- data/spec/data/examples/example-25.xml +21 -0
- data/spec/data/examples/example-26.xml +18 -0
- data/spec/data/examples/example-27.xml +36 -0
- data/spec/data/examples/example-28.xml +34 -0
- data/spec/data/examples/example-29.xml +27 -0
- data/spec/data/examples/example-3.xml +17 -0
- data/spec/data/examples/example-30.xml +18 -0
- data/spec/data/examples/example-31.xml +16 -0
- data/spec/data/examples/example-32.xml +22 -0
- data/spec/data/examples/example-33.xml +22 -0
- data/spec/data/examples/example-4.xml +10 -0
- data/spec/data/examples/example-5.xml +18 -0
- data/spec/data/examples/example-6.xml +21 -0
- data/spec/data/examples/example-7.xml +13 -0
- data/spec/data/examples/example-8.xml +12 -0
- data/spec/data/resourcesync.xsd +148 -0
- data/spec/data/siteindex.xsd +75 -0
- data/spec/data/sitemap.xsd +116 -0
- data/spec/rspec_custom_matchers.rb +89 -0
- data/spec/spec_helper.rb +31 -0
- data/spec/todo.rb +11 -0
- data/spec/unit/resync/capability_list_spec.rb +138 -0
- data/spec/unit/resync/change_dump_manifest_spec.rb +75 -0
- data/spec/unit/resync/change_dump_spec.rb +61 -0
- data/spec/unit/resync/change_list_index_spec.rb +49 -0
- data/spec/unit/resync/change_list_spec.rb +75 -0
- data/spec/unit/resync/link_spec.rb +93 -0
- data/spec/unit/resync/metadata_spec.rb +169 -0
- data/spec/unit/resync/resource_dump_manifest_spec.rb +59 -0
- data/spec/unit/resync/resource_dump_spec.rb +62 -0
- data/spec/unit/resync/resource_list_index_spec.rb +53 -0
- data/spec/unit/resync/resource_list_spec.rb +60 -0
- data/spec/unit/resync/resource_spec.rb +176 -0
- data/spec/unit/resync/shared/augmented_examples.rb +58 -0
- data/spec/unit/resync/shared/base_resource_list_examples.rb +103 -0
- data/spec/unit/resync/shared/descriptor_examples.rb +122 -0
- data/spec/unit/resync/shared/descriptor_spec.rb +33 -0
- data/spec/unit/resync/shared/sorted_list_examples.rb +134 -0
- data/spec/unit/resync/shared/uri_field_examples.rb +36 -0
- data/spec/unit/resync/source_description_spec.rb +55 -0
- data/spec/unit/resync/xml/timenode_spec.rb +48 -0
- data/spec/unit/resync/xml/xml_spec.rb +40 -0
- data/spec/unit/resync/xml_parser_spec.rb +82 -0
- metadata +340 -0
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'ruby-enum'
|
2
|
+
|
3
|
+
module Resync
|
4
|
+
module Types
|
5
|
+
# The type of change indicated by a reference in a +ChangeList+, +ChangeDump+, etc.
|
6
|
+
class Change
|
7
|
+
include Ruby::Enum
|
8
|
+
|
9
|
+
define :CREATED, 'created'
|
10
|
+
define :UPDATED, 'updated'
|
11
|
+
define :DELETED, 'deleted'
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'ruby-enum'
|
2
|
+
|
3
|
+
module Resync
|
4
|
+
module Types
|
5
|
+
# The frequency of changes to a resource.
|
6
|
+
class ChangeFrequency
|
7
|
+
include Ruby::Enum
|
8
|
+
|
9
|
+
define :ALWAYS, 'always'
|
10
|
+
define :HOURLY, 'hourly'
|
11
|
+
define :DAILY, 'daily'
|
12
|
+
define :WEEKLY, 'weekly'
|
13
|
+
define :MONTHLY, 'monthly'
|
14
|
+
define :YEARLY, 'yearly'
|
15
|
+
define :NEVER, 'never'
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/resync/types.rb
ADDED
data/lib/resync/xml.rb
ADDED
@@ -0,0 +1,216 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'time'
|
3
|
+
require 'xml/mapping'
|
4
|
+
|
5
|
+
module Resync
|
6
|
+
# Helper methods and modules related to reading and writing XML.
|
7
|
+
module XML
|
8
|
+
|
9
|
+
# ------------------------------------------------------
|
10
|
+
# Module shared methods
|
11
|
+
|
12
|
+
# Ensures that the provided value is a +URI+, parsing it if necessary.
|
13
|
+
#
|
14
|
+
# @param url [URI, String] the URI.
|
15
|
+
# @raise [URI::InvalidURIError] if +url+ cannot be converted to a URI.
|
16
|
+
def self.to_uri(url)
|
17
|
+
return nil unless url
|
18
|
+
(url.is_a? URI) ? url : URI.parse(url)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Extracts a +REXML::Element+ from the specified object.
|
22
|
+
#
|
23
|
+
# @param xml [String, IO, REXML::Document, REXML::Element] A string or IO-like
|
24
|
+
# object containing an XML document (with or without XML declaration), or an
|
25
|
+
# XML document, or an XML element.
|
26
|
+
# @return [REXML::Element] the root element of the document, or the element
|
27
|
+
# itself if +xml+ is already an element.
|
28
|
+
def self.element(xml)
|
29
|
+
case xml
|
30
|
+
when REXML::Document
|
31
|
+
xml.root
|
32
|
+
when REXML::Element
|
33
|
+
xml
|
34
|
+
else
|
35
|
+
fail ArgumentError, "Unexpected argument type; expected XML document, String, or IO source, was #{xml.class}" unless can_parse(xml)
|
36
|
+
REXML::Document.new(xml).root
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# ------------------------------------------------------------
|
41
|
+
# Private class methods
|
42
|
+
|
43
|
+
# Whether the argument can be parsed as an +REXML::Document+
|
44
|
+
#
|
45
|
+
# @return [Boolean] true if +REXML::Document.new()+ should be able to parse
|
46
|
+
# the argument, false otherwise
|
47
|
+
def self.can_parse(arg)
|
48
|
+
arg.is_a?(String) ||
|
49
|
+
(arg.respond_to?(:read) &&
|
50
|
+
arg.respond_to?(:readline) &&
|
51
|
+
arg.respond_to?(:nil?) &&
|
52
|
+
arg.respond_to?(:eof?))
|
53
|
+
end
|
54
|
+
private_class_method :can_parse
|
55
|
+
|
56
|
+
# ------------------------------------------------------------
|
57
|
+
# Time
|
58
|
+
|
59
|
+
# Maps +Time+ objects.
|
60
|
+
class TimeNode < ::XML::Mapping::SingleAttributeNode
|
61
|
+
def initialize(*args)
|
62
|
+
path, *args = super(*args)
|
63
|
+
@path = ::XML::XXPath.new(path)
|
64
|
+
args
|
65
|
+
end
|
66
|
+
|
67
|
+
# Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
|
68
|
+
def extract_attr_value(xml)
|
69
|
+
value = default_when_xpath_err { @path.first(xml).text }
|
70
|
+
value ? Time.iso8601(value).utc : nil
|
71
|
+
end
|
72
|
+
|
73
|
+
# Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
|
74
|
+
def set_attr_value(xml, value)
|
75
|
+
@path.first(xml, ensure_created: true).text = value.iso8601
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
::XML::Mapping.add_node_class TimeNode
|
80
|
+
|
81
|
+
# ------------------------------------------------------------
|
82
|
+
# URI
|
83
|
+
|
84
|
+
# Maps +URI+ objects.
|
85
|
+
class UriNode < ::XML::Mapping::SingleAttributeNode
|
86
|
+
def initialize(*args)
|
87
|
+
path, *args = super(*args)
|
88
|
+
@path = ::XML::XXPath.new(path)
|
89
|
+
args
|
90
|
+
end
|
91
|
+
|
92
|
+
# Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
|
93
|
+
def extract_attr_value(xml)
|
94
|
+
URI(default_when_xpath_err { @path.first(xml).text })
|
95
|
+
end
|
96
|
+
|
97
|
+
# Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
|
98
|
+
def set_attr_value(xml, value)
|
99
|
+
@path.first(xml, ensure_created: true).text = value.to_s
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
::XML::Mapping.add_node_class UriNode
|
104
|
+
|
105
|
+
# ------------------------------------------------------------
|
106
|
+
# Resync::Types::Change
|
107
|
+
|
108
|
+
# Maps +Resync::Types::Change+ values.
|
109
|
+
class ChangeNode < ::XML::Mapping::SingleAttributeNode
|
110
|
+
def initialize(*args)
|
111
|
+
path, *args = super(*args)
|
112
|
+
@path = ::XML::XXPath.new(path)
|
113
|
+
args
|
114
|
+
end
|
115
|
+
|
116
|
+
# Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
|
117
|
+
def extract_attr_value(xml)
|
118
|
+
Resync::Types::Change.parse(default_when_xpath_err { @path.first(xml).text })
|
119
|
+
end
|
120
|
+
|
121
|
+
# Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
|
122
|
+
def set_attr_value(xml, value)
|
123
|
+
@path.first(xml, ensure_created: true).text = value.to_s
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
::XML::Mapping.add_node_class ChangeNode
|
128
|
+
|
129
|
+
# ------------------------------------------------------------
|
130
|
+
# Resync::Types::Changefreq
|
131
|
+
|
132
|
+
# Maps +Resync::Types::Changefreq+ values.
|
133
|
+
class ChangefreqNode < ::XML::Mapping::SingleAttributeNode
|
134
|
+
def initialize(*args)
|
135
|
+
path, *args = super(*args)
|
136
|
+
@path = ::XML::XXPath.new(path)
|
137
|
+
args
|
138
|
+
end
|
139
|
+
|
140
|
+
# Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
|
141
|
+
def extract_attr_value(xml)
|
142
|
+
Resync::Types::ChangeFrequency.parse(default_when_xpath_err { @path.first(xml).text })
|
143
|
+
end
|
144
|
+
|
145
|
+
# Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
|
146
|
+
def set_attr_value(xml, value)
|
147
|
+
@path.first(xml, ensure_created: true).text = value.to_s
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
::XML::Mapping.add_node_class ChangefreqNode
|
152
|
+
|
153
|
+
# ------------------------------------------------------------
|
154
|
+
# MIME::Type
|
155
|
+
|
156
|
+
# Maps +MIME::Type+ values.
|
157
|
+
class MimeTypeNode < ::XML::Mapping::SingleAttributeNode
|
158
|
+
def initialize(*args)
|
159
|
+
path, *args = super(*args)
|
160
|
+
@path = ::XML::XXPath.new(path)
|
161
|
+
args
|
162
|
+
end
|
163
|
+
|
164
|
+
# Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
|
165
|
+
def extract_attr_value(xml)
|
166
|
+
mime_type = default_when_xpath_err { @path.first(xml).text }
|
167
|
+
return nil unless mime_type
|
168
|
+
return mime_type if mime_type.is_a?(MIME::Type)
|
169
|
+
|
170
|
+
mt = MIME::Types[mime_type].first
|
171
|
+
return mt if mt
|
172
|
+
|
173
|
+
MIME::Type.new(mime_type)
|
174
|
+
end
|
175
|
+
|
176
|
+
# Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
|
177
|
+
def set_attr_value(xml, value)
|
178
|
+
@path.first(xml, ensure_created: true).text = value.to_s
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
::XML::Mapping.add_node_class MimeTypeNode
|
183
|
+
|
184
|
+
# ------------------------------------------------------------
|
185
|
+
# Whitespace-separated hashcode list
|
186
|
+
|
187
|
+
# Maps the whitespace-separated list of hash codes in a +<rs:ln>+
|
188
|
+
# or +<rs:md>+ tag to a hash of digest values keyed by hash algorithm.
|
189
|
+
# (See {Resync::Descriptor#hashes}.)
|
190
|
+
class HashCodesNode < ::XML::Mapping::SingleAttributeNode
|
191
|
+
def initialize(*args)
|
192
|
+
path, *args = super(*args)
|
193
|
+
@path = ::XML::XXPath.new(path)
|
194
|
+
args
|
195
|
+
end
|
196
|
+
|
197
|
+
# Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
|
198
|
+
def extract_attr_value(xml)
|
199
|
+
hashes = default_when_xpath_err { @path.first(xml).text }
|
200
|
+
return {} unless hashes
|
201
|
+
return hashes if hashes.is_a?(Hash)
|
202
|
+
hashes.split(/[[:space:]]+/).map { |hash| hash.split(':') }.to_h
|
203
|
+
end
|
204
|
+
|
205
|
+
# Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
|
206
|
+
def set_attr_value(xml, value)
|
207
|
+
return if value.empty?
|
208
|
+
hash_str = value.map { |k, v| "#{k}:#{v}" }.join(' ')
|
209
|
+
@path.first(xml, ensure_created: true).text = hash_str
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
::XML::Mapping.add_node_class HashCodesNode
|
214
|
+
|
215
|
+
end
|
216
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
module Resync
|
2
|
+
|
3
|
+
# Parses ResourceSync XML documents and returns appropriate objects.
|
4
|
+
module XMLParser
|
5
|
+
|
6
|
+
# The list of parseable types, organized by XML mapping.
|
7
|
+
ROOT_TYPES = {
|
8
|
+
_default: [
|
9
|
+
CapabilityList,
|
10
|
+
ChangeDump,
|
11
|
+
ChangeDumpManifest,
|
12
|
+
ChangeList,
|
13
|
+
ResourceDump,
|
14
|
+
ResourceDumpManifest,
|
15
|
+
ResourceList,
|
16
|
+
SourceDescription
|
17
|
+
],
|
18
|
+
sitemapindex: [
|
19
|
+
ChangeListIndex,
|
20
|
+
ResourceListIndex
|
21
|
+
]
|
22
|
+
}
|
23
|
+
private_constant :ROOT_TYPES
|
24
|
+
|
25
|
+
CAPABILITY_ATTRIBUTE = "/*/[namespace-uri() = 'http://www.openarchives.org/rs/terms/' and local-name() = 'md']/@capability"
|
26
|
+
private_constant :CAPABILITY_ATTRIBUTE
|
27
|
+
|
28
|
+
# Parses the specified ResourceSync document and returns the appropriate object
|
29
|
+
# based on the +capability+ attribute of the root element's metadata (i.e. +<rs:md>+).
|
30
|
+
#
|
31
|
+
# @param xml [String, REXML::Document, REXML::Element] a ResourceSync XML document
|
32
|
+
# (or its root element)
|
33
|
+
def self.parse(xml)
|
34
|
+
root_element = XML.element(xml)
|
35
|
+
mapping = root_element.name == 'sitemapindex' ? :sitemapindex : :_default
|
36
|
+
root_type = find_root_type(ROOT_TYPES[mapping], root_element)
|
37
|
+
root_type.load_from_xml(root_element, mapping: mapping)
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.find_root_type(types, root_element)
|
41
|
+
capability = capability_for(root_element)
|
42
|
+
root_type = types.find { |t| t::CAPABILITY == capability }
|
43
|
+
fail ArgumentError, "no mapped type for capability '#{capability}'" unless root_type
|
44
|
+
root_type
|
45
|
+
end
|
46
|
+
|
47
|
+
private_class_method :find_root_type
|
48
|
+
|
49
|
+
def self.capability_for(root_element)
|
50
|
+
capability = capability_attribute_for(root_element).value
|
51
|
+
fail ArgumentError, "unable to identify capability of root element in #{root_element}" unless capability
|
52
|
+
capability
|
53
|
+
end
|
54
|
+
|
55
|
+
private_class_method :capability_for
|
56
|
+
|
57
|
+
def self.capability_attribute_for(root_element)
|
58
|
+
capability_attr = REXML::XPath.first(root_element, CAPABILITY_ATTRIBUTE)
|
59
|
+
fail ArgumentError, "unable to identify capability of root element in #{root_element}" unless capability_attr
|
60
|
+
capability_attr
|
61
|
+
end
|
62
|
+
|
63
|
+
private_class_method :capability_attribute_for
|
64
|
+
end
|
65
|
+
end
|
data/lib/resync.rb
ADDED
data/resync.gemspec
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
|
5
|
+
require 'resync/version'
|
6
|
+
require 'uri'
|
7
|
+
|
8
|
+
Gem::Specification.new do |spec|
|
9
|
+
spec.name = 'resync'
|
10
|
+
spec.version = Resync::VERSION
|
11
|
+
spec.authors = ['David Moles']
|
12
|
+
spec.email = ['david.moles@ucop.edu']
|
13
|
+
spec.summary = 'Utility library for ResourceSync'
|
14
|
+
spec.description = 'A Ruby gem for working with the ResourceSync web synchronization framework'
|
15
|
+
spec.license = 'MIT'
|
16
|
+
|
17
|
+
origin_uri = URI(`git config --get remote.origin.url`.chomp)
|
18
|
+
spec.homepage = URI::HTTP.build(host: origin_uri.host, path: origin_uri.path.chomp('.git')).to_s
|
19
|
+
|
20
|
+
spec.files = `git ls-files -z`.split("\x0")
|
21
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
22
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
23
|
+
spec.require_paths = ['lib']
|
24
|
+
|
25
|
+
spec.add_dependency 'mime-types', '~> 2.5'
|
26
|
+
spec.add_dependency 'ruby-enum', '~> 0.4'
|
27
|
+
spec.add_dependency 'xml-mapping', '~> 0.10'
|
28
|
+
|
29
|
+
spec.add_development_dependency 'equivalent-xml', '~> 0.6.0'
|
30
|
+
spec.add_development_dependency 'rake', '~> 10.4'
|
31
|
+
spec.add_development_dependency 'rspec', '~> 3.2'
|
32
|
+
spec.add_development_dependency 'rubocop', '~> 0.29.1'
|
33
|
+
spec.add_development_dependency 'simplecov', '~> 0.9.2'
|
34
|
+
spec.add_development_dependency 'simplecov-console', '~> 0.2.0'
|
35
|
+
spec.add_development_dependency 'yard', '~> 0.8'
|
36
|
+
end
|