resync 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +42 -0
- data/.rubocop.yml +23 -0
- data/.ruby-version +1 -0
- data/.travis.yml +2 -0
- data/Gemfile +3 -0
- data/LICENSE.md +22 -0
- data/README.md +92 -0
- data/Rakefile +56 -0
- data/example.rb +100 -0
- data/lib/resync/capability_list.rb +85 -0
- data/lib/resync/change_dump.rb +15 -0
- data/lib/resync/change_dump_manifest.rb +15 -0
- data/lib/resync/change_list.rb +15 -0
- data/lib/resync/change_list_index.rb +26 -0
- data/lib/resync/link.rb +87 -0
- data/lib/resync/metadata.rb +112 -0
- data/lib/resync/resource.rb +72 -0
- data/lib/resync/resource_dump.rb +15 -0
- data/lib/resync/resource_dump_manifest.rb +15 -0
- data/lib/resync/resource_list.rb +15 -0
- data/lib/resync/resource_list_index.rb +15 -0
- data/lib/resync/shared/augmented.rb +76 -0
- data/lib/resync/shared/base_resource_list.rb +117 -0
- data/lib/resync/shared/descriptor.rb +135 -0
- data/lib/resync/shared/sitemap_index.rb +32 -0
- data/lib/resync/shared/sorted_resource_list.rb +60 -0
- data/lib/resync/source_description.rb +14 -0
- data/lib/resync/types/change.rb +14 -0
- data/lib/resync/types/change_frequency.rb +18 -0
- data/lib/resync/types.rb +6 -0
- data/lib/resync/version.rb +4 -0
- data/lib/resync/xml.rb +216 -0
- data/lib/resync/xml_parser.rb +65 -0
- data/lib/resync.rb +4 -0
- data/resync.gemspec +36 -0
- data/spec/acceptance/xml_parser_spec.rb +1049 -0
- data/spec/data/examples/README.md +1 -0
- data/spec/data/examples/example-1.xml +12 -0
- data/spec/data/examples/example-12.xml +25 -0
- data/spec/data/examples/example-13.xml +25 -0
- data/spec/data/examples/example-14.xml +23 -0
- data/spec/data/examples/example-15.xml +21 -0
- data/spec/data/examples/example-16.xml +24 -0
- data/spec/data/examples/example-17.xml +39 -0
- data/spec/data/examples/example-18.xml +25 -0
- data/spec/data/examples/example-19.xml +28 -0
- data/spec/data/examples/example-2.xml +18 -0
- data/spec/data/examples/example-20.xml +22 -0
- data/spec/data/examples/example-21.xml +31 -0
- data/spec/data/examples/example-22.xml +41 -0
- data/spec/data/examples/example-23.xml +41 -0
- data/spec/data/examples/example-24.xml +28 -0
- data/spec/data/examples/example-25.xml +21 -0
- data/spec/data/examples/example-26.xml +18 -0
- data/spec/data/examples/example-27.xml +36 -0
- data/spec/data/examples/example-28.xml +34 -0
- data/spec/data/examples/example-29.xml +27 -0
- data/spec/data/examples/example-3.xml +17 -0
- data/spec/data/examples/example-30.xml +18 -0
- data/spec/data/examples/example-31.xml +16 -0
- data/spec/data/examples/example-32.xml +22 -0
- data/spec/data/examples/example-33.xml +22 -0
- data/spec/data/examples/example-4.xml +10 -0
- data/spec/data/examples/example-5.xml +18 -0
- data/spec/data/examples/example-6.xml +21 -0
- data/spec/data/examples/example-7.xml +13 -0
- data/spec/data/examples/example-8.xml +12 -0
- data/spec/data/resourcesync.xsd +148 -0
- data/spec/data/siteindex.xsd +75 -0
- data/spec/data/sitemap.xsd +116 -0
- data/spec/rspec_custom_matchers.rb +89 -0
- data/spec/spec_helper.rb +31 -0
- data/spec/todo.rb +11 -0
- data/spec/unit/resync/capability_list_spec.rb +138 -0
- data/spec/unit/resync/change_dump_manifest_spec.rb +75 -0
- data/spec/unit/resync/change_dump_spec.rb +61 -0
- data/spec/unit/resync/change_list_index_spec.rb +49 -0
- data/spec/unit/resync/change_list_spec.rb +75 -0
- data/spec/unit/resync/link_spec.rb +93 -0
- data/spec/unit/resync/metadata_spec.rb +169 -0
- data/spec/unit/resync/resource_dump_manifest_spec.rb +59 -0
- data/spec/unit/resync/resource_dump_spec.rb +62 -0
- data/spec/unit/resync/resource_list_index_spec.rb +53 -0
- data/spec/unit/resync/resource_list_spec.rb +60 -0
- data/spec/unit/resync/resource_spec.rb +176 -0
- data/spec/unit/resync/shared/augmented_examples.rb +58 -0
- data/spec/unit/resync/shared/base_resource_list_examples.rb +103 -0
- data/spec/unit/resync/shared/descriptor_examples.rb +122 -0
- data/spec/unit/resync/shared/descriptor_spec.rb +33 -0
- data/spec/unit/resync/shared/sorted_list_examples.rb +134 -0
- data/spec/unit/resync/shared/uri_field_examples.rb +36 -0
- data/spec/unit/resync/source_description_spec.rb +55 -0
- data/spec/unit/resync/xml/timenode_spec.rb +48 -0
- data/spec/unit/resync/xml/xml_spec.rb +40 -0
- data/spec/unit/resync/xml_parser_spec.rb +82 -0
- metadata +340 -0
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'ruby-enum'
|
2
|
+
|
3
|
+
module Resync
|
4
|
+
module Types
|
5
|
+
# The type of change indicated by a reference in a +ChangeList+, +ChangeDump+, etc.
|
6
|
+
class Change
|
7
|
+
include Ruby::Enum
|
8
|
+
|
9
|
+
define :CREATED, 'created'
|
10
|
+
define :UPDATED, 'updated'
|
11
|
+
define :DELETED, 'deleted'
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'ruby-enum'
|
2
|
+
|
3
|
+
module Resync
|
4
|
+
module Types
|
5
|
+
# The frequency of changes to a resource.
|
6
|
+
class ChangeFrequency
|
7
|
+
include Ruby::Enum
|
8
|
+
|
9
|
+
define :ALWAYS, 'always'
|
10
|
+
define :HOURLY, 'hourly'
|
11
|
+
define :DAILY, 'daily'
|
12
|
+
define :WEEKLY, 'weekly'
|
13
|
+
define :MONTHLY, 'monthly'
|
14
|
+
define :YEARLY, 'yearly'
|
15
|
+
define :NEVER, 'never'
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/resync/types.rb
ADDED
data/lib/resync/xml.rb
ADDED
@@ -0,0 +1,216 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'time'
|
3
|
+
require 'xml/mapping'
|
4
|
+
|
5
|
+
module Resync
|
6
|
+
# Helper methods and modules related to reading and writing XML.
|
7
|
+
module XML
|
8
|
+
|
9
|
+
# ------------------------------------------------------
|
10
|
+
# Module shared methods
|
11
|
+
|
12
|
+
# Ensures that the provided value is a +URI+, parsing it if necessary.
|
13
|
+
#
|
14
|
+
# @param url [URI, String] the URI.
|
15
|
+
# @raise [URI::InvalidURIError] if +url+ cannot be converted to a URI.
|
16
|
+
def self.to_uri(url)
|
17
|
+
return nil unless url
|
18
|
+
(url.is_a? URI) ? url : URI.parse(url)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Extracts a +REXML::Element+ from the specified object.
|
22
|
+
#
|
23
|
+
# @param xml [String, IO, REXML::Document, REXML::Element] A string or IO-like
|
24
|
+
# object containing an XML document (with or without XML declaration), or an
|
25
|
+
# XML document, or an XML element.
|
26
|
+
# @return [REXML::Element] the root element of the document, or the element
|
27
|
+
# itself if +xml+ is already an element.
|
28
|
+
def self.element(xml)
|
29
|
+
case xml
|
30
|
+
when REXML::Document
|
31
|
+
xml.root
|
32
|
+
when REXML::Element
|
33
|
+
xml
|
34
|
+
else
|
35
|
+
fail ArgumentError, "Unexpected argument type; expected XML document, String, or IO source, was #{xml.class}" unless can_parse(xml)
|
36
|
+
REXML::Document.new(xml).root
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# ------------------------------------------------------------
|
41
|
+
# Private class methods
|
42
|
+
|
43
|
+
# Whether the argument can be parsed as an +REXML::Document+
|
44
|
+
#
|
45
|
+
# @return [Boolean] true if +REXML::Document.new()+ should be able to parse
|
46
|
+
# the argument, false otherwise
|
47
|
+
def self.can_parse(arg)
|
48
|
+
arg.is_a?(String) ||
|
49
|
+
(arg.respond_to?(:read) &&
|
50
|
+
arg.respond_to?(:readline) &&
|
51
|
+
arg.respond_to?(:nil?) &&
|
52
|
+
arg.respond_to?(:eof?))
|
53
|
+
end
|
54
|
+
private_class_method :can_parse
|
55
|
+
|
56
|
+
# ------------------------------------------------------------
|
57
|
+
# Time
|
58
|
+
|
59
|
+
# Maps +Time+ objects.
|
60
|
+
class TimeNode < ::XML::Mapping::SingleAttributeNode
|
61
|
+
def initialize(*args)
|
62
|
+
path, *args = super(*args)
|
63
|
+
@path = ::XML::XXPath.new(path)
|
64
|
+
args
|
65
|
+
end
|
66
|
+
|
67
|
+
# Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
|
68
|
+
def extract_attr_value(xml)
|
69
|
+
value = default_when_xpath_err { @path.first(xml).text }
|
70
|
+
value ? Time.iso8601(value).utc : nil
|
71
|
+
end
|
72
|
+
|
73
|
+
# Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
|
74
|
+
def set_attr_value(xml, value)
|
75
|
+
@path.first(xml, ensure_created: true).text = value.iso8601
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
::XML::Mapping.add_node_class TimeNode
|
80
|
+
|
81
|
+
# ------------------------------------------------------------
|
82
|
+
# URI
|
83
|
+
|
84
|
+
# Maps +URI+ objects.
|
85
|
+
class UriNode < ::XML::Mapping::SingleAttributeNode
|
86
|
+
def initialize(*args)
|
87
|
+
path, *args = super(*args)
|
88
|
+
@path = ::XML::XXPath.new(path)
|
89
|
+
args
|
90
|
+
end
|
91
|
+
|
92
|
+
# Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
|
93
|
+
def extract_attr_value(xml)
|
94
|
+
URI(default_when_xpath_err { @path.first(xml).text })
|
95
|
+
end
|
96
|
+
|
97
|
+
# Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
|
98
|
+
def set_attr_value(xml, value)
|
99
|
+
@path.first(xml, ensure_created: true).text = value.to_s
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
::XML::Mapping.add_node_class UriNode
|
104
|
+
|
105
|
+
# ------------------------------------------------------------
|
106
|
+
# Resync::Types::Change
|
107
|
+
|
108
|
+
# Maps +Resync::Types::Change+ values.
|
109
|
+
class ChangeNode < ::XML::Mapping::SingleAttributeNode
|
110
|
+
def initialize(*args)
|
111
|
+
path, *args = super(*args)
|
112
|
+
@path = ::XML::XXPath.new(path)
|
113
|
+
args
|
114
|
+
end
|
115
|
+
|
116
|
+
# Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
|
117
|
+
def extract_attr_value(xml)
|
118
|
+
Resync::Types::Change.parse(default_when_xpath_err { @path.first(xml).text })
|
119
|
+
end
|
120
|
+
|
121
|
+
# Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
|
122
|
+
def set_attr_value(xml, value)
|
123
|
+
@path.first(xml, ensure_created: true).text = value.to_s
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
::XML::Mapping.add_node_class ChangeNode
|
128
|
+
|
129
|
+
# ------------------------------------------------------------
|
130
|
+
# Resync::Types::Changefreq
|
131
|
+
|
132
|
+
# Maps +Resync::Types::Changefreq+ values.
|
133
|
+
class ChangefreqNode < ::XML::Mapping::SingleAttributeNode
|
134
|
+
def initialize(*args)
|
135
|
+
path, *args = super(*args)
|
136
|
+
@path = ::XML::XXPath.new(path)
|
137
|
+
args
|
138
|
+
end
|
139
|
+
|
140
|
+
# Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
|
141
|
+
def extract_attr_value(xml)
|
142
|
+
Resync::Types::ChangeFrequency.parse(default_when_xpath_err { @path.first(xml).text })
|
143
|
+
end
|
144
|
+
|
145
|
+
# Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
|
146
|
+
def set_attr_value(xml, value)
|
147
|
+
@path.first(xml, ensure_created: true).text = value.to_s
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
::XML::Mapping.add_node_class ChangefreqNode
|
152
|
+
|
153
|
+
# ------------------------------------------------------------
|
154
|
+
# MIME::Type
|
155
|
+
|
156
|
+
# Maps +MIME::Type+ values.
|
157
|
+
class MimeTypeNode < ::XML::Mapping::SingleAttributeNode
|
158
|
+
def initialize(*args)
|
159
|
+
path, *args = super(*args)
|
160
|
+
@path = ::XML::XXPath.new(path)
|
161
|
+
args
|
162
|
+
end
|
163
|
+
|
164
|
+
# Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
|
165
|
+
def extract_attr_value(xml)
|
166
|
+
mime_type = default_when_xpath_err { @path.first(xml).text }
|
167
|
+
return nil unless mime_type
|
168
|
+
return mime_type if mime_type.is_a?(MIME::Type)
|
169
|
+
|
170
|
+
mt = MIME::Types[mime_type].first
|
171
|
+
return mt if mt
|
172
|
+
|
173
|
+
MIME::Type.new(mime_type)
|
174
|
+
end
|
175
|
+
|
176
|
+
# Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
|
177
|
+
def set_attr_value(xml, value)
|
178
|
+
@path.first(xml, ensure_created: true).text = value.to_s
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
::XML::Mapping.add_node_class MimeTypeNode
|
183
|
+
|
184
|
+
# ------------------------------------------------------------
|
185
|
+
# Whitespace-separated hashcode list
|
186
|
+
|
187
|
+
# Maps the whitespace-separated list of hash codes in a +<rs:ln>+
|
188
|
+
# or +<rs:md>+ tag to a hash of digest values keyed by hash algorithm.
|
189
|
+
# (See {Resync::Descriptor#hashes}.)
|
190
|
+
class HashCodesNode < ::XML::Mapping::SingleAttributeNode
|
191
|
+
def initialize(*args)
|
192
|
+
path, *args = super(*args)
|
193
|
+
@path = ::XML::XXPath.new(path)
|
194
|
+
args
|
195
|
+
end
|
196
|
+
|
197
|
+
# Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
|
198
|
+
def extract_attr_value(xml)
|
199
|
+
hashes = default_when_xpath_err { @path.first(xml).text }
|
200
|
+
return {} unless hashes
|
201
|
+
return hashes if hashes.is_a?(Hash)
|
202
|
+
hashes.split(/[[:space:]]+/).map { |hash| hash.split(':') }.to_h
|
203
|
+
end
|
204
|
+
|
205
|
+
# Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
|
206
|
+
def set_attr_value(xml, value)
|
207
|
+
return if value.empty?
|
208
|
+
hash_str = value.map { |k, v| "#{k}:#{v}" }.join(' ')
|
209
|
+
@path.first(xml, ensure_created: true).text = hash_str
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
::XML::Mapping.add_node_class HashCodesNode
|
214
|
+
|
215
|
+
end
|
216
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
module Resync
|
2
|
+
|
3
|
+
# Parses ResourceSync XML documents and returns appropriate objects.
|
4
|
+
module XMLParser
|
5
|
+
|
6
|
+
# The list of parseable types, organized by XML mapping.
|
7
|
+
ROOT_TYPES = {
|
8
|
+
_default: [
|
9
|
+
CapabilityList,
|
10
|
+
ChangeDump,
|
11
|
+
ChangeDumpManifest,
|
12
|
+
ChangeList,
|
13
|
+
ResourceDump,
|
14
|
+
ResourceDumpManifest,
|
15
|
+
ResourceList,
|
16
|
+
SourceDescription
|
17
|
+
],
|
18
|
+
sitemapindex: [
|
19
|
+
ChangeListIndex,
|
20
|
+
ResourceListIndex
|
21
|
+
]
|
22
|
+
}
|
23
|
+
private_constant :ROOT_TYPES
|
24
|
+
|
25
|
+
CAPABILITY_ATTRIBUTE = "/*/[namespace-uri() = 'http://www.openarchives.org/rs/terms/' and local-name() = 'md']/@capability"
|
26
|
+
private_constant :CAPABILITY_ATTRIBUTE
|
27
|
+
|
28
|
+
# Parses the specified ResourceSync document and returns the appropriate object
|
29
|
+
# based on the +capability+ attribute of the root element's metadata (i.e. +<rs:md>+).
|
30
|
+
#
|
31
|
+
# @param xml [String, REXML::Document, REXML::Element] a ResourceSync XML document
|
32
|
+
# (or its root element)
|
33
|
+
def self.parse(xml)
|
34
|
+
root_element = XML.element(xml)
|
35
|
+
mapping = root_element.name == 'sitemapindex' ? :sitemapindex : :_default
|
36
|
+
root_type = find_root_type(ROOT_TYPES[mapping], root_element)
|
37
|
+
root_type.load_from_xml(root_element, mapping: mapping)
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.find_root_type(types, root_element)
|
41
|
+
capability = capability_for(root_element)
|
42
|
+
root_type = types.find { |t| t::CAPABILITY == capability }
|
43
|
+
fail ArgumentError, "no mapped type for capability '#{capability}'" unless root_type
|
44
|
+
root_type
|
45
|
+
end
|
46
|
+
|
47
|
+
private_class_method :find_root_type
|
48
|
+
|
49
|
+
def self.capability_for(root_element)
|
50
|
+
capability = capability_attribute_for(root_element).value
|
51
|
+
fail ArgumentError, "unable to identify capability of root element in #{root_element}" unless capability
|
52
|
+
capability
|
53
|
+
end
|
54
|
+
|
55
|
+
private_class_method :capability_for
|
56
|
+
|
57
|
+
def self.capability_attribute_for(root_element)
|
58
|
+
capability_attr = REXML::XPath.first(root_element, CAPABILITY_ATTRIBUTE)
|
59
|
+
fail ArgumentError, "unable to identify capability of root element in #{root_element}" unless capability_attr
|
60
|
+
capability_attr
|
61
|
+
end
|
62
|
+
|
63
|
+
private_class_method :capability_attribute_for
|
64
|
+
end
|
65
|
+
end
|
data/lib/resync.rb
ADDED
data/resync.gemspec
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
|
5
|
+
require 'resync/version'
|
6
|
+
require 'uri'
|
7
|
+
|
8
|
+
Gem::Specification.new do |spec|
|
9
|
+
spec.name = 'resync'
|
10
|
+
spec.version = Resync::VERSION
|
11
|
+
spec.authors = ['David Moles']
|
12
|
+
spec.email = ['david.moles@ucop.edu']
|
13
|
+
spec.summary = 'Utility library for ResourceSync'
|
14
|
+
spec.description = 'A Ruby gem for working with the ResourceSync web synchronization framework'
|
15
|
+
spec.license = 'MIT'
|
16
|
+
|
17
|
+
origin_uri = URI(`git config --get remote.origin.url`.chomp)
|
18
|
+
spec.homepage = URI::HTTP.build(host: origin_uri.host, path: origin_uri.path.chomp('.git')).to_s
|
19
|
+
|
20
|
+
spec.files = `git ls-files -z`.split("\x0")
|
21
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
22
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
23
|
+
spec.require_paths = ['lib']
|
24
|
+
|
25
|
+
spec.add_dependency 'mime-types', '~> 2.5'
|
26
|
+
spec.add_dependency 'ruby-enum', '~> 0.4'
|
27
|
+
spec.add_dependency 'xml-mapping', '~> 0.10'
|
28
|
+
|
29
|
+
spec.add_development_dependency 'equivalent-xml', '~> 0.6.0'
|
30
|
+
spec.add_development_dependency 'rake', '~> 10.4'
|
31
|
+
spec.add_development_dependency 'rspec', '~> 3.2'
|
32
|
+
spec.add_development_dependency 'rubocop', '~> 0.29.1'
|
33
|
+
spec.add_development_dependency 'simplecov', '~> 0.9.2'
|
34
|
+
spec.add_development_dependency 'simplecov-console', '~> 0.2.0'
|
35
|
+
spec.add_development_dependency 'yard', '~> 0.8'
|
36
|
+
end
|