resync 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +42 -0
  3. data/.rubocop.yml +23 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +2 -0
  6. data/Gemfile +3 -0
  7. data/LICENSE.md +22 -0
  8. data/README.md +92 -0
  9. data/Rakefile +56 -0
  10. data/example.rb +100 -0
  11. data/lib/resync/capability_list.rb +85 -0
  12. data/lib/resync/change_dump.rb +15 -0
  13. data/lib/resync/change_dump_manifest.rb +15 -0
  14. data/lib/resync/change_list.rb +15 -0
  15. data/lib/resync/change_list_index.rb +26 -0
  16. data/lib/resync/link.rb +87 -0
  17. data/lib/resync/metadata.rb +112 -0
  18. data/lib/resync/resource.rb +72 -0
  19. data/lib/resync/resource_dump.rb +15 -0
  20. data/lib/resync/resource_dump_manifest.rb +15 -0
  21. data/lib/resync/resource_list.rb +15 -0
  22. data/lib/resync/resource_list_index.rb +15 -0
  23. data/lib/resync/shared/augmented.rb +76 -0
  24. data/lib/resync/shared/base_resource_list.rb +117 -0
  25. data/lib/resync/shared/descriptor.rb +135 -0
  26. data/lib/resync/shared/sitemap_index.rb +32 -0
  27. data/lib/resync/shared/sorted_resource_list.rb +60 -0
  28. data/lib/resync/source_description.rb +14 -0
  29. data/lib/resync/types/change.rb +14 -0
  30. data/lib/resync/types/change_frequency.rb +18 -0
  31. data/lib/resync/types.rb +6 -0
  32. data/lib/resync/version.rb +4 -0
  33. data/lib/resync/xml.rb +216 -0
  34. data/lib/resync/xml_parser.rb +65 -0
  35. data/lib/resync.rb +4 -0
  36. data/resync.gemspec +36 -0
  37. data/spec/acceptance/xml_parser_spec.rb +1049 -0
  38. data/spec/data/examples/README.md +1 -0
  39. data/spec/data/examples/example-1.xml +12 -0
  40. data/spec/data/examples/example-12.xml +25 -0
  41. data/spec/data/examples/example-13.xml +25 -0
  42. data/spec/data/examples/example-14.xml +23 -0
  43. data/spec/data/examples/example-15.xml +21 -0
  44. data/spec/data/examples/example-16.xml +24 -0
  45. data/spec/data/examples/example-17.xml +39 -0
  46. data/spec/data/examples/example-18.xml +25 -0
  47. data/spec/data/examples/example-19.xml +28 -0
  48. data/spec/data/examples/example-2.xml +18 -0
  49. data/spec/data/examples/example-20.xml +22 -0
  50. data/spec/data/examples/example-21.xml +31 -0
  51. data/spec/data/examples/example-22.xml +41 -0
  52. data/spec/data/examples/example-23.xml +41 -0
  53. data/spec/data/examples/example-24.xml +28 -0
  54. data/spec/data/examples/example-25.xml +21 -0
  55. data/spec/data/examples/example-26.xml +18 -0
  56. data/spec/data/examples/example-27.xml +36 -0
  57. data/spec/data/examples/example-28.xml +34 -0
  58. data/spec/data/examples/example-29.xml +27 -0
  59. data/spec/data/examples/example-3.xml +17 -0
  60. data/spec/data/examples/example-30.xml +18 -0
  61. data/spec/data/examples/example-31.xml +16 -0
  62. data/spec/data/examples/example-32.xml +22 -0
  63. data/spec/data/examples/example-33.xml +22 -0
  64. data/spec/data/examples/example-4.xml +10 -0
  65. data/spec/data/examples/example-5.xml +18 -0
  66. data/spec/data/examples/example-6.xml +21 -0
  67. data/spec/data/examples/example-7.xml +13 -0
  68. data/spec/data/examples/example-8.xml +12 -0
  69. data/spec/data/resourcesync.xsd +148 -0
  70. data/spec/data/siteindex.xsd +75 -0
  71. data/spec/data/sitemap.xsd +116 -0
  72. data/spec/rspec_custom_matchers.rb +89 -0
  73. data/spec/spec_helper.rb +31 -0
  74. data/spec/todo.rb +11 -0
  75. data/spec/unit/resync/capability_list_spec.rb +138 -0
  76. data/spec/unit/resync/change_dump_manifest_spec.rb +75 -0
  77. data/spec/unit/resync/change_dump_spec.rb +61 -0
  78. data/spec/unit/resync/change_list_index_spec.rb +49 -0
  79. data/spec/unit/resync/change_list_spec.rb +75 -0
  80. data/spec/unit/resync/link_spec.rb +93 -0
  81. data/spec/unit/resync/metadata_spec.rb +169 -0
  82. data/spec/unit/resync/resource_dump_manifest_spec.rb +59 -0
  83. data/spec/unit/resync/resource_dump_spec.rb +62 -0
  84. data/spec/unit/resync/resource_list_index_spec.rb +53 -0
  85. data/spec/unit/resync/resource_list_spec.rb +60 -0
  86. data/spec/unit/resync/resource_spec.rb +176 -0
  87. data/spec/unit/resync/shared/augmented_examples.rb +58 -0
  88. data/spec/unit/resync/shared/base_resource_list_examples.rb +103 -0
  89. data/spec/unit/resync/shared/descriptor_examples.rb +122 -0
  90. data/spec/unit/resync/shared/descriptor_spec.rb +33 -0
  91. data/spec/unit/resync/shared/sorted_list_examples.rb +134 -0
  92. data/spec/unit/resync/shared/uri_field_examples.rb +36 -0
  93. data/spec/unit/resync/source_description_spec.rb +55 -0
  94. data/spec/unit/resync/xml/timenode_spec.rb +48 -0
  95. data/spec/unit/resync/xml/xml_spec.rb +40 -0
  96. data/spec/unit/resync/xml_parser_spec.rb +82 -0
  97. metadata +340 -0
@@ -0,0 +1,14 @@
1
+ require 'ruby-enum'
2
+
3
+ module Resync
4
+ module Types
5
+ # The type of change indicated by a reference in a +ChangeList+, +ChangeDump+, etc.
6
+ class Change
7
+ include Ruby::Enum
8
+
9
+ define :CREATED, 'created'
10
+ define :UPDATED, 'updated'
11
+ define :DELETED, 'deleted'
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,18 @@
1
+ require 'ruby-enum'
2
+
3
+ module Resync
4
+ module Types
5
+ # The frequency of changes to a resource.
6
+ class ChangeFrequency
7
+ include Ruby::Enum
8
+
9
+ define :ALWAYS, 'always'
10
+ define :HOURLY, 'hourly'
11
+ define :DAILY, 'daily'
12
+ define :WEEKLY, 'weekly'
13
+ define :MONTHLY, 'monthly'
14
+ define :YEARLY, 'yearly'
15
+ define :NEVER, 'never'
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,6 @@
1
+ module Resync
2
+ # Enumerated types for controlled vocabularies.
3
+ module Types
4
+ Dir.glob(File.expand_path('../types/*.rb', __FILE__), &method(:require))
5
+ end
6
+ end
@@ -0,0 +1,4 @@
1
+ module Resync
2
+ # The version of this gem.
3
+ VERSION = '0.1.0'
4
+ end
data/lib/resync/xml.rb ADDED
@@ -0,0 +1,216 @@
1
+ require 'uri'
2
+ require 'time'
3
+ require 'xml/mapping'
4
+
5
+ module Resync
6
+ # Helper methods and modules related to reading and writing XML.
7
+ module XML
8
+
9
+ # ------------------------------------------------------
10
+ # Module shared methods
11
+
12
+ # Ensures that the provided value is a +URI+, parsing it if necessary.
13
+ #
14
+ # @param url [URI, String] the URI.
15
+ # @raise [URI::InvalidURIError] if +url+ cannot be converted to a URI.
16
+ def self.to_uri(url)
17
+ return nil unless url
18
+ (url.is_a? URI) ? url : URI.parse(url)
19
+ end
20
+
21
+ # Extracts a +REXML::Element+ from the specified object.
22
+ #
23
+ # @param xml [String, IO, REXML::Document, REXML::Element] A string or IO-like
24
+ # object containing an XML document (with or without XML declaration), or an
25
+ # XML document, or an XML element.
26
+ # @return [REXML::Element] the root element of the document, or the element
27
+ # itself if +xml+ is already an element.
28
+ def self.element(xml)
29
+ case xml
30
+ when REXML::Document
31
+ xml.root
32
+ when REXML::Element
33
+ xml
34
+ else
35
+ fail ArgumentError, "Unexpected argument type; expected XML document, String, or IO source, was #{xml.class}" unless can_parse(xml)
36
+ REXML::Document.new(xml).root
37
+ end
38
+ end
39
+
40
+ # ------------------------------------------------------------
41
+ # Private class methods
42
+
43
+ # Whether the argument can be parsed as an +REXML::Document+
44
+ #
45
+ # @return [Boolean] true if +REXML::Document.new()+ should be able to parse
46
+ # the argument, false otherwise
47
+ def self.can_parse(arg)
48
+ arg.is_a?(String) ||
49
+ (arg.respond_to?(:read) &&
50
+ arg.respond_to?(:readline) &&
51
+ arg.respond_to?(:nil?) &&
52
+ arg.respond_to?(:eof?))
53
+ end
54
+ private_class_method :can_parse
55
+
56
+ # ------------------------------------------------------------
57
+ # Time
58
+
59
+ # Maps +Time+ objects.
60
+ class TimeNode < ::XML::Mapping::SingleAttributeNode
61
+ def initialize(*args)
62
+ path, *args = super(*args)
63
+ @path = ::XML::XXPath.new(path)
64
+ args
65
+ end
66
+
67
+ # Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
68
+ def extract_attr_value(xml)
69
+ value = default_when_xpath_err { @path.first(xml).text }
70
+ value ? Time.iso8601(value).utc : nil
71
+ end
72
+
73
+ # Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
74
+ def set_attr_value(xml, value)
75
+ @path.first(xml, ensure_created: true).text = value.iso8601
76
+ end
77
+ end
78
+
79
+ ::XML::Mapping.add_node_class TimeNode
80
+
81
+ # ------------------------------------------------------------
82
+ # URI
83
+
84
+ # Maps +URI+ objects.
85
+ class UriNode < ::XML::Mapping::SingleAttributeNode
86
+ def initialize(*args)
87
+ path, *args = super(*args)
88
+ @path = ::XML::XXPath.new(path)
89
+ args
90
+ end
91
+
92
+ # Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
93
+ def extract_attr_value(xml)
94
+ URI(default_when_xpath_err { @path.first(xml).text })
95
+ end
96
+
97
+ # Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
98
+ def set_attr_value(xml, value)
99
+ @path.first(xml, ensure_created: true).text = value.to_s
100
+ end
101
+ end
102
+
103
+ ::XML::Mapping.add_node_class UriNode
104
+
105
+ # ------------------------------------------------------------
106
+ # Resync::Types::Change
107
+
108
+ # Maps +Resync::Types::Change+ values.
109
+ class ChangeNode < ::XML::Mapping::SingleAttributeNode
110
+ def initialize(*args)
111
+ path, *args = super(*args)
112
+ @path = ::XML::XXPath.new(path)
113
+ args
114
+ end
115
+
116
+ # Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
117
+ def extract_attr_value(xml)
118
+ Resync::Types::Change.parse(default_when_xpath_err { @path.first(xml).text })
119
+ end
120
+
121
+ # Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
122
+ def set_attr_value(xml, value)
123
+ @path.first(xml, ensure_created: true).text = value.to_s
124
+ end
125
+ end
126
+
127
+ ::XML::Mapping.add_node_class ChangeNode
128
+
129
+ # ------------------------------------------------------------
130
+ # Resync::Types::Changefreq
131
+
132
+ # Maps +Resync::Types::Changefreq+ values.
133
+ class ChangefreqNode < ::XML::Mapping::SingleAttributeNode
134
+ def initialize(*args)
135
+ path, *args = super(*args)
136
+ @path = ::XML::XXPath.new(path)
137
+ args
138
+ end
139
+
140
+ # Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
141
+ def extract_attr_value(xml)
142
+ Resync::Types::ChangeFrequency.parse(default_when_xpath_err { @path.first(xml).text })
143
+ end
144
+
145
+ # Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
146
+ def set_attr_value(xml, value)
147
+ @path.first(xml, ensure_created: true).text = value.to_s
148
+ end
149
+ end
150
+
151
+ ::XML::Mapping.add_node_class ChangefreqNode
152
+
153
+ # ------------------------------------------------------------
154
+ # MIME::Type
155
+
156
+ # Maps +MIME::Type+ values.
157
+ class MimeTypeNode < ::XML::Mapping::SingleAttributeNode
158
+ def initialize(*args)
159
+ path, *args = super(*args)
160
+ @path = ::XML::XXPath.new(path)
161
+ args
162
+ end
163
+
164
+ # Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
165
+ def extract_attr_value(xml)
166
+ mime_type = default_when_xpath_err { @path.first(xml).text }
167
+ return nil unless mime_type
168
+ return mime_type if mime_type.is_a?(MIME::Type)
169
+
170
+ mt = MIME::Types[mime_type].first
171
+ return mt if mt
172
+
173
+ MIME::Type.new(mime_type)
174
+ end
175
+
176
+ # Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
177
+ def set_attr_value(xml, value)
178
+ @path.first(xml, ensure_created: true).text = value.to_s
179
+ end
180
+ end
181
+
182
+ ::XML::Mapping.add_node_class MimeTypeNode
183
+
184
+ # ------------------------------------------------------------
185
+ # Whitespace-separated hashcode list
186
+
187
+ # Maps the whitespace-separated list of hash codes in a +<rs:ln>+
188
+ # or +<rs:md>+ tag to a hash of digest values keyed by hash algorithm.
189
+ # (See {Resync::Descriptor#hashes}.)
190
+ class HashCodesNode < ::XML::Mapping::SingleAttributeNode
191
+ def initialize(*args)
192
+ path, *args = super(*args)
193
+ @path = ::XML::XXPath.new(path)
194
+ args
195
+ end
196
+
197
+ # Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
198
+ def extract_attr_value(xml)
199
+ hashes = default_when_xpath_err { @path.first(xml).text }
200
+ return {} unless hashes
201
+ return hashes if hashes.is_a?(Hash)
202
+ hashes.split(/[[:space:]]+/).map { |hash| hash.split(':') }.to_h
203
+ end
204
+
205
+ # Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
206
+ def set_attr_value(xml, value)
207
+ return if value.empty?
208
+ hash_str = value.map { |k, v| "#{k}:#{v}" }.join(' ')
209
+ @path.first(xml, ensure_created: true).text = hash_str
210
+ end
211
+ end
212
+
213
+ ::XML::Mapping.add_node_class HashCodesNode
214
+
215
+ end
216
+ end
@@ -0,0 +1,65 @@
1
+ module Resync
2
+
3
+ # Parses ResourceSync XML documents and returns appropriate objects.
4
+ module XMLParser
5
+
6
+ # The list of parseable types, organized by XML mapping.
7
+ ROOT_TYPES = {
8
+ _default: [
9
+ CapabilityList,
10
+ ChangeDump,
11
+ ChangeDumpManifest,
12
+ ChangeList,
13
+ ResourceDump,
14
+ ResourceDumpManifest,
15
+ ResourceList,
16
+ SourceDescription
17
+ ],
18
+ sitemapindex: [
19
+ ChangeListIndex,
20
+ ResourceListIndex
21
+ ]
22
+ }
23
+ private_constant :ROOT_TYPES
24
+
25
+ CAPABILITY_ATTRIBUTE = "/*/[namespace-uri() = 'http://www.openarchives.org/rs/terms/' and local-name() = 'md']/@capability"
26
+ private_constant :CAPABILITY_ATTRIBUTE
27
+
28
+ # Parses the specified ResourceSync document and returns the appropriate object
29
+ # based on the +capability+ attribute of the root element's metadata (i.e. +<rs:md>+).
30
+ #
31
+ # @param xml [String, REXML::Document, REXML::Element] a ResourceSync XML document
32
+ # (or its root element)
33
+ def self.parse(xml)
34
+ root_element = XML.element(xml)
35
+ mapping = root_element.name == 'sitemapindex' ? :sitemapindex : :_default
36
+ root_type = find_root_type(ROOT_TYPES[mapping], root_element)
37
+ root_type.load_from_xml(root_element, mapping: mapping)
38
+ end
39
+
40
+ def self.find_root_type(types, root_element)
41
+ capability = capability_for(root_element)
42
+ root_type = types.find { |t| t::CAPABILITY == capability }
43
+ fail ArgumentError, "no mapped type for capability '#{capability}'" unless root_type
44
+ root_type
45
+ end
46
+
47
+ private_class_method :find_root_type
48
+
49
+ def self.capability_for(root_element)
50
+ capability = capability_attribute_for(root_element).value
51
+ fail ArgumentError, "unable to identify capability of root element in #{root_element}" unless capability
52
+ capability
53
+ end
54
+
55
+ private_class_method :capability_for
56
+
57
+ def self.capability_attribute_for(root_element)
58
+ capability_attr = REXML::XPath.first(root_element, CAPABILITY_ATTRIBUTE)
59
+ fail ArgumentError, "unable to identify capability of root element in #{root_element}" unless capability_attr
60
+ capability_attr
61
+ end
62
+
63
+ private_class_method :capability_attribute_for
64
+ end
65
+ end
data/lib/resync.rb ADDED
@@ -0,0 +1,4 @@
1
+ # A Ruby gem for working with the {http://www.openarchives.org/rs/1.0/resourcesync ResourceSync} web synchronization framework.
2
+ module Resync
3
+ Dir.glob(File.expand_path('../resync/*.rb', __FILE__), &method(:require))
4
+ end
data/resync.gemspec ADDED
@@ -0,0 +1,36 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ require 'resync/version'
6
+ require 'uri'
7
+
8
+ Gem::Specification.new do |spec|
9
+ spec.name = 'resync'
10
+ spec.version = Resync::VERSION
11
+ spec.authors = ['David Moles']
12
+ spec.email = ['david.moles@ucop.edu']
13
+ spec.summary = 'Utility library for ResourceSync'
14
+ spec.description = 'A Ruby gem for working with the ResourceSync web synchronization framework'
15
+ spec.license = 'MIT'
16
+
17
+ origin_uri = URI(`git config --get remote.origin.url`.chomp)
18
+ spec.homepage = URI::HTTP.build(host: origin_uri.host, path: origin_uri.path.chomp('.git')).to_s
19
+
20
+ spec.files = `git ls-files -z`.split("\x0")
21
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
22
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
23
+ spec.require_paths = ['lib']
24
+
25
+ spec.add_dependency 'mime-types', '~> 2.5'
26
+ spec.add_dependency 'ruby-enum', '~> 0.4'
27
+ spec.add_dependency 'xml-mapping', '~> 0.10'
28
+
29
+ spec.add_development_dependency 'equivalent-xml', '~> 0.6.0'
30
+ spec.add_development_dependency 'rake', '~> 10.4'
31
+ spec.add_development_dependency 'rspec', '~> 3.2'
32
+ spec.add_development_dependency 'rubocop', '~> 0.29.1'
33
+ spec.add_development_dependency 'simplecov', '~> 0.9.2'
34
+ spec.add_development_dependency 'simplecov-console', '~> 0.2.0'
35
+ spec.add_development_dependency 'yard', '~> 0.8'
36
+ end