resync 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (97) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +42 -0
  3. data/.rubocop.yml +23 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +2 -0
  6. data/Gemfile +3 -0
  7. data/LICENSE.md +22 -0
  8. data/README.md +92 -0
  9. data/Rakefile +56 -0
  10. data/example.rb +100 -0
  11. data/lib/resync/capability_list.rb +85 -0
  12. data/lib/resync/change_dump.rb +15 -0
  13. data/lib/resync/change_dump_manifest.rb +15 -0
  14. data/lib/resync/change_list.rb +15 -0
  15. data/lib/resync/change_list_index.rb +26 -0
  16. data/lib/resync/link.rb +87 -0
  17. data/lib/resync/metadata.rb +112 -0
  18. data/lib/resync/resource.rb +72 -0
  19. data/lib/resync/resource_dump.rb +15 -0
  20. data/lib/resync/resource_dump_manifest.rb +15 -0
  21. data/lib/resync/resource_list.rb +15 -0
  22. data/lib/resync/resource_list_index.rb +15 -0
  23. data/lib/resync/shared/augmented.rb +76 -0
  24. data/lib/resync/shared/base_resource_list.rb +117 -0
  25. data/lib/resync/shared/descriptor.rb +135 -0
  26. data/lib/resync/shared/sitemap_index.rb +32 -0
  27. data/lib/resync/shared/sorted_resource_list.rb +60 -0
  28. data/lib/resync/source_description.rb +14 -0
  29. data/lib/resync/types/change.rb +14 -0
  30. data/lib/resync/types/change_frequency.rb +18 -0
  31. data/lib/resync/types.rb +6 -0
  32. data/lib/resync/version.rb +4 -0
  33. data/lib/resync/xml.rb +216 -0
  34. data/lib/resync/xml_parser.rb +65 -0
  35. data/lib/resync.rb +4 -0
  36. data/resync.gemspec +36 -0
  37. data/spec/acceptance/xml_parser_spec.rb +1049 -0
  38. data/spec/data/examples/README.md +1 -0
  39. data/spec/data/examples/example-1.xml +12 -0
  40. data/spec/data/examples/example-12.xml +25 -0
  41. data/spec/data/examples/example-13.xml +25 -0
  42. data/spec/data/examples/example-14.xml +23 -0
  43. data/spec/data/examples/example-15.xml +21 -0
  44. data/spec/data/examples/example-16.xml +24 -0
  45. data/spec/data/examples/example-17.xml +39 -0
  46. data/spec/data/examples/example-18.xml +25 -0
  47. data/spec/data/examples/example-19.xml +28 -0
  48. data/spec/data/examples/example-2.xml +18 -0
  49. data/spec/data/examples/example-20.xml +22 -0
  50. data/spec/data/examples/example-21.xml +31 -0
  51. data/spec/data/examples/example-22.xml +41 -0
  52. data/spec/data/examples/example-23.xml +41 -0
  53. data/spec/data/examples/example-24.xml +28 -0
  54. data/spec/data/examples/example-25.xml +21 -0
  55. data/spec/data/examples/example-26.xml +18 -0
  56. data/spec/data/examples/example-27.xml +36 -0
  57. data/spec/data/examples/example-28.xml +34 -0
  58. data/spec/data/examples/example-29.xml +27 -0
  59. data/spec/data/examples/example-3.xml +17 -0
  60. data/spec/data/examples/example-30.xml +18 -0
  61. data/spec/data/examples/example-31.xml +16 -0
  62. data/spec/data/examples/example-32.xml +22 -0
  63. data/spec/data/examples/example-33.xml +22 -0
  64. data/spec/data/examples/example-4.xml +10 -0
  65. data/spec/data/examples/example-5.xml +18 -0
  66. data/spec/data/examples/example-6.xml +21 -0
  67. data/spec/data/examples/example-7.xml +13 -0
  68. data/spec/data/examples/example-8.xml +12 -0
  69. data/spec/data/resourcesync.xsd +148 -0
  70. data/spec/data/siteindex.xsd +75 -0
  71. data/spec/data/sitemap.xsd +116 -0
  72. data/spec/rspec_custom_matchers.rb +89 -0
  73. data/spec/spec_helper.rb +31 -0
  74. data/spec/todo.rb +11 -0
  75. data/spec/unit/resync/capability_list_spec.rb +138 -0
  76. data/spec/unit/resync/change_dump_manifest_spec.rb +75 -0
  77. data/spec/unit/resync/change_dump_spec.rb +61 -0
  78. data/spec/unit/resync/change_list_index_spec.rb +49 -0
  79. data/spec/unit/resync/change_list_spec.rb +75 -0
  80. data/spec/unit/resync/link_spec.rb +93 -0
  81. data/spec/unit/resync/metadata_spec.rb +169 -0
  82. data/spec/unit/resync/resource_dump_manifest_spec.rb +59 -0
  83. data/spec/unit/resync/resource_dump_spec.rb +62 -0
  84. data/spec/unit/resync/resource_list_index_spec.rb +53 -0
  85. data/spec/unit/resync/resource_list_spec.rb +60 -0
  86. data/spec/unit/resync/resource_spec.rb +176 -0
  87. data/spec/unit/resync/shared/augmented_examples.rb +58 -0
  88. data/spec/unit/resync/shared/base_resource_list_examples.rb +103 -0
  89. data/spec/unit/resync/shared/descriptor_examples.rb +122 -0
  90. data/spec/unit/resync/shared/descriptor_spec.rb +33 -0
  91. data/spec/unit/resync/shared/sorted_list_examples.rb +134 -0
  92. data/spec/unit/resync/shared/uri_field_examples.rb +36 -0
  93. data/spec/unit/resync/source_description_spec.rb +55 -0
  94. data/spec/unit/resync/xml/timenode_spec.rb +48 -0
  95. data/spec/unit/resync/xml/xml_spec.rb +40 -0
  96. data/spec/unit/resync/xml_parser_spec.rb +82 -0
  97. metadata +340 -0
@@ -0,0 +1,14 @@
1
+ require 'ruby-enum'
2
+
3
+ module Resync
4
+ module Types
5
+ # The type of change indicated by a reference in a +ChangeList+, +ChangeDump+, etc.
6
+ class Change
7
+ include Ruby::Enum
8
+
9
+ define :CREATED, 'created'
10
+ define :UPDATED, 'updated'
11
+ define :DELETED, 'deleted'
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,18 @@
1
+ require 'ruby-enum'
2
+
3
+ module Resync
4
+ module Types
5
+ # The frequency of changes to a resource.
6
+ class ChangeFrequency
7
+ include Ruby::Enum
8
+
9
+ define :ALWAYS, 'always'
10
+ define :HOURLY, 'hourly'
11
+ define :DAILY, 'daily'
12
+ define :WEEKLY, 'weekly'
13
+ define :MONTHLY, 'monthly'
14
+ define :YEARLY, 'yearly'
15
+ define :NEVER, 'never'
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,6 @@
1
+ module Resync
2
+ # Enumerated types for controlled vocabularies.
3
+ module Types
4
+ Dir.glob(File.expand_path('../types/*.rb', __FILE__), &method(:require))
5
+ end
6
+ end
@@ -0,0 +1,4 @@
1
+ module Resync
2
+ # The version of this gem.
3
+ VERSION = '0.1.0'
4
+ end
data/lib/resync/xml.rb ADDED
@@ -0,0 +1,216 @@
1
+ require 'uri'
2
+ require 'time'
3
+ require 'xml/mapping'
4
+
5
+ module Resync
6
+ # Helper methods and modules related to reading and writing XML.
7
+ module XML
8
+
9
+ # ------------------------------------------------------
10
+ # Module shared methods
11
+
12
+ # Ensures that the provided value is a +URI+, parsing it if necessary.
13
+ #
14
+ # @param url [URI, String] the URI.
15
+ # @raise [URI::InvalidURIError] if +url+ cannot be converted to a URI.
16
+ def self.to_uri(url)
17
+ return nil unless url
18
+ (url.is_a? URI) ? url : URI.parse(url)
19
+ end
20
+
21
+ # Extracts a +REXML::Element+ from the specified object.
22
+ #
23
+ # @param xml [String, IO, REXML::Document, REXML::Element] A string or IO-like
24
+ # object containing an XML document (with or without XML declaration), or an
25
+ # XML document, or an XML element.
26
+ # @return [REXML::Element] the root element of the document, or the element
27
+ # itself if +xml+ is already an element.
28
+ def self.element(xml)
29
+ case xml
30
+ when REXML::Document
31
+ xml.root
32
+ when REXML::Element
33
+ xml
34
+ else
35
+ fail ArgumentError, "Unexpected argument type; expected XML document, String, or IO source, was #{xml.class}" unless can_parse(xml)
36
+ REXML::Document.new(xml).root
37
+ end
38
+ end
39
+
40
+ # ------------------------------------------------------------
41
+ # Private class methods
42
+
43
+ # Whether the argument can be parsed as an +REXML::Document+
44
+ #
45
+ # @return [Boolean] true if +REXML::Document.new()+ should be able to parse
46
+ # the argument, false otherwise
47
+ def self.can_parse(arg)
48
+ arg.is_a?(String) ||
49
+ (arg.respond_to?(:read) &&
50
+ arg.respond_to?(:readline) &&
51
+ arg.respond_to?(:nil?) &&
52
+ arg.respond_to?(:eof?))
53
+ end
54
+ private_class_method :can_parse
55
+
56
+ # ------------------------------------------------------------
57
+ # Time
58
+
59
+ # Maps +Time+ objects.
60
+ class TimeNode < ::XML::Mapping::SingleAttributeNode
61
+ def initialize(*args)
62
+ path, *args = super(*args)
63
+ @path = ::XML::XXPath.new(path)
64
+ args
65
+ end
66
+
67
+ # Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
68
+ def extract_attr_value(xml)
69
+ value = default_when_xpath_err { @path.first(xml).text }
70
+ value ? Time.iso8601(value).utc : nil
71
+ end
72
+
73
+ # Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
74
+ def set_attr_value(xml, value)
75
+ @path.first(xml, ensure_created: true).text = value.iso8601
76
+ end
77
+ end
78
+
79
+ ::XML::Mapping.add_node_class TimeNode
80
+
81
+ # ------------------------------------------------------------
82
+ # URI
83
+
84
+ # Maps +URI+ objects.
85
+ class UriNode < ::XML::Mapping::SingleAttributeNode
86
+ def initialize(*args)
87
+ path, *args = super(*args)
88
+ @path = ::XML::XXPath.new(path)
89
+ args
90
+ end
91
+
92
+ # Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
93
+ def extract_attr_value(xml)
94
+ URI(default_when_xpath_err { @path.first(xml).text })
95
+ end
96
+
97
+ # Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
98
+ def set_attr_value(xml, value)
99
+ @path.first(xml, ensure_created: true).text = value.to_s
100
+ end
101
+ end
102
+
103
+ ::XML::Mapping.add_node_class UriNode
104
+
105
+ # ------------------------------------------------------------
106
+ # Resync::Types::Change
107
+
108
+ # Maps +Resync::Types::Change+ values.
109
+ class ChangeNode < ::XML::Mapping::SingleAttributeNode
110
+ def initialize(*args)
111
+ path, *args = super(*args)
112
+ @path = ::XML::XXPath.new(path)
113
+ args
114
+ end
115
+
116
+ # Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
117
+ def extract_attr_value(xml)
118
+ Resync::Types::Change.parse(default_when_xpath_err { @path.first(xml).text })
119
+ end
120
+
121
+ # Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
122
+ def set_attr_value(xml, value)
123
+ @path.first(xml, ensure_created: true).text = value.to_s
124
+ end
125
+ end
126
+
127
+ ::XML::Mapping.add_node_class ChangeNode
128
+
129
+ # ------------------------------------------------------------
130
+ # Resync::Types::Changefreq
131
+
132
+ # Maps +Resync::Types::Changefreq+ values.
133
+ class ChangefreqNode < ::XML::Mapping::SingleAttributeNode
134
+ def initialize(*args)
135
+ path, *args = super(*args)
136
+ @path = ::XML::XXPath.new(path)
137
+ args
138
+ end
139
+
140
+ # Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
141
+ def extract_attr_value(xml)
142
+ Resync::Types::ChangeFrequency.parse(default_when_xpath_err { @path.first(xml).text })
143
+ end
144
+
145
+ # Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
146
+ def set_attr_value(xml, value)
147
+ @path.first(xml, ensure_created: true).text = value.to_s
148
+ end
149
+ end
150
+
151
+ ::XML::Mapping.add_node_class ChangefreqNode
152
+
153
+ # ------------------------------------------------------------
154
+ # MIME::Type
155
+
156
+ # Maps +MIME::Type+ values.
157
+ class MimeTypeNode < ::XML::Mapping::SingleAttributeNode
158
+ def initialize(*args)
159
+ path, *args = super(*args)
160
+ @path = ::XML::XXPath.new(path)
161
+ args
162
+ end
163
+
164
+ # Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
165
+ def extract_attr_value(xml)
166
+ mime_type = default_when_xpath_err { @path.first(xml).text }
167
+ return nil unless mime_type
168
+ return mime_type if mime_type.is_a?(MIME::Type)
169
+
170
+ mt = MIME::Types[mime_type].first
171
+ return mt if mt
172
+
173
+ MIME::Type.new(mime_type)
174
+ end
175
+
176
+ # Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
177
+ def set_attr_value(xml, value)
178
+ @path.first(xml, ensure_created: true).text = value.to_s
179
+ end
180
+ end
181
+
182
+ ::XML::Mapping.add_node_class MimeTypeNode
183
+
184
+ # ------------------------------------------------------------
185
+ # Whitespace-separated hashcode list
186
+
187
+ # Maps the whitespace-separated list of hash codes in a +<rs:ln>+
188
+ # or +<rs:md>+ tag to a hash of digest values keyed by hash algorithm.
189
+ # (See {Resync::Descriptor#hashes}.)
190
+ class HashCodesNode < ::XML::Mapping::SingleAttributeNode
191
+ def initialize(*args)
192
+ path, *args = super(*args)
193
+ @path = ::XML::XXPath.new(path)
194
+ args
195
+ end
196
+
197
+ # Implements +::XML::Mapping::SingleAttributeNode#extract_attr_value+.
198
+ def extract_attr_value(xml)
199
+ hashes = default_when_xpath_err { @path.first(xml).text }
200
+ return {} unless hashes
201
+ return hashes if hashes.is_a?(Hash)
202
+ hashes.split(/[[:space:]]+/).map { |hash| hash.split(':') }.to_h
203
+ end
204
+
205
+ # Implements +::XML::Mapping::SingleAttributeNode#set_attr_value+.
206
+ def set_attr_value(xml, value)
207
+ return if value.empty?
208
+ hash_str = value.map { |k, v| "#{k}:#{v}" }.join(' ')
209
+ @path.first(xml, ensure_created: true).text = hash_str
210
+ end
211
+ end
212
+
213
+ ::XML::Mapping.add_node_class HashCodesNode
214
+
215
+ end
216
+ end
@@ -0,0 +1,65 @@
1
+ module Resync
2
+
3
+ # Parses ResourceSync XML documents and returns appropriate objects.
4
+ module XMLParser
5
+
6
+ # The list of parseable types, organized by XML mapping.
7
+ ROOT_TYPES = {
8
+ _default: [
9
+ CapabilityList,
10
+ ChangeDump,
11
+ ChangeDumpManifest,
12
+ ChangeList,
13
+ ResourceDump,
14
+ ResourceDumpManifest,
15
+ ResourceList,
16
+ SourceDescription
17
+ ],
18
+ sitemapindex: [
19
+ ChangeListIndex,
20
+ ResourceListIndex
21
+ ]
22
+ }
23
+ private_constant :ROOT_TYPES
24
+
25
+ CAPABILITY_ATTRIBUTE = "/*/[namespace-uri() = 'http://www.openarchives.org/rs/terms/' and local-name() = 'md']/@capability"
26
+ private_constant :CAPABILITY_ATTRIBUTE
27
+
28
+ # Parses the specified ResourceSync document and returns the appropriate object
29
+ # based on the +capability+ attribute of the root element's metadata (i.e. +<rs:md>+).
30
+ #
31
+ # @param xml [String, REXML::Document, REXML::Element] a ResourceSync XML document
32
+ # (or its root element)
33
+ def self.parse(xml)
34
+ root_element = XML.element(xml)
35
+ mapping = root_element.name == 'sitemapindex' ? :sitemapindex : :_default
36
+ root_type = find_root_type(ROOT_TYPES[mapping], root_element)
37
+ root_type.load_from_xml(root_element, mapping: mapping)
38
+ end
39
+
40
+ def self.find_root_type(types, root_element)
41
+ capability = capability_for(root_element)
42
+ root_type = types.find { |t| t::CAPABILITY == capability }
43
+ fail ArgumentError, "no mapped type for capability '#{capability}'" unless root_type
44
+ root_type
45
+ end
46
+
47
+ private_class_method :find_root_type
48
+
49
+ def self.capability_for(root_element)
50
+ capability = capability_attribute_for(root_element).value
51
+ fail ArgumentError, "unable to identify capability of root element in #{root_element}" unless capability
52
+ capability
53
+ end
54
+
55
+ private_class_method :capability_for
56
+
57
+ def self.capability_attribute_for(root_element)
58
+ capability_attr = REXML::XPath.first(root_element, CAPABILITY_ATTRIBUTE)
59
+ fail ArgumentError, "unable to identify capability of root element in #{root_element}" unless capability_attr
60
+ capability_attr
61
+ end
62
+
63
+ private_class_method :capability_attribute_for
64
+ end
65
+ end
data/lib/resync.rb ADDED
@@ -0,0 +1,4 @@
1
+ # A Ruby gem for working with the {http://www.openarchives.org/rs/1.0/resourcesync ResourceSync} web synchronization framework.
2
+ module Resync
3
+ Dir.glob(File.expand_path('../resync/*.rb', __FILE__), &method(:require))
4
+ end
data/resync.gemspec ADDED
@@ -0,0 +1,36 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ require 'resync/version'
6
+ require 'uri'
7
+
8
+ Gem::Specification.new do |spec|
9
+ spec.name = 'resync'
10
+ spec.version = Resync::VERSION
11
+ spec.authors = ['David Moles']
12
+ spec.email = ['david.moles@ucop.edu']
13
+ spec.summary = 'Utility library for ResourceSync'
14
+ spec.description = 'A Ruby gem for working with the ResourceSync web synchronization framework'
15
+ spec.license = 'MIT'
16
+
17
+ origin_uri = URI(`git config --get remote.origin.url`.chomp)
18
+ spec.homepage = URI::HTTP.build(host: origin_uri.host, path: origin_uri.path.chomp('.git')).to_s
19
+
20
+ spec.files = `git ls-files -z`.split("\x0")
21
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
22
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
23
+ spec.require_paths = ['lib']
24
+
25
+ spec.add_dependency 'mime-types', '~> 2.5'
26
+ spec.add_dependency 'ruby-enum', '~> 0.4'
27
+ spec.add_dependency 'xml-mapping', '~> 0.10'
28
+
29
+ spec.add_development_dependency 'equivalent-xml', '~> 0.6.0'
30
+ spec.add_development_dependency 'rake', '~> 10.4'
31
+ spec.add_development_dependency 'rspec', '~> 3.2'
32
+ spec.add_development_dependency 'rubocop', '~> 0.29.1'
33
+ spec.add_development_dependency 'simplecov', '~> 0.9.2'
34
+ spec.add_development_dependency 'simplecov-console', '~> 0.2.0'
35
+ spec.add_development_dependency 'yard', '~> 0.8'
36
+ end