resync 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +42 -0
  3. data/.rubocop.yml +23 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +2 -0
  6. data/Gemfile +3 -0
  7. data/LICENSE.md +22 -0
  8. data/README.md +92 -0
  9. data/Rakefile +56 -0
  10. data/example.rb +100 -0
  11. data/lib/resync/capability_list.rb +85 -0
  12. data/lib/resync/change_dump.rb +15 -0
  13. data/lib/resync/change_dump_manifest.rb +15 -0
  14. data/lib/resync/change_list.rb +15 -0
  15. data/lib/resync/change_list_index.rb +26 -0
  16. data/lib/resync/link.rb +87 -0
  17. data/lib/resync/metadata.rb +112 -0
  18. data/lib/resync/resource.rb +72 -0
  19. data/lib/resync/resource_dump.rb +15 -0
  20. data/lib/resync/resource_dump_manifest.rb +15 -0
  21. data/lib/resync/resource_list.rb +15 -0
  22. data/lib/resync/resource_list_index.rb +15 -0
  23. data/lib/resync/shared/augmented.rb +76 -0
  24. data/lib/resync/shared/base_resource_list.rb +117 -0
  25. data/lib/resync/shared/descriptor.rb +135 -0
  26. data/lib/resync/shared/sitemap_index.rb +32 -0
  27. data/lib/resync/shared/sorted_resource_list.rb +60 -0
  28. data/lib/resync/source_description.rb +14 -0
  29. data/lib/resync/types/change.rb +14 -0
  30. data/lib/resync/types/change_frequency.rb +18 -0
  31. data/lib/resync/types.rb +6 -0
  32. data/lib/resync/version.rb +4 -0
  33. data/lib/resync/xml.rb +216 -0
  34. data/lib/resync/xml_parser.rb +65 -0
  35. data/lib/resync.rb +4 -0
  36. data/resync.gemspec +36 -0
  37. data/spec/acceptance/xml_parser_spec.rb +1049 -0
  38. data/spec/data/examples/README.md +1 -0
  39. data/spec/data/examples/example-1.xml +12 -0
  40. data/spec/data/examples/example-12.xml +25 -0
  41. data/spec/data/examples/example-13.xml +25 -0
  42. data/spec/data/examples/example-14.xml +23 -0
  43. data/spec/data/examples/example-15.xml +21 -0
  44. data/spec/data/examples/example-16.xml +24 -0
  45. data/spec/data/examples/example-17.xml +39 -0
  46. data/spec/data/examples/example-18.xml +25 -0
  47. data/spec/data/examples/example-19.xml +28 -0
  48. data/spec/data/examples/example-2.xml +18 -0
  49. data/spec/data/examples/example-20.xml +22 -0
  50. data/spec/data/examples/example-21.xml +31 -0
  51. data/spec/data/examples/example-22.xml +41 -0
  52. data/spec/data/examples/example-23.xml +41 -0
  53. data/spec/data/examples/example-24.xml +28 -0
  54. data/spec/data/examples/example-25.xml +21 -0
  55. data/spec/data/examples/example-26.xml +18 -0
  56. data/spec/data/examples/example-27.xml +36 -0
  57. data/spec/data/examples/example-28.xml +34 -0
  58. data/spec/data/examples/example-29.xml +27 -0
  59. data/spec/data/examples/example-3.xml +17 -0
  60. data/spec/data/examples/example-30.xml +18 -0
  61. data/spec/data/examples/example-31.xml +16 -0
  62. data/spec/data/examples/example-32.xml +22 -0
  63. data/spec/data/examples/example-33.xml +22 -0
  64. data/spec/data/examples/example-4.xml +10 -0
  65. data/spec/data/examples/example-5.xml +18 -0
  66. data/spec/data/examples/example-6.xml +21 -0
  67. data/spec/data/examples/example-7.xml +13 -0
  68. data/spec/data/examples/example-8.xml +12 -0
  69. data/spec/data/resourcesync.xsd +148 -0
  70. data/spec/data/siteindex.xsd +75 -0
  71. data/spec/data/sitemap.xsd +116 -0
  72. data/spec/rspec_custom_matchers.rb +89 -0
  73. data/spec/spec_helper.rb +31 -0
  74. data/spec/todo.rb +11 -0
  75. data/spec/unit/resync/capability_list_spec.rb +138 -0
  76. data/spec/unit/resync/change_dump_manifest_spec.rb +75 -0
  77. data/spec/unit/resync/change_dump_spec.rb +61 -0
  78. data/spec/unit/resync/change_list_index_spec.rb +49 -0
  79. data/spec/unit/resync/change_list_spec.rb +75 -0
  80. data/spec/unit/resync/link_spec.rb +93 -0
  81. data/spec/unit/resync/metadata_spec.rb +169 -0
  82. data/spec/unit/resync/resource_dump_manifest_spec.rb +59 -0
  83. data/spec/unit/resync/resource_dump_spec.rb +62 -0
  84. data/spec/unit/resync/resource_list_index_spec.rb +53 -0
  85. data/spec/unit/resync/resource_list_spec.rb +60 -0
  86. data/spec/unit/resync/resource_spec.rb +176 -0
  87. data/spec/unit/resync/shared/augmented_examples.rb +58 -0
  88. data/spec/unit/resync/shared/base_resource_list_examples.rb +103 -0
  89. data/spec/unit/resync/shared/descriptor_examples.rb +122 -0
  90. data/spec/unit/resync/shared/descriptor_spec.rb +33 -0
  91. data/spec/unit/resync/shared/sorted_list_examples.rb +134 -0
  92. data/spec/unit/resync/shared/uri_field_examples.rb +36 -0
  93. data/spec/unit/resync/source_description_spec.rb +55 -0
  94. data/spec/unit/resync/xml/timenode_spec.rb +48 -0
  95. data/spec/unit/resync/xml/xml_spec.rb +40 -0
  96. data/spec/unit/resync/xml_parser_spec.rb +82 -0
  97. metadata +340 -0
@@ -0,0 +1,112 @@
1
+ require_relative 'shared/descriptor'
2
+ require_relative 'xml'
3
+
4
+ module Resync
5
+
6
+ # Metadata about a resource or ResourceSync document. See section 7,
7
+ # {http://www.openarchives.org/rs/1.0/resourcesync#DocumentFormats Sitemap Document Formats},
8
+ # in the ResourceSync specification.
9
+ #
10
+ # @!attribute [rw] at_time
11
+ # @return [Time] the datetime at which assembling a resource list
12
+ # began (including resource list indices, resource dumps, etc.)
13
+ # @!attribute [rw] from_time
14
+ # @return [Time] the beginning of the time range represented by
15
+ # a change list (including change list indices, change dumps, etc.)
16
+ # @!attribute [rw] until_time
17
+ # @return [Time] the end of the time range represented by
18
+ # a change list (including change list indices, change dumps, etc.)
19
+ # @!attribute [rw] completed_time
20
+ # @return the datetime at which assembling a resource list
21
+ # ended (including resource list indices, resource dumps, etc.)
22
+ # @!attribute [rw] change
23
+ # @return [Change] the type of change to a resource reported in
24
+ # a change list (including change list indices, change dumps, etc.)
25
+ # @!attribute [rw] capability
26
+ # @return [String] identifies the type of a ResourceSync document.
27
+ class Metadata < Descriptor
28
+ include ::XML::Mapping
29
+
30
+ # ------------------------------------------------------------
31
+ # Attributes
32
+
33
+ root_element_name 'md'
34
+
35
+ time_node :at_time, '@at', default_value: nil
36
+ time_node :from_time, '@from', default_value: nil
37
+ time_node :until_time, '@until', default_value: nil
38
+ time_node :completed_time, '@completed', default_value: nil
39
+ change_node :change, '@change', default_value: nil
40
+ text_node :capability, '@capability', default_value: nil
41
+
42
+ # ------------------------------------------------------------
43
+ # Initializer
44
+
45
+ # @param at_time [Time] the datetime at which assembling a resource list
46
+ # began (including resource list indices, resource dumps, etc.)
47
+ # @param from_time [Time] the beginning of the time range represented by
48
+ # a change list (including change list indices, change dumps, etc.)
49
+ # @param until_time [Time] the end of the time range represented by
50
+ # a change list (including change list indices, change dumps, etc.)
51
+ # @param completed_time the datetime at which assembling a resource list
52
+ # ended (including resource list indices, resource dumps, etc.)
53
+ # @param modified_time [Time] The date and time when the referenced resource was last modified.
54
+ # @param length [Integer] The content length of the referenced resource.
55
+ # @param mime_type [MIME::Type] The media type of the referenced resource.
56
+ # @param encoding [String] Any content encoding (if any) applied to the data in the
57
+ # referenced resource (e.g. for compression)
58
+ # @param hashes [Hash<String, String>] Fixity information for the referenced
59
+ # resource, as a map from hash algorithm tokens (e.g. +md5+, +sha-256+)
60
+ # to hex-encoded digest values.
61
+ # @param change [Change] the type of change to a resource reported in
62
+ # a change list (including change list indices, change dumps, etc.)
63
+ # @param capability [String] identifies the type of a ResourceSync document.
64
+ # @param path [String] For +ResourceDumpManifests+ and +ChangeDumpManifests+,
65
+ # the path to the referenced resource within the dump ZIP file.
66
+ def initialize( # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
67
+ at_time: nil,
68
+ from_time: nil,
69
+ until_time: nil,
70
+ completed_time: nil,
71
+ modified_time: nil,
72
+
73
+ length: nil,
74
+ mime_type: nil,
75
+ encoding: nil,
76
+ hashes: {},
77
+
78
+ change: nil,
79
+ capability: nil,
80
+ path: nil
81
+ )
82
+ super(modified_time: modified_time, length: length, mime_type: mime_type, encoding: encoding, hashes: hashes, path: path)
83
+
84
+ self.at_time = at_time
85
+ self.from_time = from_time
86
+ self.until_time = until_time
87
+ self.completed_time = completed_time
88
+
89
+ self.change = change
90
+ self.capability = capability
91
+ end
92
+
93
+ # ------------------------------------------------------------
94
+ # Custom setters
95
+
96
+ def at_time=(value)
97
+ @at_time = time_or_nil(value)
98
+ end
99
+
100
+ def from_time=(value)
101
+ @from_time = time_or_nil(value)
102
+ end
103
+
104
+ def until_time=(value)
105
+ @until_time = time_or_nil(value)
106
+ end
107
+
108
+ def completed_time=(value)
109
+ @completed_time = time_or_nil(value)
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,72 @@
1
+ require_relative 'shared/augmented'
2
+ require_relative 'xml'
3
+ require_relative 'metadata'
4
+
5
+ module Resync
6
+ # A resource (i.e., +<url>+ or +<sitemap>+). See section 7,
7
+ # {http://www.openarchives.org/rs/1.0/resourcesync#DocumentFormats Sitemap Document Formats},
8
+ # in the ResourceSync specification.
9
+ class Resource < Augmented
10
+ include ::XML::Mapping
11
+
12
+ # ------------------------------------------------------------
13
+ # Attributes
14
+
15
+ root_element_name 'url'
16
+
17
+ uri_node :uri, 'loc', default_value: nil
18
+ time_node :modified_time, 'lastmod', default_value: nil
19
+ changefreq_node :changefreq, 'changefreq', default_value: nil
20
+ numeric_node :priority, 'priority', default_value: nil
21
+
22
+ # ------------------------------------------------------------
23
+ # Initializer
24
+
25
+ # @param modified_time [Time] The date and time when the referenced resource was last modified.
26
+ # @param changefreq [ChangeFrequency] how frequently the referenced resource is likely to change.
27
+ # @param priority [Number] the priority of this resource relative to other resources from the
28
+ # same provider. Allows robots to decide which resources to crawl or harvest.
29
+ # Values should be in the range 0-1.0 (inclusive), where 0 is the lowest priority
30
+ # and 1.0 is the highest.
31
+ # @param links [Array<Link>] related links (i.e. +<rs:ln>+).
32
+ # @param metadata [Metadata] metadata about this resource.
33
+ def initialize( # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
34
+ uri:,
35
+ modified_time: nil,
36
+ changefreq: nil,
37
+ priority: nil,
38
+ links: [],
39
+ metadata: nil
40
+ )
41
+ super(links: links)
42
+ self.uri = uri
43
+ self.modified_time = modified_time
44
+ self.changefreq = changefreq
45
+ self.priority = priority
46
+ self.metadata = metadata
47
+ end
48
+
49
+ # ------------------------------------------------------------
50
+ # Custom setters
51
+
52
+ def uri=(value)
53
+ @uri = XML.to_uri(value)
54
+ end
55
+
56
+ # ------------------------------------------------------------
57
+ # Public methods
58
+
59
+ def capability
60
+ metadata ? metadata.capability : nil
61
+ end
62
+
63
+ # ------------------------------------------------------------
64
+ # Overrides
65
+
66
+ # ResourceSync schema requires '##other' elements to appear last
67
+ def self.all_xml_mapping_nodes(options = { mapping: nil, create: true })
68
+ xml_mapping_nodes(options) + superclass.all_xml_mapping_nodes(options)
69
+ end
70
+
71
+ end
72
+ end
@@ -0,0 +1,15 @@
1
+ require_relative 'shared/base_resource_list'
2
+ require_relative 'xml'
3
+
4
+ module Resync
5
+ # A resource dump. See section 11.1,
6
+ # "{http://www.openarchives.org/rs/1.0/resourcesync#ResourceDump Resource Dump}",
7
+ # in the ResourceSync specification.
8
+ class ResourceDump < BaseResourceList
9
+ include ::XML::Mapping
10
+
11
+ # The capability provided by this type.
12
+ CAPABILITY = 'resourcedump'
13
+
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ require_relative 'shared/base_resource_list'
2
+ require_relative 'xml'
3
+
4
+ module Resync
5
+ # A resource dump. See section 11.2,
6
+ # "{http://www.openarchives.org/rs/1.0/resourcesync#ResourceDumpManifest Resource Dump Manifest}",
7
+ # in the ResourceSync specification.
8
+ class ResourceDumpManifest < BaseResourceList
9
+ include ::XML::Mapping
10
+
11
+ # The capability provided by this type.
12
+ CAPABILITY = 'resourcedump-manifest'
13
+
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ require_relative 'shared/base_resource_list'
2
+ require_relative 'xml'
3
+
4
+ module Resync
5
+ # A resource list. See section 10.1,
6
+ # "{http://www.openarchives.org/rs/1.0/resourcesync#ResourceList Resource List}",
7
+ # in the ResourceSync specification.
8
+ class ResourceList < BaseResourceList
9
+ include ::XML::Mapping
10
+
11
+ # The capability provided by this type.
12
+ CAPABILITY = 'resourcelist'
13
+
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ require_relative 'shared/base_resource_list'
2
+ require_relative 'shared/sitemap_index'
3
+
4
+ module Resync
5
+ # A resource list index. See section 10.2,
6
+ # "{http://www.openarchives.org/rs/1.0/resourcesync#ResourceListIndex Resource List Index}",
7
+ # in the ResourceSync specification.
8
+ class ResourceListIndex < BaseResourceList
9
+ include ::XML::Mapping
10
+ include SitemapIndex
11
+
12
+ # The capability provided by this type.
13
+ CAPABILITY = 'resourcelist'
14
+ end
15
+ end
@@ -0,0 +1,76 @@
1
+ require_relative '../link'
2
+ require_relative '../metadata'
3
+
4
+ module Resync
5
+
6
+ # Base class for elements augmented with ResourceSync-specific child elements.
7
+ #
8
+ # @!attribute [rw] links
9
+ # @return [Array<Link>] related links.
10
+ # @!attribute [rw] metadata
11
+ # @return [Metadata] metadata about this object.
12
+ class Augmented
13
+ include ::XML::Mapping
14
+
15
+ # ------------------------------------------------------------
16
+ # Class methods
17
+
18
+ # ResourceSync-specific tags needing to be prefixed with +rs+ on output
19
+ RS_TAGS = Set['ln', 'md']
20
+ private_constant :RS_TAGS
21
+
22
+ # Adds the +rs+ namespace prefix
23
+ def self.prefix_rewriter(obj, xml, default_obj_to_xml)
24
+ default_obj_to_xml.call(obj, xml)
25
+ xml.each_element do |e|
26
+ e.name = "rs:#{e.name}" if RS_TAGS.include?(e.name)
27
+ end
28
+ end
29
+ private_class_method :prefix_rewriter
30
+
31
+ # ------------------------------------------------------------
32
+ # Attributes
33
+
34
+ array_node :links, 'ln', class: Link, default_value: [], writer: method(:prefix_rewriter)
35
+ object_node :metadata, 'md', class: Metadata, default_value: nil, writer: method(:prefix_rewriter)
36
+
37
+ use_mapping :sitemapindex
38
+ array_node :links, 'ln', class: Link, default_value: [], writer: method(:prefix_rewriter), sub_mapping: :_default
39
+ object_node :metadata, 'md', class: Metadata, default_value: nil, writer: method(:prefix_rewriter), sub_mapping: :_default
40
+
41
+ # ------------------------------------------------------------
42
+ # Initializer
43
+
44
+ # Creates a new +Augmented+ instance with the specified links and metadata.
45
+ #
46
+ # @param links [Array<Link>] related links (i.e. +<rs:ln>+).
47
+ # @param metadata [Metadata] metadata about this resource.
48
+ def initialize(links: [], metadata: nil)
49
+ self.links = links
50
+ self.metadata = metadata
51
+ end
52
+
53
+ # ------------------------------------------------------------
54
+ # Custom accessors
55
+
56
+ # Sets the +links+ list. +nil+ is treated as an empty list.
57
+ def links=(value)
58
+ @links = value || []
59
+ end
60
+
61
+ # Finds links with the specified relation.
62
+ # @param rel [String] the relation.
63
+ # @return [Array<Link>] those links having that relation, or an empty array if none exist.
64
+ def links_for(rel:)
65
+ links.select { |l| l.rel == rel }
66
+ end
67
+
68
+ # Shortcut to find the first link with the specified relation (in ResourceSync there often
69
+ # should be only one link with a particular relation)
70
+ # @param rel [String] the relation.
71
+ # @return [Link] the first link having that relation, or nil if none exists.
72
+ def link_for(rel:)
73
+ links.find { |l| l.rel == rel }
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,117 @@
1
+ require_relative 'augmented'
2
+ require_relative '../resource'
3
+ require_relative '../metadata'
4
+
5
+ module Resync
6
+ # Base class for root elements containing a list of resources
7
+ # (i.e., +<urlset>+ and +<sitemapindex>+ elements). Subclasses
8
+ # must define a +CAPABILITY+ constant identifying the capability
9
+ # they represent (e.g. +resourcelist+, +changelist+).
10
+ #
11
+ # @!attribute [rw] resources
12
+ # @return [Array<Resource>] the +<url>+ or +<sitemap>+ elements contained in this list.
13
+ class BaseResourceList < Augmented
14
+ include ::XML::Mapping
15
+
16
+ # ------------------------------------------------------------
17
+ # Attributes
18
+
19
+ root_element_name 'urlset'
20
+ array_node :resources, 'url', class: Resource, default_value: []
21
+
22
+ # ------------------------------------------------------------
23
+ # Initializer
24
+
25
+ # Creates a new +BaseResourceList+.
26
+ #
27
+ # @param resources [Array<Resource>] The +<url>+ or +<sitemap>+ elements contained in this list.
28
+ # @param links [Array<Link>] Related links (+<rs:ln>+).
29
+ # @param metadata [Metadata] Metadata about this list. The +capability+ of the metadata must match this
30
+ # implementation class' +CAPABILITY+ constant.
31
+ # @raise [ArgumentError] if the specified metadata does not have the correct +capability+ attribute for this list type.
32
+ def initialize(resources: [], links: [], metadata: nil)
33
+ super(links: links)
34
+ self.resources = resources
35
+ self.metadata = metadata
36
+ end
37
+
38
+ # ------------------------------------------------------------
39
+ # Custom setters
40
+
41
+ # Sets the +resources+ list. +nil+ is treated as an empty list.
42
+ def resources=(value)
43
+ @resources = value || []
44
+ end
45
+
46
+ # Sets the metadata.
47
+ #
48
+ # @raise [ArgumentError] if the specified metadata does not have the correct +capability+ attribute for
49
+ # this list type.
50
+ def metadata=(value)
51
+ @metadata = metadata_with_correct_capability(value)
52
+ end
53
+
54
+ # ------------------------------------------------------------
55
+ # Custom getters
56
+
57
+ def capability
58
+ @metadata.capability
59
+ end
60
+
61
+ # Finds resources with the specified capability.
62
+ # @param capability [String] the capability.
63
+ # @return [Array<Resource>] those resources having that capability, or an empty array if none exist.
64
+ def resources_for(capability:)
65
+ resources.select { |l| l.capability == capability }
66
+ end
67
+
68
+ # Shortcut to find the first resource with the specified capability (in ResourceSync there often
69
+ # should be only one resource with a particular capability)
70
+ # @param capability [String] the capability.
71
+ # @return [Resource] the first resource having that capability, or nil if none exists.
72
+ def resource_for(capability:)
73
+ resources.find { |l| l.capability == capability }
74
+ end
75
+
76
+ # ------------------------------------------------------------
77
+ # Overrides
78
+
79
+ # Overrides +::XML::Mapping.pre_save+ to declare the Sitemap and ResourceSync namespaces.
80
+ # Used for writing.
81
+ def pre_save(options = { mapping: :_default })
82
+ xml = super(options)
83
+ xml.add_namespace('http://www.sitemaps.org/schemas/sitemap/0.9')
84
+ xml.add_namespace('rs', 'http://www.openarchives.org/rs/terms/')
85
+ xml
86
+ end
87
+
88
+ # Initializes the +:_default+ and +:sitemapindex+ mappings on all subclasses, and sets the corresponding
89
+ # root element names (+<urlset>+ and +<sitemapindex>+)
90
+ def self.inherited(base)
91
+ base.use_mapping :_default
92
+ base.root_element_name 'urlset'
93
+ base.use_mapping :sitemapindex
94
+ base.root_element_name 'sitemapindex'
95
+ end
96
+
97
+ # ------------------------------------------------------------
98
+ # Private methods
99
+
100
+ private
101
+
102
+ # ------------------------------
103
+ # Parameter validators
104
+
105
+ # Validates the +capability+ attribute in the specified metadata.
106
+ # @raise [ArgumentError] if the specified metadata does not have the correct +capability+ attribute for this list type.
107
+ def metadata_with_correct_capability(metadata)
108
+ capability = self.class::CAPABILITY
109
+ fail ArgumentError, "Missing constant #{self.class}::CAPABILITY" unless capability
110
+ return Metadata.new(capability: capability) unless metadata
111
+ fail ArgumentError, "#{metadata} does not appear to be metadata" unless metadata.respond_to?('capability')
112
+ fail ArgumentError, "Wrong capability for #{self.class.name} metadata; expected '#{capability}', was '#{metadata.capability}'" unless metadata.capability == capability
113
+ metadata
114
+ end
115
+
116
+ end
117
+ end
@@ -0,0 +1,135 @@
1
+ require 'mime/types'
2
+ require_relative '../xml'
3
+
4
+ module Resync
5
+ # Base class for ResourceSync-specific elements describing a
6
+ # resource or link.
7
+ #
8
+ # @!attribute [rw] modified_time
9
+ # @return [Time] the date and time when the referenced resource was last modified.
10
+ # @!attribute [rw] length
11
+ # @return [Integer] the content length of the referenced resource.
12
+ # @!attribute [rw] mime_type
13
+ # @return [MIME::Type] the media type of the referenced resource.
14
+ # @!attribute [rw] encoding
15
+ # @return [String] the content encoding (if any) applied to the data in the
16
+ # referenced resource (e.g. for compression)
17
+ # @!attribute [rw] hashes
18
+ # @return [Hash<String, String>] fixity information for the referenced
19
+ # resource, as a map from hash algorithm tokens (e.g. +md5+, +sha-256+)
20
+ # to hex-encoded digest values.
21
+ # @!attribute [rw] path
22
+ # @return [String] for +ResourceDumpManifests+ and +ChangeDumpManifests+,
23
+ # the path to the referenced resource within the dump ZIP file.
24
+ class Descriptor
25
+ include ::XML::Mapping
26
+
27
+ # ------------------------------------------------------------
28
+ # Attributes
29
+
30
+ time_node :modified_time, '@modified', default_value: nil
31
+ numeric_node :length, '@length', default_value: nil
32
+ mime_type_node :mime_type, '@type', default_value: nil
33
+ text_node :encoding, '@encoding', default_value: nil
34
+ hash_codes_node :hashes, '@hash', default_value: nil
35
+ text_node :path, '@path', default_value: nil
36
+
37
+ # ------------------------------------------------------------
38
+ # Initializer
39
+
40
+ # Creates a new +Descriptor+ instance with the specified fields.
41
+ #
42
+ # @param modified_time [Time] The date and time when the referenced resource was last modified.
43
+ # @param length [Integer] The content length of the referenced resource.
44
+ # @param mime_type [MIME::Type] The media type of the referenced resource.
45
+ # @param encoding [String] Any content encoding (if any) applied to the data in the
46
+ # referenced resource (e.g. for compression)
47
+ # @param hashes [Hash<String, String>] Fixity information for the referenced
48
+ # resource, as a map from hash algorithm tokens (e.g. +md5+, +sha-256+)
49
+ # to hex-encoded digest values.
50
+ # @param path [String] For +ResourceDumpManifests+ and +ChangeDumpManifests+,
51
+ # the path to the referenced resource within the dump ZIP file.
52
+ def initialize( # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
53
+ modified_time: nil,
54
+ length: nil,
55
+ mime_type: nil,
56
+ encoding: nil,
57
+ hashes: nil,
58
+ path: nil
59
+ )
60
+ self.modified_time = modified_time
61
+ self.length = length
62
+ self.mime_type = mime_type
63
+ self.encoding = encoding
64
+ self.hashes = hashes
65
+ self.path = path
66
+ end
67
+
68
+ # ------------------------------------------------------------
69
+ # Custom setters
70
+
71
+ def modified_time=(value)
72
+ @modified_time = time_or_nil(value)
73
+ end
74
+
75
+ def length=(value)
76
+ @length = natural_number_or_nil(value)
77
+ end
78
+
79
+ def mime_type=(value)
80
+ @mime_type = mime_type_or_nil(value)
81
+ end
82
+
83
+ def hashes=(value)
84
+ @hashes = Descriptor.hash_of_hashcodes(value)
85
+ end
86
+
87
+ # ------------------------------------------------------------
88
+ # Public methods
89
+
90
+ # Gets the hash value for the specified algorithm.
91
+ #
92
+ # @param algorithm [String] The token (e.g. +md5+, +sha-256+) for the hash algorithm.
93
+ # @return [String] The hex-encoded digest value.
94
+ def hash(algorithm)
95
+ hashes[algorithm]
96
+ end
97
+
98
+ # ------------------------------------------------------------
99
+ # Private methods
100
+
101
+ private
102
+
103
+ # ------------------------------
104
+ # Parameter validators
105
+
106
+ def time_or_nil(time)
107
+ fail ArgumentError, "time #{time} is not a Time" if time && !time.is_a?(Time)
108
+ time
109
+ end
110
+
111
+ def natural_number_or_nil(value)
112
+ fail ArgumentError, "value #{value} must be a non-negative integer" if value && (!value.is_a?(Integer) || value < 0)
113
+ value
114
+ end
115
+
116
+ def mime_type_or_nil(mime_type)
117
+ return nil unless mime_type
118
+ return mime_type if mime_type.is_a?(MIME::Type)
119
+
120
+ mt = MIME::Types[mime_type].first
121
+ return mt if mt
122
+
123
+ MIME::Type.new(mime_type)
124
+ end
125
+
126
+ # ------------------------------
127
+ # Conversions
128
+
129
+ def self.hash_of_hashcodes(hashes)
130
+ return {} unless hashes
131
+ return hashes if hashes.is_a?(Hash)
132
+ hashes.split(/[[:space:]]+/).map { |hash| hash.split(':') }.to_h
133
+ end
134
+ end
135
+ end
@@ -0,0 +1,32 @@
1
+ require 'xml/mapping'
2
+ require_relative '../resource'
3
+
4
+ module Resync
5
+ module SitemapIndex
6
+ include ::XML::Mapping
7
+
8
+ def self.included(base)
9
+ base.extend(ClassMethods)
10
+
11
+ base.use_mapping :sitemapindex
12
+ base.root_element_name 'sitemapindex'
13
+ base.array_node :resources, 'sitemap', class: Resource, default_value: [], sub_mapping: :_default
14
+ end
15
+
16
+ # Ensures that an index is always written as a +<sitemapindex>+.
17
+ # Overrides +::XML::Mapping.save_to_xml+.
18
+ def save_to_xml(options = { mapping: :_default })
19
+ options = options.merge(mapping: :sitemapindex)
20
+ super(options)
21
+ end
22
+
23
+ # Ensures that an index is always read as a +<sitemapindex>+.
24
+ # Overrides +::XML::Mapping::ClassMethods.load_from_xml+.
25
+ module ClassMethods
26
+ def load_from_xml(xml, options = { mapping: :_default })
27
+ options = options.merge(mapping: :sitemapindex)
28
+ super(xml, options)
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,60 @@
1
+ require_relative 'base_resource_list'
2
+
3
+ module Resync
4
+ # An extension to +BaseResourceList+ for resource lists that
5
+ # should be sorted by modification time.
6
+ class SortedResourceList < BaseResourceList
7
+
8
+ # ------------------------------------------------------------
9
+ # Custom setters
10
+
11
+ # Sets the +resources+ list, sorting the resources by modification
12
+ # time. (+nil+ is treated as an empty list.) Resources without
13
+ # modification times will be sorted to the end.
14
+ def resources=(value)
15
+ @resources = sorted(value)
16
+ @resources_by_uri = by_uri(@resources)
17
+ end
18
+
19
+ # ------------------------------------------------------------
20
+ # Custom accessors
21
+
22
+ attr_reader :resources_by_uri
23
+
24
+ def latest_for(uri:)
25
+ uri = XML.to_uri(uri)
26
+ @resources_by_uri[uri].last
27
+ end
28
+
29
+ def all_uris
30
+ @resources_by_uri.keys
31
+ end
32
+
33
+ # ------------------------------------------------------------
34
+ # Private methods
35
+
36
+ private
37
+
38
+ # ------------------------------
39
+ # Conversions
40
+
41
+ def sorted(value)
42
+ return [] unless value
43
+ value.sort do |left, right|
44
+ if left.modified_time && right.modified_time
45
+ left.modified_time <=> right.modified_time
46
+ else
47
+ right.modified_time ? 1 : -1
48
+ end
49
+ end
50
+ end
51
+
52
+ def by_uri(resources)
53
+ by_uri = {}
54
+ resources.each do |r|
55
+ (by_uri[r.uri] ||= []) << r
56
+ end
57
+ by_uri
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,14 @@
1
+ require_relative 'shared/base_resource_list'
2
+
3
+ module Resync
4
+ # A change list. See section 8,
5
+ # "{http://www.openarchives.org/rs/1.0/resourcesync#SourceDesc Describing the Source}",
6
+ # in the ResourceSync specification.
7
+ class SourceDescription < BaseResourceList
8
+ include ::XML::Mapping
9
+
10
+ # The capability provided by this type.
11
+ CAPABILITY = 'description'
12
+
13
+ end
14
+ end