resync 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (97) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +42 -0
  3. data/.rubocop.yml +23 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +2 -0
  6. data/Gemfile +3 -0
  7. data/LICENSE.md +22 -0
  8. data/README.md +92 -0
  9. data/Rakefile +56 -0
  10. data/example.rb +100 -0
  11. data/lib/resync/capability_list.rb +85 -0
  12. data/lib/resync/change_dump.rb +15 -0
  13. data/lib/resync/change_dump_manifest.rb +15 -0
  14. data/lib/resync/change_list.rb +15 -0
  15. data/lib/resync/change_list_index.rb +26 -0
  16. data/lib/resync/link.rb +87 -0
  17. data/lib/resync/metadata.rb +112 -0
  18. data/lib/resync/resource.rb +72 -0
  19. data/lib/resync/resource_dump.rb +15 -0
  20. data/lib/resync/resource_dump_manifest.rb +15 -0
  21. data/lib/resync/resource_list.rb +15 -0
  22. data/lib/resync/resource_list_index.rb +15 -0
  23. data/lib/resync/shared/augmented.rb +76 -0
  24. data/lib/resync/shared/base_resource_list.rb +117 -0
  25. data/lib/resync/shared/descriptor.rb +135 -0
  26. data/lib/resync/shared/sitemap_index.rb +32 -0
  27. data/lib/resync/shared/sorted_resource_list.rb +60 -0
  28. data/lib/resync/source_description.rb +14 -0
  29. data/lib/resync/types/change.rb +14 -0
  30. data/lib/resync/types/change_frequency.rb +18 -0
  31. data/lib/resync/types.rb +6 -0
  32. data/lib/resync/version.rb +4 -0
  33. data/lib/resync/xml.rb +216 -0
  34. data/lib/resync/xml_parser.rb +65 -0
  35. data/lib/resync.rb +4 -0
  36. data/resync.gemspec +36 -0
  37. data/spec/acceptance/xml_parser_spec.rb +1049 -0
  38. data/spec/data/examples/README.md +1 -0
  39. data/spec/data/examples/example-1.xml +12 -0
  40. data/spec/data/examples/example-12.xml +25 -0
  41. data/spec/data/examples/example-13.xml +25 -0
  42. data/spec/data/examples/example-14.xml +23 -0
  43. data/spec/data/examples/example-15.xml +21 -0
  44. data/spec/data/examples/example-16.xml +24 -0
  45. data/spec/data/examples/example-17.xml +39 -0
  46. data/spec/data/examples/example-18.xml +25 -0
  47. data/spec/data/examples/example-19.xml +28 -0
  48. data/spec/data/examples/example-2.xml +18 -0
  49. data/spec/data/examples/example-20.xml +22 -0
  50. data/spec/data/examples/example-21.xml +31 -0
  51. data/spec/data/examples/example-22.xml +41 -0
  52. data/spec/data/examples/example-23.xml +41 -0
  53. data/spec/data/examples/example-24.xml +28 -0
  54. data/spec/data/examples/example-25.xml +21 -0
  55. data/spec/data/examples/example-26.xml +18 -0
  56. data/spec/data/examples/example-27.xml +36 -0
  57. data/spec/data/examples/example-28.xml +34 -0
  58. data/spec/data/examples/example-29.xml +27 -0
  59. data/spec/data/examples/example-3.xml +17 -0
  60. data/spec/data/examples/example-30.xml +18 -0
  61. data/spec/data/examples/example-31.xml +16 -0
  62. data/spec/data/examples/example-32.xml +22 -0
  63. data/spec/data/examples/example-33.xml +22 -0
  64. data/spec/data/examples/example-4.xml +10 -0
  65. data/spec/data/examples/example-5.xml +18 -0
  66. data/spec/data/examples/example-6.xml +21 -0
  67. data/spec/data/examples/example-7.xml +13 -0
  68. data/spec/data/examples/example-8.xml +12 -0
  69. data/spec/data/resourcesync.xsd +148 -0
  70. data/spec/data/siteindex.xsd +75 -0
  71. data/spec/data/sitemap.xsd +116 -0
  72. data/spec/rspec_custom_matchers.rb +89 -0
  73. data/spec/spec_helper.rb +31 -0
  74. data/spec/todo.rb +11 -0
  75. data/spec/unit/resync/capability_list_spec.rb +138 -0
  76. data/spec/unit/resync/change_dump_manifest_spec.rb +75 -0
  77. data/spec/unit/resync/change_dump_spec.rb +61 -0
  78. data/spec/unit/resync/change_list_index_spec.rb +49 -0
  79. data/spec/unit/resync/change_list_spec.rb +75 -0
  80. data/spec/unit/resync/link_spec.rb +93 -0
  81. data/spec/unit/resync/metadata_spec.rb +169 -0
  82. data/spec/unit/resync/resource_dump_manifest_spec.rb +59 -0
  83. data/spec/unit/resync/resource_dump_spec.rb +62 -0
  84. data/spec/unit/resync/resource_list_index_spec.rb +53 -0
  85. data/spec/unit/resync/resource_list_spec.rb +60 -0
  86. data/spec/unit/resync/resource_spec.rb +176 -0
  87. data/spec/unit/resync/shared/augmented_examples.rb +58 -0
  88. data/spec/unit/resync/shared/base_resource_list_examples.rb +103 -0
  89. data/spec/unit/resync/shared/descriptor_examples.rb +122 -0
  90. data/spec/unit/resync/shared/descriptor_spec.rb +33 -0
  91. data/spec/unit/resync/shared/sorted_list_examples.rb +134 -0
  92. data/spec/unit/resync/shared/uri_field_examples.rb +36 -0
  93. data/spec/unit/resync/source_description_spec.rb +55 -0
  94. data/spec/unit/resync/xml/timenode_spec.rb +48 -0
  95. data/spec/unit/resync/xml/xml_spec.rb +40 -0
  96. data/spec/unit/resync/xml_parser_spec.rb +82 -0
  97. metadata +340 -0
@@ -0,0 +1,112 @@
1
+ require_relative 'shared/descriptor'
2
+ require_relative 'xml'
3
+
4
+ module Resync
5
+
6
+ # Metadata about a resource or ResourceSync document. See section 7,
7
+ # {http://www.openarchives.org/rs/1.0/resourcesync#DocumentFormats Sitemap Document Formats},
8
+ # in the ResourceSync specification.
9
+ #
10
+ # @!attribute [rw] at_time
11
+ # @return [Time] the datetime at which assembling a resource list
12
+ # began (including resource list indices, resource dumps, etc.)
13
+ # @!attribute [rw] from_time
14
+ # @return [Time] the beginning of the time range represented by
15
+ # a change list (including change list indices, change dumps, etc.)
16
+ # @!attribute [rw] until_time
17
+ # @return [Time] the end of the time range represented by
18
+ # a change list (including change list indices, change dumps, etc.)
19
+ # @!attribute [rw] completed_time
20
+ # @return the datetime at which assembling a resource list
21
+ # ended (including resource list indices, resource dumps, etc.)
22
+ # @!attribute [rw] change
23
+ # @return [Change] the type of change to a resource reported in
24
+ # a change list (including change list indices, change dumps, etc.)
25
+ # @!attribute [rw] capability
26
+ # @return [String] identifies the type of a ResourceSync document.
27
+ class Metadata < Descriptor
28
+ include ::XML::Mapping
29
+
30
+ # ------------------------------------------------------------
31
+ # Attributes
32
+
33
+ root_element_name 'md'
34
+
35
+ time_node :at_time, '@at', default_value: nil
36
+ time_node :from_time, '@from', default_value: nil
37
+ time_node :until_time, '@until', default_value: nil
38
+ time_node :completed_time, '@completed', default_value: nil
39
+ change_node :change, '@change', default_value: nil
40
+ text_node :capability, '@capability', default_value: nil
41
+
42
+ # ------------------------------------------------------------
43
+ # Initializer
44
+
45
+ # @param at_time [Time] the datetime at which assembling a resource list
46
+ # began (including resource list indices, resource dumps, etc.)
47
+ # @param from_time [Time] the beginning of the time range represented by
48
+ # a change list (including change list indices, change dumps, etc.)
49
+ # @param until_time [Time] the end of the time range represented by
50
+ # a change list (including change list indices, change dumps, etc.)
51
+ # @param completed_time the datetime at which assembling a resource list
52
+ # ended (including resource list indices, resource dumps, etc.)
53
+ # @param modified_time [Time] The date and time when the referenced resource was last modified.
54
+ # @param length [Integer] The content length of the referenced resource.
55
+ # @param mime_type [MIME::Type] The media type of the referenced resource.
56
+ # @param encoding [String] Any content encoding (if any) applied to the data in the
57
+ # referenced resource (e.g. for compression)
58
+ # @param hashes [Hash<String, String>] Fixity information for the referenced
59
+ # resource, as a map from hash algorithm tokens (e.g. +md5+, +sha-256+)
60
+ # to hex-encoded digest values.
61
+ # @param change [Change] the type of change to a resource reported in
62
+ # a change list (including change list indices, change dumps, etc.)
63
+ # @param capability [String] identifies the type of a ResourceSync document.
64
+ # @param path [String] For +ResourceDumpManifests+ and +ChangeDumpManifests+,
65
+ # the path to the referenced resource within the dump ZIP file.
66
+ def initialize( # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
67
+ at_time: nil,
68
+ from_time: nil,
69
+ until_time: nil,
70
+ completed_time: nil,
71
+ modified_time: nil,
72
+
73
+ length: nil,
74
+ mime_type: nil,
75
+ encoding: nil,
76
+ hashes: {},
77
+
78
+ change: nil,
79
+ capability: nil,
80
+ path: nil
81
+ )
82
+ super(modified_time: modified_time, length: length, mime_type: mime_type, encoding: encoding, hashes: hashes, path: path)
83
+
84
+ self.at_time = at_time
85
+ self.from_time = from_time
86
+ self.until_time = until_time
87
+ self.completed_time = completed_time
88
+
89
+ self.change = change
90
+ self.capability = capability
91
+ end
92
+
93
+ # ------------------------------------------------------------
94
+ # Custom setters
95
+
96
+ def at_time=(value)
97
+ @at_time = time_or_nil(value)
98
+ end
99
+
100
+ def from_time=(value)
101
+ @from_time = time_or_nil(value)
102
+ end
103
+
104
+ def until_time=(value)
105
+ @until_time = time_or_nil(value)
106
+ end
107
+
108
+ def completed_time=(value)
109
+ @completed_time = time_or_nil(value)
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,72 @@
1
+ require_relative 'shared/augmented'
2
+ require_relative 'xml'
3
+ require_relative 'metadata'
4
+
5
+ module Resync
6
+ # A resource (i.e., +<url>+ or +<sitemap>+). See section 7,
7
+ # {http://www.openarchives.org/rs/1.0/resourcesync#DocumentFormats Sitemap Document Formats},
8
+ # in the ResourceSync specification.
9
+ class Resource < Augmented
10
+ include ::XML::Mapping
11
+
12
+ # ------------------------------------------------------------
13
+ # Attributes
14
+
15
+ root_element_name 'url'
16
+
17
+ uri_node :uri, 'loc', default_value: nil
18
+ time_node :modified_time, 'lastmod', default_value: nil
19
+ changefreq_node :changefreq, 'changefreq', default_value: nil
20
+ numeric_node :priority, 'priority', default_value: nil
21
+
22
+ # ------------------------------------------------------------
23
+ # Initializer
24
+
25
+ # @param modified_time [Time] The date and time when the referenced resource was last modified.
26
+ # @param changefreq [ChangeFrequency] how frequently the referenced resource is likely to change.
27
+ # @param priority [Number] the priority of this resource relative to other resources from the
28
+ # same provider. Allows robots to decide which resources to crawl or harvest.
29
+ # Values should be in the range 0-1.0 (inclusive), where 0 is the lowest priority
30
+ # and 1.0 is the highest.
31
+ # @param links [Array<Link>] related links (i.e. +<rs:ln>+).
32
+ # @param metadata [Metadata] metadata about this resource.
33
+ def initialize( # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
34
+ uri:,
35
+ modified_time: nil,
36
+ changefreq: nil,
37
+ priority: nil,
38
+ links: [],
39
+ metadata: nil
40
+ )
41
+ super(links: links)
42
+ self.uri = uri
43
+ self.modified_time = modified_time
44
+ self.changefreq = changefreq
45
+ self.priority = priority
46
+ self.metadata = metadata
47
+ end
48
+
49
+ # ------------------------------------------------------------
50
+ # Custom setters
51
+
52
+ def uri=(value)
53
+ @uri = XML.to_uri(value)
54
+ end
55
+
56
+ # ------------------------------------------------------------
57
+ # Public methods
58
+
59
+ def capability
60
+ metadata ? metadata.capability : nil
61
+ end
62
+
63
+ # ------------------------------------------------------------
64
+ # Overrides
65
+
66
+ # ResourceSync schema requires '##other' elements to appear last
67
+ def self.all_xml_mapping_nodes(options = { mapping: nil, create: true })
68
+ xml_mapping_nodes(options) + superclass.all_xml_mapping_nodes(options)
69
+ end
70
+
71
+ end
72
+ end
@@ -0,0 +1,15 @@
1
+ require_relative 'shared/base_resource_list'
2
+ require_relative 'xml'
3
+
4
+ module Resync
5
+ # A resource dump. See section 11.1,
6
+ # "{http://www.openarchives.org/rs/1.0/resourcesync#ResourceDump Resource Dump}",
7
+ # in the ResourceSync specification.
8
+ class ResourceDump < BaseResourceList
9
+ include ::XML::Mapping
10
+
11
+ # The capability provided by this type.
12
+ CAPABILITY = 'resourcedump'
13
+
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ require_relative 'shared/base_resource_list'
2
+ require_relative 'xml'
3
+
4
+ module Resync
5
+ # A resource dump. See section 11.2,
6
+ # "{http://www.openarchives.org/rs/1.0/resourcesync#ResourceDumpManifest Resource Dump Manifest}",
7
+ # in the ResourceSync specification.
8
+ class ResourceDumpManifest < BaseResourceList
9
+ include ::XML::Mapping
10
+
11
+ # The capability provided by this type.
12
+ CAPABILITY = 'resourcedump-manifest'
13
+
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ require_relative 'shared/base_resource_list'
2
+ require_relative 'xml'
3
+
4
+ module Resync
5
+ # A resource list. See section 10.1,
6
+ # "{http://www.openarchives.org/rs/1.0/resourcesync#ResourceList Resource List}",
7
+ # in the ResourceSync specification.
8
+ class ResourceList < BaseResourceList
9
+ include ::XML::Mapping
10
+
11
+ # The capability provided by this type.
12
+ CAPABILITY = 'resourcelist'
13
+
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ require_relative 'shared/base_resource_list'
2
+ require_relative 'shared/sitemap_index'
3
+
4
+ module Resync
5
+ # A resource list index. See section 10.2,
6
+ # "{http://www.openarchives.org/rs/1.0/resourcesync#ResourceListIndex Resource List Index}",
7
+ # in the ResourceSync specification.
8
+ class ResourceListIndex < BaseResourceList
9
+ include ::XML::Mapping
10
+ include SitemapIndex
11
+
12
+ # The capability provided by this type.
13
+ CAPABILITY = 'resourcelist'
14
+ end
15
+ end
@@ -0,0 +1,76 @@
1
+ require_relative '../link'
2
+ require_relative '../metadata'
3
+
4
+ module Resync
5
+
6
+ # Base class for elements augmented with ResourceSync-specific child elements.
7
+ #
8
+ # @!attribute [rw] links
9
+ # @return [Array<Link>] related links.
10
+ # @!attribute [rw] metadata
11
+ # @return [Metadata] metadata about this object.
12
+ class Augmented
13
+ include ::XML::Mapping
14
+
15
+ # ------------------------------------------------------------
16
+ # Class methods
17
+
18
+ # ResourceSync-specific tags needing to be prefixed with +rs+ on output
19
+ RS_TAGS = Set['ln', 'md']
20
+ private_constant :RS_TAGS
21
+
22
+ # Adds the +rs+ namespace prefix
23
+ def self.prefix_rewriter(obj, xml, default_obj_to_xml)
24
+ default_obj_to_xml.call(obj, xml)
25
+ xml.each_element do |e|
26
+ e.name = "rs:#{e.name}" if RS_TAGS.include?(e.name)
27
+ end
28
+ end
29
+ private_class_method :prefix_rewriter
30
+
31
+ # ------------------------------------------------------------
32
+ # Attributes
33
+
34
+ array_node :links, 'ln', class: Link, default_value: [], writer: method(:prefix_rewriter)
35
+ object_node :metadata, 'md', class: Metadata, default_value: nil, writer: method(:prefix_rewriter)
36
+
37
+ use_mapping :sitemapindex
38
+ array_node :links, 'ln', class: Link, default_value: [], writer: method(:prefix_rewriter), sub_mapping: :_default
39
+ object_node :metadata, 'md', class: Metadata, default_value: nil, writer: method(:prefix_rewriter), sub_mapping: :_default
40
+
41
+ # ------------------------------------------------------------
42
+ # Initializer
43
+
44
+ # Creates a new +Augmented+ instance with the specified links and metadata.
45
+ #
46
+ # @param links [Array<Link>] related links (i.e. +<rs:ln>+).
47
+ # @param metadata [Metadata] metadata about this resource.
48
+ def initialize(links: [], metadata: nil)
49
+ self.links = links
50
+ self.metadata = metadata
51
+ end
52
+
53
+ # ------------------------------------------------------------
54
+ # Custom accessors
55
+
56
+ # Sets the +links+ list. +nil+ is treated as an empty list.
57
+ def links=(value)
58
+ @links = value || []
59
+ end
60
+
61
+ # Finds links with the specified relation.
62
+ # @param rel [String] the relation.
63
+ # @return [Array<Link>] those links having that relation, or an empty array if none exist.
64
+ def links_for(rel:)
65
+ links.select { |l| l.rel == rel }
66
+ end
67
+
68
+ # Shortcut to find the first link with the specified relation (in ResourceSync there often
69
+ # should be only one link with a particular relation)
70
+ # @param rel [String] the relation.
71
+ # @return [Link] the first link having that relation, or nil if none exists.
72
+ def link_for(rel:)
73
+ links.find { |l| l.rel == rel }
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,117 @@
1
+ require_relative 'augmented'
2
+ require_relative '../resource'
3
+ require_relative '../metadata'
4
+
5
+ module Resync
6
+ # Base class for root elements containing a list of resources
7
+ # (i.e., +<urlset>+ and +<sitemapindex>+ elements). Subclasses
8
+ # must define a +CAPABILITY+ constant identifying the capability
9
+ # they represent (e.g. +resourcelist+, +changelist+).
10
+ #
11
+ # @!attribute [rw] resources
12
+ # @return [Array<Resource>] the +<url>+ or +<sitemap>+ elements contained in this list.
13
+ class BaseResourceList < Augmented
14
+ include ::XML::Mapping
15
+
16
+ # ------------------------------------------------------------
17
+ # Attributes
18
+
19
+ root_element_name 'urlset'
20
+ array_node :resources, 'url', class: Resource, default_value: []
21
+
22
+ # ------------------------------------------------------------
23
+ # Initializer
24
+
25
+ # Creates a new +BaseResourceList+.
26
+ #
27
+ # @param resources [Array<Resource>] The +<url>+ or +<sitemap>+ elements contained in this list.
28
+ # @param links [Array<Link>] Related links (+<rs:ln>+).
29
+ # @param metadata [Metadata] Metadata about this list. The +capability+ of the metadata must match this
30
+ # implementation class' +CAPABILITY+ constant.
31
+ # @raise [ArgumentError] if the specified metadata does not have the correct +capability+ attribute for this list type.
32
+ def initialize(resources: [], links: [], metadata: nil)
33
+ super(links: links)
34
+ self.resources = resources
35
+ self.metadata = metadata
36
+ end
37
+
38
+ # ------------------------------------------------------------
39
+ # Custom setters
40
+
41
+ # Sets the +resources+ list. +nil+ is treated as an empty list.
42
+ def resources=(value)
43
+ @resources = value || []
44
+ end
45
+
46
+ # Sets the metadata.
47
+ #
48
+ # @raise [ArgumentError] if the specified metadata does not have the correct +capability+ attribute for
49
+ # this list type.
50
+ def metadata=(value)
51
+ @metadata = metadata_with_correct_capability(value)
52
+ end
53
+
54
+ # ------------------------------------------------------------
55
+ # Custom getters
56
+
57
+ def capability
58
+ @metadata.capability
59
+ end
60
+
61
+ # Finds resources with the specified capability.
62
+ # @param capability [String] the capability.
63
+ # @return [Array<Resource>] those resources having that capability, or an empty array if none exist.
64
+ def resources_for(capability:)
65
+ resources.select { |l| l.capability == capability }
66
+ end
67
+
68
+ # Shortcut to find the first resource with the specified capability (in ResourceSync there often
69
+ # should be only one resource with a particular capability)
70
+ # @param capability [String] the capability.
71
+ # @return [Resource] the first resource having that capability, or nil if none exists.
72
+ def resource_for(capability:)
73
+ resources.find { |l| l.capability == capability }
74
+ end
75
+
76
+ # ------------------------------------------------------------
77
+ # Overrides
78
+
79
+ # Overrides +::XML::Mapping.pre_save+ to declare the Sitemap and ResourceSync namespaces.
80
+ # Used for writing.
81
+ def pre_save(options = { mapping: :_default })
82
+ xml = super(options)
83
+ xml.add_namespace('http://www.sitemaps.org/schemas/sitemap/0.9')
84
+ xml.add_namespace('rs', 'http://www.openarchives.org/rs/terms/')
85
+ xml
86
+ end
87
+
88
+ # Initializes the +:_default+ and +:sitemapindex+ mappings on all subclasses, and sets the corresponding
89
+ # root element names (+<urlset>+ and +<sitemapindex>+)
90
+ def self.inherited(base)
91
+ base.use_mapping :_default
92
+ base.root_element_name 'urlset'
93
+ base.use_mapping :sitemapindex
94
+ base.root_element_name 'sitemapindex'
95
+ end
96
+
97
+ # ------------------------------------------------------------
98
+ # Private methods
99
+
100
+ private
101
+
102
+ # ------------------------------
103
+ # Parameter validators
104
+
105
+ # Validates the +capability+ attribute in the specified metadata.
106
+ # @raise [ArgumentError] if the specified metadata does not have the correct +capability+ attribute for this list type.
107
+ def metadata_with_correct_capability(metadata)
108
+ capability = self.class::CAPABILITY
109
+ fail ArgumentError, "Missing constant #{self.class}::CAPABILITY" unless capability
110
+ return Metadata.new(capability: capability) unless metadata
111
+ fail ArgumentError, "#{metadata} does not appear to be metadata" unless metadata.respond_to?('capability')
112
+ fail ArgumentError, "Wrong capability for #{self.class.name} metadata; expected '#{capability}', was '#{metadata.capability}'" unless metadata.capability == capability
113
+ metadata
114
+ end
115
+
116
+ end
117
+ end
@@ -0,0 +1,135 @@
1
+ require 'mime/types'
2
+ require_relative '../xml'
3
+
4
+ module Resync
5
+ # Base class for ResourceSync-specific elements describing a
6
+ # resource or link.
7
+ #
8
+ # @!attribute [rw] modified_time
9
+ # @return [Time] the date and time when the referenced resource was last modified.
10
+ # @!attribute [rw] length
11
+ # @return [Integer] the content length of the referenced resource.
12
+ # @!attribute [rw] mime_type
13
+ # @return [MIME::Type] the media type of the referenced resource.
14
+ # @!attribute [rw] encoding
15
+ # @return [String] the content encoding (if any) applied to the data in the
16
+ # referenced resource (e.g. for compression)
17
+ # @!attribute [rw] hashes
18
+ # @return [Hash<String, String>] fixity information for the referenced
19
+ # resource, as a map from hash algorithm tokens (e.g. +md5+, +sha-256+)
20
+ # to hex-encoded digest values.
21
+ # @!attribute [rw] path
22
+ # @return [String] for +ResourceDumpManifests+ and +ChangeDumpManifests+,
23
+ # the path to the referenced resource within the dump ZIP file.
24
+ class Descriptor
25
+ include ::XML::Mapping
26
+
27
+ # ------------------------------------------------------------
28
+ # Attributes
29
+
30
+ time_node :modified_time, '@modified', default_value: nil
31
+ numeric_node :length, '@length', default_value: nil
32
+ mime_type_node :mime_type, '@type', default_value: nil
33
+ text_node :encoding, '@encoding', default_value: nil
34
+ hash_codes_node :hashes, '@hash', default_value: nil
35
+ text_node :path, '@path', default_value: nil
36
+
37
+ # ------------------------------------------------------------
38
+ # Initializer
39
+
40
+ # Creates a new +Descriptor+ instance with the specified fields.
41
+ #
42
+ # @param modified_time [Time] The date and time when the referenced resource was last modified.
43
+ # @param length [Integer] The content length of the referenced resource.
44
+ # @param mime_type [MIME::Type] The media type of the referenced resource.
45
+ # @param encoding [String] Any content encoding (if any) applied to the data in the
46
+ # referenced resource (e.g. for compression)
47
+ # @param hashes [Hash<String, String>] Fixity information for the referenced
48
+ # resource, as a map from hash algorithm tokens (e.g. +md5+, +sha-256+)
49
+ # to hex-encoded digest values.
50
+ # @param path [String] For +ResourceDumpManifests+ and +ChangeDumpManifests+,
51
+ # the path to the referenced resource within the dump ZIP file.
52
+ def initialize( # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
53
+ modified_time: nil,
54
+ length: nil,
55
+ mime_type: nil,
56
+ encoding: nil,
57
+ hashes: nil,
58
+ path: nil
59
+ )
60
+ self.modified_time = modified_time
61
+ self.length = length
62
+ self.mime_type = mime_type
63
+ self.encoding = encoding
64
+ self.hashes = hashes
65
+ self.path = path
66
+ end
67
+
68
+ # ------------------------------------------------------------
69
+ # Custom setters
70
+
71
+ def modified_time=(value)
72
+ @modified_time = time_or_nil(value)
73
+ end
74
+
75
+ def length=(value)
76
+ @length = natural_number_or_nil(value)
77
+ end
78
+
79
+ def mime_type=(value)
80
+ @mime_type = mime_type_or_nil(value)
81
+ end
82
+
83
+ def hashes=(value)
84
+ @hashes = Descriptor.hash_of_hashcodes(value)
85
+ end
86
+
87
+ # ------------------------------------------------------------
88
+ # Public methods
89
+
90
+ # Gets the hash value for the specified algorithm.
91
+ #
92
+ # @param algorithm [String] The token (e.g. +md5+, +sha-256+) for the hash algorithm.
93
+ # @return [String] The hex-encoded digest value.
94
+ def hash(algorithm)
95
+ hashes[algorithm]
96
+ end
97
+
98
+ # ------------------------------------------------------------
99
+ # Private methods
100
+
101
+ private
102
+
103
+ # ------------------------------
104
+ # Parameter validators
105
+
106
+ def time_or_nil(time)
107
+ fail ArgumentError, "time #{time} is not a Time" if time && !time.is_a?(Time)
108
+ time
109
+ end
110
+
111
+ def natural_number_or_nil(value)
112
+ fail ArgumentError, "value #{value} must be a non-negative integer" if value && (!value.is_a?(Integer) || value < 0)
113
+ value
114
+ end
115
+
116
+ def mime_type_or_nil(mime_type)
117
+ return nil unless mime_type
118
+ return mime_type if mime_type.is_a?(MIME::Type)
119
+
120
+ mt = MIME::Types[mime_type].first
121
+ return mt if mt
122
+
123
+ MIME::Type.new(mime_type)
124
+ end
125
+
126
+ # ------------------------------
127
+ # Conversions
128
+
129
+ def self.hash_of_hashcodes(hashes)
130
+ return {} unless hashes
131
+ return hashes if hashes.is_a?(Hash)
132
+ hashes.split(/[[:space:]]+/).map { |hash| hash.split(':') }.to_h
133
+ end
134
+ end
135
+ end
@@ -0,0 +1,32 @@
1
+ require 'xml/mapping'
2
+ require_relative '../resource'
3
+
4
+ module Resync
5
+ module SitemapIndex
6
+ include ::XML::Mapping
7
+
8
+ def self.included(base)
9
+ base.extend(ClassMethods)
10
+
11
+ base.use_mapping :sitemapindex
12
+ base.root_element_name 'sitemapindex'
13
+ base.array_node :resources, 'sitemap', class: Resource, default_value: [], sub_mapping: :_default
14
+ end
15
+
16
+ # Ensures that an index is always written as a +<sitemapindex>+.
17
+ # Overrides +::XML::Mapping.save_to_xml+.
18
+ def save_to_xml(options = { mapping: :_default })
19
+ options = options.merge(mapping: :sitemapindex)
20
+ super(options)
21
+ end
22
+
23
+ # Ensures that an index is always read as a +<sitemapindex>+.
24
+ # Overrides +::XML::Mapping::ClassMethods.load_from_xml+.
25
+ module ClassMethods
26
+ def load_from_xml(xml, options = { mapping: :_default })
27
+ options = options.merge(mapping: :sitemapindex)
28
+ super(xml, options)
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,60 @@
1
+ require_relative 'base_resource_list'
2
+
3
+ module Resync
4
+ # An extension to +BaseResourceList+ for resource lists that
5
+ # should be sorted by modification time.
6
+ class SortedResourceList < BaseResourceList
7
+
8
+ # ------------------------------------------------------------
9
+ # Custom setters
10
+
11
+ # Sets the +resources+ list, sorting the resources by modification
12
+ # time. (+nil+ is treated as an empty list.) Resources without
13
+ # modification times will be sorted to the end.
14
+ def resources=(value)
15
+ @resources = sorted(value)
16
+ @resources_by_uri = by_uri(@resources)
17
+ end
18
+
19
+ # ------------------------------------------------------------
20
+ # Custom accessors
21
+
22
+ attr_reader :resources_by_uri
23
+
24
+ def latest_for(uri:)
25
+ uri = XML.to_uri(uri)
26
+ @resources_by_uri[uri].last
27
+ end
28
+
29
+ def all_uris
30
+ @resources_by_uri.keys
31
+ end
32
+
33
+ # ------------------------------------------------------------
34
+ # Private methods
35
+
36
+ private
37
+
38
+ # ------------------------------
39
+ # Conversions
40
+
41
+ def sorted(value)
42
+ return [] unless value
43
+ value.sort do |left, right|
44
+ if left.modified_time && right.modified_time
45
+ left.modified_time <=> right.modified_time
46
+ else
47
+ right.modified_time ? 1 : -1
48
+ end
49
+ end
50
+ end
51
+
52
+ def by_uri(resources)
53
+ by_uri = {}
54
+ resources.each do |r|
55
+ (by_uri[r.uri] ||= []) << r
56
+ end
57
+ by_uri
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,14 @@
1
+ require_relative 'shared/base_resource_list'
2
+
3
+ module Resync
4
+ # A change list. See section 8,
5
+ # "{http://www.openarchives.org/rs/1.0/resourcesync#SourceDesc Describing the Source}",
6
+ # in the ResourceSync specification.
7
+ class SourceDescription < BaseResourceList
8
+ include ::XML::Mapping
9
+
10
+ # The capability provided by this type.
11
+ CAPABILITY = 'description'
12
+
13
+ end
14
+ end