resync 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +42 -0
- data/.rubocop.yml +23 -0
- data/.ruby-version +1 -0
- data/.travis.yml +2 -0
- data/Gemfile +3 -0
- data/LICENSE.md +22 -0
- data/README.md +92 -0
- data/Rakefile +56 -0
- data/example.rb +100 -0
- data/lib/resync/capability_list.rb +85 -0
- data/lib/resync/change_dump.rb +15 -0
- data/lib/resync/change_dump_manifest.rb +15 -0
- data/lib/resync/change_list.rb +15 -0
- data/lib/resync/change_list_index.rb +26 -0
- data/lib/resync/link.rb +87 -0
- data/lib/resync/metadata.rb +112 -0
- data/lib/resync/resource.rb +72 -0
- data/lib/resync/resource_dump.rb +15 -0
- data/lib/resync/resource_dump_manifest.rb +15 -0
- data/lib/resync/resource_list.rb +15 -0
- data/lib/resync/resource_list_index.rb +15 -0
- data/lib/resync/shared/augmented.rb +76 -0
- data/lib/resync/shared/base_resource_list.rb +117 -0
- data/lib/resync/shared/descriptor.rb +135 -0
- data/lib/resync/shared/sitemap_index.rb +32 -0
- data/lib/resync/shared/sorted_resource_list.rb +60 -0
- data/lib/resync/source_description.rb +14 -0
- data/lib/resync/types/change.rb +14 -0
- data/lib/resync/types/change_frequency.rb +18 -0
- data/lib/resync/types.rb +6 -0
- data/lib/resync/version.rb +4 -0
- data/lib/resync/xml.rb +216 -0
- data/lib/resync/xml_parser.rb +65 -0
- data/lib/resync.rb +4 -0
- data/resync.gemspec +36 -0
- data/spec/acceptance/xml_parser_spec.rb +1049 -0
- data/spec/data/examples/README.md +1 -0
- data/spec/data/examples/example-1.xml +12 -0
- data/spec/data/examples/example-12.xml +25 -0
- data/spec/data/examples/example-13.xml +25 -0
- data/spec/data/examples/example-14.xml +23 -0
- data/spec/data/examples/example-15.xml +21 -0
- data/spec/data/examples/example-16.xml +24 -0
- data/spec/data/examples/example-17.xml +39 -0
- data/spec/data/examples/example-18.xml +25 -0
- data/spec/data/examples/example-19.xml +28 -0
- data/spec/data/examples/example-2.xml +18 -0
- data/spec/data/examples/example-20.xml +22 -0
- data/spec/data/examples/example-21.xml +31 -0
- data/spec/data/examples/example-22.xml +41 -0
- data/spec/data/examples/example-23.xml +41 -0
- data/spec/data/examples/example-24.xml +28 -0
- data/spec/data/examples/example-25.xml +21 -0
- data/spec/data/examples/example-26.xml +18 -0
- data/spec/data/examples/example-27.xml +36 -0
- data/spec/data/examples/example-28.xml +34 -0
- data/spec/data/examples/example-29.xml +27 -0
- data/spec/data/examples/example-3.xml +17 -0
- data/spec/data/examples/example-30.xml +18 -0
- data/spec/data/examples/example-31.xml +16 -0
- data/spec/data/examples/example-32.xml +22 -0
- data/spec/data/examples/example-33.xml +22 -0
- data/spec/data/examples/example-4.xml +10 -0
- data/spec/data/examples/example-5.xml +18 -0
- data/spec/data/examples/example-6.xml +21 -0
- data/spec/data/examples/example-7.xml +13 -0
- data/spec/data/examples/example-8.xml +12 -0
- data/spec/data/resourcesync.xsd +148 -0
- data/spec/data/siteindex.xsd +75 -0
- data/spec/data/sitemap.xsd +116 -0
- data/spec/rspec_custom_matchers.rb +89 -0
- data/spec/spec_helper.rb +31 -0
- data/spec/todo.rb +11 -0
- data/spec/unit/resync/capability_list_spec.rb +138 -0
- data/spec/unit/resync/change_dump_manifest_spec.rb +75 -0
- data/spec/unit/resync/change_dump_spec.rb +61 -0
- data/spec/unit/resync/change_list_index_spec.rb +49 -0
- data/spec/unit/resync/change_list_spec.rb +75 -0
- data/spec/unit/resync/link_spec.rb +93 -0
- data/spec/unit/resync/metadata_spec.rb +169 -0
- data/spec/unit/resync/resource_dump_manifest_spec.rb +59 -0
- data/spec/unit/resync/resource_dump_spec.rb +62 -0
- data/spec/unit/resync/resource_list_index_spec.rb +53 -0
- data/spec/unit/resync/resource_list_spec.rb +60 -0
- data/spec/unit/resync/resource_spec.rb +176 -0
- data/spec/unit/resync/shared/augmented_examples.rb +58 -0
- data/spec/unit/resync/shared/base_resource_list_examples.rb +103 -0
- data/spec/unit/resync/shared/descriptor_examples.rb +122 -0
- data/spec/unit/resync/shared/descriptor_spec.rb +33 -0
- data/spec/unit/resync/shared/sorted_list_examples.rb +134 -0
- data/spec/unit/resync/shared/uri_field_examples.rb +36 -0
- data/spec/unit/resync/source_description_spec.rb +55 -0
- data/spec/unit/resync/xml/timenode_spec.rb +48 -0
- data/spec/unit/resync/xml/xml_spec.rb +40 -0
- data/spec/unit/resync/xml_parser_spec.rb +82 -0
- metadata +340 -0
@@ -0,0 +1,112 @@
|
|
1
|
+
require_relative 'shared/descriptor'
|
2
|
+
require_relative 'xml'
|
3
|
+
|
4
|
+
module Resync
|
5
|
+
|
6
|
+
# Metadata about a resource or ResourceSync document. See section 7,
|
7
|
+
# {http://www.openarchives.org/rs/1.0/resourcesync#DocumentFormats Sitemap Document Formats},
|
8
|
+
# in the ResourceSync specification.
|
9
|
+
#
|
10
|
+
# @!attribute [rw] at_time
|
11
|
+
# @return [Time] the datetime at which assembling a resource list
|
12
|
+
# began (including resource list indices, resource dumps, etc.)
|
13
|
+
# @!attribute [rw] from_time
|
14
|
+
# @return [Time] the beginning of the time range represented by
|
15
|
+
# a change list (including change list indices, change dumps, etc.)
|
16
|
+
# @!attribute [rw] until_time
|
17
|
+
# @return [Time] the end of the time range represented by
|
18
|
+
# a change list (including change list indices, change dumps, etc.)
|
19
|
+
# @!attribute [rw] completed_time
|
20
|
+
# @return the datetime at which assembling a resource list
|
21
|
+
# ended (including resource list indices, resource dumps, etc.)
|
22
|
+
# @!attribute [rw] change
|
23
|
+
# @return [Change] the type of change to a resource reported in
|
24
|
+
# a change list (including change list indices, change dumps, etc.)
|
25
|
+
# @!attribute [rw] capability
|
26
|
+
# @return [String] identifies the type of a ResourceSync document.
|
27
|
+
class Metadata < Descriptor
|
28
|
+
include ::XML::Mapping
|
29
|
+
|
30
|
+
# ------------------------------------------------------------
|
31
|
+
# Attributes
|
32
|
+
|
33
|
+
root_element_name 'md'
|
34
|
+
|
35
|
+
time_node :at_time, '@at', default_value: nil
|
36
|
+
time_node :from_time, '@from', default_value: nil
|
37
|
+
time_node :until_time, '@until', default_value: nil
|
38
|
+
time_node :completed_time, '@completed', default_value: nil
|
39
|
+
change_node :change, '@change', default_value: nil
|
40
|
+
text_node :capability, '@capability', default_value: nil
|
41
|
+
|
42
|
+
# ------------------------------------------------------------
|
43
|
+
# Initializer
|
44
|
+
|
45
|
+
# @param at_time [Time] the datetime at which assembling a resource list
|
46
|
+
# began (including resource list indices, resource dumps, etc.)
|
47
|
+
# @param from_time [Time] the beginning of the time range represented by
|
48
|
+
# a change list (including change list indices, change dumps, etc.)
|
49
|
+
# @param until_time [Time] the end of the time range represented by
|
50
|
+
# a change list (including change list indices, change dumps, etc.)
|
51
|
+
# @param completed_time the datetime at which assembling a resource list
|
52
|
+
# ended (including resource list indices, resource dumps, etc.)
|
53
|
+
# @param modified_time [Time] The date and time when the referenced resource was last modified.
|
54
|
+
# @param length [Integer] The content length of the referenced resource.
|
55
|
+
# @param mime_type [MIME::Type] The media type of the referenced resource.
|
56
|
+
# @param encoding [String] Any content encoding (if any) applied to the data in the
|
57
|
+
# referenced resource (e.g. for compression)
|
58
|
+
# @param hashes [Hash<String, String>] Fixity information for the referenced
|
59
|
+
# resource, as a map from hash algorithm tokens (e.g. +md5+, +sha-256+)
|
60
|
+
# to hex-encoded digest values.
|
61
|
+
# @param change [Change] the type of change to a resource reported in
|
62
|
+
# a change list (including change list indices, change dumps, etc.)
|
63
|
+
# @param capability [String] identifies the type of a ResourceSync document.
|
64
|
+
# @param path [String] For +ResourceDumpManifests+ and +ChangeDumpManifests+,
|
65
|
+
# the path to the referenced resource within the dump ZIP file.
|
66
|
+
def initialize( # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
|
67
|
+
at_time: nil,
|
68
|
+
from_time: nil,
|
69
|
+
until_time: nil,
|
70
|
+
completed_time: nil,
|
71
|
+
modified_time: nil,
|
72
|
+
|
73
|
+
length: nil,
|
74
|
+
mime_type: nil,
|
75
|
+
encoding: nil,
|
76
|
+
hashes: {},
|
77
|
+
|
78
|
+
change: nil,
|
79
|
+
capability: nil,
|
80
|
+
path: nil
|
81
|
+
)
|
82
|
+
super(modified_time: modified_time, length: length, mime_type: mime_type, encoding: encoding, hashes: hashes, path: path)
|
83
|
+
|
84
|
+
self.at_time = at_time
|
85
|
+
self.from_time = from_time
|
86
|
+
self.until_time = until_time
|
87
|
+
self.completed_time = completed_time
|
88
|
+
|
89
|
+
self.change = change
|
90
|
+
self.capability = capability
|
91
|
+
end
|
92
|
+
|
93
|
+
# ------------------------------------------------------------
|
94
|
+
# Custom setters
|
95
|
+
|
96
|
+
def at_time=(value)
|
97
|
+
@at_time = time_or_nil(value)
|
98
|
+
end
|
99
|
+
|
100
|
+
def from_time=(value)
|
101
|
+
@from_time = time_or_nil(value)
|
102
|
+
end
|
103
|
+
|
104
|
+
def until_time=(value)
|
105
|
+
@until_time = time_or_nil(value)
|
106
|
+
end
|
107
|
+
|
108
|
+
def completed_time=(value)
|
109
|
+
@completed_time = time_or_nil(value)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require_relative 'shared/augmented'
|
2
|
+
require_relative 'xml'
|
3
|
+
require_relative 'metadata'
|
4
|
+
|
5
|
+
module Resync
|
6
|
+
# A resource (i.e., +<url>+ or +<sitemap>+). See section 7,
|
7
|
+
# {http://www.openarchives.org/rs/1.0/resourcesync#DocumentFormats Sitemap Document Formats},
|
8
|
+
# in the ResourceSync specification.
|
9
|
+
class Resource < Augmented
|
10
|
+
include ::XML::Mapping
|
11
|
+
|
12
|
+
# ------------------------------------------------------------
|
13
|
+
# Attributes
|
14
|
+
|
15
|
+
root_element_name 'url'
|
16
|
+
|
17
|
+
uri_node :uri, 'loc', default_value: nil
|
18
|
+
time_node :modified_time, 'lastmod', default_value: nil
|
19
|
+
changefreq_node :changefreq, 'changefreq', default_value: nil
|
20
|
+
numeric_node :priority, 'priority', default_value: nil
|
21
|
+
|
22
|
+
# ------------------------------------------------------------
|
23
|
+
# Initializer
|
24
|
+
|
25
|
+
# @param modified_time [Time] The date and time when the referenced resource was last modified.
|
26
|
+
# @param changefreq [ChangeFrequency] how frequently the referenced resource is likely to change.
|
27
|
+
# @param priority [Number] the priority of this resource relative to other resources from the
|
28
|
+
# same provider. Allows robots to decide which resources to crawl or harvest.
|
29
|
+
# Values should be in the range 0-1.0 (inclusive), where 0 is the lowest priority
|
30
|
+
# and 1.0 is the highest.
|
31
|
+
# @param links [Array<Link>] related links (i.e. +<rs:ln>+).
|
32
|
+
# @param metadata [Metadata] metadata about this resource.
|
33
|
+
def initialize( # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
|
34
|
+
uri:,
|
35
|
+
modified_time: nil,
|
36
|
+
changefreq: nil,
|
37
|
+
priority: nil,
|
38
|
+
links: [],
|
39
|
+
metadata: nil
|
40
|
+
)
|
41
|
+
super(links: links)
|
42
|
+
self.uri = uri
|
43
|
+
self.modified_time = modified_time
|
44
|
+
self.changefreq = changefreq
|
45
|
+
self.priority = priority
|
46
|
+
self.metadata = metadata
|
47
|
+
end
|
48
|
+
|
49
|
+
# ------------------------------------------------------------
|
50
|
+
# Custom setters
|
51
|
+
|
52
|
+
def uri=(value)
|
53
|
+
@uri = XML.to_uri(value)
|
54
|
+
end
|
55
|
+
|
56
|
+
# ------------------------------------------------------------
|
57
|
+
# Public methods
|
58
|
+
|
59
|
+
def capability
|
60
|
+
metadata ? metadata.capability : nil
|
61
|
+
end
|
62
|
+
|
63
|
+
# ------------------------------------------------------------
|
64
|
+
# Overrides
|
65
|
+
|
66
|
+
# ResourceSync schema requires '##other' elements to appear last
|
67
|
+
def self.all_xml_mapping_nodes(options = { mapping: nil, create: true })
|
68
|
+
xml_mapping_nodes(options) + superclass.all_xml_mapping_nodes(options)
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require_relative 'shared/base_resource_list'
|
2
|
+
require_relative 'xml'
|
3
|
+
|
4
|
+
module Resync
|
5
|
+
# A resource dump. See section 11.1,
|
6
|
+
# "{http://www.openarchives.org/rs/1.0/resourcesync#ResourceDump Resource Dump}",
|
7
|
+
# in the ResourceSync specification.
|
8
|
+
class ResourceDump < BaseResourceList
|
9
|
+
include ::XML::Mapping
|
10
|
+
|
11
|
+
# The capability provided by this type.
|
12
|
+
CAPABILITY = 'resourcedump'
|
13
|
+
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require_relative 'shared/base_resource_list'
|
2
|
+
require_relative 'xml'
|
3
|
+
|
4
|
+
module Resync
|
5
|
+
# A resource dump. See section 11.2,
|
6
|
+
# "{http://www.openarchives.org/rs/1.0/resourcesync#ResourceDumpManifest Resource Dump Manifest}",
|
7
|
+
# in the ResourceSync specification.
|
8
|
+
class ResourceDumpManifest < BaseResourceList
|
9
|
+
include ::XML::Mapping
|
10
|
+
|
11
|
+
# The capability provided by this type.
|
12
|
+
CAPABILITY = 'resourcedump-manifest'
|
13
|
+
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require_relative 'shared/base_resource_list'
|
2
|
+
require_relative 'xml'
|
3
|
+
|
4
|
+
module Resync
|
5
|
+
# A resource list. See section 10.1,
|
6
|
+
# "{http://www.openarchives.org/rs/1.0/resourcesync#ResourceList Resource List}",
|
7
|
+
# in the ResourceSync specification.
|
8
|
+
class ResourceList < BaseResourceList
|
9
|
+
include ::XML::Mapping
|
10
|
+
|
11
|
+
# The capability provided by this type.
|
12
|
+
CAPABILITY = 'resourcelist'
|
13
|
+
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require_relative 'shared/base_resource_list'
|
2
|
+
require_relative 'shared/sitemap_index'
|
3
|
+
|
4
|
+
module Resync
|
5
|
+
# A resource list index. See section 10.2,
|
6
|
+
# "{http://www.openarchives.org/rs/1.0/resourcesync#ResourceListIndex Resource List Index}",
|
7
|
+
# in the ResourceSync specification.
|
8
|
+
class ResourceListIndex < BaseResourceList
|
9
|
+
include ::XML::Mapping
|
10
|
+
include SitemapIndex
|
11
|
+
|
12
|
+
# The capability provided by this type.
|
13
|
+
CAPABILITY = 'resourcelist'
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require_relative '../link'
|
2
|
+
require_relative '../metadata'
|
3
|
+
|
4
|
+
module Resync
|
5
|
+
|
6
|
+
# Base class for elements augmented with ResourceSync-specific child elements.
|
7
|
+
#
|
8
|
+
# @!attribute [rw] links
|
9
|
+
# @return [Array<Link>] related links.
|
10
|
+
# @!attribute [rw] metadata
|
11
|
+
# @return [Metadata] metadata about this object.
|
12
|
+
class Augmented
|
13
|
+
include ::XML::Mapping
|
14
|
+
|
15
|
+
# ------------------------------------------------------------
|
16
|
+
# Class methods
|
17
|
+
|
18
|
+
# ResourceSync-specific tags needing to be prefixed with +rs+ on output
|
19
|
+
RS_TAGS = Set['ln', 'md']
|
20
|
+
private_constant :RS_TAGS
|
21
|
+
|
22
|
+
# Adds the +rs+ namespace prefix
|
23
|
+
def self.prefix_rewriter(obj, xml, default_obj_to_xml)
|
24
|
+
default_obj_to_xml.call(obj, xml)
|
25
|
+
xml.each_element do |e|
|
26
|
+
e.name = "rs:#{e.name}" if RS_TAGS.include?(e.name)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
private_class_method :prefix_rewriter
|
30
|
+
|
31
|
+
# ------------------------------------------------------------
|
32
|
+
# Attributes
|
33
|
+
|
34
|
+
array_node :links, 'ln', class: Link, default_value: [], writer: method(:prefix_rewriter)
|
35
|
+
object_node :metadata, 'md', class: Metadata, default_value: nil, writer: method(:prefix_rewriter)
|
36
|
+
|
37
|
+
use_mapping :sitemapindex
|
38
|
+
array_node :links, 'ln', class: Link, default_value: [], writer: method(:prefix_rewriter), sub_mapping: :_default
|
39
|
+
object_node :metadata, 'md', class: Metadata, default_value: nil, writer: method(:prefix_rewriter), sub_mapping: :_default
|
40
|
+
|
41
|
+
# ------------------------------------------------------------
|
42
|
+
# Initializer
|
43
|
+
|
44
|
+
# Creates a new +Augmented+ instance with the specified links and metadata.
|
45
|
+
#
|
46
|
+
# @param links [Array<Link>] related links (i.e. +<rs:ln>+).
|
47
|
+
# @param metadata [Metadata] metadata about this resource.
|
48
|
+
def initialize(links: [], metadata: nil)
|
49
|
+
self.links = links
|
50
|
+
self.metadata = metadata
|
51
|
+
end
|
52
|
+
|
53
|
+
# ------------------------------------------------------------
|
54
|
+
# Custom accessors
|
55
|
+
|
56
|
+
# Sets the +links+ list. +nil+ is treated as an empty list.
|
57
|
+
def links=(value)
|
58
|
+
@links = value || []
|
59
|
+
end
|
60
|
+
|
61
|
+
# Finds links with the specified relation.
|
62
|
+
# @param rel [String] the relation.
|
63
|
+
# @return [Array<Link>] those links having that relation, or an empty array if none exist.
|
64
|
+
def links_for(rel:)
|
65
|
+
links.select { |l| l.rel == rel }
|
66
|
+
end
|
67
|
+
|
68
|
+
# Shortcut to find the first link with the specified relation (in ResourceSync there often
|
69
|
+
# should be only one link with a particular relation)
|
70
|
+
# @param rel [String] the relation.
|
71
|
+
# @return [Link] the first link having that relation, or nil if none exists.
|
72
|
+
def link_for(rel:)
|
73
|
+
links.find { |l| l.rel == rel }
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,117 @@
|
|
1
|
+
require_relative 'augmented'
|
2
|
+
require_relative '../resource'
|
3
|
+
require_relative '../metadata'
|
4
|
+
|
5
|
+
module Resync
|
6
|
+
# Base class for root elements containing a list of resources
|
7
|
+
# (i.e., +<urlset>+ and +<sitemapindex>+ elements). Subclasses
|
8
|
+
# must define a +CAPABILITY+ constant identifying the capability
|
9
|
+
# they represent (e.g. +resourcelist+, +changelist+).
|
10
|
+
#
|
11
|
+
# @!attribute [rw] resources
|
12
|
+
# @return [Array<Resource>] the +<url>+ or +<sitemap>+ elements contained in this list.
|
13
|
+
class BaseResourceList < Augmented
|
14
|
+
include ::XML::Mapping
|
15
|
+
|
16
|
+
# ------------------------------------------------------------
|
17
|
+
# Attributes
|
18
|
+
|
19
|
+
root_element_name 'urlset'
|
20
|
+
array_node :resources, 'url', class: Resource, default_value: []
|
21
|
+
|
22
|
+
# ------------------------------------------------------------
|
23
|
+
# Initializer
|
24
|
+
|
25
|
+
# Creates a new +BaseResourceList+.
|
26
|
+
#
|
27
|
+
# @param resources [Array<Resource>] The +<url>+ or +<sitemap>+ elements contained in this list.
|
28
|
+
# @param links [Array<Link>] Related links (+<rs:ln>+).
|
29
|
+
# @param metadata [Metadata] Metadata about this list. The +capability+ of the metadata must match this
|
30
|
+
# implementation class' +CAPABILITY+ constant.
|
31
|
+
# @raise [ArgumentError] if the specified metadata does not have the correct +capability+ attribute for this list type.
|
32
|
+
def initialize(resources: [], links: [], metadata: nil)
|
33
|
+
super(links: links)
|
34
|
+
self.resources = resources
|
35
|
+
self.metadata = metadata
|
36
|
+
end
|
37
|
+
|
38
|
+
# ------------------------------------------------------------
|
39
|
+
# Custom setters
|
40
|
+
|
41
|
+
# Sets the +resources+ list. +nil+ is treated as an empty list.
|
42
|
+
def resources=(value)
|
43
|
+
@resources = value || []
|
44
|
+
end
|
45
|
+
|
46
|
+
# Sets the metadata.
|
47
|
+
#
|
48
|
+
# @raise [ArgumentError] if the specified metadata does not have the correct +capability+ attribute for
|
49
|
+
# this list type.
|
50
|
+
def metadata=(value)
|
51
|
+
@metadata = metadata_with_correct_capability(value)
|
52
|
+
end
|
53
|
+
|
54
|
+
# ------------------------------------------------------------
|
55
|
+
# Custom getters
|
56
|
+
|
57
|
+
def capability
|
58
|
+
@metadata.capability
|
59
|
+
end
|
60
|
+
|
61
|
+
# Finds resources with the specified capability.
|
62
|
+
# @param capability [String] the capability.
|
63
|
+
# @return [Array<Resource>] those resources having that capability, or an empty array if none exist.
|
64
|
+
def resources_for(capability:)
|
65
|
+
resources.select { |l| l.capability == capability }
|
66
|
+
end
|
67
|
+
|
68
|
+
# Shortcut to find the first resource with the specified capability (in ResourceSync there often
|
69
|
+
# should be only one resource with a particular capability)
|
70
|
+
# @param capability [String] the capability.
|
71
|
+
# @return [Resource] the first resource having that capability, or nil if none exists.
|
72
|
+
def resource_for(capability:)
|
73
|
+
resources.find { |l| l.capability == capability }
|
74
|
+
end
|
75
|
+
|
76
|
+
# ------------------------------------------------------------
|
77
|
+
# Overrides
|
78
|
+
|
79
|
+
# Overrides +::XML::Mapping.pre_save+ to declare the Sitemap and ResourceSync namespaces.
|
80
|
+
# Used for writing.
|
81
|
+
def pre_save(options = { mapping: :_default })
|
82
|
+
xml = super(options)
|
83
|
+
xml.add_namespace('http://www.sitemaps.org/schemas/sitemap/0.9')
|
84
|
+
xml.add_namespace('rs', 'http://www.openarchives.org/rs/terms/')
|
85
|
+
xml
|
86
|
+
end
|
87
|
+
|
88
|
+
# Initializes the +:_default+ and +:sitemapindex+ mappings on all subclasses, and sets the corresponding
|
89
|
+
# root element names (+<urlset>+ and +<sitemapindex>+)
|
90
|
+
def self.inherited(base)
|
91
|
+
base.use_mapping :_default
|
92
|
+
base.root_element_name 'urlset'
|
93
|
+
base.use_mapping :sitemapindex
|
94
|
+
base.root_element_name 'sitemapindex'
|
95
|
+
end
|
96
|
+
|
97
|
+
# ------------------------------------------------------------
|
98
|
+
# Private methods
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
# ------------------------------
|
103
|
+
# Parameter validators
|
104
|
+
|
105
|
+
# Validates the +capability+ attribute in the specified metadata.
|
106
|
+
# @raise [ArgumentError] if the specified metadata does not have the correct +capability+ attribute for this list type.
|
107
|
+
def metadata_with_correct_capability(metadata)
|
108
|
+
capability = self.class::CAPABILITY
|
109
|
+
fail ArgumentError, "Missing constant #{self.class}::CAPABILITY" unless capability
|
110
|
+
return Metadata.new(capability: capability) unless metadata
|
111
|
+
fail ArgumentError, "#{metadata} does not appear to be metadata" unless metadata.respond_to?('capability')
|
112
|
+
fail ArgumentError, "Wrong capability for #{self.class.name} metadata; expected '#{capability}', was '#{metadata.capability}'" unless metadata.capability == capability
|
113
|
+
metadata
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
117
|
+
end
|
@@ -0,0 +1,135 @@
|
|
1
|
+
require 'mime/types'
|
2
|
+
require_relative '../xml'
|
3
|
+
|
4
|
+
module Resync
|
5
|
+
# Base class for ResourceSync-specific elements describing a
|
6
|
+
# resource or link.
|
7
|
+
#
|
8
|
+
# @!attribute [rw] modified_time
|
9
|
+
# @return [Time] the date and time when the referenced resource was last modified.
|
10
|
+
# @!attribute [rw] length
|
11
|
+
# @return [Integer] the content length of the referenced resource.
|
12
|
+
# @!attribute [rw] mime_type
|
13
|
+
# @return [MIME::Type] the media type of the referenced resource.
|
14
|
+
# @!attribute [rw] encoding
|
15
|
+
# @return [String] the content encoding (if any) applied to the data in the
|
16
|
+
# referenced resource (e.g. for compression)
|
17
|
+
# @!attribute [rw] hashes
|
18
|
+
# @return [Hash<String, String>] fixity information for the referenced
|
19
|
+
# resource, as a map from hash algorithm tokens (e.g. +md5+, +sha-256+)
|
20
|
+
# to hex-encoded digest values.
|
21
|
+
# @!attribute [rw] path
|
22
|
+
# @return [String] for +ResourceDumpManifests+ and +ChangeDumpManifests+,
|
23
|
+
# the path to the referenced resource within the dump ZIP file.
|
24
|
+
class Descriptor
|
25
|
+
include ::XML::Mapping
|
26
|
+
|
27
|
+
# ------------------------------------------------------------
|
28
|
+
# Attributes
|
29
|
+
|
30
|
+
time_node :modified_time, '@modified', default_value: nil
|
31
|
+
numeric_node :length, '@length', default_value: nil
|
32
|
+
mime_type_node :mime_type, '@type', default_value: nil
|
33
|
+
text_node :encoding, '@encoding', default_value: nil
|
34
|
+
hash_codes_node :hashes, '@hash', default_value: nil
|
35
|
+
text_node :path, '@path', default_value: nil
|
36
|
+
|
37
|
+
# ------------------------------------------------------------
|
38
|
+
# Initializer
|
39
|
+
|
40
|
+
# Creates a new +Descriptor+ instance with the specified fields.
|
41
|
+
#
|
42
|
+
# @param modified_time [Time] The date and time when the referenced resource was last modified.
|
43
|
+
# @param length [Integer] The content length of the referenced resource.
|
44
|
+
# @param mime_type [MIME::Type] The media type of the referenced resource.
|
45
|
+
# @param encoding [String] Any content encoding (if any) applied to the data in the
|
46
|
+
# referenced resource (e.g. for compression)
|
47
|
+
# @param hashes [Hash<String, String>] Fixity information for the referenced
|
48
|
+
# resource, as a map from hash algorithm tokens (e.g. +md5+, +sha-256+)
|
49
|
+
# to hex-encoded digest values.
|
50
|
+
# @param path [String] For +ResourceDumpManifests+ and +ChangeDumpManifests+,
|
51
|
+
# the path to the referenced resource within the dump ZIP file.
|
52
|
+
def initialize( # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
|
53
|
+
modified_time: nil,
|
54
|
+
length: nil,
|
55
|
+
mime_type: nil,
|
56
|
+
encoding: nil,
|
57
|
+
hashes: nil,
|
58
|
+
path: nil
|
59
|
+
)
|
60
|
+
self.modified_time = modified_time
|
61
|
+
self.length = length
|
62
|
+
self.mime_type = mime_type
|
63
|
+
self.encoding = encoding
|
64
|
+
self.hashes = hashes
|
65
|
+
self.path = path
|
66
|
+
end
|
67
|
+
|
68
|
+
# ------------------------------------------------------------
|
69
|
+
# Custom setters
|
70
|
+
|
71
|
+
def modified_time=(value)
|
72
|
+
@modified_time = time_or_nil(value)
|
73
|
+
end
|
74
|
+
|
75
|
+
def length=(value)
|
76
|
+
@length = natural_number_or_nil(value)
|
77
|
+
end
|
78
|
+
|
79
|
+
def mime_type=(value)
|
80
|
+
@mime_type = mime_type_or_nil(value)
|
81
|
+
end
|
82
|
+
|
83
|
+
def hashes=(value)
|
84
|
+
@hashes = Descriptor.hash_of_hashcodes(value)
|
85
|
+
end
|
86
|
+
|
87
|
+
# ------------------------------------------------------------
|
88
|
+
# Public methods
|
89
|
+
|
90
|
+
# Gets the hash value for the specified algorithm.
|
91
|
+
#
|
92
|
+
# @param algorithm [String] The token (e.g. +md5+, +sha-256+) for the hash algorithm.
|
93
|
+
# @return [String] The hex-encoded digest value.
|
94
|
+
def hash(algorithm)
|
95
|
+
hashes[algorithm]
|
96
|
+
end
|
97
|
+
|
98
|
+
# ------------------------------------------------------------
|
99
|
+
# Private methods
|
100
|
+
|
101
|
+
private
|
102
|
+
|
103
|
+
# ------------------------------
|
104
|
+
# Parameter validators
|
105
|
+
|
106
|
+
def time_or_nil(time)
|
107
|
+
fail ArgumentError, "time #{time} is not a Time" if time && !time.is_a?(Time)
|
108
|
+
time
|
109
|
+
end
|
110
|
+
|
111
|
+
def natural_number_or_nil(value)
|
112
|
+
fail ArgumentError, "value #{value} must be a non-negative integer" if value && (!value.is_a?(Integer) || value < 0)
|
113
|
+
value
|
114
|
+
end
|
115
|
+
|
116
|
+
def mime_type_or_nil(mime_type)
|
117
|
+
return nil unless mime_type
|
118
|
+
return mime_type if mime_type.is_a?(MIME::Type)
|
119
|
+
|
120
|
+
mt = MIME::Types[mime_type].first
|
121
|
+
return mt if mt
|
122
|
+
|
123
|
+
MIME::Type.new(mime_type)
|
124
|
+
end
|
125
|
+
|
126
|
+
# ------------------------------
|
127
|
+
# Conversions
|
128
|
+
|
129
|
+
def self.hash_of_hashcodes(hashes)
|
130
|
+
return {} unless hashes
|
131
|
+
return hashes if hashes.is_a?(Hash)
|
132
|
+
hashes.split(/[[:space:]]+/).map { |hash| hash.split(':') }.to_h
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'xml/mapping'
|
2
|
+
require_relative '../resource'
|
3
|
+
|
4
|
+
module Resync
|
5
|
+
module SitemapIndex
|
6
|
+
include ::XML::Mapping
|
7
|
+
|
8
|
+
def self.included(base)
|
9
|
+
base.extend(ClassMethods)
|
10
|
+
|
11
|
+
base.use_mapping :sitemapindex
|
12
|
+
base.root_element_name 'sitemapindex'
|
13
|
+
base.array_node :resources, 'sitemap', class: Resource, default_value: [], sub_mapping: :_default
|
14
|
+
end
|
15
|
+
|
16
|
+
# Ensures that an index is always written as a +<sitemapindex>+.
|
17
|
+
# Overrides +::XML::Mapping.save_to_xml+.
|
18
|
+
def save_to_xml(options = { mapping: :_default })
|
19
|
+
options = options.merge(mapping: :sitemapindex)
|
20
|
+
super(options)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Ensures that an index is always read as a +<sitemapindex>+.
|
24
|
+
# Overrides +::XML::Mapping::ClassMethods.load_from_xml+.
|
25
|
+
module ClassMethods
|
26
|
+
def load_from_xml(xml, options = { mapping: :_default })
|
27
|
+
options = options.merge(mapping: :sitemapindex)
|
28
|
+
super(xml, options)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require_relative 'base_resource_list'
|
2
|
+
|
3
|
+
module Resync
|
4
|
+
# An extension to +BaseResourceList+ for resource lists that
|
5
|
+
# should be sorted by modification time.
|
6
|
+
class SortedResourceList < BaseResourceList
|
7
|
+
|
8
|
+
# ------------------------------------------------------------
|
9
|
+
# Custom setters
|
10
|
+
|
11
|
+
# Sets the +resources+ list, sorting the resources by modification
|
12
|
+
# time. (+nil+ is treated as an empty list.) Resources without
|
13
|
+
# modification times will be sorted to the end.
|
14
|
+
def resources=(value)
|
15
|
+
@resources = sorted(value)
|
16
|
+
@resources_by_uri = by_uri(@resources)
|
17
|
+
end
|
18
|
+
|
19
|
+
# ------------------------------------------------------------
|
20
|
+
# Custom accessors
|
21
|
+
|
22
|
+
attr_reader :resources_by_uri
|
23
|
+
|
24
|
+
def latest_for(uri:)
|
25
|
+
uri = XML.to_uri(uri)
|
26
|
+
@resources_by_uri[uri].last
|
27
|
+
end
|
28
|
+
|
29
|
+
def all_uris
|
30
|
+
@resources_by_uri.keys
|
31
|
+
end
|
32
|
+
|
33
|
+
# ------------------------------------------------------------
|
34
|
+
# Private methods
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
# ------------------------------
|
39
|
+
# Conversions
|
40
|
+
|
41
|
+
def sorted(value)
|
42
|
+
return [] unless value
|
43
|
+
value.sort do |left, right|
|
44
|
+
if left.modified_time && right.modified_time
|
45
|
+
left.modified_time <=> right.modified_time
|
46
|
+
else
|
47
|
+
right.modified_time ? 1 : -1
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def by_uri(resources)
|
53
|
+
by_uri = {}
|
54
|
+
resources.each do |r|
|
55
|
+
(by_uri[r.uri] ||= []) << r
|
56
|
+
end
|
57
|
+
by_uri
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require_relative 'shared/base_resource_list'
|
2
|
+
|
3
|
+
module Resync
|
4
|
+
# A change list. See section 8,
|
5
|
+
# "{http://www.openarchives.org/rs/1.0/resourcesync#SourceDesc Describing the Source}",
|
6
|
+
# in the ResourceSync specification.
|
7
|
+
class SourceDescription < BaseResourceList
|
8
|
+
include ::XML::Mapping
|
9
|
+
|
10
|
+
# The capability provided by this type.
|
11
|
+
CAPABILITY = 'description'
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|