resync 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +42 -0
- data/.rubocop.yml +23 -0
- data/.ruby-version +1 -0
- data/.travis.yml +2 -0
- data/Gemfile +3 -0
- data/LICENSE.md +22 -0
- data/README.md +92 -0
- data/Rakefile +56 -0
- data/example.rb +100 -0
- data/lib/resync/capability_list.rb +85 -0
- data/lib/resync/change_dump.rb +15 -0
- data/lib/resync/change_dump_manifest.rb +15 -0
- data/lib/resync/change_list.rb +15 -0
- data/lib/resync/change_list_index.rb +26 -0
- data/lib/resync/link.rb +87 -0
- data/lib/resync/metadata.rb +112 -0
- data/lib/resync/resource.rb +72 -0
- data/lib/resync/resource_dump.rb +15 -0
- data/lib/resync/resource_dump_manifest.rb +15 -0
- data/lib/resync/resource_list.rb +15 -0
- data/lib/resync/resource_list_index.rb +15 -0
- data/lib/resync/shared/augmented.rb +76 -0
- data/lib/resync/shared/base_resource_list.rb +117 -0
- data/lib/resync/shared/descriptor.rb +135 -0
- data/lib/resync/shared/sitemap_index.rb +32 -0
- data/lib/resync/shared/sorted_resource_list.rb +60 -0
- data/lib/resync/source_description.rb +14 -0
- data/lib/resync/types/change.rb +14 -0
- data/lib/resync/types/change_frequency.rb +18 -0
- data/lib/resync/types.rb +6 -0
- data/lib/resync/version.rb +4 -0
- data/lib/resync/xml.rb +216 -0
- data/lib/resync/xml_parser.rb +65 -0
- data/lib/resync.rb +4 -0
- data/resync.gemspec +36 -0
- data/spec/acceptance/xml_parser_spec.rb +1049 -0
- data/spec/data/examples/README.md +1 -0
- data/spec/data/examples/example-1.xml +12 -0
- data/spec/data/examples/example-12.xml +25 -0
- data/spec/data/examples/example-13.xml +25 -0
- data/spec/data/examples/example-14.xml +23 -0
- data/spec/data/examples/example-15.xml +21 -0
- data/spec/data/examples/example-16.xml +24 -0
- data/spec/data/examples/example-17.xml +39 -0
- data/spec/data/examples/example-18.xml +25 -0
- data/spec/data/examples/example-19.xml +28 -0
- data/spec/data/examples/example-2.xml +18 -0
- data/spec/data/examples/example-20.xml +22 -0
- data/spec/data/examples/example-21.xml +31 -0
- data/spec/data/examples/example-22.xml +41 -0
- data/spec/data/examples/example-23.xml +41 -0
- data/spec/data/examples/example-24.xml +28 -0
- data/spec/data/examples/example-25.xml +21 -0
- data/spec/data/examples/example-26.xml +18 -0
- data/spec/data/examples/example-27.xml +36 -0
- data/spec/data/examples/example-28.xml +34 -0
- data/spec/data/examples/example-29.xml +27 -0
- data/spec/data/examples/example-3.xml +17 -0
- data/spec/data/examples/example-30.xml +18 -0
- data/spec/data/examples/example-31.xml +16 -0
- data/spec/data/examples/example-32.xml +22 -0
- data/spec/data/examples/example-33.xml +22 -0
- data/spec/data/examples/example-4.xml +10 -0
- data/spec/data/examples/example-5.xml +18 -0
- data/spec/data/examples/example-6.xml +21 -0
- data/spec/data/examples/example-7.xml +13 -0
- data/spec/data/examples/example-8.xml +12 -0
- data/spec/data/resourcesync.xsd +148 -0
- data/spec/data/siteindex.xsd +75 -0
- data/spec/data/sitemap.xsd +116 -0
- data/spec/rspec_custom_matchers.rb +89 -0
- data/spec/spec_helper.rb +31 -0
- data/spec/todo.rb +11 -0
- data/spec/unit/resync/capability_list_spec.rb +138 -0
- data/spec/unit/resync/change_dump_manifest_spec.rb +75 -0
- data/spec/unit/resync/change_dump_spec.rb +61 -0
- data/spec/unit/resync/change_list_index_spec.rb +49 -0
- data/spec/unit/resync/change_list_spec.rb +75 -0
- data/spec/unit/resync/link_spec.rb +93 -0
- data/spec/unit/resync/metadata_spec.rb +169 -0
- data/spec/unit/resync/resource_dump_manifest_spec.rb +59 -0
- data/spec/unit/resync/resource_dump_spec.rb +62 -0
- data/spec/unit/resync/resource_list_index_spec.rb +53 -0
- data/spec/unit/resync/resource_list_spec.rb +60 -0
- data/spec/unit/resync/resource_spec.rb +176 -0
- data/spec/unit/resync/shared/augmented_examples.rb +58 -0
- data/spec/unit/resync/shared/base_resource_list_examples.rb +103 -0
- data/spec/unit/resync/shared/descriptor_examples.rb +122 -0
- data/spec/unit/resync/shared/descriptor_spec.rb +33 -0
- data/spec/unit/resync/shared/sorted_list_examples.rb +134 -0
- data/spec/unit/resync/shared/uri_field_examples.rb +36 -0
- data/spec/unit/resync/source_description_spec.rb +55 -0
- data/spec/unit/resync/xml/timenode_spec.rb +48 -0
- data/spec/unit/resync/xml/xml_spec.rb +40 -0
- data/spec/unit/resync/xml_parser_spec.rb +82 -0
- metadata +340 -0
@@ -0,0 +1,112 @@
|
|
1
|
+
require_relative 'shared/descriptor'
|
2
|
+
require_relative 'xml'
|
3
|
+
|
4
|
+
module Resync
|
5
|
+
|
6
|
+
# Metadata about a resource or ResourceSync document. See section 7,
|
7
|
+
# {http://www.openarchives.org/rs/1.0/resourcesync#DocumentFormats Sitemap Document Formats},
|
8
|
+
# in the ResourceSync specification.
|
9
|
+
#
|
10
|
+
# @!attribute [rw] at_time
|
11
|
+
# @return [Time] the datetime at which assembling a resource list
|
12
|
+
# began (including resource list indices, resource dumps, etc.)
|
13
|
+
# @!attribute [rw] from_time
|
14
|
+
# @return [Time] the beginning of the time range represented by
|
15
|
+
# a change list (including change list indices, change dumps, etc.)
|
16
|
+
# @!attribute [rw] until_time
|
17
|
+
# @return [Time] the end of the time range represented by
|
18
|
+
# a change list (including change list indices, change dumps, etc.)
|
19
|
+
# @!attribute [rw] completed_time
|
20
|
+
# @return the datetime at which assembling a resource list
|
21
|
+
# ended (including resource list indices, resource dumps, etc.)
|
22
|
+
# @!attribute [rw] change
|
23
|
+
# @return [Change] the type of change to a resource reported in
|
24
|
+
# a change list (including change list indices, change dumps, etc.)
|
25
|
+
# @!attribute [rw] capability
|
26
|
+
# @return [String] identifies the type of a ResourceSync document.
|
27
|
+
class Metadata < Descriptor
|
28
|
+
include ::XML::Mapping
|
29
|
+
|
30
|
+
# ------------------------------------------------------------
|
31
|
+
# Attributes
|
32
|
+
|
33
|
+
root_element_name 'md'
|
34
|
+
|
35
|
+
time_node :at_time, '@at', default_value: nil
|
36
|
+
time_node :from_time, '@from', default_value: nil
|
37
|
+
time_node :until_time, '@until', default_value: nil
|
38
|
+
time_node :completed_time, '@completed', default_value: nil
|
39
|
+
change_node :change, '@change', default_value: nil
|
40
|
+
text_node :capability, '@capability', default_value: nil
|
41
|
+
|
42
|
+
# ------------------------------------------------------------
|
43
|
+
# Initializer
|
44
|
+
|
45
|
+
# @param at_time [Time] the datetime at which assembling a resource list
|
46
|
+
# began (including resource list indices, resource dumps, etc.)
|
47
|
+
# @param from_time [Time] the beginning of the time range represented by
|
48
|
+
# a change list (including change list indices, change dumps, etc.)
|
49
|
+
# @param until_time [Time] the end of the time range represented by
|
50
|
+
# a change list (including change list indices, change dumps, etc.)
|
51
|
+
# @param completed_time the datetime at which assembling a resource list
|
52
|
+
# ended (including resource list indices, resource dumps, etc.)
|
53
|
+
# @param modified_time [Time] The date and time when the referenced resource was last modified.
|
54
|
+
# @param length [Integer] The content length of the referenced resource.
|
55
|
+
# @param mime_type [MIME::Type] The media type of the referenced resource.
|
56
|
+
# @param encoding [String] Any content encoding (if any) applied to the data in the
|
57
|
+
# referenced resource (e.g. for compression)
|
58
|
+
# @param hashes [Hash<String, String>] Fixity information for the referenced
|
59
|
+
# resource, as a map from hash algorithm tokens (e.g. +md5+, +sha-256+)
|
60
|
+
# to hex-encoded digest values.
|
61
|
+
# @param change [Change] the type of change to a resource reported in
|
62
|
+
# a change list (including change list indices, change dumps, etc.)
|
63
|
+
# @param capability [String] identifies the type of a ResourceSync document.
|
64
|
+
# @param path [String] For +ResourceDumpManifests+ and +ChangeDumpManifests+,
|
65
|
+
# the path to the referenced resource within the dump ZIP file.
|
66
|
+
def initialize( # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
|
67
|
+
at_time: nil,
|
68
|
+
from_time: nil,
|
69
|
+
until_time: nil,
|
70
|
+
completed_time: nil,
|
71
|
+
modified_time: nil,
|
72
|
+
|
73
|
+
length: nil,
|
74
|
+
mime_type: nil,
|
75
|
+
encoding: nil,
|
76
|
+
hashes: {},
|
77
|
+
|
78
|
+
change: nil,
|
79
|
+
capability: nil,
|
80
|
+
path: nil
|
81
|
+
)
|
82
|
+
super(modified_time: modified_time, length: length, mime_type: mime_type, encoding: encoding, hashes: hashes, path: path)
|
83
|
+
|
84
|
+
self.at_time = at_time
|
85
|
+
self.from_time = from_time
|
86
|
+
self.until_time = until_time
|
87
|
+
self.completed_time = completed_time
|
88
|
+
|
89
|
+
self.change = change
|
90
|
+
self.capability = capability
|
91
|
+
end
|
92
|
+
|
93
|
+
# ------------------------------------------------------------
|
94
|
+
# Custom setters
|
95
|
+
|
96
|
+
def at_time=(value)
|
97
|
+
@at_time = time_or_nil(value)
|
98
|
+
end
|
99
|
+
|
100
|
+
def from_time=(value)
|
101
|
+
@from_time = time_or_nil(value)
|
102
|
+
end
|
103
|
+
|
104
|
+
def until_time=(value)
|
105
|
+
@until_time = time_or_nil(value)
|
106
|
+
end
|
107
|
+
|
108
|
+
def completed_time=(value)
|
109
|
+
@completed_time = time_or_nil(value)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require_relative 'shared/augmented'
|
2
|
+
require_relative 'xml'
|
3
|
+
require_relative 'metadata'
|
4
|
+
|
5
|
+
module Resync
|
6
|
+
# A resource (i.e., +<url>+ or +<sitemap>+). See section 7,
|
7
|
+
# {http://www.openarchives.org/rs/1.0/resourcesync#DocumentFormats Sitemap Document Formats},
|
8
|
+
# in the ResourceSync specification.
|
9
|
+
class Resource < Augmented
|
10
|
+
include ::XML::Mapping
|
11
|
+
|
12
|
+
# ------------------------------------------------------------
|
13
|
+
# Attributes
|
14
|
+
|
15
|
+
root_element_name 'url'
|
16
|
+
|
17
|
+
uri_node :uri, 'loc', default_value: nil
|
18
|
+
time_node :modified_time, 'lastmod', default_value: nil
|
19
|
+
changefreq_node :changefreq, 'changefreq', default_value: nil
|
20
|
+
numeric_node :priority, 'priority', default_value: nil
|
21
|
+
|
22
|
+
# ------------------------------------------------------------
|
23
|
+
# Initializer
|
24
|
+
|
25
|
+
# @param modified_time [Time] The date and time when the referenced resource was last modified.
|
26
|
+
# @param changefreq [ChangeFrequency] how frequently the referenced resource is likely to change.
|
27
|
+
# @param priority [Number] the priority of this resource relative to other resources from the
|
28
|
+
# same provider. Allows robots to decide which resources to crawl or harvest.
|
29
|
+
# Values should be in the range 0-1.0 (inclusive), where 0 is the lowest priority
|
30
|
+
# and 1.0 is the highest.
|
31
|
+
# @param links [Array<Link>] related links (i.e. +<rs:ln>+).
|
32
|
+
# @param metadata [Metadata] metadata about this resource.
|
33
|
+
def initialize( # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
|
34
|
+
uri:,
|
35
|
+
modified_time: nil,
|
36
|
+
changefreq: nil,
|
37
|
+
priority: nil,
|
38
|
+
links: [],
|
39
|
+
metadata: nil
|
40
|
+
)
|
41
|
+
super(links: links)
|
42
|
+
self.uri = uri
|
43
|
+
self.modified_time = modified_time
|
44
|
+
self.changefreq = changefreq
|
45
|
+
self.priority = priority
|
46
|
+
self.metadata = metadata
|
47
|
+
end
|
48
|
+
|
49
|
+
# ------------------------------------------------------------
|
50
|
+
# Custom setters
|
51
|
+
|
52
|
+
def uri=(value)
|
53
|
+
@uri = XML.to_uri(value)
|
54
|
+
end
|
55
|
+
|
56
|
+
# ------------------------------------------------------------
|
57
|
+
# Public methods
|
58
|
+
|
59
|
+
def capability
|
60
|
+
metadata ? metadata.capability : nil
|
61
|
+
end
|
62
|
+
|
63
|
+
# ------------------------------------------------------------
|
64
|
+
# Overrides
|
65
|
+
|
66
|
+
# ResourceSync schema requires '##other' elements to appear last
|
67
|
+
def self.all_xml_mapping_nodes(options = { mapping: nil, create: true })
|
68
|
+
xml_mapping_nodes(options) + superclass.all_xml_mapping_nodes(options)
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require_relative 'shared/base_resource_list'
|
2
|
+
require_relative 'xml'
|
3
|
+
|
4
|
+
module Resync
|
5
|
+
# A resource dump. See section 11.1,
|
6
|
+
# "{http://www.openarchives.org/rs/1.0/resourcesync#ResourceDump Resource Dump}",
|
7
|
+
# in the ResourceSync specification.
|
8
|
+
class ResourceDump < BaseResourceList
|
9
|
+
include ::XML::Mapping
|
10
|
+
|
11
|
+
# The capability provided by this type.
|
12
|
+
CAPABILITY = 'resourcedump'
|
13
|
+
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require_relative 'shared/base_resource_list'
|
2
|
+
require_relative 'xml'
|
3
|
+
|
4
|
+
module Resync
|
5
|
+
# A resource dump. See section 11.2,
|
6
|
+
# "{http://www.openarchives.org/rs/1.0/resourcesync#ResourceDumpManifest Resource Dump Manifest}",
|
7
|
+
# in the ResourceSync specification.
|
8
|
+
class ResourceDumpManifest < BaseResourceList
|
9
|
+
include ::XML::Mapping
|
10
|
+
|
11
|
+
# The capability provided by this type.
|
12
|
+
CAPABILITY = 'resourcedump-manifest'
|
13
|
+
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require_relative 'shared/base_resource_list'
|
2
|
+
require_relative 'xml'
|
3
|
+
|
4
|
+
module Resync
|
5
|
+
# A resource list. See section 10.1,
|
6
|
+
# "{http://www.openarchives.org/rs/1.0/resourcesync#ResourceList Resource List}",
|
7
|
+
# in the ResourceSync specification.
|
8
|
+
class ResourceList < BaseResourceList
|
9
|
+
include ::XML::Mapping
|
10
|
+
|
11
|
+
# The capability provided by this type.
|
12
|
+
CAPABILITY = 'resourcelist'
|
13
|
+
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require_relative 'shared/base_resource_list'
|
2
|
+
require_relative 'shared/sitemap_index'
|
3
|
+
|
4
|
+
module Resync
|
5
|
+
# A resource list index. See section 10.2,
|
6
|
+
# "{http://www.openarchives.org/rs/1.0/resourcesync#ResourceListIndex Resource List Index}",
|
7
|
+
# in the ResourceSync specification.
|
8
|
+
class ResourceListIndex < BaseResourceList
|
9
|
+
include ::XML::Mapping
|
10
|
+
include SitemapIndex
|
11
|
+
|
12
|
+
# The capability provided by this type.
|
13
|
+
CAPABILITY = 'resourcelist'
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require_relative '../link'
|
2
|
+
require_relative '../metadata'
|
3
|
+
|
4
|
+
module Resync
|
5
|
+
|
6
|
+
# Base class for elements augmented with ResourceSync-specific child elements.
|
7
|
+
#
|
8
|
+
# @!attribute [rw] links
|
9
|
+
# @return [Array<Link>] related links.
|
10
|
+
# @!attribute [rw] metadata
|
11
|
+
# @return [Metadata] metadata about this object.
|
12
|
+
class Augmented
|
13
|
+
include ::XML::Mapping
|
14
|
+
|
15
|
+
# ------------------------------------------------------------
|
16
|
+
# Class methods
|
17
|
+
|
18
|
+
# ResourceSync-specific tags needing to be prefixed with +rs+ on output
|
19
|
+
RS_TAGS = Set['ln', 'md']
|
20
|
+
private_constant :RS_TAGS
|
21
|
+
|
22
|
+
# Adds the +rs+ namespace prefix
|
23
|
+
def self.prefix_rewriter(obj, xml, default_obj_to_xml)
|
24
|
+
default_obj_to_xml.call(obj, xml)
|
25
|
+
xml.each_element do |e|
|
26
|
+
e.name = "rs:#{e.name}" if RS_TAGS.include?(e.name)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
private_class_method :prefix_rewriter
|
30
|
+
|
31
|
+
# ------------------------------------------------------------
|
32
|
+
# Attributes
|
33
|
+
|
34
|
+
array_node :links, 'ln', class: Link, default_value: [], writer: method(:prefix_rewriter)
|
35
|
+
object_node :metadata, 'md', class: Metadata, default_value: nil, writer: method(:prefix_rewriter)
|
36
|
+
|
37
|
+
use_mapping :sitemapindex
|
38
|
+
array_node :links, 'ln', class: Link, default_value: [], writer: method(:prefix_rewriter), sub_mapping: :_default
|
39
|
+
object_node :metadata, 'md', class: Metadata, default_value: nil, writer: method(:prefix_rewriter), sub_mapping: :_default
|
40
|
+
|
41
|
+
# ------------------------------------------------------------
|
42
|
+
# Initializer
|
43
|
+
|
44
|
+
# Creates a new +Augmented+ instance with the specified links and metadata.
|
45
|
+
#
|
46
|
+
# @param links [Array<Link>] related links (i.e. +<rs:ln>+).
|
47
|
+
# @param metadata [Metadata] metadata about this resource.
|
48
|
+
def initialize(links: [], metadata: nil)
|
49
|
+
self.links = links
|
50
|
+
self.metadata = metadata
|
51
|
+
end
|
52
|
+
|
53
|
+
# ------------------------------------------------------------
|
54
|
+
# Custom accessors
|
55
|
+
|
56
|
+
# Sets the +links+ list. +nil+ is treated as an empty list.
|
57
|
+
def links=(value)
|
58
|
+
@links = value || []
|
59
|
+
end
|
60
|
+
|
61
|
+
# Finds links with the specified relation.
|
62
|
+
# @param rel [String] the relation.
|
63
|
+
# @return [Array<Link>] those links having that relation, or an empty array if none exist.
|
64
|
+
def links_for(rel:)
|
65
|
+
links.select { |l| l.rel == rel }
|
66
|
+
end
|
67
|
+
|
68
|
+
# Shortcut to find the first link with the specified relation (in ResourceSync there often
|
69
|
+
# should be only one link with a particular relation)
|
70
|
+
# @param rel [String] the relation.
|
71
|
+
# @return [Link] the first link having that relation, or nil if none exists.
|
72
|
+
def link_for(rel:)
|
73
|
+
links.find { |l| l.rel == rel }
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,117 @@
|
|
1
|
+
require_relative 'augmented'
|
2
|
+
require_relative '../resource'
|
3
|
+
require_relative '../metadata'
|
4
|
+
|
5
|
+
module Resync
|
6
|
+
# Base class for root elements containing a list of resources
|
7
|
+
# (i.e., +<urlset>+ and +<sitemapindex>+ elements). Subclasses
|
8
|
+
# must define a +CAPABILITY+ constant identifying the capability
|
9
|
+
# they represent (e.g. +resourcelist+, +changelist+).
|
10
|
+
#
|
11
|
+
# @!attribute [rw] resources
|
12
|
+
# @return [Array<Resource>] the +<url>+ or +<sitemap>+ elements contained in this list.
|
13
|
+
class BaseResourceList < Augmented
|
14
|
+
include ::XML::Mapping
|
15
|
+
|
16
|
+
# ------------------------------------------------------------
|
17
|
+
# Attributes
|
18
|
+
|
19
|
+
root_element_name 'urlset'
|
20
|
+
array_node :resources, 'url', class: Resource, default_value: []
|
21
|
+
|
22
|
+
# ------------------------------------------------------------
|
23
|
+
# Initializer
|
24
|
+
|
25
|
+
# Creates a new +BaseResourceList+.
|
26
|
+
#
|
27
|
+
# @param resources [Array<Resource>] The +<url>+ or +<sitemap>+ elements contained in this list.
|
28
|
+
# @param links [Array<Link>] Related links (+<rs:ln>+).
|
29
|
+
# @param metadata [Metadata] Metadata about this list. The +capability+ of the metadata must match this
|
30
|
+
# implementation class' +CAPABILITY+ constant.
|
31
|
+
# @raise [ArgumentError] if the specified metadata does not have the correct +capability+ attribute for this list type.
|
32
|
+
def initialize(resources: [], links: [], metadata: nil)
|
33
|
+
super(links: links)
|
34
|
+
self.resources = resources
|
35
|
+
self.metadata = metadata
|
36
|
+
end
|
37
|
+
|
38
|
+
# ------------------------------------------------------------
|
39
|
+
# Custom setters
|
40
|
+
|
41
|
+
# Sets the +resources+ list. +nil+ is treated as an empty list.
|
42
|
+
def resources=(value)
|
43
|
+
@resources = value || []
|
44
|
+
end
|
45
|
+
|
46
|
+
# Sets the metadata.
|
47
|
+
#
|
48
|
+
# @raise [ArgumentError] if the specified metadata does not have the correct +capability+ attribute for
|
49
|
+
# this list type.
|
50
|
+
def metadata=(value)
|
51
|
+
@metadata = metadata_with_correct_capability(value)
|
52
|
+
end
|
53
|
+
|
54
|
+
# ------------------------------------------------------------
|
55
|
+
# Custom getters
|
56
|
+
|
57
|
+
def capability
|
58
|
+
@metadata.capability
|
59
|
+
end
|
60
|
+
|
61
|
+
# Finds resources with the specified capability.
|
62
|
+
# @param capability [String] the capability.
|
63
|
+
# @return [Array<Resource>] those resources having that capability, or an empty array if none exist.
|
64
|
+
def resources_for(capability:)
|
65
|
+
resources.select { |l| l.capability == capability }
|
66
|
+
end
|
67
|
+
|
68
|
+
# Shortcut to find the first resource with the specified capability (in ResourceSync there often
|
69
|
+
# should be only one resource with a particular capability)
|
70
|
+
# @param capability [String] the capability.
|
71
|
+
# @return [Resource] the first resource having that capability, or nil if none exists.
|
72
|
+
def resource_for(capability:)
|
73
|
+
resources.find { |l| l.capability == capability }
|
74
|
+
end
|
75
|
+
|
76
|
+
# ------------------------------------------------------------
|
77
|
+
# Overrides
|
78
|
+
|
79
|
+
# Overrides +::XML::Mapping.pre_save+ to declare the Sitemap and ResourceSync namespaces.
|
80
|
+
# Used for writing.
|
81
|
+
def pre_save(options = { mapping: :_default })
|
82
|
+
xml = super(options)
|
83
|
+
xml.add_namespace('http://www.sitemaps.org/schemas/sitemap/0.9')
|
84
|
+
xml.add_namespace('rs', 'http://www.openarchives.org/rs/terms/')
|
85
|
+
xml
|
86
|
+
end
|
87
|
+
|
88
|
+
# Initializes the +:_default+ and +:sitemapindex+ mappings on all subclasses, and sets the corresponding
|
89
|
+
# root element names (+<urlset>+ and +<sitemapindex>+)
|
90
|
+
def self.inherited(base)
|
91
|
+
base.use_mapping :_default
|
92
|
+
base.root_element_name 'urlset'
|
93
|
+
base.use_mapping :sitemapindex
|
94
|
+
base.root_element_name 'sitemapindex'
|
95
|
+
end
|
96
|
+
|
97
|
+
# ------------------------------------------------------------
|
98
|
+
# Private methods
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
# ------------------------------
|
103
|
+
# Parameter validators
|
104
|
+
|
105
|
+
# Validates the +capability+ attribute in the specified metadata.
|
106
|
+
# @raise [ArgumentError] if the specified metadata does not have the correct +capability+ attribute for this list type.
|
107
|
+
def metadata_with_correct_capability(metadata)
|
108
|
+
capability = self.class::CAPABILITY
|
109
|
+
fail ArgumentError, "Missing constant #{self.class}::CAPABILITY" unless capability
|
110
|
+
return Metadata.new(capability: capability) unless metadata
|
111
|
+
fail ArgumentError, "#{metadata} does not appear to be metadata" unless metadata.respond_to?('capability')
|
112
|
+
fail ArgumentError, "Wrong capability for #{self.class.name} metadata; expected '#{capability}', was '#{metadata.capability}'" unless metadata.capability == capability
|
113
|
+
metadata
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
117
|
+
end
|
@@ -0,0 +1,135 @@
|
|
1
|
+
require 'mime/types'
|
2
|
+
require_relative '../xml'
|
3
|
+
|
4
|
+
module Resync
|
5
|
+
# Base class for ResourceSync-specific elements describing a
|
6
|
+
# resource or link.
|
7
|
+
#
|
8
|
+
# @!attribute [rw] modified_time
|
9
|
+
# @return [Time] the date and time when the referenced resource was last modified.
|
10
|
+
# @!attribute [rw] length
|
11
|
+
# @return [Integer] the content length of the referenced resource.
|
12
|
+
# @!attribute [rw] mime_type
|
13
|
+
# @return [MIME::Type] the media type of the referenced resource.
|
14
|
+
# @!attribute [rw] encoding
|
15
|
+
# @return [String] the content encoding (if any) applied to the data in the
|
16
|
+
# referenced resource (e.g. for compression)
|
17
|
+
# @!attribute [rw] hashes
|
18
|
+
# @return [Hash<String, String>] fixity information for the referenced
|
19
|
+
# resource, as a map from hash algorithm tokens (e.g. +md5+, +sha-256+)
|
20
|
+
# to hex-encoded digest values.
|
21
|
+
# @!attribute [rw] path
|
22
|
+
# @return [String] for +ResourceDumpManifests+ and +ChangeDumpManifests+,
|
23
|
+
# the path to the referenced resource within the dump ZIP file.
|
24
|
+
class Descriptor
|
25
|
+
include ::XML::Mapping
|
26
|
+
|
27
|
+
# ------------------------------------------------------------
|
28
|
+
# Attributes
|
29
|
+
|
30
|
+
time_node :modified_time, '@modified', default_value: nil
|
31
|
+
numeric_node :length, '@length', default_value: nil
|
32
|
+
mime_type_node :mime_type, '@type', default_value: nil
|
33
|
+
text_node :encoding, '@encoding', default_value: nil
|
34
|
+
hash_codes_node :hashes, '@hash', default_value: nil
|
35
|
+
text_node :path, '@path', default_value: nil
|
36
|
+
|
37
|
+
# ------------------------------------------------------------
|
38
|
+
# Initializer
|
39
|
+
|
40
|
+
# Creates a new +Descriptor+ instance with the specified fields.
|
41
|
+
#
|
42
|
+
# @param modified_time [Time] The date and time when the referenced resource was last modified.
|
43
|
+
# @param length [Integer] The content length of the referenced resource.
|
44
|
+
# @param mime_type [MIME::Type] The media type of the referenced resource.
|
45
|
+
# @param encoding [String] Any content encoding (if any) applied to the data in the
|
46
|
+
# referenced resource (e.g. for compression)
|
47
|
+
# @param hashes [Hash<String, String>] Fixity information for the referenced
|
48
|
+
# resource, as a map from hash algorithm tokens (e.g. +md5+, +sha-256+)
|
49
|
+
# to hex-encoded digest values.
|
50
|
+
# @param path [String] For +ResourceDumpManifests+ and +ChangeDumpManifests+,
|
51
|
+
# the path to the referenced resource within the dump ZIP file.
|
52
|
+
def initialize( # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
|
53
|
+
modified_time: nil,
|
54
|
+
length: nil,
|
55
|
+
mime_type: nil,
|
56
|
+
encoding: nil,
|
57
|
+
hashes: nil,
|
58
|
+
path: nil
|
59
|
+
)
|
60
|
+
self.modified_time = modified_time
|
61
|
+
self.length = length
|
62
|
+
self.mime_type = mime_type
|
63
|
+
self.encoding = encoding
|
64
|
+
self.hashes = hashes
|
65
|
+
self.path = path
|
66
|
+
end
|
67
|
+
|
68
|
+
# ------------------------------------------------------------
|
69
|
+
# Custom setters
|
70
|
+
|
71
|
+
def modified_time=(value)
|
72
|
+
@modified_time = time_or_nil(value)
|
73
|
+
end
|
74
|
+
|
75
|
+
def length=(value)
|
76
|
+
@length = natural_number_or_nil(value)
|
77
|
+
end
|
78
|
+
|
79
|
+
def mime_type=(value)
|
80
|
+
@mime_type = mime_type_or_nil(value)
|
81
|
+
end
|
82
|
+
|
83
|
+
def hashes=(value)
|
84
|
+
@hashes = Descriptor.hash_of_hashcodes(value)
|
85
|
+
end
|
86
|
+
|
87
|
+
# ------------------------------------------------------------
|
88
|
+
# Public methods
|
89
|
+
|
90
|
+
# Gets the hash value for the specified algorithm.
|
91
|
+
#
|
92
|
+
# @param algorithm [String] The token (e.g. +md5+, +sha-256+) for the hash algorithm.
|
93
|
+
# @return [String] The hex-encoded digest value.
|
94
|
+
def hash(algorithm)
|
95
|
+
hashes[algorithm]
|
96
|
+
end
|
97
|
+
|
98
|
+
# ------------------------------------------------------------
|
99
|
+
# Private methods
|
100
|
+
|
101
|
+
private
|
102
|
+
|
103
|
+
# ------------------------------
|
104
|
+
# Parameter validators
|
105
|
+
|
106
|
+
def time_or_nil(time)
|
107
|
+
fail ArgumentError, "time #{time} is not a Time" if time && !time.is_a?(Time)
|
108
|
+
time
|
109
|
+
end
|
110
|
+
|
111
|
+
def natural_number_or_nil(value)
|
112
|
+
fail ArgumentError, "value #{value} must be a non-negative integer" if value && (!value.is_a?(Integer) || value < 0)
|
113
|
+
value
|
114
|
+
end
|
115
|
+
|
116
|
+
def mime_type_or_nil(mime_type)
|
117
|
+
return nil unless mime_type
|
118
|
+
return mime_type if mime_type.is_a?(MIME::Type)
|
119
|
+
|
120
|
+
mt = MIME::Types[mime_type].first
|
121
|
+
return mt if mt
|
122
|
+
|
123
|
+
MIME::Type.new(mime_type)
|
124
|
+
end
|
125
|
+
|
126
|
+
# ------------------------------
|
127
|
+
# Conversions
|
128
|
+
|
129
|
+
def self.hash_of_hashcodes(hashes)
|
130
|
+
return {} unless hashes
|
131
|
+
return hashes if hashes.is_a?(Hash)
|
132
|
+
hashes.split(/[[:space:]]+/).map { |hash| hash.split(':') }.to_h
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'xml/mapping'
|
2
|
+
require_relative '../resource'
|
3
|
+
|
4
|
+
module Resync
|
5
|
+
module SitemapIndex
|
6
|
+
include ::XML::Mapping
|
7
|
+
|
8
|
+
def self.included(base)
|
9
|
+
base.extend(ClassMethods)
|
10
|
+
|
11
|
+
base.use_mapping :sitemapindex
|
12
|
+
base.root_element_name 'sitemapindex'
|
13
|
+
base.array_node :resources, 'sitemap', class: Resource, default_value: [], sub_mapping: :_default
|
14
|
+
end
|
15
|
+
|
16
|
+
# Ensures that an index is always written as a +<sitemapindex>+.
|
17
|
+
# Overrides +::XML::Mapping.save_to_xml+.
|
18
|
+
def save_to_xml(options = { mapping: :_default })
|
19
|
+
options = options.merge(mapping: :sitemapindex)
|
20
|
+
super(options)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Ensures that an index is always read as a +<sitemapindex>+.
|
24
|
+
# Overrides +::XML::Mapping::ClassMethods.load_from_xml+.
|
25
|
+
module ClassMethods
|
26
|
+
def load_from_xml(xml, options = { mapping: :_default })
|
27
|
+
options = options.merge(mapping: :sitemapindex)
|
28
|
+
super(xml, options)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require_relative 'base_resource_list'
|
2
|
+
|
3
|
+
module Resync
|
4
|
+
# An extension to +BaseResourceList+ for resource lists that
|
5
|
+
# should be sorted by modification time.
|
6
|
+
class SortedResourceList < BaseResourceList
|
7
|
+
|
8
|
+
# ------------------------------------------------------------
|
9
|
+
# Custom setters
|
10
|
+
|
11
|
+
# Sets the +resources+ list, sorting the resources by modification
|
12
|
+
# time. (+nil+ is treated as an empty list.) Resources without
|
13
|
+
# modification times will be sorted to the end.
|
14
|
+
def resources=(value)
|
15
|
+
@resources = sorted(value)
|
16
|
+
@resources_by_uri = by_uri(@resources)
|
17
|
+
end
|
18
|
+
|
19
|
+
# ------------------------------------------------------------
|
20
|
+
# Custom accessors
|
21
|
+
|
22
|
+
attr_reader :resources_by_uri
|
23
|
+
|
24
|
+
def latest_for(uri:)
|
25
|
+
uri = XML.to_uri(uri)
|
26
|
+
@resources_by_uri[uri].last
|
27
|
+
end
|
28
|
+
|
29
|
+
def all_uris
|
30
|
+
@resources_by_uri.keys
|
31
|
+
end
|
32
|
+
|
33
|
+
# ------------------------------------------------------------
|
34
|
+
# Private methods
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
# ------------------------------
|
39
|
+
# Conversions
|
40
|
+
|
41
|
+
def sorted(value)
|
42
|
+
return [] unless value
|
43
|
+
value.sort do |left, right|
|
44
|
+
if left.modified_time && right.modified_time
|
45
|
+
left.modified_time <=> right.modified_time
|
46
|
+
else
|
47
|
+
right.modified_time ? 1 : -1
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def by_uri(resources)
|
53
|
+
by_uri = {}
|
54
|
+
resources.each do |r|
|
55
|
+
(by_uri[r.uri] ||= []) << r
|
56
|
+
end
|
57
|
+
by_uri
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require_relative 'shared/base_resource_list'
|
2
|
+
|
3
|
+
module Resync
|
4
|
+
# A change list. See section 8,
|
5
|
+
# "{http://www.openarchives.org/rs/1.0/resourcesync#SourceDesc Describing the Source}",
|
6
|
+
# in the ResourceSync specification.
|
7
|
+
class SourceDescription < BaseResourceList
|
8
|
+
include ::XML::Mapping
|
9
|
+
|
10
|
+
# The capability provided by this type.
|
11
|
+
CAPABILITY = 'description'
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|