rof 1.0.7 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +9 -7
- data/LICENSE +201 -16
- data/Rakefile +46 -0
- data/bin/csv_to_rof +1 -2
- data/bin/fedora_to_rof +7 -1
- data/bin/jsonld_to_rof +26 -0
- data/bin/osf_to_rof +6 -2
- data/bin/rof +5 -19
- data/lib/rof.rb +2 -6
- data/lib/rof/access.rb +1 -1
- data/lib/rof/cli.rb +104 -67
- data/lib/rof/compare_rof.rb +68 -39
- data/lib/rof/filter.rb +21 -0
- data/lib/rof/filters.rb +38 -0
- data/lib/rof/filters/bendo.rb +15 -17
- data/lib/rof/filters/date_stamp.rb +5 -4
- data/lib/rof/filters/file_to_url.rb +5 -3
- data/lib/rof/filters/label.rb +9 -7
- data/lib/rof/filters/work.rb +7 -5
- data/lib/rof/ingest.rb +5 -0
- data/lib/rof/osf_context.rb +2 -2
- data/lib/rof/rdf_context.rb +2 -0
- data/lib/rof/translator.rb +18 -0
- data/lib/rof/translators.rb +23 -0
- data/lib/rof/{translate_csv.rb → translators/csv_to_rof.rb} +4 -3
- data/lib/rof/translators/fedora_to_rof.rb +244 -0
- data/lib/rof/translators/jsonld_to_rof.rb +112 -0
- data/lib/rof/translators/jsonld_to_rof/accumulator.rb +175 -0
- data/lib/rof/translators/jsonld_to_rof/predicate_handler.rb +223 -0
- data/lib/rof/translators/jsonld_to_rof/predicate_object_handler.rb +125 -0
- data/lib/rof/translators/jsonld_to_rof/statement_handler.rb +91 -0
- data/lib/rof/translators/osf_to_rof.rb +191 -0
- data/lib/rof/utility.rb +44 -1
- data/lib/rof/version.rb +1 -1
- data/rof.gemspec +10 -2
- data/spec/coverage_helper.rb +17 -0
- data/spec/fixtures/for_utility_load_items_from_json_file/multiple_items.json +8 -0
- data/spec/fixtures/for_utility_load_items_from_json_file/parse_error.json +3 -0
- data/spec/fixtures/for_utility_load_items_from_json_file/single_item.json +3 -0
- data/spec/fixtures/jsonld_to_rof/0g354f18610.jsonld +113 -0
- data/spec/fixtures/jsonld_to_rof/0g354f18610.rof +96 -0
- data/spec/fixtures/jsonld_to_rof/2j62s467216.jsonld +113 -0
- data/spec/fixtures/jsonld_to_rof/2j62s467216.rof +93 -0
- data/spec/fixtures/jsonld_to_rof/2v23vt16z2z.jsonld +70 -0
- data/spec/fixtures/jsonld_to_rof/2v23vt16z2z.rof +87 -0
- data/spec/fixtures/jsonld_to_rof/cr56n01253w.jsonld +84 -0
- data/spec/fixtures/jsonld_to_rof/cr56n01253w.rof +95 -0
- data/spec/fixtures/jsonld_to_rof/h989r21069m.jsonld +84 -0
- data/spec/fixtures/jsonld_to_rof/h989r21069m.rof +98 -0
- data/spec/fixtures/jsonld_to_rof/js956d59913.jsonld +79 -0
- data/spec/fixtures/jsonld_to_rof/js956d59913.rof +89 -0
- data/spec/fixtures/jsonld_to_rof/m039k358q5c.jsonld +80 -0
- data/spec/fixtures/jsonld_to_rof/m039k358q5c.rof +64 -0
- data/spec/fixtures/jsonld_to_rof/nk322b9161g.jsonld +89 -0
- data/spec/fixtures/jsonld_to_rof/nk322b9161g.rof +69 -0
- data/spec/fixtures/jsonld_to_rof/p8418k7430d.jsonld +84 -0
- data/spec/fixtures/jsonld_to_rof/p8418k7430d.rof +67 -0
- data/spec/fixtures/jsonld_to_rof/xg94hm53h0c.jsonld +98 -0
- data/spec/fixtures/jsonld_to_rof/xg94hm53h0c.rof +110 -0
- data/spec/fixtures/jsonld_to_rof/zk51vd69n1r.jsonld +94 -0
- data/spec/fixtures/jsonld_to_rof/zk51vd69n1r.rof +121 -0
- data/spec/fixtures/osf/phz6b.tar.gz +0 -0
- data/spec/lib/rof/access_spec.rb +30 -23
- data/spec/lib/rof/cli_spec.rb +83 -60
- data/spec/lib/rof/compare_rof_spec.rb +35 -24
- data/spec/lib/rof/filter_spec.rb +10 -0
- data/spec/lib/rof/filters/bendo_spec.rb +42 -0
- data/spec/lib/rof/filters/date_stamp_spec.rb +9 -5
- data/spec/lib/rof/filters/file_to_url_spec.rb +7 -3
- data/spec/lib/rof/filters/label_spec.rb +121 -77
- data/spec/lib/rof/filters/work_spec.rb +7 -4
- data/spec/lib/rof/filters_spec.rb +14 -0
- data/spec/lib/rof/translator_spec.rb +15 -0
- data/spec/lib/rof/{translate_csv_spec.rb → translators/csv_to_rof_spec.rb} +14 -14
- data/spec/lib/rof/translators/fedora_to_rof_spec.rb +64 -0
- data/spec/lib/rof/translators/jsonld_to_rof/accumulator_spec.rb +121 -0
- data/spec/lib/rof/translators/jsonld_to_rof/predicate_handler_spec.rb +73 -0
- data/spec/lib/rof/translators/jsonld_to_rof/predicate_object_handler_spec.rb +48 -0
- data/spec/lib/rof/translators/jsonld_to_rof/statement_handler_spec.rb +40 -0
- data/spec/lib/rof/translators/jsonld_to_rof_spec.rb +120 -0
- data/spec/lib/rof/{osf_to_rof_spec.rb → translators/osf_to_rof_spec.rb} +55 -25
- data/spec/lib/rof/translators_spec.rb +14 -0
- data/spec/lib/rof/utility_spec.rb +47 -1
- data/spec/spec_helper.rb +1 -1
- data/spec/support/an_rof_filter.rb +10 -0
- metadata +186 -15
- data/lib/rof/get_from_fedora.rb +0 -211
- data/lib/rof/osf_to_rof.rb +0 -123
- data/spec/lib/rof/get_from_fedora_spec.rb +0 -22
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
require 'active_support/core_ext/array/wrap'
|
|
2
|
+
|
|
3
|
+
module ROF
|
|
4
|
+
module Translators
|
|
5
|
+
module JsonldToRof
|
|
6
|
+
# Responsible for dealing with registered predicates and how those are handled.
|
|
7
|
+
module PredicateHandler
|
|
8
|
+
class UnhandledPredicateError < RuntimeError
|
|
9
|
+
def initialize(predicate, urls)
|
|
10
|
+
super(%(Unable to handle predicate "#{predicate}". The following predicate URLs were registered #{urls.inspect}))
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# @api public
|
|
15
|
+
#
|
|
16
|
+
# Parse the RDF predicate and RDF object and add it's contents to the accumulator
|
|
17
|
+
#
|
|
18
|
+
# @example
|
|
19
|
+
# Given the following 4 RDF N-Triples (subject, predicate, object). The first two, with subject "_:b0" represent blank nodes.
|
|
20
|
+
# The last one with subject "<https://curate.nd.edu/show/zk51vd69n1r>" has an object that points to the "_:b0" blank node.
|
|
21
|
+
# _:b0 <http://purl.org/dc/terms/contributor> "David R.Hyde" .
|
|
22
|
+
# _:b0 <http://www.ndltd.org/standards/metadata/etdms/1.1/role> "Research Director" .
|
|
23
|
+
# <https://curate.nd.edu/show/zk51vd69n1r> <http://purl.org/dc/terms/contributor> _:b0 .
|
|
24
|
+
# <https://curate.nd.edu/show/zk51vd69n1r> <http://projecthydra.org/ns/relations#hasEditorGroup> <https://curate.nd.edu/show/q524jm23g92> .
|
|
25
|
+
# For the first two N-Triples you would get a BlankNodeHandler; For the last two, you would get a UriSubjectHandler
|
|
26
|
+
#
|
|
27
|
+
# @note It is assumed that all blank nodes (e.g. RDF::Node) will be processed before you process any RDF::URI nodes.
|
|
28
|
+
#
|
|
29
|
+
# @param [RDF::Predicate] predicate - the RDF predicate that we will parse and add to the appropriate spot in the accumulator
|
|
30
|
+
# @param [RDF::Object] object - the RDF object that we will parse and add to the appropriate spot in the accumulator
|
|
31
|
+
# @param [ROF::Translators::JsonldToRof::Accumulator] accumulator - a data accumulator that will be changed in place
|
|
32
|
+
# @return [ROF::Translators::JsonldToRof::Accumulator] the given accumulator
|
|
33
|
+
# @raise [ROF::Translators::JsonldToRof::UnhandledPredicateError] when we are unable to handle the given predicate
|
|
34
|
+
def self.call(predicate, object, accumulator, blank_node = false)
|
|
35
|
+
handler = registry.handler_for(predicate)
|
|
36
|
+
handler.handle(object, accumulator, blank_node)
|
|
37
|
+
accumulator
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# @api public
|
|
41
|
+
# @param [String] url - The URL that we want to match against
|
|
42
|
+
# @yield The block to configure how we handle RDF Predicates that match the gvien URL
|
|
43
|
+
# @yieldparam [ROF::JsonldToRof::PredicateHandler::UrlHandler]
|
|
44
|
+
# @see ./spec/lib/rof/translators/jsonld_to_rof/predicate_handler_spec.rb for details and usage usage
|
|
45
|
+
def self.register(url, &block)
|
|
46
|
+
registry << UrlHandler.new(url, &block)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# @api private
|
|
50
|
+
def self.registry
|
|
51
|
+
@registry ||= RegistrySet.new
|
|
52
|
+
end
|
|
53
|
+
private_class_method :registry
|
|
54
|
+
|
|
55
|
+
def self.clear_registry!(set_with = RegistrySet.new)
|
|
56
|
+
@registry = set_with
|
|
57
|
+
end
|
|
58
|
+
private_class_method :clear_registry!
|
|
59
|
+
|
|
60
|
+
class RegistrySet
|
|
61
|
+
def initialize
|
|
62
|
+
@set = []
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def <<(value)
|
|
66
|
+
@set << value
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def handler_for(predicate)
|
|
70
|
+
location_extractor = nil
|
|
71
|
+
@set.each do |handler|
|
|
72
|
+
location_extractor = handler.location_extractor_for(predicate)
|
|
73
|
+
break if location_extractor
|
|
74
|
+
end
|
|
75
|
+
raise UnhandledPredicateError.new(predicate, @set.map(&:url)) if location_extractor.nil?
|
|
76
|
+
location_extractor
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
private_constant :RegistrySet
|
|
80
|
+
|
|
81
|
+
# For a given URL map all of the predicates; Some predicates require explicit mapping, while others
|
|
82
|
+
# may use implicit mapping.
|
|
83
|
+
class UrlHandler
|
|
84
|
+
def initialize(url)
|
|
85
|
+
@url = url
|
|
86
|
+
@within = []
|
|
87
|
+
@namespace_prefix = ''
|
|
88
|
+
@slug_handlers = {}
|
|
89
|
+
yield(self) if block_given?
|
|
90
|
+
end
|
|
91
|
+
attr_reader :url
|
|
92
|
+
|
|
93
|
+
# The final key in the location array should be prefixed with the namespace_prefix; By default this is ""
|
|
94
|
+
# @param [String, nil] prefix - what is the namespace prefix to apply to the last location in the array.
|
|
95
|
+
# @return [String]
|
|
96
|
+
def namespace_prefix(prefix = nil)
|
|
97
|
+
return @namespace_prefix if prefix.nil?
|
|
98
|
+
@namespace_prefix = prefix
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Prepend the within array to the location array
|
|
102
|
+
# @param [Array<String>, nil] location - where in the ROF document are we putting the value
|
|
103
|
+
# @return [Array<String>]
|
|
104
|
+
def within(location = nil)
|
|
105
|
+
return @within if location.nil?
|
|
106
|
+
@within = Array.wrap(location)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# @param [#to_s] predicate
|
|
110
|
+
# @return [nil, LocationExtractor] if the given predicate does not match the url, return nil; Otherwise return a LocationExtractor
|
|
111
|
+
# @see LocationExtractor
|
|
112
|
+
def location_extractor_for(predicate)
|
|
113
|
+
return nil unless predicate.to_s =~ %r{^#{Regexp.escape(@url)}(.*)}
|
|
114
|
+
slug = $1
|
|
115
|
+
handlers = handlers_for(slug)
|
|
116
|
+
LocationExtractor.new(predicate, handlers)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
private
|
|
120
|
+
|
|
121
|
+
# @param [String] slug - a slug that may or may not have been registered
|
|
122
|
+
# @return [Array<#call>] an array of handlers that each respond to #call
|
|
123
|
+
# @see ImplicitLocationHandler
|
|
124
|
+
# @see ExplicitLocationSlugHandler
|
|
125
|
+
# @see BlockSlugHandler
|
|
126
|
+
def handlers_for(slug)
|
|
127
|
+
Array.wrap(@slug_handlers.fetch(slug) { ImplicitLocationHandler.new(self, slug) })
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
public
|
|
131
|
+
|
|
132
|
+
# @param [String] slug =
|
|
133
|
+
# @param [Hash] options (with symbol keys)
|
|
134
|
+
# @option options [Boolean] :force - don't apply the within nor namespace prefix
|
|
135
|
+
# @option options [Array] :to - an array that will be nested Hash keys
|
|
136
|
+
# @yield If a block is given, call the block (and skip all other configuration)
|
|
137
|
+
# @yieldparam [String] object
|
|
138
|
+
# @see BlockSlugHandler for details concerning a mapping via a block
|
|
139
|
+
# @see ExplicitLocationSlugHandler for details concerning a mapping via a to: option
|
|
140
|
+
def map(slug, options = {}, &block)
|
|
141
|
+
@slug_handlers ||= {}
|
|
142
|
+
@slug_handlers[slug] ||= []
|
|
143
|
+
if block_given?
|
|
144
|
+
@slug_handlers[slug] << BlockSlugHandler.new(self, slug, options, block)
|
|
145
|
+
else
|
|
146
|
+
@slug_handlers[slug] << ExplicitLocationSlugHandler.new(self, slug, options)
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Responsible for coordinating the extraction of the
|
|
151
|
+
class LocationExtractor
|
|
152
|
+
def initialize(predicate, handlers)
|
|
153
|
+
@predicate = predicate
|
|
154
|
+
@handlers = Array.wrap(handlers)
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def handle(object, accumulator, blank_node)
|
|
158
|
+
@handlers.each do |handler|
|
|
159
|
+
handler.call(object, accumulator, blank_node)
|
|
160
|
+
end
|
|
161
|
+
accumulator
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
private_constant :LocationExtractor
|
|
165
|
+
|
|
166
|
+
class ImplicitLocationHandler
|
|
167
|
+
def initialize(url_handler, slug)
|
|
168
|
+
@url_handler = url_handler
|
|
169
|
+
@slug = slug
|
|
170
|
+
end
|
|
171
|
+
attr_reader :slug
|
|
172
|
+
def call(object, accumulator, blank_node)
|
|
173
|
+
to = @url_handler.within + Array.wrap(slug)
|
|
174
|
+
to[-1] = "#{@url_handler.namespace_prefix}#{to[-1]}"
|
|
175
|
+
accumulator.add_predicate_location_and_value(to, object, blank_node)
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
private_constant :ImplicitLocationHandler
|
|
179
|
+
|
|
180
|
+
class BlockSlugHandler
|
|
181
|
+
def initialize(url_handler, slug, options, block)
|
|
182
|
+
@url_handler = url_handler
|
|
183
|
+
@slug = slug
|
|
184
|
+
@options = options
|
|
185
|
+
@block = block
|
|
186
|
+
end
|
|
187
|
+
attr_reader :slug
|
|
188
|
+
|
|
189
|
+
# @todo Are there differences that need to be handled for the blank_node?
|
|
190
|
+
def call(object, accumulator, _blank_node)
|
|
191
|
+
@block.call(object, accumulator)
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
private_constant :BlockSlugHandler
|
|
195
|
+
|
|
196
|
+
class ExplicitLocationSlugHandler
|
|
197
|
+
def initialize(url_handler, slug, options)
|
|
198
|
+
@url_handler = url_handler
|
|
199
|
+
@slug = slug
|
|
200
|
+
@options = options
|
|
201
|
+
end
|
|
202
|
+
attr_reader :slug
|
|
203
|
+
|
|
204
|
+
def call(object, accumulator, blank_node)
|
|
205
|
+
to = @options.fetch(:to)
|
|
206
|
+
unless force?
|
|
207
|
+
to = @url_handler.within + Array.wrap(to)
|
|
208
|
+
to[-1] = "#{@url_handler.namespace_prefix}#{to[-1]}"
|
|
209
|
+
end
|
|
210
|
+
accumulator.add_predicate_location_and_value(to, object, blank_node)
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def force?
|
|
214
|
+
@options.fetch(:force, false)
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
private_constant :ExplicitLocationSlugHandler
|
|
218
|
+
end
|
|
219
|
+
private_constant :UrlHandler
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
require 'rdf'
|
|
2
|
+
require 'rof/translators/jsonld_to_rof/predicate_handler'
|
|
3
|
+
|
|
4
|
+
module ROF
|
|
5
|
+
module Translators
|
|
6
|
+
module JsonldToRof
|
|
7
|
+
# We need to handle the Predicate / Object pair as one (thank you RDF blank nodes for this nuance)
|
|
8
|
+
module PredicateObjectHandler
|
|
9
|
+
# @api public
|
|
10
|
+
#
|
|
11
|
+
# Parse the RDF::Predicate, RDF::Object and the relevant data to the contents to the accumulator
|
|
12
|
+
#
|
|
13
|
+
# @example
|
|
14
|
+
# Given the following 4 RDF N-Triples (subject, predicate, object). The first and second RDF objects are RDF::Literal. The 3rd triple's object is
|
|
15
|
+
# and RDF::Node. And the last is an RDF::URI. Each require different handlers as they have nuanced differences.
|
|
16
|
+
# _:b0 <http://purl.org/dc/terms/contributor> "David R.Hyde" .
|
|
17
|
+
# _:b0 <http://www.ndltd.org/standards/metadata/etdms/1.1/role> "Research Director" .
|
|
18
|
+
# <https://curate.nd.edu/show/zk51vd69n1r> <http://purl.org/dc/terms/contributor> _:b0 .
|
|
19
|
+
# <https://curate.nd.edu/show/zk51vd69n1r> <http://projecthydra.org/ns/relations#hasEditorGroup> <https://curate.nd.edu/show/q524jm23g92> .
|
|
20
|
+
#
|
|
21
|
+
# @note It is assumed that all blank nodes (e.g. RDF::Node) will be processed before you process any RDF::URI nodes.
|
|
22
|
+
#
|
|
23
|
+
# @param [RDF::Predicate] predicate - the RDF predicate that we will parse and add to the appropriate spot in the accumulator
|
|
24
|
+
# @param [RDF::Object] object - the RDF object that we will parse and add to the appropriate spot in the accumulator
|
|
25
|
+
# @param [ROF::Translators::JsonldToRof::Accumulator] accumulator - a data accumulator that will be changed in place
|
|
26
|
+
# @return [ROF::Translators::JsonldToRof::Accumulator] the given accumulator
|
|
27
|
+
# @raise [ROF::Translators::JsonldToRof::UnknownRdfObjectTypeError] when the RDF::Object's subject is not a valid type
|
|
28
|
+
def self.call(predicate, object, accumulator, options = {})
|
|
29
|
+
new(predicate, object, accumulator, options).call
|
|
30
|
+
accumulator
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# @api private
|
|
34
|
+
#
|
|
35
|
+
# @param [RDF::Predicate] predicate - the RDF predicate that we will parse and add to the appropriate spot in the accumulator
|
|
36
|
+
# @param [RDF::Object] object - the RDF object that we will parse and add to the appropriate spot in the accumulator
|
|
37
|
+
# @param [ROF::Translators::JsonldToRof::Accumulator] accumulator - a data accumulator that will be changed in place
|
|
38
|
+
# @return [#call]
|
|
39
|
+
def self.new(predicate, object, accumulator, options)
|
|
40
|
+
klass_for(object).new(predicate, object, accumulator, options)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
class UnknownRdfObjectTypeError < RuntimeError
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# @api private
|
|
47
|
+
def self.klass_for(object)
|
|
48
|
+
case object
|
|
49
|
+
when RDF::URI
|
|
50
|
+
UriPredicateObjectHandler
|
|
51
|
+
when RDF::Node
|
|
52
|
+
NodePredicateObjectHandler
|
|
53
|
+
when RDF::Literal
|
|
54
|
+
LiteralPredicateObjectHandler
|
|
55
|
+
else
|
|
56
|
+
raise UnknownRdfObjectTypeError, "Unable to determine object handler for #{object.inspect}"
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# @api private
|
|
61
|
+
class UriPredicateObjectHandler
|
|
62
|
+
def initialize(predicate, object, accumulator, options)
|
|
63
|
+
@predicate = predicate
|
|
64
|
+
@object = object
|
|
65
|
+
@accumulator = accumulator
|
|
66
|
+
@options = options
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def call
|
|
70
|
+
PredicateHandler.call(predicate, object, accumulator, options[:blank_node])
|
|
71
|
+
accumulator
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
private
|
|
75
|
+
attr_reader :predicate, :object, :accumulator, :options
|
|
76
|
+
end
|
|
77
|
+
private_constant :UriPredicateObjectHandler
|
|
78
|
+
|
|
79
|
+
# @api private
|
|
80
|
+
# Blank Nodes; Oh how we love thee. Let me count the ways
|
|
81
|
+
class NodePredicateObjectHandler
|
|
82
|
+
def initialize(predicate, object, accumulator, options)
|
|
83
|
+
@predicate = predicate
|
|
84
|
+
@object = object
|
|
85
|
+
@accumulator = accumulator
|
|
86
|
+
@options = options
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def call
|
|
90
|
+
blank_node = accumulator.fetch_blank_node(object)
|
|
91
|
+
blank_node.each_pair do |blank_node_predicate, blank_node_objects|
|
|
92
|
+
blank_node_objects.each do |blank_node_object|
|
|
93
|
+
PredicateObjectHandler.call(blank_node_predicate, blank_node_object, accumulator, blank_node: object)
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
accumulator
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
private
|
|
100
|
+
attr_reader :predicate, :object, :accumulator, :options
|
|
101
|
+
end
|
|
102
|
+
private_constant :NodePredicateObjectHandler
|
|
103
|
+
|
|
104
|
+
# @api private
|
|
105
|
+
class LiteralPredicateObjectHandler
|
|
106
|
+
def initialize(predicate, object, accumulator, options)
|
|
107
|
+
@predicate = predicate
|
|
108
|
+
@object = object
|
|
109
|
+
@accumulator = accumulator
|
|
110
|
+
@options = options
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def call
|
|
114
|
+
PredicateHandler.call(predicate, object, accumulator, options[:blank_node])
|
|
115
|
+
accumulator
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
private
|
|
119
|
+
attr_reader :predicate, :object, :accumulator, :options
|
|
120
|
+
end
|
|
121
|
+
private_constant :LiteralPredicateObjectHandler
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
require 'rof/translators/jsonld_to_rof/predicate_object_handler'
|
|
2
|
+
|
|
3
|
+
module ROF
|
|
4
|
+
module Translators
|
|
5
|
+
module JsonldToRof
|
|
6
|
+
# Responsible for parsing an RDF statement and adding to the accumulator.
|
|
7
|
+
module StatementHandler
|
|
8
|
+
# @api public
|
|
9
|
+
#
|
|
10
|
+
# Parse the RDF statement and add it's contents to the accumulator
|
|
11
|
+
#
|
|
12
|
+
# @example
|
|
13
|
+
# Given the following 4 RDF N-Triples (subject, predicate, object). The first two, with subject "_:b0" represent blank nodes.
|
|
14
|
+
# The last one with subject "<https://curate.nd.edu/show/zk51vd69n1r>" has an object that points to the "_:b0" blank node.
|
|
15
|
+
# _:b0 <http://purl.org/dc/terms/contributor> "David R.Hyde" .
|
|
16
|
+
# _:b0 <http://www.ndltd.org/standards/metadata/etdms/1.1/role> "Research Director" .
|
|
17
|
+
# <https://curate.nd.edu/show/zk51vd69n1r> <http://purl.org/dc/terms/contributor> _:b0 .
|
|
18
|
+
# <https://curate.nd.edu/show/zk51vd69n1r> <http://projecthydra.org/ns/relations#hasEditorGroup> <https://curate.nd.edu/show/q524jm23g92> .
|
|
19
|
+
# For the first two N-Triples you would get a BlankNodeHandler; For the last two, you would get a UriSubjectHandler
|
|
20
|
+
#
|
|
21
|
+
# @note It is assumed that all blank nodes (e.g. RDF::Node) will be processed before you process any RDF::URI nodes.
|
|
22
|
+
#
|
|
23
|
+
# @param [RDF::Statement] statement - the RDF statement that we will parse and add to the appropriate spot in the accumulator
|
|
24
|
+
# @param [ROF::Translators::JsonldToRof::Accumulator] accumulator - a data accumulator that will be changed in place
|
|
25
|
+
# @return [ROF::Translators::JsonldToRof::Accumulator] the given accumulator
|
|
26
|
+
# @raise [ROF::Translators::JsonldToRof::UnhandledRdfSubjectError] when the RDF::Statement's subject is not a valid type
|
|
27
|
+
def self.call(statement, accumulator)
|
|
28
|
+
new(statement, accumulator).call
|
|
29
|
+
accumulator
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
class UnhandledRdfSubjectError < RuntimeError
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# @api private
|
|
36
|
+
def self.new(statement, accumulator)
|
|
37
|
+
case statement.subject
|
|
38
|
+
when RDF::URI
|
|
39
|
+
UriSubjectHandler.new(statement, accumulator)
|
|
40
|
+
when RDF::Node
|
|
41
|
+
BlankNodeHandler.new(statement, accumulator)
|
|
42
|
+
else
|
|
43
|
+
raise UnhandledRdfSubjectError, "Unable to determine subject handler for #{statement.inspect}"
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Responsible for accumulating the ROF data for a URI based resource
|
|
48
|
+
class UriSubjectHandler
|
|
49
|
+
def initialize(statement, accumulator)
|
|
50
|
+
@accumulator = accumulator
|
|
51
|
+
@statement = statement
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def call
|
|
55
|
+
handle_subject
|
|
56
|
+
handle_predicate_and_object
|
|
57
|
+
@accumulator
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
private
|
|
61
|
+
|
|
62
|
+
def handle_predicate_and_object
|
|
63
|
+
PredicateObjectHandler.call(@statement.predicate, @statement.object, @accumulator)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def handle_subject
|
|
67
|
+
return nil unless @statement.subject.to_s =~ %r{https://curate.nd.edu/show/([^\\]+)/?}
|
|
68
|
+
pid = "und:#{$1}"
|
|
69
|
+
@accumulator.add_pid(pid)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
private_constant :UriSubjectHandler
|
|
73
|
+
|
|
74
|
+
# Responsible for handling blank nodes in the RDF graph; Examples include ETD degree information
|
|
75
|
+
# Blank node subjects behave different from UriSubjectHandler
|
|
76
|
+
class BlankNodeHandler
|
|
77
|
+
def initialize(statement, accumulator)
|
|
78
|
+
@accumulator = accumulator
|
|
79
|
+
@statement = statement
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def call
|
|
83
|
+
@accumulator.add_blank_node(@statement)
|
|
84
|
+
@accumulator
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
private_constant :BlankNodeHandler
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
require 'json'
|
|
2
|
+
require 'zlib'
|
|
3
|
+
require 'rubygems/package'
|
|
4
|
+
require 'rdf/turtle'
|
|
5
|
+
require 'rof/osf_context'
|
|
6
|
+
require 'rof/rdf_context'
|
|
7
|
+
require 'rof/utility'
|
|
8
|
+
require 'rof/translator'
|
|
9
|
+
|
|
10
|
+
module ROF::Translators
|
|
11
|
+
# Class for managing OSF Archive data transformations
|
|
12
|
+
# It is called after the get-from-osf task, and before the work-xlat task
|
|
13
|
+
class OsfToRof < ROF::Translator
|
|
14
|
+
# @todo Set this to be something more meaningful than an empty lambda
|
|
15
|
+
# @return [#call]
|
|
16
|
+
def self.default_previously_archived_pid_finder
|
|
17
|
+
->(archive_type, osf_project_identifier) {}
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Convert Osf Archive tar.gz to ROF
|
|
21
|
+
def self.call(project, config, previously_archived_pid_finder = default_previously_archived_pid_finder)
|
|
22
|
+
new(project, config, previously_archived_pid_finder).call
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def initialize(project, config, previously_archived_pid_finder = self.class.default_previously_archived_pid_finder)
|
|
26
|
+
@config = config
|
|
27
|
+
@project = project
|
|
28
|
+
@previously_archived_pid_finder = previously_archived_pid_finder
|
|
29
|
+
@osf_map = ROF::OsfToNDMap
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def call
|
|
33
|
+
rof_array = []
|
|
34
|
+
return {} if project.nil?
|
|
35
|
+
@ttl_data = ttl_from_targz(source_slug + '.ttl')
|
|
36
|
+
rof_array[0] = build_archive_record
|
|
37
|
+
rof_array
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# @api private
|
|
41
|
+
# @see https://github.com/ndlib/curate_nd/blob/677c05c836ff913c01dcbbfc5e5d21366b87d500/app/repository_models/osf_archive.rb#L62
|
|
42
|
+
def archive_type
|
|
43
|
+
project.fetch('package_type')
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# @api private
|
|
47
|
+
# This is a bit of a misnomer; As used it represents the path to the project or registration
|
|
48
|
+
# that we have ingested (e.g. https://osf.io/:source_slug)
|
|
49
|
+
#
|
|
50
|
+
# It was previously named :project_identifier in this class, but that gets conflated with the
|
|
51
|
+
# underlying object's osf_project_identifier (e.g. what OSF Project was this archive originally based on)
|
|
52
|
+
#
|
|
53
|
+
# @see https://github.com/ndlib/curate_nd/blob/115efec2e046257282a86fe2cd98c7d229d04cf9/app/repository_models/osf_archive.rb#L96
|
|
54
|
+
def source_slug
|
|
55
|
+
project.fetch('project_identifier')
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# @api private
|
|
59
|
+
# @see https://github.com/ndlib/curate_nd/blob/115efec2e046257282a86fe2cd98c7d229d04cf9/app/repository_models/osf_archive.rb#L106
|
|
60
|
+
def osf_project_identifier
|
|
61
|
+
return source_slug if project['package_type'] == 'OSF Project'
|
|
62
|
+
osf_url_from_filename(ttl_data[0][@osf_map['registeredFrom']][0]['@id'])
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
private
|
|
66
|
+
|
|
67
|
+
attr_reader :config, :project
|
|
68
|
+
|
|
69
|
+
# A function responsible for finding the previously archive pid.
|
|
70
|
+
# @return [#call]
|
|
71
|
+
# @see #default_previously_archived_pid_finder for interface
|
|
72
|
+
attr_reader :previously_archived_pid_finder
|
|
73
|
+
|
|
74
|
+
# this is an array- the addition elements are the contributor(s)
|
|
75
|
+
# @return [Array<Hash>]
|
|
76
|
+
# @see #ttl_from_targz
|
|
77
|
+
attr_reader :ttl_data
|
|
78
|
+
|
|
79
|
+
# reads a ttl file and makes it a JSON-LD file that we can parse
|
|
80
|
+
def fetch_from_ttl(ttl_file)
|
|
81
|
+
graph = RDF::Turtle::Reader.open(ttl_file, prefixes: ROF::OsfPrefixList.dup)
|
|
82
|
+
JSON::LD::API.fromRdf(graph)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# extracts given ttl file from JHU tar.gz package
|
|
86
|
+
# - assumed to live under data/obj/root
|
|
87
|
+
# @return [Array<Hash>] the first element is the "work" and the additional elements, if any, are the contributor(s)
|
|
88
|
+
def ttl_from_targz(ttl_filename)
|
|
89
|
+
package_dir = config.fetch('package_dir')
|
|
90
|
+
ttl_path = File.join(source_slug, 'data/obj/root', ttl_filename)
|
|
91
|
+
ROF::Utility.file_from_targz(File.join(package_dir, source_slug + '.tar.gz'), ttl_path)
|
|
92
|
+
fetch_from_ttl(File.join(package_dir, ttl_path))
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Maps RELS-EXT
|
|
96
|
+
def map_rels_ext
|
|
97
|
+
rels_ext = {}
|
|
98
|
+
rels_ext['@context'] = ROF::RelsExtRefContext.dup
|
|
99
|
+
apply_previous_archived_version_if_applicable(rels_ext)
|
|
100
|
+
rels_ext
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# sets metadata
|
|
104
|
+
def map_metadata
|
|
105
|
+
metadata = {}
|
|
106
|
+
metadata['@context'] = ROF::RdfContext.dup
|
|
107
|
+
# metdata derived from project ttl file
|
|
108
|
+
metadata['dc:created'] = Time.iso8601(ttl_data[0][@osf_map['dc:created']][0]['@value']).to_date.iso8601 + 'Z'
|
|
109
|
+
metadata['dc:title'] = ttl_data[0][@osf_map['dc:title']][0]['@value']
|
|
110
|
+
metadata['dc:description'] = ttl_data[0][@osf_map['dc:description']][0]['@value']
|
|
111
|
+
metadata['dc:subject'] = map_subject
|
|
112
|
+
# metadata derived from osf_projects data, passed from UI
|
|
113
|
+
metadata['dc:source'] = 'https://osf.io/' + source_slug
|
|
114
|
+
metadata['dc:creator#adminstrative_unit'] = project['administrative_unit']
|
|
115
|
+
metadata['dc:creator#affiliation'] = project['affiliation']
|
|
116
|
+
metadata['nd:osfProjectIdentifier'] = osf_project_identifier
|
|
117
|
+
metadata['dc:creator'] = map_creator
|
|
118
|
+
metadata['dc:type'] = project['package_type']
|
|
119
|
+
metadata
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# For reference to the assumed RELS-EXT see the following spec in CurateND
|
|
123
|
+
# @see https://github.com/ndlib/curate_nd/blob/115efec2e046257282a86fe2cd98c7d229d04cf9/spec/repository_models/osf_archive_spec.rb#L97
|
|
124
|
+
def apply_previous_archived_version_if_applicable(rels_ext)
|
|
125
|
+
# If a previously archived pid was passed in, use it to set pav:previousVersion
|
|
126
|
+
# If not, check SOLR for one.
|
|
127
|
+
pid = previously_archived_pid_finder.call(archive_type, osf_project_identifier)
|
|
128
|
+
pid = ROF::Utility.check_solr_for_previous(config, osf_project_identifier) if pid.nil?
|
|
129
|
+
rels_ext['pav:previousVersion'] = pid if pid
|
|
130
|
+
rels_ext
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Constructs OsfArchive Record from ttl_data, data from the UI form,
|
|
134
|
+
# and task config data
|
|
135
|
+
def build_archive_record
|
|
136
|
+
this_rof = {}
|
|
137
|
+
this_rof['owner'] = project['owner']
|
|
138
|
+
this_rof['type'] = 'OsfArchive'
|
|
139
|
+
this_rof['rights'] = map_rights
|
|
140
|
+
this_rof['rels-ext'] = map_rels_ext
|
|
141
|
+
this_rof['metadata'] = map_metadata
|
|
142
|
+
this_rof['files'] = [source_slug + '.tar.gz']
|
|
143
|
+
this_rof
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# sets subject
|
|
147
|
+
def map_subject
|
|
148
|
+
if ttl_data[0].key?(@osf_map['dc:subject'])
|
|
149
|
+
return ttl_data[0][@osf_map['dc:subject']][0]['@value']
|
|
150
|
+
end
|
|
151
|
+
''
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# make osf url from bagfile name
|
|
155
|
+
def osf_url_from_filename(ttl_file)
|
|
156
|
+
project_id = ttl_file.rpartition('/')[2].rpartition('.')[0]
|
|
157
|
+
project_id
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# figures out the rights
|
|
161
|
+
def map_rights
|
|
162
|
+
rights = {}
|
|
163
|
+
if ttl_data[0][@osf_map['isPublic']][0]['@value'] == 'true'
|
|
164
|
+
rights['read-groups'] = ['public']
|
|
165
|
+
end
|
|
166
|
+
rights
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# sets the creator- needs to read another ttl for the User data
|
|
170
|
+
# only contrubutors with isBibliographic true are considered
|
|
171
|
+
def map_creator
|
|
172
|
+
creator = []
|
|
173
|
+
ttl_data[0][@osf_map['hasContributor']].each do |contributor|
|
|
174
|
+
# Looping through the primary document and the contributors
|
|
175
|
+
ttl_data.each do |item|
|
|
176
|
+
next unless item['@id'] == contributor['@id']
|
|
177
|
+
if item[@osf_map['isBibliographic']][0]['@value'] == 'true'
|
|
178
|
+
creator.push map_user_from_ttl(item[@osf_map['hasUser']][0]['@id'])
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
creator
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# read user ttl file, extract User's full name
|
|
186
|
+
def map_user_from_ttl(file_subpath)
|
|
187
|
+
user_ttl_data = ttl_from_targz(File.basename(file_subpath))
|
|
188
|
+
user_ttl_data[0][@osf_map['hasFullName']][0]['@value']
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
end
|