rof 1.0.7 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +9 -7
- data/LICENSE +201 -16
- data/Rakefile +46 -0
- data/bin/csv_to_rof +1 -2
- data/bin/fedora_to_rof +7 -1
- data/bin/jsonld_to_rof +26 -0
- data/bin/osf_to_rof +6 -2
- data/bin/rof +5 -19
- data/lib/rof.rb +2 -6
- data/lib/rof/access.rb +1 -1
- data/lib/rof/cli.rb +104 -67
- data/lib/rof/compare_rof.rb +68 -39
- data/lib/rof/filter.rb +21 -0
- data/lib/rof/filters.rb +38 -0
- data/lib/rof/filters/bendo.rb +15 -17
- data/lib/rof/filters/date_stamp.rb +5 -4
- data/lib/rof/filters/file_to_url.rb +5 -3
- data/lib/rof/filters/label.rb +9 -7
- data/lib/rof/filters/work.rb +7 -5
- data/lib/rof/ingest.rb +5 -0
- data/lib/rof/osf_context.rb +2 -2
- data/lib/rof/rdf_context.rb +2 -0
- data/lib/rof/translator.rb +18 -0
- data/lib/rof/translators.rb +23 -0
- data/lib/rof/{translate_csv.rb → translators/csv_to_rof.rb} +4 -3
- data/lib/rof/translators/fedora_to_rof.rb +244 -0
- data/lib/rof/translators/jsonld_to_rof.rb +112 -0
- data/lib/rof/translators/jsonld_to_rof/accumulator.rb +175 -0
- data/lib/rof/translators/jsonld_to_rof/predicate_handler.rb +223 -0
- data/lib/rof/translators/jsonld_to_rof/predicate_object_handler.rb +125 -0
- data/lib/rof/translators/jsonld_to_rof/statement_handler.rb +91 -0
- data/lib/rof/translators/osf_to_rof.rb +191 -0
- data/lib/rof/utility.rb +44 -1
- data/lib/rof/version.rb +1 -1
- data/rof.gemspec +10 -2
- data/spec/coverage_helper.rb +17 -0
- data/spec/fixtures/for_utility_load_items_from_json_file/multiple_items.json +8 -0
- data/spec/fixtures/for_utility_load_items_from_json_file/parse_error.json +3 -0
- data/spec/fixtures/for_utility_load_items_from_json_file/single_item.json +3 -0
- data/spec/fixtures/jsonld_to_rof/0g354f18610.jsonld +113 -0
- data/spec/fixtures/jsonld_to_rof/0g354f18610.rof +96 -0
- data/spec/fixtures/jsonld_to_rof/2j62s467216.jsonld +113 -0
- data/spec/fixtures/jsonld_to_rof/2j62s467216.rof +93 -0
- data/spec/fixtures/jsonld_to_rof/2v23vt16z2z.jsonld +70 -0
- data/spec/fixtures/jsonld_to_rof/2v23vt16z2z.rof +87 -0
- data/spec/fixtures/jsonld_to_rof/cr56n01253w.jsonld +84 -0
- data/spec/fixtures/jsonld_to_rof/cr56n01253w.rof +95 -0
- data/spec/fixtures/jsonld_to_rof/h989r21069m.jsonld +84 -0
- data/spec/fixtures/jsonld_to_rof/h989r21069m.rof +98 -0
- data/spec/fixtures/jsonld_to_rof/js956d59913.jsonld +79 -0
- data/spec/fixtures/jsonld_to_rof/js956d59913.rof +89 -0
- data/spec/fixtures/jsonld_to_rof/m039k358q5c.jsonld +80 -0
- data/spec/fixtures/jsonld_to_rof/m039k358q5c.rof +64 -0
- data/spec/fixtures/jsonld_to_rof/nk322b9161g.jsonld +89 -0
- data/spec/fixtures/jsonld_to_rof/nk322b9161g.rof +69 -0
- data/spec/fixtures/jsonld_to_rof/p8418k7430d.jsonld +84 -0
- data/spec/fixtures/jsonld_to_rof/p8418k7430d.rof +67 -0
- data/spec/fixtures/jsonld_to_rof/xg94hm53h0c.jsonld +98 -0
- data/spec/fixtures/jsonld_to_rof/xg94hm53h0c.rof +110 -0
- data/spec/fixtures/jsonld_to_rof/zk51vd69n1r.jsonld +94 -0
- data/spec/fixtures/jsonld_to_rof/zk51vd69n1r.rof +121 -0
- data/spec/fixtures/osf/phz6b.tar.gz +0 -0
- data/spec/lib/rof/access_spec.rb +30 -23
- data/spec/lib/rof/cli_spec.rb +83 -60
- data/spec/lib/rof/compare_rof_spec.rb +35 -24
- data/spec/lib/rof/filter_spec.rb +10 -0
- data/spec/lib/rof/filters/bendo_spec.rb +42 -0
- data/spec/lib/rof/filters/date_stamp_spec.rb +9 -5
- data/spec/lib/rof/filters/file_to_url_spec.rb +7 -3
- data/spec/lib/rof/filters/label_spec.rb +121 -77
- data/spec/lib/rof/filters/work_spec.rb +7 -4
- data/spec/lib/rof/filters_spec.rb +14 -0
- data/spec/lib/rof/translator_spec.rb +15 -0
- data/spec/lib/rof/{translate_csv_spec.rb → translators/csv_to_rof_spec.rb} +14 -14
- data/spec/lib/rof/translators/fedora_to_rof_spec.rb +64 -0
- data/spec/lib/rof/translators/jsonld_to_rof/accumulator_spec.rb +121 -0
- data/spec/lib/rof/translators/jsonld_to_rof/predicate_handler_spec.rb +73 -0
- data/spec/lib/rof/translators/jsonld_to_rof/predicate_object_handler_spec.rb +48 -0
- data/spec/lib/rof/translators/jsonld_to_rof/statement_handler_spec.rb +40 -0
- data/spec/lib/rof/translators/jsonld_to_rof_spec.rb +120 -0
- data/spec/lib/rof/{osf_to_rof_spec.rb → translators/osf_to_rof_spec.rb} +55 -25
- data/spec/lib/rof/translators_spec.rb +14 -0
- data/spec/lib/rof/utility_spec.rb +47 -1
- data/spec/spec_helper.rb +1 -1
- data/spec/support/an_rof_filter.rb +10 -0
- metadata +186 -15
- data/lib/rof/get_from_fedora.rb +0 -211
- data/lib/rof/osf_to_rof.rb +0 -123
- data/spec/lib/rof/get_from_fedora_spec.rb +0 -22
@@ -0,0 +1,223 @@
|
|
1
|
+
require 'active_support/core_ext/array/wrap'
|
2
|
+
|
3
|
+
module ROF
|
4
|
+
module Translators
|
5
|
+
module JsonldToRof
|
6
|
+
# Responsible for dealing with registered predicates and how those are handled.
|
7
|
+
module PredicateHandler
|
8
|
+
class UnhandledPredicateError < RuntimeError
|
9
|
+
def initialize(predicate, urls)
|
10
|
+
super(%(Unable to handle predicate "#{predicate}". The following predicate URLs were registered #{urls.inspect}))
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
# @api public
|
15
|
+
#
|
16
|
+
# Parse the RDF predicate and RDF object and add it's contents to the accumulator
|
17
|
+
#
|
18
|
+
# @example
|
19
|
+
# Given the following 4 RDF N-Triples (subject, predicate, object). The first two, with subject "_:b0" represent blank nodes.
|
20
|
+
# The last one with subject "<https://curate.nd.edu/show/zk51vd69n1r>" has an object that points to the "_:b0" blank node.
|
21
|
+
# _:b0 <http://purl.org/dc/terms/contributor> "David R.Hyde" .
|
22
|
+
# _:b0 <http://www.ndltd.org/standards/metadata/etdms/1.1/role> "Research Director" .
|
23
|
+
# <https://curate.nd.edu/show/zk51vd69n1r> <http://purl.org/dc/terms/contributor> _:b0 .
|
24
|
+
# <https://curate.nd.edu/show/zk51vd69n1r> <http://projecthydra.org/ns/relations#hasEditorGroup> <https://curate.nd.edu/show/q524jm23g92> .
|
25
|
+
# For the first two N-Triples you would get a BlankNodeHandler; For the last two, you would get a UriSubjectHandler
|
26
|
+
#
|
27
|
+
# @note It is assumed that all blank nodes (e.g. RDF::Node) will be processed before you process any RDF::URI nodes.
|
28
|
+
#
|
29
|
+
# @param [RDF::Predicate] predicate - the RDF predicate that we will parse and add to the appropriate spot in the accumulator
|
30
|
+
# @param [RDF::Object] object - the RDF object that we will parse and add to the appropriate spot in the accumulator
|
31
|
+
# @param [ROF::Translators::JsonldToRof::Accumulator] accumulator - a data accumulator that will be changed in place
|
32
|
+
# @return [ROF::Translators::JsonldToRof::Accumulator] the given accumulator
|
33
|
+
# @raise [ROF::Translators::JsonldToRof::UnhandledPredicateError] when we are unable to handle the given predicate
|
34
|
+
def self.call(predicate, object, accumulator, blank_node = false)
|
35
|
+
handler = registry.handler_for(predicate)
|
36
|
+
handler.handle(object, accumulator, blank_node)
|
37
|
+
accumulator
|
38
|
+
end
|
39
|
+
|
40
|
+
# @api public
|
41
|
+
# @param [String] url - The URL that we want to match against
|
42
|
+
# @yield The block to configure how we handle RDF Predicates that match the gvien URL
|
43
|
+
# @yieldparam [ROF::JsonldToRof::PredicateHandler::UrlHandler]
|
44
|
+
# @see ./spec/lib/rof/translators/jsonld_to_rof/predicate_handler_spec.rb for details and usage usage
|
45
|
+
def self.register(url, &block)
|
46
|
+
registry << UrlHandler.new(url, &block)
|
47
|
+
end
|
48
|
+
|
49
|
+
# @api private
|
50
|
+
def self.registry
|
51
|
+
@registry ||= RegistrySet.new
|
52
|
+
end
|
53
|
+
private_class_method :registry
|
54
|
+
|
55
|
+
def self.clear_registry!(set_with = RegistrySet.new)
|
56
|
+
@registry = set_with
|
57
|
+
end
|
58
|
+
private_class_method :clear_registry!
|
59
|
+
|
60
|
+
class RegistrySet
|
61
|
+
def initialize
|
62
|
+
@set = []
|
63
|
+
end
|
64
|
+
|
65
|
+
def <<(value)
|
66
|
+
@set << value
|
67
|
+
end
|
68
|
+
|
69
|
+
def handler_for(predicate)
|
70
|
+
location_extractor = nil
|
71
|
+
@set.each do |handler|
|
72
|
+
location_extractor = handler.location_extractor_for(predicate)
|
73
|
+
break if location_extractor
|
74
|
+
end
|
75
|
+
raise UnhandledPredicateError.new(predicate, @set.map(&:url)) if location_extractor.nil?
|
76
|
+
location_extractor
|
77
|
+
end
|
78
|
+
end
|
79
|
+
private_constant :RegistrySet
|
80
|
+
|
81
|
+
# For a given URL map all of the predicates; Some predicates require explicit mapping, while others
|
82
|
+
# may use implicit mapping.
|
83
|
+
class UrlHandler
|
84
|
+
def initialize(url)
|
85
|
+
@url = url
|
86
|
+
@within = []
|
87
|
+
@namespace_prefix = ''
|
88
|
+
@slug_handlers = {}
|
89
|
+
yield(self) if block_given?
|
90
|
+
end
|
91
|
+
attr_reader :url
|
92
|
+
|
93
|
+
# The final key in the location array should be prefixed with the namespace_prefix; By default this is ""
|
94
|
+
# @param [String, nil] prefix - what is the namespace prefix to apply to the last location in the array.
|
95
|
+
# @return [String]
|
96
|
+
def namespace_prefix(prefix = nil)
|
97
|
+
return @namespace_prefix if prefix.nil?
|
98
|
+
@namespace_prefix = prefix
|
99
|
+
end
|
100
|
+
|
101
|
+
# Prepend the within array to the location array
|
102
|
+
# @param [Array<String>, nil] location - where in the ROF document are we putting the value
|
103
|
+
# @return [Array<String>]
|
104
|
+
def within(location = nil)
|
105
|
+
return @within if location.nil?
|
106
|
+
@within = Array.wrap(location)
|
107
|
+
end
|
108
|
+
|
109
|
+
# @param [#to_s] predicate
|
110
|
+
# @return [nil, LocationExtractor] if the given predicate does not match the url, return nil; Otherwise return a LocationExtractor
|
111
|
+
# @see LocationExtractor
|
112
|
+
def location_extractor_for(predicate)
|
113
|
+
return nil unless predicate.to_s =~ %r{^#{Regexp.escape(@url)}(.*)}
|
114
|
+
slug = $1
|
115
|
+
handlers = handlers_for(slug)
|
116
|
+
LocationExtractor.new(predicate, handlers)
|
117
|
+
end
|
118
|
+
|
119
|
+
private
|
120
|
+
|
121
|
+
# @param [String] slug - a slug that may or may not have been registered
|
122
|
+
# @return [Array<#call>] an array of handlers that each respond to #call
|
123
|
+
# @see ImplicitLocationHandler
|
124
|
+
# @see ExplicitLocationSlugHandler
|
125
|
+
# @see BlockSlugHandler
|
126
|
+
def handlers_for(slug)
|
127
|
+
Array.wrap(@slug_handlers.fetch(slug) { ImplicitLocationHandler.new(self, slug) })
|
128
|
+
end
|
129
|
+
|
130
|
+
public
|
131
|
+
|
132
|
+
# @param [String] slug =
|
133
|
+
# @param [Hash] options (with symbol keys)
|
134
|
+
# @option options [Boolean] :force - don't apply the within nor namespace prefix
|
135
|
+
# @option options [Array] :to - an array that will be nested Hash keys
|
136
|
+
# @yield If a block is given, call the block (and skip all other configuration)
|
137
|
+
# @yieldparam [String] object
|
138
|
+
# @see BlockSlugHandler for details concerning a mapping via a block
|
139
|
+
# @see ExplicitLocationSlugHandler for details concerning a mapping via a to: option
|
140
|
+
def map(slug, options = {}, &block)
|
141
|
+
@slug_handlers ||= {}
|
142
|
+
@slug_handlers[slug] ||= []
|
143
|
+
if block_given?
|
144
|
+
@slug_handlers[slug] << BlockSlugHandler.new(self, slug, options, block)
|
145
|
+
else
|
146
|
+
@slug_handlers[slug] << ExplicitLocationSlugHandler.new(self, slug, options)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
# Responsible for coordinating the extraction of the
|
151
|
+
class LocationExtractor
|
152
|
+
def initialize(predicate, handlers)
|
153
|
+
@predicate = predicate
|
154
|
+
@handlers = Array.wrap(handlers)
|
155
|
+
end
|
156
|
+
|
157
|
+
def handle(object, accumulator, blank_node)
|
158
|
+
@handlers.each do |handler|
|
159
|
+
handler.call(object, accumulator, blank_node)
|
160
|
+
end
|
161
|
+
accumulator
|
162
|
+
end
|
163
|
+
end
|
164
|
+
private_constant :LocationExtractor
|
165
|
+
|
166
|
+
class ImplicitLocationHandler
|
167
|
+
def initialize(url_handler, slug)
|
168
|
+
@url_handler = url_handler
|
169
|
+
@slug = slug
|
170
|
+
end
|
171
|
+
attr_reader :slug
|
172
|
+
def call(object, accumulator, blank_node)
|
173
|
+
to = @url_handler.within + Array.wrap(slug)
|
174
|
+
to[-1] = "#{@url_handler.namespace_prefix}#{to[-1]}"
|
175
|
+
accumulator.add_predicate_location_and_value(to, object, blank_node)
|
176
|
+
end
|
177
|
+
end
|
178
|
+
private_constant :ImplicitLocationHandler
|
179
|
+
|
180
|
+
class BlockSlugHandler
|
181
|
+
def initialize(url_handler, slug, options, block)
|
182
|
+
@url_handler = url_handler
|
183
|
+
@slug = slug
|
184
|
+
@options = options
|
185
|
+
@block = block
|
186
|
+
end
|
187
|
+
attr_reader :slug
|
188
|
+
|
189
|
+
# @todo Are there differences that need to be handled for the blank_node?
|
190
|
+
def call(object, accumulator, _blank_node)
|
191
|
+
@block.call(object, accumulator)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
private_constant :BlockSlugHandler
|
195
|
+
|
196
|
+
class ExplicitLocationSlugHandler
|
197
|
+
def initialize(url_handler, slug, options)
|
198
|
+
@url_handler = url_handler
|
199
|
+
@slug = slug
|
200
|
+
@options = options
|
201
|
+
end
|
202
|
+
attr_reader :slug
|
203
|
+
|
204
|
+
def call(object, accumulator, blank_node)
|
205
|
+
to = @options.fetch(:to)
|
206
|
+
unless force?
|
207
|
+
to = @url_handler.within + Array.wrap(to)
|
208
|
+
to[-1] = "#{@url_handler.namespace_prefix}#{to[-1]}"
|
209
|
+
end
|
210
|
+
accumulator.add_predicate_location_and_value(to, object, blank_node)
|
211
|
+
end
|
212
|
+
|
213
|
+
def force?
|
214
|
+
@options.fetch(:force, false)
|
215
|
+
end
|
216
|
+
end
|
217
|
+
private_constant :ExplicitLocationSlugHandler
|
218
|
+
end
|
219
|
+
private_constant :UrlHandler
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
require 'rdf'
|
2
|
+
require 'rof/translators/jsonld_to_rof/predicate_handler'
|
3
|
+
|
4
|
+
module ROF
|
5
|
+
module Translators
|
6
|
+
module JsonldToRof
|
7
|
+
# We need to handle the Predicate / Object pair as one (thank you RDF blank nodes for this nuance)
|
8
|
+
module PredicateObjectHandler
|
9
|
+
# @api public
|
10
|
+
#
|
11
|
+
# Parse the RDF::Predicate, RDF::Object and the relevant data to the contents to the accumulator
|
12
|
+
#
|
13
|
+
# @example
|
14
|
+
# Given the following 4 RDF N-Triples (subject, predicate, object). The first and second RDF objects are RDF::Literal. The 3rd triple's object is
|
15
|
+
# and RDF::Node. And the last is an RDF::URI. Each require different handlers as they have nuanced differences.
|
16
|
+
# _:b0 <http://purl.org/dc/terms/contributor> "David R.Hyde" .
|
17
|
+
# _:b0 <http://www.ndltd.org/standards/metadata/etdms/1.1/role> "Research Director" .
|
18
|
+
# <https://curate.nd.edu/show/zk51vd69n1r> <http://purl.org/dc/terms/contributor> _:b0 .
|
19
|
+
# <https://curate.nd.edu/show/zk51vd69n1r> <http://projecthydra.org/ns/relations#hasEditorGroup> <https://curate.nd.edu/show/q524jm23g92> .
|
20
|
+
#
|
21
|
+
# @note It is assumed that all blank nodes (e.g. RDF::Node) will be processed before you process any RDF::URI nodes.
|
22
|
+
#
|
23
|
+
# @param [RDF::Predicate] predicate - the RDF predicate that we will parse and add to the appropriate spot in the accumulator
|
24
|
+
# @param [RDF::Object] object - the RDF object that we will parse and add to the appropriate spot in the accumulator
|
25
|
+
# @param [ROF::Translators::JsonldToRof::Accumulator] accumulator - a data accumulator that will be changed in place
|
26
|
+
# @return [ROF::Translators::JsonldToRof::Accumulator] the given accumulator
|
27
|
+
# @raise [ROF::Translators::JsonldToRof::UnknownRdfObjectTypeError] when the RDF::Object's subject is not a valid type
|
28
|
+
def self.call(predicate, object, accumulator, options = {})
|
29
|
+
new(predicate, object, accumulator, options).call
|
30
|
+
accumulator
|
31
|
+
end
|
32
|
+
|
33
|
+
# @api private
|
34
|
+
#
|
35
|
+
# @param [RDF::Predicate] predicate - the RDF predicate that we will parse and add to the appropriate spot in the accumulator
|
36
|
+
# @param [RDF::Object] object - the RDF object that we will parse and add to the appropriate spot in the accumulator
|
37
|
+
# @param [ROF::Translators::JsonldToRof::Accumulator] accumulator - a data accumulator that will be changed in place
|
38
|
+
# @return [#call]
|
39
|
+
def self.new(predicate, object, accumulator, options)
|
40
|
+
klass_for(object).new(predicate, object, accumulator, options)
|
41
|
+
end
|
42
|
+
|
43
|
+
class UnknownRdfObjectTypeError < RuntimeError
|
44
|
+
end
|
45
|
+
|
46
|
+
# @api private
|
47
|
+
def self.klass_for(object)
|
48
|
+
case object
|
49
|
+
when RDF::URI
|
50
|
+
UriPredicateObjectHandler
|
51
|
+
when RDF::Node
|
52
|
+
NodePredicateObjectHandler
|
53
|
+
when RDF::Literal
|
54
|
+
LiteralPredicateObjectHandler
|
55
|
+
else
|
56
|
+
raise UnknownRdfObjectTypeError, "Unable to determine object handler for #{object.inspect}"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# @api private
|
61
|
+
class UriPredicateObjectHandler
|
62
|
+
def initialize(predicate, object, accumulator, options)
|
63
|
+
@predicate = predicate
|
64
|
+
@object = object
|
65
|
+
@accumulator = accumulator
|
66
|
+
@options = options
|
67
|
+
end
|
68
|
+
|
69
|
+
def call
|
70
|
+
PredicateHandler.call(predicate, object, accumulator, options[:blank_node])
|
71
|
+
accumulator
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
attr_reader :predicate, :object, :accumulator, :options
|
76
|
+
end
|
77
|
+
private_constant :UriPredicateObjectHandler
|
78
|
+
|
79
|
+
# @api private
|
80
|
+
# Blank Nodes; Oh how we love thee. Let me count the ways
|
81
|
+
class NodePredicateObjectHandler
|
82
|
+
def initialize(predicate, object, accumulator, options)
|
83
|
+
@predicate = predicate
|
84
|
+
@object = object
|
85
|
+
@accumulator = accumulator
|
86
|
+
@options = options
|
87
|
+
end
|
88
|
+
|
89
|
+
def call
|
90
|
+
blank_node = accumulator.fetch_blank_node(object)
|
91
|
+
blank_node.each_pair do |blank_node_predicate, blank_node_objects|
|
92
|
+
blank_node_objects.each do |blank_node_object|
|
93
|
+
PredicateObjectHandler.call(blank_node_predicate, blank_node_object, accumulator, blank_node: object)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
accumulator
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
attr_reader :predicate, :object, :accumulator, :options
|
101
|
+
end
|
102
|
+
private_constant :NodePredicateObjectHandler
|
103
|
+
|
104
|
+
# @api private
|
105
|
+
class LiteralPredicateObjectHandler
|
106
|
+
def initialize(predicate, object, accumulator, options)
|
107
|
+
@predicate = predicate
|
108
|
+
@object = object
|
109
|
+
@accumulator = accumulator
|
110
|
+
@options = options
|
111
|
+
end
|
112
|
+
|
113
|
+
def call
|
114
|
+
PredicateHandler.call(predicate, object, accumulator, options[:blank_node])
|
115
|
+
accumulator
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
attr_reader :predicate, :object, :accumulator, :options
|
120
|
+
end
|
121
|
+
private_constant :LiteralPredicateObjectHandler
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'rof/translators/jsonld_to_rof/predicate_object_handler'
|
2
|
+
|
3
|
+
module ROF
|
4
|
+
module Translators
|
5
|
+
module JsonldToRof
|
6
|
+
# Responsible for parsing an RDF statement and adding to the accumulator.
|
7
|
+
module StatementHandler
|
8
|
+
# @api public
|
9
|
+
#
|
10
|
+
# Parse the RDF statement and add it's contents to the accumulator
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# Given the following 4 RDF N-Triples (subject, predicate, object). The first two, with subject "_:b0" represent blank nodes.
|
14
|
+
# The last one with subject "<https://curate.nd.edu/show/zk51vd69n1r>" has an object that points to the "_:b0" blank node.
|
15
|
+
# _:b0 <http://purl.org/dc/terms/contributor> "David R.Hyde" .
|
16
|
+
# _:b0 <http://www.ndltd.org/standards/metadata/etdms/1.1/role> "Research Director" .
|
17
|
+
# <https://curate.nd.edu/show/zk51vd69n1r> <http://purl.org/dc/terms/contributor> _:b0 .
|
18
|
+
# <https://curate.nd.edu/show/zk51vd69n1r> <http://projecthydra.org/ns/relations#hasEditorGroup> <https://curate.nd.edu/show/q524jm23g92> .
|
19
|
+
# For the first two N-Triples you would get a BlankNodeHandler; For the last two, you would get a UriSubjectHandler
|
20
|
+
#
|
21
|
+
# @note It is assumed that all blank nodes (e.g. RDF::Node) will be processed before you process any RDF::URI nodes.
|
22
|
+
#
|
23
|
+
# @param [RDF::Statement] statement - the RDF statement that we will parse and add to the appropriate spot in the accumulator
|
24
|
+
# @param [ROF::Translators::JsonldToRof::Accumulator] accumulator - a data accumulator that will be changed in place
|
25
|
+
# @return [ROF::Translators::JsonldToRof::Accumulator] the given accumulator
|
26
|
+
# @raise [ROF::Translators::JsonldToRof::UnhandledRdfSubjectError] when the RDF::Statement's subject is not a valid type
|
27
|
+
def self.call(statement, accumulator)
|
28
|
+
new(statement, accumulator).call
|
29
|
+
accumulator
|
30
|
+
end
|
31
|
+
|
32
|
+
class UnhandledRdfSubjectError < RuntimeError
|
33
|
+
end
|
34
|
+
|
35
|
+
# @api private
|
36
|
+
def self.new(statement, accumulator)
|
37
|
+
case statement.subject
|
38
|
+
when RDF::URI
|
39
|
+
UriSubjectHandler.new(statement, accumulator)
|
40
|
+
when RDF::Node
|
41
|
+
BlankNodeHandler.new(statement, accumulator)
|
42
|
+
else
|
43
|
+
raise UnhandledRdfSubjectError, "Unable to determine subject handler for #{statement.inspect}"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# Responsible for accumulating the ROF data for a URI based resource
|
48
|
+
class UriSubjectHandler
|
49
|
+
def initialize(statement, accumulator)
|
50
|
+
@accumulator = accumulator
|
51
|
+
@statement = statement
|
52
|
+
end
|
53
|
+
|
54
|
+
def call
|
55
|
+
handle_subject
|
56
|
+
handle_predicate_and_object
|
57
|
+
@accumulator
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
def handle_predicate_and_object
|
63
|
+
PredicateObjectHandler.call(@statement.predicate, @statement.object, @accumulator)
|
64
|
+
end
|
65
|
+
|
66
|
+
def handle_subject
|
67
|
+
return nil unless @statement.subject.to_s =~ %r{https://curate.nd.edu/show/([^\\]+)/?}
|
68
|
+
pid = "und:#{$1}"
|
69
|
+
@accumulator.add_pid(pid)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
private_constant :UriSubjectHandler
|
73
|
+
|
74
|
+
# Responsible for handling blank nodes in the RDF graph; Examples include ETD degree information
|
75
|
+
# Blank node subjects behave different from UriSubjectHandler
|
76
|
+
class BlankNodeHandler
|
77
|
+
def initialize(statement, accumulator)
|
78
|
+
@accumulator = accumulator
|
79
|
+
@statement = statement
|
80
|
+
end
|
81
|
+
|
82
|
+
def call
|
83
|
+
@accumulator.add_blank_node(@statement)
|
84
|
+
@accumulator
|
85
|
+
end
|
86
|
+
end
|
87
|
+
private_constant :BlankNodeHandler
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,191 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'zlib'
|
3
|
+
require 'rubygems/package'
|
4
|
+
require 'rdf/turtle'
|
5
|
+
require 'rof/osf_context'
|
6
|
+
require 'rof/rdf_context'
|
7
|
+
require 'rof/utility'
|
8
|
+
require 'rof/translator'
|
9
|
+
|
10
|
+
module ROF::Translators
|
11
|
+
# Class for managing OSF Archive data transformations
|
12
|
+
# It is called after the get-from-osf task, and before the work-xlat task
|
13
|
+
class OsfToRof < ROF::Translator
|
14
|
+
# @todo Set this to be something more meaningful than an empty lambda
|
15
|
+
# @return [#call]
|
16
|
+
def self.default_previously_archived_pid_finder
|
17
|
+
->(archive_type, osf_project_identifier) {}
|
18
|
+
end
|
19
|
+
|
20
|
+
# Convert Osf Archive tar.gz to ROF
|
21
|
+
def self.call(project, config, previously_archived_pid_finder = default_previously_archived_pid_finder)
|
22
|
+
new(project, config, previously_archived_pid_finder).call
|
23
|
+
end
|
24
|
+
|
25
|
+
def initialize(project, config, previously_archived_pid_finder = self.class.default_previously_archived_pid_finder)
|
26
|
+
@config = config
|
27
|
+
@project = project
|
28
|
+
@previously_archived_pid_finder = previously_archived_pid_finder
|
29
|
+
@osf_map = ROF::OsfToNDMap
|
30
|
+
end
|
31
|
+
|
32
|
+
def call
|
33
|
+
rof_array = []
|
34
|
+
return {} if project.nil?
|
35
|
+
@ttl_data = ttl_from_targz(source_slug + '.ttl')
|
36
|
+
rof_array[0] = build_archive_record
|
37
|
+
rof_array
|
38
|
+
end
|
39
|
+
|
40
|
+
# @api private
|
41
|
+
# @see https://github.com/ndlib/curate_nd/blob/677c05c836ff913c01dcbbfc5e5d21366b87d500/app/repository_models/osf_archive.rb#L62
|
42
|
+
def archive_type
|
43
|
+
project.fetch('package_type')
|
44
|
+
end
|
45
|
+
|
46
|
+
# @api private
|
47
|
+
# This is a bit of a misnomer; As used it represents the path to the project or registration
|
48
|
+
# that we have ingested (e.g. https://osf.io/:source_slug)
|
49
|
+
#
|
50
|
+
# It was previously named :project_identifier in this class, but that gets conflated with the
|
51
|
+
# underlying object's osf_project_identifier (e.g. what OSF Project was this archive originally based on)
|
52
|
+
#
|
53
|
+
# @see https://github.com/ndlib/curate_nd/blob/115efec2e046257282a86fe2cd98c7d229d04cf9/app/repository_models/osf_archive.rb#L96
|
54
|
+
def source_slug
|
55
|
+
project.fetch('project_identifier')
|
56
|
+
end
|
57
|
+
|
58
|
+
# @api private
|
59
|
+
# @see https://github.com/ndlib/curate_nd/blob/115efec2e046257282a86fe2cd98c7d229d04cf9/app/repository_models/osf_archive.rb#L106
|
60
|
+
def osf_project_identifier
|
61
|
+
return source_slug if project['package_type'] == 'OSF Project'
|
62
|
+
osf_url_from_filename(ttl_data[0][@osf_map['registeredFrom']][0]['@id'])
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
attr_reader :config, :project
|
68
|
+
|
69
|
+
# A function responsible for finding the previously archive pid.
|
70
|
+
# @return [#call]
|
71
|
+
# @see #default_previously_archived_pid_finder for interface
|
72
|
+
attr_reader :previously_archived_pid_finder
|
73
|
+
|
74
|
+
# this is an array- the addition elements are the contributor(s)
|
75
|
+
# @return [Array<Hash>]
|
76
|
+
# @see #ttl_from_targz
|
77
|
+
attr_reader :ttl_data
|
78
|
+
|
79
|
+
# reads a ttl file and makes it a JSON-LD file that we can parse
|
80
|
+
def fetch_from_ttl(ttl_file)
|
81
|
+
graph = RDF::Turtle::Reader.open(ttl_file, prefixes: ROF::OsfPrefixList.dup)
|
82
|
+
JSON::LD::API.fromRdf(graph)
|
83
|
+
end
|
84
|
+
|
85
|
+
# extracts given ttl file from JHU tar.gz package
|
86
|
+
# - assumed to live under data/obj/root
|
87
|
+
# @return [Array<Hash>] the first element is the "work" and the additional elements, if any, are the contributor(s)
|
88
|
+
def ttl_from_targz(ttl_filename)
|
89
|
+
package_dir = config.fetch('package_dir')
|
90
|
+
ttl_path = File.join(source_slug, 'data/obj/root', ttl_filename)
|
91
|
+
ROF::Utility.file_from_targz(File.join(package_dir, source_slug + '.tar.gz'), ttl_path)
|
92
|
+
fetch_from_ttl(File.join(package_dir, ttl_path))
|
93
|
+
end
|
94
|
+
|
95
|
+
# Maps RELS-EXT
|
96
|
+
def map_rels_ext
|
97
|
+
rels_ext = {}
|
98
|
+
rels_ext['@context'] = ROF::RelsExtRefContext.dup
|
99
|
+
apply_previous_archived_version_if_applicable(rels_ext)
|
100
|
+
rels_ext
|
101
|
+
end
|
102
|
+
|
103
|
+
# sets metadata
|
104
|
+
def map_metadata
|
105
|
+
metadata = {}
|
106
|
+
metadata['@context'] = ROF::RdfContext.dup
|
107
|
+
# metdata derived from project ttl file
|
108
|
+
metadata['dc:created'] = Time.iso8601(ttl_data[0][@osf_map['dc:created']][0]['@value']).to_date.iso8601 + 'Z'
|
109
|
+
metadata['dc:title'] = ttl_data[0][@osf_map['dc:title']][0]['@value']
|
110
|
+
metadata['dc:description'] = ttl_data[0][@osf_map['dc:description']][0]['@value']
|
111
|
+
metadata['dc:subject'] = map_subject
|
112
|
+
# metadata derived from osf_projects data, passed from UI
|
113
|
+
metadata['dc:source'] = 'https://osf.io/' + source_slug
|
114
|
+
metadata['dc:creator#adminstrative_unit'] = project['administrative_unit']
|
115
|
+
metadata['dc:creator#affiliation'] = project['affiliation']
|
116
|
+
metadata['nd:osfProjectIdentifier'] = osf_project_identifier
|
117
|
+
metadata['dc:creator'] = map_creator
|
118
|
+
metadata['dc:type'] = project['package_type']
|
119
|
+
metadata
|
120
|
+
end
|
121
|
+
|
122
|
+
# For reference to the assumed RELS-EXT see the following spec in CurateND
|
123
|
+
# @see https://github.com/ndlib/curate_nd/blob/115efec2e046257282a86fe2cd98c7d229d04cf9/spec/repository_models/osf_archive_spec.rb#L97
|
124
|
+
def apply_previous_archived_version_if_applicable(rels_ext)
|
125
|
+
# If a previously archived pid was passed in, use it to set pav:previousVersion
|
126
|
+
# If not, check SOLR for one.
|
127
|
+
pid = previously_archived_pid_finder.call(archive_type, osf_project_identifier)
|
128
|
+
pid = ROF::Utility.check_solr_for_previous(config, osf_project_identifier) if pid.nil?
|
129
|
+
rels_ext['pav:previousVersion'] = pid if pid
|
130
|
+
rels_ext
|
131
|
+
end
|
132
|
+
|
133
|
+
# Constructs OsfArchive Record from ttl_data, data from the UI form,
|
134
|
+
# and task config data
|
135
|
+
def build_archive_record
|
136
|
+
this_rof = {}
|
137
|
+
this_rof['owner'] = project['owner']
|
138
|
+
this_rof['type'] = 'OsfArchive'
|
139
|
+
this_rof['rights'] = map_rights
|
140
|
+
this_rof['rels-ext'] = map_rels_ext
|
141
|
+
this_rof['metadata'] = map_metadata
|
142
|
+
this_rof['files'] = [source_slug + '.tar.gz']
|
143
|
+
this_rof
|
144
|
+
end
|
145
|
+
|
146
|
+
# sets subject
|
147
|
+
def map_subject
|
148
|
+
if ttl_data[0].key?(@osf_map['dc:subject'])
|
149
|
+
return ttl_data[0][@osf_map['dc:subject']][0]['@value']
|
150
|
+
end
|
151
|
+
''
|
152
|
+
end
|
153
|
+
|
154
|
+
# make osf url from bagfile name
|
155
|
+
def osf_url_from_filename(ttl_file)
|
156
|
+
project_id = ttl_file.rpartition('/')[2].rpartition('.')[0]
|
157
|
+
project_id
|
158
|
+
end
|
159
|
+
|
160
|
+
# figures out the rights
|
161
|
+
def map_rights
|
162
|
+
rights = {}
|
163
|
+
if ttl_data[0][@osf_map['isPublic']][0]['@value'] == 'true'
|
164
|
+
rights['read-groups'] = ['public']
|
165
|
+
end
|
166
|
+
rights
|
167
|
+
end
|
168
|
+
|
169
|
+
# sets the creator- needs to read another ttl for the User data
|
170
|
+
# only contrubutors with isBibliographic true are considered
|
171
|
+
def map_creator
|
172
|
+
creator = []
|
173
|
+
ttl_data[0][@osf_map['hasContributor']].each do |contributor|
|
174
|
+
# Looping through the primary document and the contributors
|
175
|
+
ttl_data.each do |item|
|
176
|
+
next unless item['@id'] == contributor['@id']
|
177
|
+
if item[@osf_map['isBibliographic']][0]['@value'] == 'true'
|
178
|
+
creator.push map_user_from_ttl(item[@osf_map['hasUser']][0]['@id'])
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
creator
|
183
|
+
end
|
184
|
+
|
185
|
+
# read user ttl file, extract User's full name
|
186
|
+
def map_user_from_ttl(file_subpath)
|
187
|
+
user_ttl_data = ttl_from_targz(File.basename(file_subpath))
|
188
|
+
user_ttl_data[0][@osf_map['hasFullName']][0]['@value']
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|