rof 1.0.7 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +9 -7
- data/LICENSE +201 -16
- data/Rakefile +46 -0
- data/bin/csv_to_rof +1 -2
- data/bin/fedora_to_rof +7 -1
- data/bin/jsonld_to_rof +26 -0
- data/bin/osf_to_rof +6 -2
- data/bin/rof +5 -19
- data/lib/rof.rb +2 -6
- data/lib/rof/access.rb +1 -1
- data/lib/rof/cli.rb +104 -67
- data/lib/rof/compare_rof.rb +68 -39
- data/lib/rof/filter.rb +21 -0
- data/lib/rof/filters.rb +38 -0
- data/lib/rof/filters/bendo.rb +15 -17
- data/lib/rof/filters/date_stamp.rb +5 -4
- data/lib/rof/filters/file_to_url.rb +5 -3
- data/lib/rof/filters/label.rb +9 -7
- data/lib/rof/filters/work.rb +7 -5
- data/lib/rof/ingest.rb +5 -0
- data/lib/rof/osf_context.rb +2 -2
- data/lib/rof/rdf_context.rb +2 -0
- data/lib/rof/translator.rb +18 -0
- data/lib/rof/translators.rb +23 -0
- data/lib/rof/{translate_csv.rb → translators/csv_to_rof.rb} +4 -3
- data/lib/rof/translators/fedora_to_rof.rb +244 -0
- data/lib/rof/translators/jsonld_to_rof.rb +112 -0
- data/lib/rof/translators/jsonld_to_rof/accumulator.rb +175 -0
- data/lib/rof/translators/jsonld_to_rof/predicate_handler.rb +223 -0
- data/lib/rof/translators/jsonld_to_rof/predicate_object_handler.rb +125 -0
- data/lib/rof/translators/jsonld_to_rof/statement_handler.rb +91 -0
- data/lib/rof/translators/osf_to_rof.rb +191 -0
- data/lib/rof/utility.rb +44 -1
- data/lib/rof/version.rb +1 -1
- data/rof.gemspec +10 -2
- data/spec/coverage_helper.rb +17 -0
- data/spec/fixtures/for_utility_load_items_from_json_file/multiple_items.json +8 -0
- data/spec/fixtures/for_utility_load_items_from_json_file/parse_error.json +3 -0
- data/spec/fixtures/for_utility_load_items_from_json_file/single_item.json +3 -0
- data/spec/fixtures/jsonld_to_rof/0g354f18610.jsonld +113 -0
- data/spec/fixtures/jsonld_to_rof/0g354f18610.rof +96 -0
- data/spec/fixtures/jsonld_to_rof/2j62s467216.jsonld +113 -0
- data/spec/fixtures/jsonld_to_rof/2j62s467216.rof +93 -0
- data/spec/fixtures/jsonld_to_rof/2v23vt16z2z.jsonld +70 -0
- data/spec/fixtures/jsonld_to_rof/2v23vt16z2z.rof +87 -0
- data/spec/fixtures/jsonld_to_rof/cr56n01253w.jsonld +84 -0
- data/spec/fixtures/jsonld_to_rof/cr56n01253w.rof +95 -0
- data/spec/fixtures/jsonld_to_rof/h989r21069m.jsonld +84 -0
- data/spec/fixtures/jsonld_to_rof/h989r21069m.rof +98 -0
- data/spec/fixtures/jsonld_to_rof/js956d59913.jsonld +79 -0
- data/spec/fixtures/jsonld_to_rof/js956d59913.rof +89 -0
- data/spec/fixtures/jsonld_to_rof/m039k358q5c.jsonld +80 -0
- data/spec/fixtures/jsonld_to_rof/m039k358q5c.rof +64 -0
- data/spec/fixtures/jsonld_to_rof/nk322b9161g.jsonld +89 -0
- data/spec/fixtures/jsonld_to_rof/nk322b9161g.rof +69 -0
- data/spec/fixtures/jsonld_to_rof/p8418k7430d.jsonld +84 -0
- data/spec/fixtures/jsonld_to_rof/p8418k7430d.rof +67 -0
- data/spec/fixtures/jsonld_to_rof/xg94hm53h0c.jsonld +98 -0
- data/spec/fixtures/jsonld_to_rof/xg94hm53h0c.rof +110 -0
- data/spec/fixtures/jsonld_to_rof/zk51vd69n1r.jsonld +94 -0
- data/spec/fixtures/jsonld_to_rof/zk51vd69n1r.rof +121 -0
- data/spec/fixtures/osf/phz6b.tar.gz +0 -0
- data/spec/lib/rof/access_spec.rb +30 -23
- data/spec/lib/rof/cli_spec.rb +83 -60
- data/spec/lib/rof/compare_rof_spec.rb +35 -24
- data/spec/lib/rof/filter_spec.rb +10 -0
- data/spec/lib/rof/filters/bendo_spec.rb +42 -0
- data/spec/lib/rof/filters/date_stamp_spec.rb +9 -5
- data/spec/lib/rof/filters/file_to_url_spec.rb +7 -3
- data/spec/lib/rof/filters/label_spec.rb +121 -77
- data/spec/lib/rof/filters/work_spec.rb +7 -4
- data/spec/lib/rof/filters_spec.rb +14 -0
- data/spec/lib/rof/translator_spec.rb +15 -0
- data/spec/lib/rof/{translate_csv_spec.rb → translators/csv_to_rof_spec.rb} +14 -14
- data/spec/lib/rof/translators/fedora_to_rof_spec.rb +64 -0
- data/spec/lib/rof/translators/jsonld_to_rof/accumulator_spec.rb +121 -0
- data/spec/lib/rof/translators/jsonld_to_rof/predicate_handler_spec.rb +73 -0
- data/spec/lib/rof/translators/jsonld_to_rof/predicate_object_handler_spec.rb +48 -0
- data/spec/lib/rof/translators/jsonld_to_rof/statement_handler_spec.rb +40 -0
- data/spec/lib/rof/translators/jsonld_to_rof_spec.rb +120 -0
- data/spec/lib/rof/{osf_to_rof_spec.rb → translators/osf_to_rof_spec.rb} +55 -25
- data/spec/lib/rof/translators_spec.rb +14 -0
- data/spec/lib/rof/utility_spec.rb +47 -1
- data/spec/spec_helper.rb +1 -1
- data/spec/support/an_rof_filter.rb +10 -0
- metadata +186 -15
- data/lib/rof/get_from_fedora.rb +0 -211
- data/lib/rof/osf_to_rof.rb +0 -123
- data/spec/lib/rof/get_from_fedora_spec.rb +0 -22
@@ -0,0 +1,23 @@
|
|
1
|
+
Dir.glob(File.expand_path('../translators/*.rb', __FILE__)).each do |filename|
|
2
|
+
require filename
|
3
|
+
end
|
4
|
+
|
5
|
+
module ROF
|
6
|
+
# A namespace for organizing translating classes. A translating class is responsible for
|
7
|
+
# converting from one format to another format (e.g. CSV to ROF).
|
8
|
+
#
|
9
|
+
# @see ROF::Translator
|
10
|
+
# @see ROF::Translators::CsvToRof
|
11
|
+
# @see ROF::Translators::FedoraToRof
|
12
|
+
# @see ROF::Translators::OsfToRof
|
13
|
+
module Translators
|
14
|
+
# @api public
|
15
|
+
# @param [String] csv_contents - in the form of a CSV
|
16
|
+
# @param [Hash] config - Hash with symbols for keys
|
17
|
+
# @return [Hash] in ROF format
|
18
|
+
# @see ROF::Translators::CsvToRof for full details
|
19
|
+
def self.csv_to_rof(csv_contents, config = {})
|
20
|
+
CsvToRof.call(csv_contents, config)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -1,7 +1,8 @@
|
|
1
|
+
require 'rof/translator'
|
1
2
|
require('csv')
|
2
3
|
require('json')
|
3
4
|
|
4
|
-
module ROF
|
5
|
+
module ROF::Translators
|
5
6
|
# Turn a CSV file into a ROF file.
|
6
7
|
#
|
7
8
|
# pass in the contents of the CSV file.
|
@@ -31,7 +32,7 @@ module ROF
|
|
31
32
|
# with the previous work translated into ROF. This will allow a work to have
|
32
33
|
# attached files with different access permissions, owners, etc...
|
33
34
|
# Any extra files are appended to the file list for the work.
|
34
|
-
class
|
35
|
+
class CsvToRof < ROF::Translator
|
35
36
|
class MissingOwnerOrType < RuntimeError
|
36
37
|
end
|
37
38
|
|
@@ -45,7 +46,7 @@ module ROF
|
|
45
46
|
class NoPriorWork < RuntimeError
|
46
47
|
end
|
47
48
|
|
48
|
-
def self.
|
49
|
+
def self.call(csv_contents, config = {})
|
49
50
|
first_line = nil
|
50
51
|
rof_contents = []
|
51
52
|
previous_work = nil
|
@@ -0,0 +1,244 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'rexml/document'
|
3
|
+
require 'rdf/ntriples'
|
4
|
+
require 'rdf/rdfxml'
|
5
|
+
require 'rubydora'
|
6
|
+
require 'rof/translator'
|
7
|
+
|
8
|
+
module ROF
|
9
|
+
module Translators
|
10
|
+
# Responsible for translating Fedora PIDs to ROF objects
|
11
|
+
class FedoraToRof < ROF::Translator
|
12
|
+
# @param [Array] pids - Fedora PIDs
|
13
|
+
# @param [Hash] config - Hash with symbol keys
|
14
|
+
# @option config [Hash] :fedora_connection_information - The Hash that contains the connection information for Fedora
|
15
|
+
# @return [Hash] The ROF representation of teh Fedora objects
|
16
|
+
# @see Rubydora.connect
|
17
|
+
def self.call(pids, config = {})
|
18
|
+
new(pids, config).to_rof
|
19
|
+
end
|
20
|
+
|
21
|
+
def initialize(pids, config = {})
|
22
|
+
@pids = pids
|
23
|
+
@fedora_connection_information = config.fetch(:fedora_connection_information)
|
24
|
+
@config = config
|
25
|
+
connect_to_fedora!
|
26
|
+
end
|
27
|
+
attr_reader :pids, :fedora_connection_information, :config, :connection
|
28
|
+
|
29
|
+
private
|
30
|
+
def connect_to_fedora!
|
31
|
+
@connection = Rubydora.connect(fedora_connection_information)
|
32
|
+
end
|
33
|
+
public
|
34
|
+
|
35
|
+
def to_rof
|
36
|
+
# wrap the objects inside a JSON list
|
37
|
+
pids.map do |pid|
|
38
|
+
PidToRofElement.new(pid, connection, config).convert
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Responsible for converting a single PID to an ROF Element
|
43
|
+
class PidToRofElement
|
44
|
+
def initialize(pid, connection, config)
|
45
|
+
@pid = pid
|
46
|
+
@connection = connection
|
47
|
+
@config = config
|
48
|
+
@fedora_info = { 'pid' => pid, 'type' => 'fobject' }
|
49
|
+
@fedora_object = connection.find(pid)
|
50
|
+
end
|
51
|
+
attr_reader :pid, :config, :fedora_object
|
52
|
+
|
53
|
+
# Given a rubydora object, extract what we need
|
54
|
+
# to create our ROF object in an associative array
|
55
|
+
#
|
56
|
+
def convert
|
57
|
+
@fedora_info['af-model'] = setModel
|
58
|
+
# iterate through the data streams that are present.
|
59
|
+
# use reflection to call appropriate method for each
|
60
|
+
fedora_object.datastreams.each do |dsname, ds|
|
61
|
+
method_name = DATASTREAM_NAME_TO_METHOD_MAP.fetch(dsname) { :default_datastream_conversion }
|
62
|
+
send(method_name, dsname, ds)
|
63
|
+
end
|
64
|
+
@fedora_info
|
65
|
+
end
|
66
|
+
|
67
|
+
DATASTREAM_NAME_TO_METHOD_MAP = {
|
68
|
+
'DC' => :skip_datastream,
|
69
|
+
'RELS-EXT' => :convert_rels_ext,
|
70
|
+
'rightsMetadata' => :convert_rights_metadata,
|
71
|
+
'properties' => :default_datastream_conversion,
|
72
|
+
'content' => :default_datastream_conversion,
|
73
|
+
'descMetadata' => :convert_desc_metadata,
|
74
|
+
'bendo-item' => :default_datastream_conversion,
|
75
|
+
'characterization' => :default_datastream_conversion,
|
76
|
+
'thumbnail' => :default_datastream_conversion
|
77
|
+
}
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def default_datastream_conversion(dsname, ds)
|
82
|
+
# dump generic datastream
|
83
|
+
meta = create_meta(ds)
|
84
|
+
@fedora_info["#{dsname}-meta"] = meta unless meta.empty?
|
85
|
+
|
86
|
+
# if content is short < X bytes and valid utf-8, save as string
|
87
|
+
# if content is > X bytes or is not utf-8, save as file only if config option is given
|
88
|
+
content = ds.datastream_content
|
89
|
+
if content.length <= 1024 || config['inline']
|
90
|
+
# this downloads the contents of the datastream into memory
|
91
|
+
content_string = content.to_s.force_encoding('UTF-8')
|
92
|
+
if content_string.valid_encoding?
|
93
|
+
@fedora_info[dsname] = content_string
|
94
|
+
return # we're done! move on to next datastream
|
95
|
+
end
|
96
|
+
# not utf-8, so keep going and see if download option was given
|
97
|
+
end
|
98
|
+
return unless config['download']
|
99
|
+
# download option was given, so save this datastream as a file
|
100
|
+
fname = "#{@fedora_info['pid']}-#{dsname}"
|
101
|
+
abspath = File.join(config['download_path'], fname)
|
102
|
+
@fedora_info["#{dsname}-file"] = fname
|
103
|
+
if File.file?(config['download_path'])
|
104
|
+
$stderr.puts "Error: --download directory #{config['download_path']} specified is an existing file."
|
105
|
+
exit 1
|
106
|
+
end
|
107
|
+
FileUtils.mkdir_p(config['download_path'])
|
108
|
+
File.open(abspath, 'w') do |f|
|
109
|
+
f.write(content)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def create_meta(ds)
|
114
|
+
result = {}
|
115
|
+
|
116
|
+
label = ds.profile['dsLabel']
|
117
|
+
result['label'] = label unless label.nil? || label == ''
|
118
|
+
result['mime-type'] = ds.profile['dsMIME'] if ds.profile['dsMIME'] != 'text/plain'
|
119
|
+
# TODO(dbrower): make sure this is working as intended
|
120
|
+
if %w(R E).include?(ds.profile['dsControlGroup'])
|
121
|
+
s = result['URL'] = ds.profile['dsLocation']
|
122
|
+
s = s.sub(config['bendo'], 'bendo:') if config['bendo']
|
123
|
+
result['URL'] = s
|
124
|
+
end
|
125
|
+
result
|
126
|
+
end
|
127
|
+
|
128
|
+
# set fedora_indo['af-model']
|
129
|
+
#
|
130
|
+
def setModel
|
131
|
+
# only keep info:fedora/afmodel:XXXXX
|
132
|
+
models = fedora_object.profile['objModels'].map do |model|
|
133
|
+
Regexp.last_match(1) if model =~ /^info:fedora\/afmodel:(.*)/
|
134
|
+
end.compact
|
135
|
+
models[0]
|
136
|
+
end
|
137
|
+
|
138
|
+
# The methods below are called if the like-named datastream exists in fedora
|
139
|
+
|
140
|
+
def skip_datastream(*)
|
141
|
+
end
|
142
|
+
|
143
|
+
# set metadata
|
144
|
+
#
|
145
|
+
def convert_desc_metadata(_dsname, ds)
|
146
|
+
# desMetadata is encoded in ntriples, convert to JSON-LD using our special context
|
147
|
+
graph = RDF::Graph.new
|
148
|
+
data = ds.datastream_content
|
149
|
+
# force utf-8 encoding. fedora does not store the encoding, so it defaults to ASCII-8BIT
|
150
|
+
# see https://github.com/ruby-rdf/rdf/issues/142
|
151
|
+
data.force_encoding('utf-8')
|
152
|
+
graph.from_ntriples(data, format: :ntriples)
|
153
|
+
JSON::LD::API.fromRdf(graph) do |expanded|
|
154
|
+
result = JSON::LD::API.compact(expanded, RdfContext)
|
155
|
+
@fedora_info['metadata'] = result
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
# set rights
|
160
|
+
#
|
161
|
+
def convert_rights_metadata(_dsname, ds)
|
162
|
+
# rights is an XML document
|
163
|
+
# the access array may have read or edit elements
|
164
|
+
# each of these elements may contain group or person elements
|
165
|
+
xml_doc = REXML::Document.new(ds.datastream_content)
|
166
|
+
|
167
|
+
rights_array = {}
|
168
|
+
|
169
|
+
root = xml_doc.root
|
170
|
+
|
171
|
+
# check for optional embargo date - set if present
|
172
|
+
this_embargo = root.elements['embargo']
|
173
|
+
rights_array['embargo-date'] = this_embargo.elements['machine'].elements['date'][0] if Utility.has_embargo_date?(this_embargo)
|
174
|
+
|
175
|
+
%w(read edit).each do |access|
|
176
|
+
this_access = root.elements["//access[@type=\'#{access}\']"]
|
177
|
+
|
178
|
+
next if this_access.nil?
|
179
|
+
|
180
|
+
unless this_access.elements['machine'].elements['group'].nil?
|
181
|
+
group_array = []
|
182
|
+
this_access.elements['machine'].elements['group'].each do |this_group|
|
183
|
+
group_array << this_group
|
184
|
+
end
|
185
|
+
rights_array["#{access}-groups"] = group_array
|
186
|
+
end
|
187
|
+
|
188
|
+
next if this_access.elements['machine'].elements['person'].nil?
|
189
|
+
person_array = []
|
190
|
+
|
191
|
+
this_access.elements['machine'].elements['person'].each do |this_person|
|
192
|
+
person_array << this_person
|
193
|
+
end
|
194
|
+
rights_array[access.to_s] = person_array
|
195
|
+
end
|
196
|
+
|
197
|
+
@fedora_info['rights'] = rights_array
|
198
|
+
end
|
199
|
+
|
200
|
+
def convert_rels_ext(_dsname, ds)
|
201
|
+
# RELS-EXT is RDF-XML - parse it
|
202
|
+
ctx = ROF::RelsExtRefContext.dup
|
203
|
+
ctx.delete('@base') # @base causes problems when converting TO json-ld (it is = "info:/fedora") but info is not a namespace
|
204
|
+
graph = RDF::Graph.new
|
205
|
+
graph.from_rdfxml(ds.datastream_content)
|
206
|
+
result = nil
|
207
|
+
JSON::LD::API.fromRdf(graph) do |expanded|
|
208
|
+
result = JSON::LD::API.compact(expanded, ctx)
|
209
|
+
end
|
210
|
+
# now strip the info:fedora/ prefix from the URIs
|
211
|
+
strip_info_fedora(result)
|
212
|
+
# remove extra items
|
213
|
+
result.delete('hasModel')
|
214
|
+
@fedora_info['rels-ext'] = result
|
215
|
+
end
|
216
|
+
|
217
|
+
private
|
218
|
+
|
219
|
+
def strip_info_fedora(rels_ext)
|
220
|
+
rels_ext.each do |relation, targets|
|
221
|
+
next if relation == '@context'
|
222
|
+
if targets.is_a?(Hash)
|
223
|
+
strip_info_fedora(targets)
|
224
|
+
next
|
225
|
+
end
|
226
|
+
targets = [targets] if targets.is_a?(String)
|
227
|
+
targets.map! do |target|
|
228
|
+
if target.is_a?(Hash)
|
229
|
+
strip_info_fedora(target)
|
230
|
+
else
|
231
|
+
target.sub('info:fedora/', '')
|
232
|
+
end
|
233
|
+
end
|
234
|
+
# some single strings cannot be arrays in json-ld, so convert back
|
235
|
+
# this shouldn't cause any problems with items that began as arrays
|
236
|
+
targets = targets[0] if targets.length == 1
|
237
|
+
rels_ext[relation] = targets
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
private_constant :PidToRofElement
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
require 'rof/rdf_context'
|
2
|
+
require 'active_support/core_ext/array/wrap'
|
3
|
+
require 'rof/translators/jsonld_to_rof/statement_handler'
|
4
|
+
require 'rof/translators/jsonld_to_rof/predicate_handler'
|
5
|
+
require 'rof/translators/jsonld_to_rof/accumulator'
|
6
|
+
|
7
|
+
module ROF
|
8
|
+
module Translators
|
9
|
+
# @api public
|
10
|
+
#
|
11
|
+
# Responsible for converting JSON LD into an ROF Hash via registered URI maps.
|
12
|
+
#
|
13
|
+
# @note Some predicates require explicit mapping where as others have an assumed mapping. At present all URLs for @context of JSON-LD documents must be registered.
|
14
|
+
#
|
15
|
+
# @see ROF::Translators::JsonldToRof.call for details on how the JSON-LD is converted
|
16
|
+
# @see ROF::Translators::PredicateHandler.register for details on how Predicate URI's are mapped to nodes in the ROF document.
|
17
|
+
# @see ROF::Translators::JsonldToRof::PredicateHandler
|
18
|
+
# @see ROF::Translators::JsonldToRof::StatementHandler
|
19
|
+
module JsonldToRof
|
20
|
+
PredicateHandler.register('http://purl.org/ontology/bibo/') do |handler|
|
21
|
+
handler.namespace_prefix('bibo:')
|
22
|
+
handler.within(['metadata'])
|
23
|
+
end
|
24
|
+
PredicateHandler.register('info:fedora/fedora-system:def/relations-external') do |handler|
|
25
|
+
handler.map('#isMemberOfCollection', to: ['rels-ext', 'isMemberOfCollection'])
|
26
|
+
end
|
27
|
+
PredicateHandler.register('http://id.loc.gov/vocabulary/relators/') do |handler|
|
28
|
+
handler.namespace_prefix('mrel:')
|
29
|
+
handler.within(['metadata'])
|
30
|
+
end
|
31
|
+
PredicateHandler.register('http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#') do |handler|
|
32
|
+
handler.namespace_prefix('ebucore:')
|
33
|
+
handler.within(['metadata'])
|
34
|
+
end
|
35
|
+
|
36
|
+
PredicateHandler.register('https://library.nd.edu/ns/terms/') do |handler|
|
37
|
+
handler.map('accessEdit', to: ['rights', 'edit'])
|
38
|
+
handler.map('accessRead', to: ['rights', 'read'])
|
39
|
+
handler.map('accessEditGroup', to: ['rights', 'edit-groups'])
|
40
|
+
handler.map('accessReadGroup', to: ['rights', 'read-groups'])
|
41
|
+
handler.map('accessEmbargoDate', to: ['rights', 'embargo-date'])
|
42
|
+
handler.map('afmodel', to: ["af-model"])
|
43
|
+
handler.map('bendoitem', to: ["bendo-item"])
|
44
|
+
handler.map('depositor') do |object, accumulator|
|
45
|
+
accumulator.register_properties('depositor', object)
|
46
|
+
end
|
47
|
+
handler.map('owner') do |object, accumulator|
|
48
|
+
accumulator.register_properties('owner', object)
|
49
|
+
end
|
50
|
+
handler.map('representativeFile') do |object, accumulator|
|
51
|
+
accumulator.register_properties('representative', object)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
PredicateHandler.register('http://purl.org/dc/terms/') do |handler|
|
56
|
+
handler.namespace_prefix('dc:')
|
57
|
+
handler.within(['metadata'])
|
58
|
+
handler.map('contributor', to: ['metadata', 'dc:contributor', 'dc:contributor'], force: true)
|
59
|
+
end
|
60
|
+
|
61
|
+
PredicateHandler.register('http://projecthydra.org/ns/relations#') do |handler|
|
62
|
+
handler.map('hasEditor', to: ['rels-ext', 'hydramata-rel:hasEditor'])
|
63
|
+
# We need to map the hasEditorGroup predicate to two different locations in the ROF
|
64
|
+
handler.map('hasEditorGroup', to: ['rels-ext', 'hydramata-rel:hasEditorGroup'], force: true)
|
65
|
+
handler.map('hasEditorGroup', to: ['rights', 'edit-groups'], force: true)
|
66
|
+
end
|
67
|
+
|
68
|
+
PredicateHandler.register('http://www.ndltd.org/standards/metadata/etdms/1.1/') do |handler|
|
69
|
+
handler.within(['metadata', 'ms:degree'])
|
70
|
+
handler.namespace_prefix('ms:')
|
71
|
+
handler.map('role', to: ['metadata', 'dc:contributor', 'ms:role'], force: true)
|
72
|
+
end
|
73
|
+
|
74
|
+
# @api public
|
75
|
+
#
|
76
|
+
# Convert's the given JSON-LD into an ROF document that can be used to batch ingest into Fedora.
|
77
|
+
#
|
78
|
+
# @param [Array<Hash>, Hash] jsonld - a Hash of JSON-LD data or an Array of JSON-LD Hashes
|
79
|
+
# @param [Hash] config (included to conform to the loose interface of translators)
|
80
|
+
# @return [Array<Hash>] An ROF document
|
81
|
+
def self.call(jsonld, config)
|
82
|
+
Array.wrap(jsonld).map! do |element|
|
83
|
+
Element.new(element).to_rof
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# A single top-level element of a JSON-LD document
|
88
|
+
class Element
|
89
|
+
def initialize(element)
|
90
|
+
@element = element
|
91
|
+
end
|
92
|
+
|
93
|
+
def to_rof
|
94
|
+
@accumulator = Accumulator.new(base_rof)
|
95
|
+
JSON::LD::API.toRdf(element) do |statement|
|
96
|
+
StatementHandler.call(statement, accumulator)
|
97
|
+
end
|
98
|
+
@accumulator.to_rof
|
99
|
+
end
|
100
|
+
|
101
|
+
private
|
102
|
+
|
103
|
+
attr_reader :element, :accumulator
|
104
|
+
|
105
|
+
def base_rof
|
106
|
+
{ "type" => "fobject", "metadata" => { "@context" => ROF::RdfContext }, "rels-ext" => { "@context" => ROF::RelsExtRefContext } }
|
107
|
+
end
|
108
|
+
end
|
109
|
+
private_constant :Element
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,175 @@
|
|
1
|
+
require 'active_support/core_ext/array/wrap'
|
2
|
+
require 'active_support/core_ext/module/delegation'
|
3
|
+
require 'active_support/core_ext/object/deep_dup'
|
4
|
+
|
5
|
+
module ROF
|
6
|
+
module Translators
|
7
|
+
module JsonldToRof
|
8
|
+
# @api pubilc
|
9
|
+
# The accumulator is a "passive" object. Things happen to it. All in the name of building the
|
10
|
+
# hash that is ROF.
|
11
|
+
#
|
12
|
+
# @note The accumulator is only for one PID. See [ROF::Translators::JsonldToRof::Accumulator#add_pid]
|
13
|
+
class Accumulator
|
14
|
+
# @param [Hash] initial_rof - The base ROF document to which we will be adding elements.
|
15
|
+
def initialize(initial_rof = {})
|
16
|
+
@rof = initial_rof
|
17
|
+
@blank_nodes = {}
|
18
|
+
@blank_node_locations = {}
|
19
|
+
end
|
20
|
+
|
21
|
+
# @api public
|
22
|
+
# @return [Hash]
|
23
|
+
def to_rof
|
24
|
+
rof = @rof.deep_dup
|
25
|
+
expand_blank_node_locations(rof)
|
26
|
+
append_properties_to(rof)
|
27
|
+
rof
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
# The antics of the blank node! See the specs for blank nodes to see the expected behavior.
|
33
|
+
def expand_blank_node_locations(rof)
|
34
|
+
@blank_node_locations.each_pair do |node, locations|
|
35
|
+
locations.each_pair do |location, key_value_pairs|
|
36
|
+
data = rof
|
37
|
+
location[0..-2].each do |slug|
|
38
|
+
data[slug] ||= {}
|
39
|
+
data = data[slug]
|
40
|
+
end
|
41
|
+
|
42
|
+
# We may encounter a shallow map, if so we need for it to behave differently
|
43
|
+
slug = location[-1]
|
44
|
+
if slug
|
45
|
+
data[slug] ||= []
|
46
|
+
hash = {}
|
47
|
+
else
|
48
|
+
hash = data
|
49
|
+
end
|
50
|
+
Array.wrap(key_value_pairs).each do |key_value|
|
51
|
+
key_value.each_pair do |key, value|
|
52
|
+
hash[key] ||= []
|
53
|
+
hash[key] += Array.wrap(value)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
data[slug] << hash if slug
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def append_properties_to(rof)
|
62
|
+
return rof unless @properties
|
63
|
+
rof['properties-meta'] = { "mime-type" => "text/xml" }
|
64
|
+
xml = '<fields>'
|
65
|
+
@properties.each do |node_name, object|
|
66
|
+
xml += "<#{node_name}>#{object}</#{node_name}>"
|
67
|
+
end
|
68
|
+
xml += '</fields>'
|
69
|
+
rof['properties'] = xml
|
70
|
+
rof
|
71
|
+
end
|
72
|
+
|
73
|
+
public
|
74
|
+
|
75
|
+
# @api public
|
76
|
+
# @param [String] node_name - the XML node's name (e.g. <node_name>node_value</node_name>)
|
77
|
+
# @param [String] node_value - the XML element's value
|
78
|
+
# @return [Array] of given node_name and node_value
|
79
|
+
def register_properties(node_name, node_value)
|
80
|
+
@properties ||= []
|
81
|
+
@properties << [node_name, coerce_object_to_string(node_value)]
|
82
|
+
[node_name, node_value]
|
83
|
+
end
|
84
|
+
|
85
|
+
class PidAlreadySetError < RuntimeError
|
86
|
+
end
|
87
|
+
|
88
|
+
# @api public
|
89
|
+
# @param [RDF::Statement] statement
|
90
|
+
# @return [RDF::Statement]
|
91
|
+
def add_blank_node(statement)
|
92
|
+
@blank_nodes[statement.subject] ||= {}
|
93
|
+
@blank_nodes[statement.subject][statement.predicate] ||= []
|
94
|
+
@blank_nodes[statement.subject][statement.predicate] << statement.object
|
95
|
+
statement
|
96
|
+
end
|
97
|
+
|
98
|
+
# @api public
|
99
|
+
# @param [RDF::Subject] subject - Fetch the corresponding blank node that was added
|
100
|
+
# @return [RDF::Statement]
|
101
|
+
# @raise [KeyError] when the subject has not previosly been added
|
102
|
+
# @see #add_blank_node
|
103
|
+
def fetch_blank_node(subject)
|
104
|
+
@blank_nodes.fetch(subject)
|
105
|
+
end
|
106
|
+
|
107
|
+
# @api public
|
108
|
+
# @param [String] pid - an identifier
|
109
|
+
# @return [String] pid
|
110
|
+
# @raise PidAlreadySetError - if you attempted to a different PID
|
111
|
+
def add_pid(pid)
|
112
|
+
pid = coerce_object_to_string(pid)
|
113
|
+
if @rof.key?('pid')
|
114
|
+
if @rof['pid'] != pid
|
115
|
+
raise PidAlreadySetError, "Attempted to set pid=#{pid}, but it is already set to #{@rof['pid']}"
|
116
|
+
end
|
117
|
+
else
|
118
|
+
@rof['pid'] = pid
|
119
|
+
end
|
120
|
+
pid
|
121
|
+
end
|
122
|
+
|
123
|
+
# @api public
|
124
|
+
# @param [Array<String>, String] location - a list of nested hash keys (or a single string)
|
125
|
+
# @param [String] value - a translated value for the original RDF Statement
|
126
|
+
# @param [false, RDF::Node] blank_node
|
127
|
+
# @return [Array] location, value
|
128
|
+
def add_predicate_location_and_value(location, value, blank_node = false)
|
129
|
+
# Because I am making transformation on the location via #shift method, I need a duplication.
|
130
|
+
location = Array.wrap(location)
|
131
|
+
if location == ['pid']
|
132
|
+
return add_pid(value)
|
133
|
+
end
|
134
|
+
if blank_node
|
135
|
+
add_predicate_location_and_value_direct_for_blank_node(location, value, blank_node)
|
136
|
+
else
|
137
|
+
add_predicate_location_and_value_direct_for_non_blank_node(location, value)
|
138
|
+
end
|
139
|
+
[location, value]
|
140
|
+
end
|
141
|
+
|
142
|
+
def add_predicate_location_and_value_direct_for_blank_node(location, value, blank_node)
|
143
|
+
fetch_blank_node(blank_node) # Ensure the node exists
|
144
|
+
@blank_node_locations[blank_node] ||= {}
|
145
|
+
@blank_node_locations[blank_node][location[0..-2]] ||= []
|
146
|
+
@blank_node_locations[blank_node][location[0..-2]] << { location[-1] => Array.wrap(coerce_object_to_string(value)) }
|
147
|
+
end
|
148
|
+
|
149
|
+
def add_predicate_location_and_value_direct_for_non_blank_node(location, value)
|
150
|
+
data = @rof
|
151
|
+
location[0..-2].each do |slug|
|
152
|
+
data[slug] ||= {}
|
153
|
+
data = data[slug]
|
154
|
+
end
|
155
|
+
slug = location[-1]
|
156
|
+
data[slug] ||= []
|
157
|
+
data[slug] << coerce_object_to_string(value)
|
158
|
+
end
|
159
|
+
|
160
|
+
private
|
161
|
+
|
162
|
+
def coerce_object_to_string(object)
|
163
|
+
return object if object.nil?
|
164
|
+
if object.to_s =~ %r{https?://curate.nd.edu/show/([^\\]+)/?}
|
165
|
+
return "und:#{$1}"
|
166
|
+
elsif object.respond_to?(:value)
|
167
|
+
return object.value
|
168
|
+
else
|
169
|
+
object
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|