rof 1.0.7 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +9 -7
- data/LICENSE +201 -16
- data/Rakefile +46 -0
- data/bin/csv_to_rof +1 -2
- data/bin/fedora_to_rof +7 -1
- data/bin/jsonld_to_rof +26 -0
- data/bin/osf_to_rof +6 -2
- data/bin/rof +5 -19
- data/lib/rof.rb +2 -6
- data/lib/rof/access.rb +1 -1
- data/lib/rof/cli.rb +104 -67
- data/lib/rof/compare_rof.rb +68 -39
- data/lib/rof/filter.rb +21 -0
- data/lib/rof/filters.rb +38 -0
- data/lib/rof/filters/bendo.rb +15 -17
- data/lib/rof/filters/date_stamp.rb +5 -4
- data/lib/rof/filters/file_to_url.rb +5 -3
- data/lib/rof/filters/label.rb +9 -7
- data/lib/rof/filters/work.rb +7 -5
- data/lib/rof/ingest.rb +5 -0
- data/lib/rof/osf_context.rb +2 -2
- data/lib/rof/rdf_context.rb +2 -0
- data/lib/rof/translator.rb +18 -0
- data/lib/rof/translators.rb +23 -0
- data/lib/rof/{translate_csv.rb → translators/csv_to_rof.rb} +4 -3
- data/lib/rof/translators/fedora_to_rof.rb +244 -0
- data/lib/rof/translators/jsonld_to_rof.rb +112 -0
- data/lib/rof/translators/jsonld_to_rof/accumulator.rb +175 -0
- data/lib/rof/translators/jsonld_to_rof/predicate_handler.rb +223 -0
- data/lib/rof/translators/jsonld_to_rof/predicate_object_handler.rb +125 -0
- data/lib/rof/translators/jsonld_to_rof/statement_handler.rb +91 -0
- data/lib/rof/translators/osf_to_rof.rb +191 -0
- data/lib/rof/utility.rb +44 -1
- data/lib/rof/version.rb +1 -1
- data/rof.gemspec +10 -2
- data/spec/coverage_helper.rb +17 -0
- data/spec/fixtures/for_utility_load_items_from_json_file/multiple_items.json +8 -0
- data/spec/fixtures/for_utility_load_items_from_json_file/parse_error.json +3 -0
- data/spec/fixtures/for_utility_load_items_from_json_file/single_item.json +3 -0
- data/spec/fixtures/jsonld_to_rof/0g354f18610.jsonld +113 -0
- data/spec/fixtures/jsonld_to_rof/0g354f18610.rof +96 -0
- data/spec/fixtures/jsonld_to_rof/2j62s467216.jsonld +113 -0
- data/spec/fixtures/jsonld_to_rof/2j62s467216.rof +93 -0
- data/spec/fixtures/jsonld_to_rof/2v23vt16z2z.jsonld +70 -0
- data/spec/fixtures/jsonld_to_rof/2v23vt16z2z.rof +87 -0
- data/spec/fixtures/jsonld_to_rof/cr56n01253w.jsonld +84 -0
- data/spec/fixtures/jsonld_to_rof/cr56n01253w.rof +95 -0
- data/spec/fixtures/jsonld_to_rof/h989r21069m.jsonld +84 -0
- data/spec/fixtures/jsonld_to_rof/h989r21069m.rof +98 -0
- data/spec/fixtures/jsonld_to_rof/js956d59913.jsonld +79 -0
- data/spec/fixtures/jsonld_to_rof/js956d59913.rof +89 -0
- data/spec/fixtures/jsonld_to_rof/m039k358q5c.jsonld +80 -0
- data/spec/fixtures/jsonld_to_rof/m039k358q5c.rof +64 -0
- data/spec/fixtures/jsonld_to_rof/nk322b9161g.jsonld +89 -0
- data/spec/fixtures/jsonld_to_rof/nk322b9161g.rof +69 -0
- data/spec/fixtures/jsonld_to_rof/p8418k7430d.jsonld +84 -0
- data/spec/fixtures/jsonld_to_rof/p8418k7430d.rof +67 -0
- data/spec/fixtures/jsonld_to_rof/xg94hm53h0c.jsonld +98 -0
- data/spec/fixtures/jsonld_to_rof/xg94hm53h0c.rof +110 -0
- data/spec/fixtures/jsonld_to_rof/zk51vd69n1r.jsonld +94 -0
- data/spec/fixtures/jsonld_to_rof/zk51vd69n1r.rof +121 -0
- data/spec/fixtures/osf/phz6b.tar.gz +0 -0
- data/spec/lib/rof/access_spec.rb +30 -23
- data/spec/lib/rof/cli_spec.rb +83 -60
- data/spec/lib/rof/compare_rof_spec.rb +35 -24
- data/spec/lib/rof/filter_spec.rb +10 -0
- data/spec/lib/rof/filters/bendo_spec.rb +42 -0
- data/spec/lib/rof/filters/date_stamp_spec.rb +9 -5
- data/spec/lib/rof/filters/file_to_url_spec.rb +7 -3
- data/spec/lib/rof/filters/label_spec.rb +121 -77
- data/spec/lib/rof/filters/work_spec.rb +7 -4
- data/spec/lib/rof/filters_spec.rb +14 -0
- data/spec/lib/rof/translator_spec.rb +15 -0
- data/spec/lib/rof/{translate_csv_spec.rb → translators/csv_to_rof_spec.rb} +14 -14
- data/spec/lib/rof/translators/fedora_to_rof_spec.rb +64 -0
- data/spec/lib/rof/translators/jsonld_to_rof/accumulator_spec.rb +121 -0
- data/spec/lib/rof/translators/jsonld_to_rof/predicate_handler_spec.rb +73 -0
- data/spec/lib/rof/translators/jsonld_to_rof/predicate_object_handler_spec.rb +48 -0
- data/spec/lib/rof/translators/jsonld_to_rof/statement_handler_spec.rb +40 -0
- data/spec/lib/rof/translators/jsonld_to_rof_spec.rb +120 -0
- data/spec/lib/rof/{osf_to_rof_spec.rb → translators/osf_to_rof_spec.rb} +55 -25
- data/spec/lib/rof/translators_spec.rb +14 -0
- data/spec/lib/rof/utility_spec.rb +47 -1
- data/spec/spec_helper.rb +1 -1
- data/spec/support/an_rof_filter.rb +10 -0
- metadata +186 -15
- data/lib/rof/get_from_fedora.rb +0 -211
- data/lib/rof/osf_to_rof.rb +0 -123
- data/spec/lib/rof/get_from_fedora_spec.rb +0 -22
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
Dir.glob(File.expand_path('../translators/*.rb', __FILE__)).each do |filename|
|
|
2
|
+
require filename
|
|
3
|
+
end
|
|
4
|
+
|
|
5
|
+
module ROF
|
|
6
|
+
# A namespace for organizing translating classes. A translating class is responsible for
|
|
7
|
+
# converting from one format to another format (e.g. CSV to ROF).
|
|
8
|
+
#
|
|
9
|
+
# @see ROF::Translator
|
|
10
|
+
# @see ROF::Translators::CsvToRof
|
|
11
|
+
# @see ROF::Translators::FedoraToRof
|
|
12
|
+
# @see ROF::Translators::OsfToRof
|
|
13
|
+
module Translators
|
|
14
|
+
# @api public
|
|
15
|
+
# @param [String] csv_contents - in the form of a CSV
|
|
16
|
+
# @param [Hash] config - Hash with symbols for keys
|
|
17
|
+
# @return [Hash] in ROF format
|
|
18
|
+
# @see ROF::Translators::CsvToRof for full details
|
|
19
|
+
def self.csv_to_rof(csv_contents, config = {})
|
|
20
|
+
CsvToRof.call(csv_contents, config)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
+
require 'rof/translator'
|
|
1
2
|
require('csv')
|
|
2
3
|
require('json')
|
|
3
4
|
|
|
4
|
-
module ROF
|
|
5
|
+
module ROF::Translators
|
|
5
6
|
# Turn a CSV file into a ROF file.
|
|
6
7
|
#
|
|
7
8
|
# pass in the contents of the CSV file.
|
|
@@ -31,7 +32,7 @@ module ROF
|
|
|
31
32
|
# with the previous work translated into ROF. This will allow a work to have
|
|
32
33
|
# attached files with different access permissions, owners, etc...
|
|
33
34
|
# Any extra files are appended to the file list for the work.
|
|
34
|
-
class
|
|
35
|
+
class CsvToRof < ROF::Translator
|
|
35
36
|
class MissingOwnerOrType < RuntimeError
|
|
36
37
|
end
|
|
37
38
|
|
|
@@ -45,7 +46,7 @@ module ROF
|
|
|
45
46
|
class NoPriorWork < RuntimeError
|
|
46
47
|
end
|
|
47
48
|
|
|
48
|
-
def self.
|
|
49
|
+
def self.call(csv_contents, config = {})
|
|
49
50
|
first_line = nil
|
|
50
51
|
rof_contents = []
|
|
51
52
|
previous_work = nil
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
require 'json'
|
|
2
|
+
require 'rexml/document'
|
|
3
|
+
require 'rdf/ntriples'
|
|
4
|
+
require 'rdf/rdfxml'
|
|
5
|
+
require 'rubydora'
|
|
6
|
+
require 'rof/translator'
|
|
7
|
+
|
|
8
|
+
module ROF
|
|
9
|
+
module Translators
|
|
10
|
+
# Responsible for translating Fedora PIDs to ROF objects
|
|
11
|
+
class FedoraToRof < ROF::Translator
|
|
12
|
+
# @param [Array] pids - Fedora PIDs
|
|
13
|
+
# @param [Hash] config - Hash with symbol keys
|
|
14
|
+
# @option config [Hash] :fedora_connection_information - The Hash that contains the connection information for Fedora
|
|
15
|
+
# @return [Hash] The ROF representation of teh Fedora objects
|
|
16
|
+
# @see Rubydora.connect
|
|
17
|
+
def self.call(pids, config = {})
|
|
18
|
+
new(pids, config).to_rof
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def initialize(pids, config = {})
|
|
22
|
+
@pids = pids
|
|
23
|
+
@fedora_connection_information = config.fetch(:fedora_connection_information)
|
|
24
|
+
@config = config
|
|
25
|
+
connect_to_fedora!
|
|
26
|
+
end
|
|
27
|
+
attr_reader :pids, :fedora_connection_information, :config, :connection
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
def connect_to_fedora!
|
|
31
|
+
@connection = Rubydora.connect(fedora_connection_information)
|
|
32
|
+
end
|
|
33
|
+
public
|
|
34
|
+
|
|
35
|
+
def to_rof
|
|
36
|
+
# wrap the objects inside a JSON list
|
|
37
|
+
pids.map do |pid|
|
|
38
|
+
PidToRofElement.new(pid, connection, config).convert
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Responsible for converting a single PID to an ROF Element
|
|
43
|
+
class PidToRofElement
|
|
44
|
+
def initialize(pid, connection, config)
|
|
45
|
+
@pid = pid
|
|
46
|
+
@connection = connection
|
|
47
|
+
@config = config
|
|
48
|
+
@fedora_info = { 'pid' => pid, 'type' => 'fobject' }
|
|
49
|
+
@fedora_object = connection.find(pid)
|
|
50
|
+
end
|
|
51
|
+
attr_reader :pid, :config, :fedora_object
|
|
52
|
+
|
|
53
|
+
# Given a rubydora object, extract what we need
|
|
54
|
+
# to create our ROF object in an associative array
|
|
55
|
+
#
|
|
56
|
+
def convert
|
|
57
|
+
@fedora_info['af-model'] = setModel
|
|
58
|
+
# iterate through the data streams that are present.
|
|
59
|
+
# use reflection to call appropriate method for each
|
|
60
|
+
fedora_object.datastreams.each do |dsname, ds|
|
|
61
|
+
method_name = DATASTREAM_NAME_TO_METHOD_MAP.fetch(dsname) { :default_datastream_conversion }
|
|
62
|
+
send(method_name, dsname, ds)
|
|
63
|
+
end
|
|
64
|
+
@fedora_info
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
DATASTREAM_NAME_TO_METHOD_MAP = {
|
|
68
|
+
'DC' => :skip_datastream,
|
|
69
|
+
'RELS-EXT' => :convert_rels_ext,
|
|
70
|
+
'rightsMetadata' => :convert_rights_metadata,
|
|
71
|
+
'properties' => :default_datastream_conversion,
|
|
72
|
+
'content' => :default_datastream_conversion,
|
|
73
|
+
'descMetadata' => :convert_desc_metadata,
|
|
74
|
+
'bendo-item' => :default_datastream_conversion,
|
|
75
|
+
'characterization' => :default_datastream_conversion,
|
|
76
|
+
'thumbnail' => :default_datastream_conversion
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
private
|
|
80
|
+
|
|
81
|
+
def default_datastream_conversion(dsname, ds)
|
|
82
|
+
# dump generic datastream
|
|
83
|
+
meta = create_meta(ds)
|
|
84
|
+
@fedora_info["#{dsname}-meta"] = meta unless meta.empty?
|
|
85
|
+
|
|
86
|
+
# if content is short < X bytes and valid utf-8, save as string
|
|
87
|
+
# if content is > X bytes or is not utf-8, save as file only if config option is given
|
|
88
|
+
content = ds.datastream_content
|
|
89
|
+
if content.length <= 1024 || config['inline']
|
|
90
|
+
# this downloads the contents of the datastream into memory
|
|
91
|
+
content_string = content.to_s.force_encoding('UTF-8')
|
|
92
|
+
if content_string.valid_encoding?
|
|
93
|
+
@fedora_info[dsname] = content_string
|
|
94
|
+
return # we're done! move on to next datastream
|
|
95
|
+
end
|
|
96
|
+
# not utf-8, so keep going and see if download option was given
|
|
97
|
+
end
|
|
98
|
+
return unless config['download']
|
|
99
|
+
# download option was given, so save this datastream as a file
|
|
100
|
+
fname = "#{@fedora_info['pid']}-#{dsname}"
|
|
101
|
+
abspath = File.join(config['download_path'], fname)
|
|
102
|
+
@fedora_info["#{dsname}-file"] = fname
|
|
103
|
+
if File.file?(config['download_path'])
|
|
104
|
+
$stderr.puts "Error: --download directory #{config['download_path']} specified is an existing file."
|
|
105
|
+
exit 1
|
|
106
|
+
end
|
|
107
|
+
FileUtils.mkdir_p(config['download_path'])
|
|
108
|
+
File.open(abspath, 'w') do |f|
|
|
109
|
+
f.write(content)
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def create_meta(ds)
|
|
114
|
+
result = {}
|
|
115
|
+
|
|
116
|
+
label = ds.profile['dsLabel']
|
|
117
|
+
result['label'] = label unless label.nil? || label == ''
|
|
118
|
+
result['mime-type'] = ds.profile['dsMIME'] if ds.profile['dsMIME'] != 'text/plain'
|
|
119
|
+
# TODO(dbrower): make sure this is working as intended
|
|
120
|
+
if %w(R E).include?(ds.profile['dsControlGroup'])
|
|
121
|
+
s = result['URL'] = ds.profile['dsLocation']
|
|
122
|
+
s = s.sub(config['bendo'], 'bendo:') if config['bendo']
|
|
123
|
+
result['URL'] = s
|
|
124
|
+
end
|
|
125
|
+
result
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# set fedora_indo['af-model']
|
|
129
|
+
#
|
|
130
|
+
def setModel
|
|
131
|
+
# only keep info:fedora/afmodel:XXXXX
|
|
132
|
+
models = fedora_object.profile['objModels'].map do |model|
|
|
133
|
+
Regexp.last_match(1) if model =~ /^info:fedora\/afmodel:(.*)/
|
|
134
|
+
end.compact
|
|
135
|
+
models[0]
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# The methods below are called if the like-named datastream exists in fedora
|
|
139
|
+
|
|
140
|
+
def skip_datastream(*)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# set metadata
|
|
144
|
+
#
|
|
145
|
+
def convert_desc_metadata(_dsname, ds)
|
|
146
|
+
# desMetadata is encoded in ntriples, convert to JSON-LD using our special context
|
|
147
|
+
graph = RDF::Graph.new
|
|
148
|
+
data = ds.datastream_content
|
|
149
|
+
# force utf-8 encoding. fedora does not store the encoding, so it defaults to ASCII-8BIT
|
|
150
|
+
# see https://github.com/ruby-rdf/rdf/issues/142
|
|
151
|
+
data.force_encoding('utf-8')
|
|
152
|
+
graph.from_ntriples(data, format: :ntriples)
|
|
153
|
+
JSON::LD::API.fromRdf(graph) do |expanded|
|
|
154
|
+
result = JSON::LD::API.compact(expanded, RdfContext)
|
|
155
|
+
@fedora_info['metadata'] = result
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# set rights
|
|
160
|
+
#
|
|
161
|
+
def convert_rights_metadata(_dsname, ds)
|
|
162
|
+
# rights is an XML document
|
|
163
|
+
# the access array may have read or edit elements
|
|
164
|
+
# each of these elements may contain group or person elements
|
|
165
|
+
xml_doc = REXML::Document.new(ds.datastream_content)
|
|
166
|
+
|
|
167
|
+
rights_array = {}
|
|
168
|
+
|
|
169
|
+
root = xml_doc.root
|
|
170
|
+
|
|
171
|
+
# check for optional embargo date - set if present
|
|
172
|
+
this_embargo = root.elements['embargo']
|
|
173
|
+
rights_array['embargo-date'] = this_embargo.elements['machine'].elements['date'][0] if Utility.has_embargo_date?(this_embargo)
|
|
174
|
+
|
|
175
|
+
%w(read edit).each do |access|
|
|
176
|
+
this_access = root.elements["//access[@type=\'#{access}\']"]
|
|
177
|
+
|
|
178
|
+
next if this_access.nil?
|
|
179
|
+
|
|
180
|
+
unless this_access.elements['machine'].elements['group'].nil?
|
|
181
|
+
group_array = []
|
|
182
|
+
this_access.elements['machine'].elements['group'].each do |this_group|
|
|
183
|
+
group_array << this_group
|
|
184
|
+
end
|
|
185
|
+
rights_array["#{access}-groups"] = group_array
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
next if this_access.elements['machine'].elements['person'].nil?
|
|
189
|
+
person_array = []
|
|
190
|
+
|
|
191
|
+
this_access.elements['machine'].elements['person'].each do |this_person|
|
|
192
|
+
person_array << this_person
|
|
193
|
+
end
|
|
194
|
+
rights_array[access.to_s] = person_array
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
@fedora_info['rights'] = rights_array
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def convert_rels_ext(_dsname, ds)
|
|
201
|
+
# RELS-EXT is RDF-XML - parse it
|
|
202
|
+
ctx = ROF::RelsExtRefContext.dup
|
|
203
|
+
ctx.delete('@base') # @base causes problems when converting TO json-ld (it is = "info:/fedora") but info is not a namespace
|
|
204
|
+
graph = RDF::Graph.new
|
|
205
|
+
graph.from_rdfxml(ds.datastream_content)
|
|
206
|
+
result = nil
|
|
207
|
+
JSON::LD::API.fromRdf(graph) do |expanded|
|
|
208
|
+
result = JSON::LD::API.compact(expanded, ctx)
|
|
209
|
+
end
|
|
210
|
+
# now strip the info:fedora/ prefix from the URIs
|
|
211
|
+
strip_info_fedora(result)
|
|
212
|
+
# remove extra items
|
|
213
|
+
result.delete('hasModel')
|
|
214
|
+
@fedora_info['rels-ext'] = result
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
private
|
|
218
|
+
|
|
219
|
+
def strip_info_fedora(rels_ext)
|
|
220
|
+
rels_ext.each do |relation, targets|
|
|
221
|
+
next if relation == '@context'
|
|
222
|
+
if targets.is_a?(Hash)
|
|
223
|
+
strip_info_fedora(targets)
|
|
224
|
+
next
|
|
225
|
+
end
|
|
226
|
+
targets = [targets] if targets.is_a?(String)
|
|
227
|
+
targets.map! do |target|
|
|
228
|
+
if target.is_a?(Hash)
|
|
229
|
+
strip_info_fedora(target)
|
|
230
|
+
else
|
|
231
|
+
target.sub('info:fedora/', '')
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
# some single strings cannot be arrays in json-ld, so convert back
|
|
235
|
+
# this shouldn't cause any problems with items that began as arrays
|
|
236
|
+
targets = targets[0] if targets.length == 1
|
|
237
|
+
rels_ext[relation] = targets
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
private_constant :PidToRofElement
|
|
242
|
+
end
|
|
243
|
+
end
|
|
244
|
+
end
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
require 'rof/rdf_context'
|
|
2
|
+
require 'active_support/core_ext/array/wrap'
|
|
3
|
+
require 'rof/translators/jsonld_to_rof/statement_handler'
|
|
4
|
+
require 'rof/translators/jsonld_to_rof/predicate_handler'
|
|
5
|
+
require 'rof/translators/jsonld_to_rof/accumulator'
|
|
6
|
+
|
|
7
|
+
module ROF
|
|
8
|
+
module Translators
|
|
9
|
+
# @api public
|
|
10
|
+
#
|
|
11
|
+
# Responsible for converting JSON LD into an ROF Hash via registered URI maps.
|
|
12
|
+
#
|
|
13
|
+
# @note Some predicates require explicit mapping where as others have an assumed mapping. At present all URLs for @context of JSON-LD documents must be registered.
|
|
14
|
+
#
|
|
15
|
+
# @see ROF::Translators::JsonldToRof.call for details on how the JSON-LD is converted
|
|
16
|
+
# @see ROF::Translators::PredicateHandler.register for details on how Predicate URI's are mapped to nodes in the ROF document.
|
|
17
|
+
# @see ROF::Translators::JsonldToRof::PredicateHandler
|
|
18
|
+
# @see ROF::Translators::JsonldToRof::StatementHandler
|
|
19
|
+
module JsonldToRof
|
|
20
|
+
PredicateHandler.register('http://purl.org/ontology/bibo/') do |handler|
|
|
21
|
+
handler.namespace_prefix('bibo:')
|
|
22
|
+
handler.within(['metadata'])
|
|
23
|
+
end
|
|
24
|
+
PredicateHandler.register('info:fedora/fedora-system:def/relations-external') do |handler|
|
|
25
|
+
handler.map('#isMemberOfCollection', to: ['rels-ext', 'isMemberOfCollection'])
|
|
26
|
+
end
|
|
27
|
+
PredicateHandler.register('http://id.loc.gov/vocabulary/relators/') do |handler|
|
|
28
|
+
handler.namespace_prefix('mrel:')
|
|
29
|
+
handler.within(['metadata'])
|
|
30
|
+
end
|
|
31
|
+
PredicateHandler.register('http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#') do |handler|
|
|
32
|
+
handler.namespace_prefix('ebucore:')
|
|
33
|
+
handler.within(['metadata'])
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
PredicateHandler.register('https://library.nd.edu/ns/terms/') do |handler|
|
|
37
|
+
handler.map('accessEdit', to: ['rights', 'edit'])
|
|
38
|
+
handler.map('accessRead', to: ['rights', 'read'])
|
|
39
|
+
handler.map('accessEditGroup', to: ['rights', 'edit-groups'])
|
|
40
|
+
handler.map('accessReadGroup', to: ['rights', 'read-groups'])
|
|
41
|
+
handler.map('accessEmbargoDate', to: ['rights', 'embargo-date'])
|
|
42
|
+
handler.map('afmodel', to: ["af-model"])
|
|
43
|
+
handler.map('bendoitem', to: ["bendo-item"])
|
|
44
|
+
handler.map('depositor') do |object, accumulator|
|
|
45
|
+
accumulator.register_properties('depositor', object)
|
|
46
|
+
end
|
|
47
|
+
handler.map('owner') do |object, accumulator|
|
|
48
|
+
accumulator.register_properties('owner', object)
|
|
49
|
+
end
|
|
50
|
+
handler.map('representativeFile') do |object, accumulator|
|
|
51
|
+
accumulator.register_properties('representative', object)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
PredicateHandler.register('http://purl.org/dc/terms/') do |handler|
|
|
56
|
+
handler.namespace_prefix('dc:')
|
|
57
|
+
handler.within(['metadata'])
|
|
58
|
+
handler.map('contributor', to: ['metadata', 'dc:contributor', 'dc:contributor'], force: true)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
PredicateHandler.register('http://projecthydra.org/ns/relations#') do |handler|
|
|
62
|
+
handler.map('hasEditor', to: ['rels-ext', 'hydramata-rel:hasEditor'])
|
|
63
|
+
# We need to map the hasEditorGroup predicate to two different locations in the ROF
|
|
64
|
+
handler.map('hasEditorGroup', to: ['rels-ext', 'hydramata-rel:hasEditorGroup'], force: true)
|
|
65
|
+
handler.map('hasEditorGroup', to: ['rights', 'edit-groups'], force: true)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
PredicateHandler.register('http://www.ndltd.org/standards/metadata/etdms/1.1/') do |handler|
|
|
69
|
+
handler.within(['metadata', 'ms:degree'])
|
|
70
|
+
handler.namespace_prefix('ms:')
|
|
71
|
+
handler.map('role', to: ['metadata', 'dc:contributor', 'ms:role'], force: true)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# @api public
|
|
75
|
+
#
|
|
76
|
+
# Convert's the given JSON-LD into an ROF document that can be used to batch ingest into Fedora.
|
|
77
|
+
#
|
|
78
|
+
# @param [Array<Hash>, Hash] jsonld - a Hash of JSON-LD data or an Array of JSON-LD Hashes
|
|
79
|
+
# @param [Hash] config (included to conform to the loose interface of translators)
|
|
80
|
+
# @return [Array<Hash>] An ROF document
|
|
81
|
+
def self.call(jsonld, config)
|
|
82
|
+
Array.wrap(jsonld).map! do |element|
|
|
83
|
+
Element.new(element).to_rof
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# A single top-level element of a JSON-LD document
|
|
88
|
+
class Element
|
|
89
|
+
def initialize(element)
|
|
90
|
+
@element = element
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def to_rof
|
|
94
|
+
@accumulator = Accumulator.new(base_rof)
|
|
95
|
+
JSON::LD::API.toRdf(element) do |statement|
|
|
96
|
+
StatementHandler.call(statement, accumulator)
|
|
97
|
+
end
|
|
98
|
+
@accumulator.to_rof
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
private
|
|
102
|
+
|
|
103
|
+
attr_reader :element, :accumulator
|
|
104
|
+
|
|
105
|
+
def base_rof
|
|
106
|
+
{ "type" => "fobject", "metadata" => { "@context" => ROF::RdfContext }, "rels-ext" => { "@context" => ROF::RelsExtRefContext } }
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
private_constant :Element
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
require 'active_support/core_ext/array/wrap'
|
|
2
|
+
require 'active_support/core_ext/module/delegation'
|
|
3
|
+
require 'active_support/core_ext/object/deep_dup'
|
|
4
|
+
|
|
5
|
+
module ROF
|
|
6
|
+
module Translators
|
|
7
|
+
module JsonldToRof
|
|
8
|
+
# @api pubilc
|
|
9
|
+
# The accumulator is a "passive" object. Things happen to it. All in the name of building the
|
|
10
|
+
# hash that is ROF.
|
|
11
|
+
#
|
|
12
|
+
# @note The accumulator is only for one PID. See [ROF::Translators::JsonldToRof::Accumulator#add_pid]
|
|
13
|
+
class Accumulator
|
|
14
|
+
# @param [Hash] initial_rof - The base ROF document to which we will be adding elements.
|
|
15
|
+
def initialize(initial_rof = {})
|
|
16
|
+
@rof = initial_rof
|
|
17
|
+
@blank_nodes = {}
|
|
18
|
+
@blank_node_locations = {}
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# @api public
|
|
22
|
+
# @return [Hash]
|
|
23
|
+
def to_rof
|
|
24
|
+
rof = @rof.deep_dup
|
|
25
|
+
expand_blank_node_locations(rof)
|
|
26
|
+
append_properties_to(rof)
|
|
27
|
+
rof
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
# The antics of the blank node! See the specs for blank nodes to see the expected behavior.
|
|
33
|
+
def expand_blank_node_locations(rof)
|
|
34
|
+
@blank_node_locations.each_pair do |node, locations|
|
|
35
|
+
locations.each_pair do |location, key_value_pairs|
|
|
36
|
+
data = rof
|
|
37
|
+
location[0..-2].each do |slug|
|
|
38
|
+
data[slug] ||= {}
|
|
39
|
+
data = data[slug]
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# We may encounter a shallow map, if so we need for it to behave differently
|
|
43
|
+
slug = location[-1]
|
|
44
|
+
if slug
|
|
45
|
+
data[slug] ||= []
|
|
46
|
+
hash = {}
|
|
47
|
+
else
|
|
48
|
+
hash = data
|
|
49
|
+
end
|
|
50
|
+
Array.wrap(key_value_pairs).each do |key_value|
|
|
51
|
+
key_value.each_pair do |key, value|
|
|
52
|
+
hash[key] ||= []
|
|
53
|
+
hash[key] += Array.wrap(value)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
data[slug] << hash if slug
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def append_properties_to(rof)
|
|
62
|
+
return rof unless @properties
|
|
63
|
+
rof['properties-meta'] = { "mime-type" => "text/xml" }
|
|
64
|
+
xml = '<fields>'
|
|
65
|
+
@properties.each do |node_name, object|
|
|
66
|
+
xml += "<#{node_name}>#{object}</#{node_name}>"
|
|
67
|
+
end
|
|
68
|
+
xml += '</fields>'
|
|
69
|
+
rof['properties'] = xml
|
|
70
|
+
rof
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
public
|
|
74
|
+
|
|
75
|
+
# @api public
|
|
76
|
+
# @param [String] node_name - the XML node's name (e.g. <node_name>node_value</node_name>)
|
|
77
|
+
# @param [String] node_value - the XML element's value
|
|
78
|
+
# @return [Array] of given node_name and node_value
|
|
79
|
+
def register_properties(node_name, node_value)
|
|
80
|
+
@properties ||= []
|
|
81
|
+
@properties << [node_name, coerce_object_to_string(node_value)]
|
|
82
|
+
[node_name, node_value]
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
class PidAlreadySetError < RuntimeError
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# @api public
|
|
89
|
+
# @param [RDF::Statement] statement
|
|
90
|
+
# @return [RDF::Statement]
|
|
91
|
+
def add_blank_node(statement)
|
|
92
|
+
@blank_nodes[statement.subject] ||= {}
|
|
93
|
+
@blank_nodes[statement.subject][statement.predicate] ||= []
|
|
94
|
+
@blank_nodes[statement.subject][statement.predicate] << statement.object
|
|
95
|
+
statement
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# @api public
|
|
99
|
+
# @param [RDF::Subject] subject - Fetch the corresponding blank node that was added
|
|
100
|
+
# @return [RDF::Statement]
|
|
101
|
+
# @raise [KeyError] when the subject has not previosly been added
|
|
102
|
+
# @see #add_blank_node
|
|
103
|
+
def fetch_blank_node(subject)
|
|
104
|
+
@blank_nodes.fetch(subject)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# @api public
|
|
108
|
+
# @param [String] pid - an identifier
|
|
109
|
+
# @return [String] pid
|
|
110
|
+
# @raise PidAlreadySetError - if you attempted to a different PID
|
|
111
|
+
def add_pid(pid)
|
|
112
|
+
pid = coerce_object_to_string(pid)
|
|
113
|
+
if @rof.key?('pid')
|
|
114
|
+
if @rof['pid'] != pid
|
|
115
|
+
raise PidAlreadySetError, "Attempted to set pid=#{pid}, but it is already set to #{@rof['pid']}"
|
|
116
|
+
end
|
|
117
|
+
else
|
|
118
|
+
@rof['pid'] = pid
|
|
119
|
+
end
|
|
120
|
+
pid
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# @api public
|
|
124
|
+
# @param [Array<String>, String] location - a list of nested hash keys (or a single string)
|
|
125
|
+
# @param [String] value - a translated value for the original RDF Statement
|
|
126
|
+
# @param [false, RDF::Node] blank_node
|
|
127
|
+
# @return [Array] location, value
|
|
128
|
+
def add_predicate_location_and_value(location, value, blank_node = false)
|
|
129
|
+
# Because I am making transformation on the location via #shift method, I need a duplication.
|
|
130
|
+
location = Array.wrap(location)
|
|
131
|
+
if location == ['pid']
|
|
132
|
+
return add_pid(value)
|
|
133
|
+
end
|
|
134
|
+
if blank_node
|
|
135
|
+
add_predicate_location_and_value_direct_for_blank_node(location, value, blank_node)
|
|
136
|
+
else
|
|
137
|
+
add_predicate_location_and_value_direct_for_non_blank_node(location, value)
|
|
138
|
+
end
|
|
139
|
+
[location, value]
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def add_predicate_location_and_value_direct_for_blank_node(location, value, blank_node)
|
|
143
|
+
fetch_blank_node(blank_node) # Ensure the node exists
|
|
144
|
+
@blank_node_locations[blank_node] ||= {}
|
|
145
|
+
@blank_node_locations[blank_node][location[0..-2]] ||= []
|
|
146
|
+
@blank_node_locations[blank_node][location[0..-2]] << { location[-1] => Array.wrap(coerce_object_to_string(value)) }
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def add_predicate_location_and_value_direct_for_non_blank_node(location, value)
|
|
150
|
+
data = @rof
|
|
151
|
+
location[0..-2].each do |slug|
|
|
152
|
+
data[slug] ||= {}
|
|
153
|
+
data = data[slug]
|
|
154
|
+
end
|
|
155
|
+
slug = location[-1]
|
|
156
|
+
data[slug] ||= []
|
|
157
|
+
data[slug] << coerce_object_to_string(value)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
private
|
|
161
|
+
|
|
162
|
+
def coerce_object_to_string(object)
|
|
163
|
+
return object if object.nil?
|
|
164
|
+
if object.to_s =~ %r{https?://curate.nd.edu/show/([^\\]+)/?}
|
|
165
|
+
return "und:#{$1}"
|
|
166
|
+
elsif object.respond_to?(:value)
|
|
167
|
+
return object.value
|
|
168
|
+
else
|
|
169
|
+
object
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|