rof 0.0.1.pre → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/.travis.yml +12 -2
- data/Gemfile +1 -0
- data/README.md +87 -0
- data/bin/.ruby-version +1 -0
- data/bin/csv_to_rof +26 -0
- data/bin/fedora_to_rof +57 -0
- data/bin/osf_to_rof +40 -0
- data/bin/rof +78 -0
- data/bulk-ingest.md +242 -0
- data/labels.md +111 -0
- data/lib/rof.rb +20 -1
- data/lib/rof/access.rb +57 -0
- data/lib/rof/cli.rb +122 -0
- data/lib/rof/collection.rb +109 -0
- data/lib/rof/compare_rof.rb +92 -0
- data/lib/rof/filters/bendo.rb +33 -0
- data/lib/rof/filters/date_stamp.rb +36 -0
- data/lib/rof/filters/file_to_url.rb +27 -0
- data/lib/rof/filters/label.rb +153 -0
- data/lib/rof/filters/work.rb +111 -0
- data/lib/rof/get_from_fedora.rb +196 -0
- data/lib/rof/ingest.rb +204 -0
- data/lib/rof/ingesters/rels_ext_ingester.rb +78 -0
- data/lib/rof/ingesters/rights_metadata_ingester.rb +68 -0
- data/lib/rof/osf_context.rb +19 -0
- data/lib/rof/osf_to_rof.rb +122 -0
- data/lib/rof/rdf_context.rb +36 -0
- data/lib/rof/translate_csv.rb +112 -0
- data/lib/rof/utility.rb +84 -0
- data/lib/rof/version.rb +2 -2
- data/rof.gemspec +17 -0
- data/spec/fixtures/a.json +4 -0
- data/spec/fixtures/label.json +20 -0
- data/spec/fixtures/osf/b6psa.tar.gz +0 -0
- data/spec/fixtures/rof/dev0012829m.rof +45 -0
- data/spec/fixtures/vcr_tests/fedora_to_rof1.yml +5274 -0
- data/spec/fixtures/vecnet-citation.json +73 -0
- data/spec/lib/rof/access_spec.rb +36 -0
- data/spec/lib/rof/cli_spec.rb +66 -0
- data/spec/lib/rof/collection_spec.rb +90 -0
- data/spec/lib/rof/compare_rof_spec.rb +263 -0
- data/spec/lib/rof/filters/date_stamp_spec.rb +90 -0
- data/spec/lib/rof/filters/file_to_url_spec.rb +70 -0
- data/spec/lib/rof/filters/label_spec.rb +94 -0
- data/spec/lib/rof/filters/work_spec.rb +87 -0
- data/spec/lib/rof/ingest_spec.rb +117 -0
- data/spec/lib/rof/ingesters/rels_ext_ingester_spec.rb +62 -0
- data/spec/lib/rof/ingesters/rights_metadata_ingester_spec.rb +114 -0
- data/spec/lib/rof/osf_to_rof_spec.rb +76 -0
- data/spec/lib/rof/translate_csv_spec.rb +109 -0
- data/spec/lib/rof/utility_spec.rb +64 -0
- data/spec/lib/rof_spec.rb +14 -0
- data/spec/spec_helper.rb +11 -11
- metadata +283 -18
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'date'
|
2
|
+
|
3
|
+
module ROF
|
4
|
+
module Filters
|
5
|
+
|
6
|
+
# If bendo server is set , add it into datasreams that contain an URl referencing bendo
|
7
|
+
#
|
8
|
+
class Bendo
|
9
|
+
def initialize(bendo=nil)
|
10
|
+
@bendo = bendo
|
11
|
+
end
|
12
|
+
|
13
|
+
def process(obj_list, _fname)
|
14
|
+
|
15
|
+
ends_meta = Regexp.new('(.+)-meta')
|
16
|
+
|
17
|
+
# for *-meta objects containing "URL", sub in bendo string if provided
|
18
|
+
|
19
|
+
obj_list.map! do |obj|
|
20
|
+
obj.map do |name, value|
|
21
|
+
if name =~ ends_meta
|
22
|
+
if obj[name]["URL"] && @bendo
|
23
|
+
obj[name]["URL"] = obj[name]["URL"].sub("bendo:",@bendo)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
# print object
|
28
|
+
obj
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'date'
|
2
|
+
|
3
|
+
module ROF
|
4
|
+
module Filters
|
5
|
+
# Set the upload date to be the date given, provided it doesn't already exist.
|
6
|
+
# Also set the date modified to be the date given.
|
7
|
+
# If not given, the date used defaults to the local time on the computer.
|
8
|
+
class DateStamp
|
9
|
+
def initialize(date=nil)
|
10
|
+
@today = date || Date::today
|
11
|
+
@today_s = if @today.is_a?(Date)
|
12
|
+
@today.strftime('%FZ')
|
13
|
+
else
|
14
|
+
@today.to_s
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def process(obj_list, _fname)
|
19
|
+
obj_list.map! do |obj|
|
20
|
+
if obj["metadata"].nil?
|
21
|
+
obj["metadata"] = {
|
22
|
+
"@context" => ROF::RdfContext
|
23
|
+
}
|
24
|
+
end
|
25
|
+
# only save the date submitted if it is not already present
|
26
|
+
if obj["metadata"]["dc:dateSubmitted"].nil?
|
27
|
+
obj["metadata"]["dc:dateSubmitted"] = @today_s
|
28
|
+
end
|
29
|
+
# always update the date modified
|
30
|
+
obj["metadata"]["dc:modified"] = @today_s
|
31
|
+
obj
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module ROF
|
2
|
+
module Filters
|
3
|
+
# Convert any content datastream files into a bendo URL, and alter the rof
|
4
|
+
# to use the URL and not upload the file to fedora directly. The bendo URL
|
5
|
+
# will only exist for items having a bendo-item id set. The URL generated
|
6
|
+
# supposes the file keeps the same relative path the item originally had in
|
7
|
+
# the rof file.
|
8
|
+
class FileToUrl
|
9
|
+
def initialize()
|
10
|
+
end
|
11
|
+
|
12
|
+
def process(obj_list, _fname)
|
13
|
+
obj_list.map! do |obj|
|
14
|
+
bendo_item = obj['bendo-item']
|
15
|
+
content_file = obj['content-file']
|
16
|
+
if bendo_item && content_file
|
17
|
+
new_meta = obj.fetch('content-meta', {})
|
18
|
+
new_meta['URL'] = "bendo:/item/#{bendo_item}/#{content_file}"
|
19
|
+
obj['content-meta'] = new_meta
|
20
|
+
obj.delete('content-file')
|
21
|
+
end
|
22
|
+
obj
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
require 'noids_client'
|
2
|
+
|
3
|
+
module ROF
|
4
|
+
module Filters
|
5
|
+
# Class Label locates in-place labels of the form
|
6
|
+
# "$(label_name)" in the ROF file, assigns each
|
7
|
+
# label a pid, then replaces the label with that pid.
|
8
|
+
class Label
|
9
|
+
class MissingLabel < RuntimeError
|
10
|
+
end
|
11
|
+
|
12
|
+
class NoPool < RuntimeError
|
13
|
+
end
|
14
|
+
|
15
|
+
class OutOfIdentifiers < RuntimeError
|
16
|
+
end
|
17
|
+
|
18
|
+
# Create a new label assigner and resolver. The source of identifiers
|
19
|
+
# is given using options.
|
20
|
+
# Use :noid_server and :pool_name to connect to an external noid server.
|
21
|
+
# Use :id_list to pass in a ruby object responding to #shift and #empty? to generate
|
22
|
+
# ids. This is usually a list, to facilitate testing.
|
23
|
+
#
|
24
|
+
# If prefix is not nil, then "#{prefix}:" is prepended to
|
25
|
+
# every identifier.
|
26
|
+
def initialize(prefix, options)
|
27
|
+
@id_list = case
|
28
|
+
when options[:id_list]
|
29
|
+
options[:id_list]
|
30
|
+
when options[:noid_server]
|
31
|
+
NoidsPool.new(options[:noid_server], options[:pool_name])
|
32
|
+
else
|
33
|
+
raise NoPool
|
34
|
+
end
|
35
|
+
@prefix = "#{prefix}:" if prefix
|
36
|
+
# The first match group in the RE provides the label name
|
37
|
+
@label_re = /\$\(([^)]+)\)/
|
38
|
+
end
|
39
|
+
|
40
|
+
# mutate obj_list by assigning labels and resolving labels where needed
|
41
|
+
# Every fobject will be assigned an pid and a bendo_item
|
42
|
+
def process(obj_list, _fname)
|
43
|
+
labels = {}
|
44
|
+
|
45
|
+
# Use two passes. First assign ids, and then resolve labels
|
46
|
+
# Do this since labels can be referenced before being defined
|
47
|
+
|
48
|
+
# Assign pids to each fobject. If we find any labels in the pid field, then
|
49
|
+
# record a mapping of label => pid into the labels hash.
|
50
|
+
obj_list.each do |obj|
|
51
|
+
assign_pid(obj, labels)
|
52
|
+
end
|
53
|
+
|
54
|
+
# now replace any reference labels with the pids we've assigned them
|
55
|
+
obj_list.each do |obj|
|
56
|
+
replace_labels_in_obj(obj, labels)
|
57
|
+
end
|
58
|
+
|
59
|
+
# now assign bendo ids
|
60
|
+
bendo_item = nil
|
61
|
+
obj_list.each do |obj|
|
62
|
+
# for now we just use the first item's pid stripped of any namespaces as the bendo item id
|
63
|
+
if bendo_item.nil?
|
64
|
+
bendo_item = obj['pid'].gsub(/^.*:/, '') unless obj['pid'].nil?
|
65
|
+
next if bendo_item.nil?
|
66
|
+
end
|
67
|
+
# don't touch if a bendo item has already been assigned
|
68
|
+
obj['bendo-item'] = bendo_item if obj['bendo-item'].nil? || obj['bendo-item'] == ''
|
69
|
+
end
|
70
|
+
|
71
|
+
obj_list
|
72
|
+
end
|
73
|
+
|
74
|
+
# assign pids, recording any labels we find.
|
75
|
+
# obj is mutated
|
76
|
+
def assign_pid(obj, labels)
|
77
|
+
return if obj['type'] != 'fobject'
|
78
|
+
|
79
|
+
label = nil
|
80
|
+
unless obj['pid'].nil?
|
81
|
+
label = find_label(obj['pid'])
|
82
|
+
# skip if the "pid" is not a label
|
83
|
+
return if label.nil?
|
84
|
+
end
|
85
|
+
pid = "#{@prefix}#{next_id}"
|
86
|
+
obj['pid'] = pid
|
87
|
+
labels[label] = pid unless label.nil?
|
88
|
+
end
|
89
|
+
|
90
|
+
# replace any label references we find in obj.
|
91
|
+
# obj is mutated
|
92
|
+
def replace_labels_in_obj(obj, labels)
|
93
|
+
return if obj['type'] != 'fobject'
|
94
|
+
obj.each do |k, v|
|
95
|
+
# only force labels to exist if we are looking in the rels-ext
|
96
|
+
obj[k] = replace_labels(v, labels, k == 'rels-ext')
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
# recurse through obj replacing any labels in strings
|
101
|
+
# with the id in labels, which is a hash.
|
102
|
+
# The relacement is done in place.
|
103
|
+
# Hash keys are not touched (only hash values).
|
104
|
+
# if force is true, labels which don't resolve will raise
|
105
|
+
# a MissingLabel error.
|
106
|
+
def replace_labels(obj, labels, force = false)
|
107
|
+
if obj.is_a?(Array)
|
108
|
+
obj.map! { |x| replace_labels(x, labels, force) }
|
109
|
+
elsif obj.is_a?(Hash)
|
110
|
+
obj.each { |k, v| obj[k] = replace_labels(v, labels, force) }
|
111
|
+
obj
|
112
|
+
elsif obj.is_a?(String)
|
113
|
+
replace_match(obj, labels, force)
|
114
|
+
else
|
115
|
+
obj
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# small matching function- uses regular expression
|
120
|
+
def replace_match(obj, labels, force)
|
121
|
+
obj.gsub(@label_re) do |match|
|
122
|
+
pid = labels[Regexp.last_match(1)]
|
123
|
+
raise MissingLabel if pid.nil? && force
|
124
|
+
pid.nil? ? match : pid
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def find_label(s)
|
129
|
+
s[@label_re, 1]
|
130
|
+
end
|
131
|
+
|
132
|
+
def next_id
|
133
|
+
raise OutOfIdentifiers if @id_list.empty?
|
134
|
+
@id_list.shift
|
135
|
+
end
|
136
|
+
|
137
|
+
# Encapsulates connection to Noids Server
|
138
|
+
class NoidsPool
|
139
|
+
def initialize(noids_server, pool_name)
|
140
|
+
@pool = NoidsClient::Connection.new(noids_server).get_pool(pool_name)
|
141
|
+
end
|
142
|
+
|
143
|
+
def shift
|
144
|
+
@pool.mint.first
|
145
|
+
end
|
146
|
+
|
147
|
+
def empty?
|
148
|
+
@pool.closed?
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
require 'mime-types'
|
2
|
+
|
3
|
+
module ROF
|
4
|
+
module Filters
|
5
|
+
# Expand objects of type "Work(-(.+))?" into a
|
6
|
+
# constellation of "fobjects".
|
7
|
+
# Makes a fobject/generic_file for each file
|
8
|
+
# adds a depositor
|
9
|
+
# turns original object into an fobject/$1
|
10
|
+
# and copies the access to each fobject.
|
11
|
+
class Work
|
12
|
+
class NoFile < RuntimeError
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
@utility = ROF::Utility.new
|
17
|
+
end
|
18
|
+
|
19
|
+
# wade through object list
|
20
|
+
def process(obj_list, filename)
|
21
|
+
@utility.set_workdir(filename)
|
22
|
+
obj_list.map! { |x| process_one_work(x) }
|
23
|
+
obj_list.flatten!
|
24
|
+
end
|
25
|
+
|
26
|
+
# given a single object, return a list (possibly empty) of new objects
|
27
|
+
# to replace the one given
|
28
|
+
def process_one_work(input_obj)
|
29
|
+
model = @utility.decode_work_type(input_obj)
|
30
|
+
return [input_obj] if model.nil?
|
31
|
+
return [ROF::Collection.process_one_collection(input_obj, @utility)] if model == 'Collection'
|
32
|
+
|
33
|
+
main_obj = set_main_obj(input_obj, model)
|
34
|
+
|
35
|
+
result = [main_obj]
|
36
|
+
result = make_thumbnail(result, main_obj, input_obj) unless input_obj['files'].nil?
|
37
|
+
result
|
38
|
+
end
|
39
|
+
|
40
|
+
# make the first file be the representative thumbnail
|
41
|
+
def make_thumbnail(result, main_obj, input_obj)
|
42
|
+
thumb_rep = nil
|
43
|
+
input_obj['files'].each do |finfo|
|
44
|
+
if finfo.is_a?(String)
|
45
|
+
fname = finfo
|
46
|
+
finfo = { 'files' => [fname] }
|
47
|
+
else
|
48
|
+
fname = finfo['files'].first
|
49
|
+
raise NoFile if fname.nil?
|
50
|
+
end
|
51
|
+
finfo['rights'] ||= input_obj['rights']
|
52
|
+
finfo['owner'] ||= input_obj['owner']
|
53
|
+
finfo['bendo-item'] ||= input_obj['bendo-item']
|
54
|
+
finfo['metadata'] ||= {
|
55
|
+
'@context' => ROF::RdfContext
|
56
|
+
}
|
57
|
+
finfo['metadata']['dc:title'] ||= fname
|
58
|
+
mimetype = MIME::Types.of(fname)
|
59
|
+
mimetype = mimetype.empty? ? 'application/octet-stream' : mimetype.first.content_type
|
60
|
+
f_obj = {
|
61
|
+
'type' => 'fobject',
|
62
|
+
'af-model' => 'GenericFile',
|
63
|
+
'pid' => finfo['pid'],
|
64
|
+
'bendo-item' => finfo['bendo-item'],
|
65
|
+
'rights' => finfo['rights'],
|
66
|
+
'properties' => ROF::Utility.prop_ds(finfo['owner']),
|
67
|
+
'properties-meta' => {
|
68
|
+
'mime-type' => 'text/xml'
|
69
|
+
},
|
70
|
+
'rels-ext' => {
|
71
|
+
'isPartOf' => [main_obj['pid']]
|
72
|
+
},
|
73
|
+
'content-file' => fname,
|
74
|
+
'content-meta' => {
|
75
|
+
'label' => fname,
|
76
|
+
'mime-type' => mimetype
|
77
|
+
},
|
78
|
+
'collections' => finfo['collections'],
|
79
|
+
'metadata' => finfo['metadata']
|
80
|
+
}
|
81
|
+
f_obj.delete_if { |_k, v| v.nil? }
|
82
|
+
if thumb_rep.nil?
|
83
|
+
thumb_rep = f_obj['pid']
|
84
|
+
if thumb_rep.nil?
|
85
|
+
thumb_rep = @utility.next_label
|
86
|
+
f_obj['pid'] = thumb_rep
|
87
|
+
end
|
88
|
+
main_obj['properties'] = ROF::Utility.prop_ds(input_obj['owner'], thumb_rep)
|
89
|
+
end
|
90
|
+
result << f_obj
|
91
|
+
end
|
92
|
+
result
|
93
|
+
end
|
94
|
+
|
95
|
+
def set_main_obj(input_obj, model)
|
96
|
+
result = {}
|
97
|
+
|
98
|
+
result['type'] = 'fobject'
|
99
|
+
result['af-model'] = model
|
100
|
+
result['pid'] = input_obj.fetch('pid', @utility.next_label)
|
101
|
+
result['bendo-item'] = input_obj['bendo-item']
|
102
|
+
result['rights'] = input_obj['rights']
|
103
|
+
result['properties'] = ROF::Utility.prop_ds(input_obj['owner'])
|
104
|
+
result['properties-meta'] = { 'mime-type' => 'text/xml' }
|
105
|
+
result['rels-ext'] = input_obj.fetch('rels-ext', {})
|
106
|
+
result['metadata'] = input_obj['metadata']
|
107
|
+
result
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,196 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'rexml/document'
|
3
|
+
require 'rdf/ntriples'
|
4
|
+
require 'rdf/rdfxml'
|
5
|
+
require 'rubydora'
|
6
|
+
|
7
|
+
module ROF
|
8
|
+
class FedoraToRof
|
9
|
+
# connect to fedora and fetch object
|
10
|
+
# returns array of fedora attributes or nil
|
11
|
+
def self.GetFromFedora(pid, fedora, config)
|
12
|
+
@fedora_info = {}
|
13
|
+
|
14
|
+
# Try to connect to fedora, and search for the desired item
|
15
|
+
# If either of these actions fail, handle it, and exit.
|
16
|
+
begin
|
17
|
+
fedora = Rubydora.connect(fedora)
|
18
|
+
doc = fedora.find(pid)
|
19
|
+
rescue StandardError => e
|
20
|
+
puts "Error: #{e}"
|
21
|
+
exit 1
|
22
|
+
end
|
23
|
+
|
24
|
+
# set pid, type
|
25
|
+
@fedora_info['pid'] = pid
|
26
|
+
@fedora_info['type'] = 'fobject'
|
27
|
+
|
28
|
+
readFedora(doc, config)
|
29
|
+
|
30
|
+
@fedora_info
|
31
|
+
end
|
32
|
+
|
33
|
+
# Given a rubydora object, extract what we need
|
34
|
+
# to create our ROF object in an associative array
|
35
|
+
#
|
36
|
+
def self.readFedora(rdora_obj, config)
|
37
|
+
@fedora_info['af-model'] = setModel(rdora_obj)
|
38
|
+
# iterate through the data streams that are present.
|
39
|
+
# use reflection to call appropriate method for each
|
40
|
+
rdora_obj.datastreams.each do |dsname, ds|
|
41
|
+
next if dsname == 'DC'
|
42
|
+
method_key = dsname.sub('-', '')
|
43
|
+
if respond_to?(method_key)
|
44
|
+
send(method_key, ds, config)
|
45
|
+
else
|
46
|
+
# dump generic datastream
|
47
|
+
meta = create_meta(ds, config)
|
48
|
+
@fedora_info["#{dsname}-meta"] = meta unless meta.empty?
|
49
|
+
|
50
|
+
# TODO(dbrower): change dump algorithm:
|
51
|
+
# if content is short < X bytes, save as string
|
52
|
+
# if content is > X bytes, save as file only if config option is given
|
53
|
+
content = ds.datastream_content
|
54
|
+
# NOTE- Entire datastream being downloaded every time.
|
55
|
+
content_string = content.to_s.force_encoding('UTF-8')
|
56
|
+
if (content.length <= 1024 || config['inline']) && content_string.valid_encoding?
|
57
|
+
@fedora_info[dsname] = content_string
|
58
|
+
elsif config['download']
|
59
|
+
fname = "#{@fedora_info['pid']}-#{dsname}"
|
60
|
+
abspath = File.join(config['download_path'], fname)
|
61
|
+
@fedora_info["#{dsname}-file"] = fname
|
62
|
+
if File.file?(config['download_path'])
|
63
|
+
puts "Error: --download directory #{config['download_path']} specified is an existing file."
|
64
|
+
exit 1
|
65
|
+
end
|
66
|
+
FileUtils.mkdir_p(config['download_path'])
|
67
|
+
File.open(abspath, 'w') do |f|
|
68
|
+
f.write(content)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.create_meta(ds, config)
|
76
|
+
result = {}
|
77
|
+
|
78
|
+
label = ds.profile['dsLabel']
|
79
|
+
result['label'] = label unless label.nil? || label == ''
|
80
|
+
result['mime-type'] = ds.profile['dsMIME'] if ds.profile['dsMIME'] != 'text/plain'
|
81
|
+
# TODO(dbrower): make sure this is working as intended
|
82
|
+
if %w(R E).include?(ds.profile['dsControlGroup'])
|
83
|
+
s = result['URL'] = ds.profile['dsLocation']
|
84
|
+
s = s.sub(config['bendo'], 'bendo:') if config['bendo']
|
85
|
+
result['URL'] = s
|
86
|
+
end
|
87
|
+
result
|
88
|
+
end
|
89
|
+
|
90
|
+
# set fedora_indo['af-model']
|
91
|
+
#
|
92
|
+
def self.setModel(rdora_obj)
|
93
|
+
# only keep info:fedora/afmodel:XXXXX
|
94
|
+
models = rdora_obj.profile['objModels'].map do |model|
|
95
|
+
Regexp.last_match(1) if model =~ /^info:fedora\/afmodel:(.*)/
|
96
|
+
end.compact
|
97
|
+
models[0]
|
98
|
+
end
|
99
|
+
|
100
|
+
# The methods below are called if the like-named datastream exists in fedora
|
101
|
+
|
102
|
+
# set metadata
|
103
|
+
#
|
104
|
+
def self.descMetadata(ds, _config)
|
105
|
+
# desMetadata is encoded in ntriples, convert to JSON-LD using our special context
|
106
|
+
graph = RDF::Graph.new
|
107
|
+
data = ds.datastream_content
|
108
|
+
# force utf-8 encoding. fedora does not store the encoding, so it defaults to ASCII-8BIT
|
109
|
+
# see https://github.com/ruby-rdf/rdf/issues/142
|
110
|
+
data.force_encoding('utf-8')
|
111
|
+
graph.from_ntriples(data, format: :ntriples)
|
112
|
+
JSON::LD::API.fromRdf(graph) do |expanded|
|
113
|
+
result = JSON::LD::API.compact(expanded, RdfContext)
|
114
|
+
@fedora_info['metadata'] = result
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# set rights
|
119
|
+
#
|
120
|
+
def self.rightsMetadata(ds, _config)
|
121
|
+
# rights is an XML document
|
122
|
+
# the access array may have read or edit elements
|
123
|
+
# each of these elements may contain group or person elements
|
124
|
+
xml_doc = REXML::Document.new(ds.datastream_content)
|
125
|
+
|
126
|
+
rights_array = {}
|
127
|
+
|
128
|
+
root = xml_doc.root
|
129
|
+
|
130
|
+
%w(read edit).each do |access|
|
131
|
+
this_access = root.elements["//access[@type=\'#{access}\']"]
|
132
|
+
|
133
|
+
next if this_access.nil?
|
134
|
+
|
135
|
+
unless this_access.elements['machine'].elements['group'].nil?
|
136
|
+
group_array = []
|
137
|
+
this_access.elements['machine'].elements['group'].each do |this_group|
|
138
|
+
group_array << this_group
|
139
|
+
end
|
140
|
+
rights_array["#{access}-groups"] = group_array
|
141
|
+
end
|
142
|
+
|
143
|
+
next if this_access.elements['machine'].elements['person'].nil?
|
144
|
+
person_array = []
|
145
|
+
|
146
|
+
this_access.elements['machine'].elements['person'].each do |this_person|
|
147
|
+
person_array << this_person
|
148
|
+
end
|
149
|
+
rights_array[access.to_s] = person_array
|
150
|
+
end
|
151
|
+
|
152
|
+
@fedora_info['rights'] = rights_array
|
153
|
+
end
|
154
|
+
|
155
|
+
def self.RELSEXT(ds, _config)
|
156
|
+
# RELS-EXT is RDF-XML - parse it
|
157
|
+
ctx = ROF::RelsExtRefContext.dup
|
158
|
+
ctx.delete('@base') # @base causes problems when converting TO json-ld (it is = "info:/fedora") but info is not a namespace
|
159
|
+
graph = RDF::Graph.new
|
160
|
+
graph.from_rdfxml(ds.datastream_content)
|
161
|
+
result = nil
|
162
|
+
JSON::LD::API.fromRdf(graph) do |expanded|
|
163
|
+
result = JSON::LD::API.compact(expanded, ctx)
|
164
|
+
end
|
165
|
+
# now strip the info:fedora/ prefix from the URIs
|
166
|
+
strip_info_fedora(result)
|
167
|
+
# remove extra items
|
168
|
+
result.delete('hasModel')
|
169
|
+
@fedora_info['rels-ext'] = result
|
170
|
+
end
|
171
|
+
|
172
|
+
private
|
173
|
+
|
174
|
+
def self.strip_info_fedora(rels_ext)
|
175
|
+
rels_ext.each do |relation, targets|
|
176
|
+
next if relation == '@context'
|
177
|
+
if targets.is_a?(Hash)
|
178
|
+
strip_info_fedora(targets)
|
179
|
+
next
|
180
|
+
end
|
181
|
+
targets = [targets] if targets.is_a?(String)
|
182
|
+
targets.map! do |target|
|
183
|
+
if target.is_a?(Hash)
|
184
|
+
strip_info_fedora(target)
|
185
|
+
else
|
186
|
+
target.sub('info:fedora/', '')
|
187
|
+
end
|
188
|
+
end
|
189
|
+
# some single strings cannot be arrays in json-ld, so convert back
|
190
|
+
# this shouldn't cause any problems with items that began as arrays
|
191
|
+
targets = targets[0] if targets.length == 1
|
192
|
+
rels_ext[relation] = targets
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|