rof 0.0.1.pre → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/.travis.yml +12 -2
- data/Gemfile +1 -0
- data/README.md +87 -0
- data/bin/.ruby-version +1 -0
- data/bin/csv_to_rof +26 -0
- data/bin/fedora_to_rof +57 -0
- data/bin/osf_to_rof +40 -0
- data/bin/rof +78 -0
- data/bulk-ingest.md +242 -0
- data/labels.md +111 -0
- data/lib/rof.rb +20 -1
- data/lib/rof/access.rb +57 -0
- data/lib/rof/cli.rb +122 -0
- data/lib/rof/collection.rb +109 -0
- data/lib/rof/compare_rof.rb +92 -0
- data/lib/rof/filters/bendo.rb +33 -0
- data/lib/rof/filters/date_stamp.rb +36 -0
- data/lib/rof/filters/file_to_url.rb +27 -0
- data/lib/rof/filters/label.rb +153 -0
- data/lib/rof/filters/work.rb +111 -0
- data/lib/rof/get_from_fedora.rb +196 -0
- data/lib/rof/ingest.rb +204 -0
- data/lib/rof/ingesters/rels_ext_ingester.rb +78 -0
- data/lib/rof/ingesters/rights_metadata_ingester.rb +68 -0
- data/lib/rof/osf_context.rb +19 -0
- data/lib/rof/osf_to_rof.rb +122 -0
- data/lib/rof/rdf_context.rb +36 -0
- data/lib/rof/translate_csv.rb +112 -0
- data/lib/rof/utility.rb +84 -0
- data/lib/rof/version.rb +2 -2
- data/rof.gemspec +17 -0
- data/spec/fixtures/a.json +4 -0
- data/spec/fixtures/label.json +20 -0
- data/spec/fixtures/osf/b6psa.tar.gz +0 -0
- data/spec/fixtures/rof/dev0012829m.rof +45 -0
- data/spec/fixtures/vcr_tests/fedora_to_rof1.yml +5274 -0
- data/spec/fixtures/vecnet-citation.json +73 -0
- data/spec/lib/rof/access_spec.rb +36 -0
- data/spec/lib/rof/cli_spec.rb +66 -0
- data/spec/lib/rof/collection_spec.rb +90 -0
- data/spec/lib/rof/compare_rof_spec.rb +263 -0
- data/spec/lib/rof/filters/date_stamp_spec.rb +90 -0
- data/spec/lib/rof/filters/file_to_url_spec.rb +70 -0
- data/spec/lib/rof/filters/label_spec.rb +94 -0
- data/spec/lib/rof/filters/work_spec.rb +87 -0
- data/spec/lib/rof/ingest_spec.rb +117 -0
- data/spec/lib/rof/ingesters/rels_ext_ingester_spec.rb +62 -0
- data/spec/lib/rof/ingesters/rights_metadata_ingester_spec.rb +114 -0
- data/spec/lib/rof/osf_to_rof_spec.rb +76 -0
- data/spec/lib/rof/translate_csv_spec.rb +109 -0
- data/spec/lib/rof/utility_spec.rb +64 -0
- data/spec/lib/rof_spec.rb +14 -0
- data/spec/spec_helper.rb +11 -11
- metadata +283 -18
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'date'
|
2
|
+
|
3
|
+
module ROF
|
4
|
+
module Filters
|
5
|
+
|
6
|
+
# If bendo server is set , add it into datasreams that contain an URl referencing bendo
|
7
|
+
#
|
8
|
+
class Bendo
|
9
|
+
def initialize(bendo=nil)
|
10
|
+
@bendo = bendo
|
11
|
+
end
|
12
|
+
|
13
|
+
def process(obj_list, _fname)
|
14
|
+
|
15
|
+
ends_meta = Regexp.new('(.+)-meta')
|
16
|
+
|
17
|
+
# for *-meta objects containing "URL", sub in bendo string if provided
|
18
|
+
|
19
|
+
obj_list.map! do |obj|
|
20
|
+
obj.map do |name, value|
|
21
|
+
if name =~ ends_meta
|
22
|
+
if obj[name]["URL"] && @bendo
|
23
|
+
obj[name]["URL"] = obj[name]["URL"].sub("bendo:",@bendo)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
# print object
|
28
|
+
obj
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'date'
|
2
|
+
|
3
|
+
module ROF
|
4
|
+
module Filters
|
5
|
+
# Set the upload date to be the date given, provided it doesn't already exist.
|
6
|
+
# Also set the date modified to be the date given.
|
7
|
+
# If not given, the date used defaults to the local time on the computer.
|
8
|
+
class DateStamp
|
9
|
+
def initialize(date=nil)
|
10
|
+
@today = date || Date::today
|
11
|
+
@today_s = if @today.is_a?(Date)
|
12
|
+
@today.strftime('%FZ')
|
13
|
+
else
|
14
|
+
@today.to_s
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def process(obj_list, _fname)
|
19
|
+
obj_list.map! do |obj|
|
20
|
+
if obj["metadata"].nil?
|
21
|
+
obj["metadata"] = {
|
22
|
+
"@context" => ROF::RdfContext
|
23
|
+
}
|
24
|
+
end
|
25
|
+
# only save the date submitted if it is not already present
|
26
|
+
if obj["metadata"]["dc:dateSubmitted"].nil?
|
27
|
+
obj["metadata"]["dc:dateSubmitted"] = @today_s
|
28
|
+
end
|
29
|
+
# always update the date modified
|
30
|
+
obj["metadata"]["dc:modified"] = @today_s
|
31
|
+
obj
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module ROF
|
2
|
+
module Filters
|
3
|
+
# Convert any content datastream files into a bendo URL, and alter the rof
|
4
|
+
# to use the URL and not upload the file to fedora directly. The bendo URL
|
5
|
+
# will only exist for items having a bendo-item id set. The URL generated
|
6
|
+
# supposes the file keeps the same relative path the item originally had in
|
7
|
+
# the rof file.
|
8
|
+
class FileToUrl
|
9
|
+
def initialize()
|
10
|
+
end
|
11
|
+
|
12
|
+
def process(obj_list, _fname)
|
13
|
+
obj_list.map! do |obj|
|
14
|
+
bendo_item = obj['bendo-item']
|
15
|
+
content_file = obj['content-file']
|
16
|
+
if bendo_item && content_file
|
17
|
+
new_meta = obj.fetch('content-meta', {})
|
18
|
+
new_meta['URL'] = "bendo:/item/#{bendo_item}/#{content_file}"
|
19
|
+
obj['content-meta'] = new_meta
|
20
|
+
obj.delete('content-file')
|
21
|
+
end
|
22
|
+
obj
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
require 'noids_client'
|
2
|
+
|
3
|
+
module ROF
|
4
|
+
module Filters
|
5
|
+
# Class Label locates in-place labels of the form
|
6
|
+
# "$(label_name)" in the ROF file, assigns each
|
7
|
+
# label a pid, then replaces the label with that pid.
|
8
|
+
class Label
|
9
|
+
class MissingLabel < RuntimeError
|
10
|
+
end
|
11
|
+
|
12
|
+
class NoPool < RuntimeError
|
13
|
+
end
|
14
|
+
|
15
|
+
class OutOfIdentifiers < RuntimeError
|
16
|
+
end
|
17
|
+
|
18
|
+
# Create a new label assigner and resolver. The source of identifiers
|
19
|
+
# is given using options.
|
20
|
+
# Use :noid_server and :pool_name to connect to an external noid server.
|
21
|
+
# Use :id_list to pass in a ruby object responding to #shift and #empty? to generate
|
22
|
+
# ids. This is usually a list, to facilitate testing.
|
23
|
+
#
|
24
|
+
# If prefix is not nil, then "#{prefix}:" is prepended to
|
25
|
+
# every identifier.
|
26
|
+
def initialize(prefix, options)
|
27
|
+
@id_list = case
|
28
|
+
when options[:id_list]
|
29
|
+
options[:id_list]
|
30
|
+
when options[:noid_server]
|
31
|
+
NoidsPool.new(options[:noid_server], options[:pool_name])
|
32
|
+
else
|
33
|
+
raise NoPool
|
34
|
+
end
|
35
|
+
@prefix = "#{prefix}:" if prefix
|
36
|
+
# The first match group in the RE provides the label name
|
37
|
+
@label_re = /\$\(([^)]+)\)/
|
38
|
+
end
|
39
|
+
|
40
|
+
# mutate obj_list by assigning labels and resolving labels where needed
|
41
|
+
# Every fobject will be assigned an pid and a bendo_item
|
42
|
+
def process(obj_list, _fname)
|
43
|
+
labels = {}
|
44
|
+
|
45
|
+
# Use two passes. First assign ids, and then resolve labels
|
46
|
+
# Do this since labels can be referenced before being defined
|
47
|
+
|
48
|
+
# Assign pids to each fobject. If we find any labels in the pid field, then
|
49
|
+
# record a mapping of label => pid into the labels hash.
|
50
|
+
obj_list.each do |obj|
|
51
|
+
assign_pid(obj, labels)
|
52
|
+
end
|
53
|
+
|
54
|
+
# now replace any reference labels with the pids we've assigned them
|
55
|
+
obj_list.each do |obj|
|
56
|
+
replace_labels_in_obj(obj, labels)
|
57
|
+
end
|
58
|
+
|
59
|
+
# now assign bendo ids
|
60
|
+
bendo_item = nil
|
61
|
+
obj_list.each do |obj|
|
62
|
+
# for now we just use the first item's pid stripped of any namespaces as the bendo item id
|
63
|
+
if bendo_item.nil?
|
64
|
+
bendo_item = obj['pid'].gsub(/^.*:/, '') unless obj['pid'].nil?
|
65
|
+
next if bendo_item.nil?
|
66
|
+
end
|
67
|
+
# don't touch if a bendo item has already been assigned
|
68
|
+
obj['bendo-item'] = bendo_item if obj['bendo-item'].nil? || obj['bendo-item'] == ''
|
69
|
+
end
|
70
|
+
|
71
|
+
obj_list
|
72
|
+
end
|
73
|
+
|
74
|
+
# assign pids, recording any labels we find.
|
75
|
+
# obj is mutated
|
76
|
+
def assign_pid(obj, labels)
|
77
|
+
return if obj['type'] != 'fobject'
|
78
|
+
|
79
|
+
label = nil
|
80
|
+
unless obj['pid'].nil?
|
81
|
+
label = find_label(obj['pid'])
|
82
|
+
# skip if the "pid" is not a label
|
83
|
+
return if label.nil?
|
84
|
+
end
|
85
|
+
pid = "#{@prefix}#{next_id}"
|
86
|
+
obj['pid'] = pid
|
87
|
+
labels[label] = pid unless label.nil?
|
88
|
+
end
|
89
|
+
|
90
|
+
# replace any label references we find in obj.
|
91
|
+
# obj is mutated
|
92
|
+
def replace_labels_in_obj(obj, labels)
|
93
|
+
return if obj['type'] != 'fobject'
|
94
|
+
obj.each do |k, v|
|
95
|
+
# only force labels to exist if we are looking in the rels-ext
|
96
|
+
obj[k] = replace_labels(v, labels, k == 'rels-ext')
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
# recurse through obj replacing any labels in strings
|
101
|
+
# with the id in labels, which is a hash.
|
102
|
+
# The relacement is done in place.
|
103
|
+
# Hash keys are not touched (only hash values).
|
104
|
+
# if force is true, labels which don't resolve will raise
|
105
|
+
# a MissingLabel error.
|
106
|
+
def replace_labels(obj, labels, force = false)
|
107
|
+
if obj.is_a?(Array)
|
108
|
+
obj.map! { |x| replace_labels(x, labels, force) }
|
109
|
+
elsif obj.is_a?(Hash)
|
110
|
+
obj.each { |k, v| obj[k] = replace_labels(v, labels, force) }
|
111
|
+
obj
|
112
|
+
elsif obj.is_a?(String)
|
113
|
+
replace_match(obj, labels, force)
|
114
|
+
else
|
115
|
+
obj
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# small matching function- uses regular expression
|
120
|
+
def replace_match(obj, labels, force)
|
121
|
+
obj.gsub(@label_re) do |match|
|
122
|
+
pid = labels[Regexp.last_match(1)]
|
123
|
+
raise MissingLabel if pid.nil? && force
|
124
|
+
pid.nil? ? match : pid
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def find_label(s)
|
129
|
+
s[@label_re, 1]
|
130
|
+
end
|
131
|
+
|
132
|
+
def next_id
|
133
|
+
raise OutOfIdentifiers if @id_list.empty?
|
134
|
+
@id_list.shift
|
135
|
+
end
|
136
|
+
|
137
|
+
# Encapsulates connection to Noids Server
|
138
|
+
class NoidsPool
|
139
|
+
def initialize(noids_server, pool_name)
|
140
|
+
@pool = NoidsClient::Connection.new(noids_server).get_pool(pool_name)
|
141
|
+
end
|
142
|
+
|
143
|
+
def shift
|
144
|
+
@pool.mint.first
|
145
|
+
end
|
146
|
+
|
147
|
+
def empty?
|
148
|
+
@pool.closed?
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
require 'mime-types'
|
2
|
+
|
3
|
+
module ROF
|
4
|
+
module Filters
|
5
|
+
# Expand objects of type "Work(-(.+))?" into a
|
6
|
+
# constellation of "fobjects".
|
7
|
+
# Makes a fobject/generic_file for each file
|
8
|
+
# adds a depositor
|
9
|
+
# turns original object into an fobject/$1
|
10
|
+
# and copies the access to each fobject.
|
11
|
+
class Work
|
12
|
+
class NoFile < RuntimeError
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
@utility = ROF::Utility.new
|
17
|
+
end
|
18
|
+
|
19
|
+
# wade through object list
|
20
|
+
def process(obj_list, filename)
|
21
|
+
@utility.set_workdir(filename)
|
22
|
+
obj_list.map! { |x| process_one_work(x) }
|
23
|
+
obj_list.flatten!
|
24
|
+
end
|
25
|
+
|
26
|
+
# given a single object, return a list (possibly empty) of new objects
|
27
|
+
# to replace the one given
|
28
|
+
def process_one_work(input_obj)
|
29
|
+
model = @utility.decode_work_type(input_obj)
|
30
|
+
return [input_obj] if model.nil?
|
31
|
+
return [ROF::Collection.process_one_collection(input_obj, @utility)] if model == 'Collection'
|
32
|
+
|
33
|
+
main_obj = set_main_obj(input_obj, model)
|
34
|
+
|
35
|
+
result = [main_obj]
|
36
|
+
result = make_thumbnail(result, main_obj, input_obj) unless input_obj['files'].nil?
|
37
|
+
result
|
38
|
+
end
|
39
|
+
|
40
|
+
# make the first file be the representative thumbnail
|
41
|
+
def make_thumbnail(result, main_obj, input_obj)
|
42
|
+
thumb_rep = nil
|
43
|
+
input_obj['files'].each do |finfo|
|
44
|
+
if finfo.is_a?(String)
|
45
|
+
fname = finfo
|
46
|
+
finfo = { 'files' => [fname] }
|
47
|
+
else
|
48
|
+
fname = finfo['files'].first
|
49
|
+
raise NoFile if fname.nil?
|
50
|
+
end
|
51
|
+
finfo['rights'] ||= input_obj['rights']
|
52
|
+
finfo['owner'] ||= input_obj['owner']
|
53
|
+
finfo['bendo-item'] ||= input_obj['bendo-item']
|
54
|
+
finfo['metadata'] ||= {
|
55
|
+
'@context' => ROF::RdfContext
|
56
|
+
}
|
57
|
+
finfo['metadata']['dc:title'] ||= fname
|
58
|
+
mimetype = MIME::Types.of(fname)
|
59
|
+
mimetype = mimetype.empty? ? 'application/octet-stream' : mimetype.first.content_type
|
60
|
+
f_obj = {
|
61
|
+
'type' => 'fobject',
|
62
|
+
'af-model' => 'GenericFile',
|
63
|
+
'pid' => finfo['pid'],
|
64
|
+
'bendo-item' => finfo['bendo-item'],
|
65
|
+
'rights' => finfo['rights'],
|
66
|
+
'properties' => ROF::Utility.prop_ds(finfo['owner']),
|
67
|
+
'properties-meta' => {
|
68
|
+
'mime-type' => 'text/xml'
|
69
|
+
},
|
70
|
+
'rels-ext' => {
|
71
|
+
'isPartOf' => [main_obj['pid']]
|
72
|
+
},
|
73
|
+
'content-file' => fname,
|
74
|
+
'content-meta' => {
|
75
|
+
'label' => fname,
|
76
|
+
'mime-type' => mimetype
|
77
|
+
},
|
78
|
+
'collections' => finfo['collections'],
|
79
|
+
'metadata' => finfo['metadata']
|
80
|
+
}
|
81
|
+
f_obj.delete_if { |_k, v| v.nil? }
|
82
|
+
if thumb_rep.nil?
|
83
|
+
thumb_rep = f_obj['pid']
|
84
|
+
if thumb_rep.nil?
|
85
|
+
thumb_rep = @utility.next_label
|
86
|
+
f_obj['pid'] = thumb_rep
|
87
|
+
end
|
88
|
+
main_obj['properties'] = ROF::Utility.prop_ds(input_obj['owner'], thumb_rep)
|
89
|
+
end
|
90
|
+
result << f_obj
|
91
|
+
end
|
92
|
+
result
|
93
|
+
end
|
94
|
+
|
95
|
+
def set_main_obj(input_obj, model)
|
96
|
+
result = {}
|
97
|
+
|
98
|
+
result['type'] = 'fobject'
|
99
|
+
result['af-model'] = model
|
100
|
+
result['pid'] = input_obj.fetch('pid', @utility.next_label)
|
101
|
+
result['bendo-item'] = input_obj['bendo-item']
|
102
|
+
result['rights'] = input_obj['rights']
|
103
|
+
result['properties'] = ROF::Utility.prop_ds(input_obj['owner'])
|
104
|
+
result['properties-meta'] = { 'mime-type' => 'text/xml' }
|
105
|
+
result['rels-ext'] = input_obj.fetch('rels-ext', {})
|
106
|
+
result['metadata'] = input_obj['metadata']
|
107
|
+
result
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,196 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'rexml/document'
|
3
|
+
require 'rdf/ntriples'
|
4
|
+
require 'rdf/rdfxml'
|
5
|
+
require 'rubydora'
|
6
|
+
|
7
|
+
module ROF
|
8
|
+
class FedoraToRof
|
9
|
+
# connect to fedora and fetch object
|
10
|
+
# returns array of fedora attributes or nil
|
11
|
+
def self.GetFromFedora(pid, fedora, config)
|
12
|
+
@fedora_info = {}
|
13
|
+
|
14
|
+
# Try to connect to fedora, and search for the desired item
|
15
|
+
# If either of these actions fail, handle it, and exit.
|
16
|
+
begin
|
17
|
+
fedora = Rubydora.connect(fedora)
|
18
|
+
doc = fedora.find(pid)
|
19
|
+
rescue StandardError => e
|
20
|
+
puts "Error: #{e}"
|
21
|
+
exit 1
|
22
|
+
end
|
23
|
+
|
24
|
+
# set pid, type
|
25
|
+
@fedora_info['pid'] = pid
|
26
|
+
@fedora_info['type'] = 'fobject'
|
27
|
+
|
28
|
+
readFedora(doc, config)
|
29
|
+
|
30
|
+
@fedora_info
|
31
|
+
end
|
32
|
+
|
33
|
+
# Given a rubydora object, extract what we need
|
34
|
+
# to create our ROF object in an associative array
|
35
|
+
#
|
36
|
+
def self.readFedora(rdora_obj, config)
|
37
|
+
@fedora_info['af-model'] = setModel(rdora_obj)
|
38
|
+
# iterate through the data streams that are present.
|
39
|
+
# use reflection to call appropriate method for each
|
40
|
+
rdora_obj.datastreams.each do |dsname, ds|
|
41
|
+
next if dsname == 'DC'
|
42
|
+
method_key = dsname.sub('-', '')
|
43
|
+
if respond_to?(method_key)
|
44
|
+
send(method_key, ds, config)
|
45
|
+
else
|
46
|
+
# dump generic datastream
|
47
|
+
meta = create_meta(ds, config)
|
48
|
+
@fedora_info["#{dsname}-meta"] = meta unless meta.empty?
|
49
|
+
|
50
|
+
# TODO(dbrower): change dump algorithm:
|
51
|
+
# if content is short < X bytes, save as string
|
52
|
+
# if content is > X bytes, save as file only if config option is given
|
53
|
+
content = ds.datastream_content
|
54
|
+
# NOTE- Entire datastream being downloaded every time.
|
55
|
+
content_string = content.to_s.force_encoding('UTF-8')
|
56
|
+
if (content.length <= 1024 || config['inline']) && content_string.valid_encoding?
|
57
|
+
@fedora_info[dsname] = content_string
|
58
|
+
elsif config['download']
|
59
|
+
fname = "#{@fedora_info['pid']}-#{dsname}"
|
60
|
+
abspath = File.join(config['download_path'], fname)
|
61
|
+
@fedora_info["#{dsname}-file"] = fname
|
62
|
+
if File.file?(config['download_path'])
|
63
|
+
puts "Error: --download directory #{config['download_path']} specified is an existing file."
|
64
|
+
exit 1
|
65
|
+
end
|
66
|
+
FileUtils.mkdir_p(config['download_path'])
|
67
|
+
File.open(abspath, 'w') do |f|
|
68
|
+
f.write(content)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.create_meta(ds, config)
|
76
|
+
result = {}
|
77
|
+
|
78
|
+
label = ds.profile['dsLabel']
|
79
|
+
result['label'] = label unless label.nil? || label == ''
|
80
|
+
result['mime-type'] = ds.profile['dsMIME'] if ds.profile['dsMIME'] != 'text/plain'
|
81
|
+
# TODO(dbrower): make sure this is working as intended
|
82
|
+
if %w(R E).include?(ds.profile['dsControlGroup'])
|
83
|
+
s = result['URL'] = ds.profile['dsLocation']
|
84
|
+
s = s.sub(config['bendo'], 'bendo:') if config['bendo']
|
85
|
+
result['URL'] = s
|
86
|
+
end
|
87
|
+
result
|
88
|
+
end
|
89
|
+
|
90
|
+
# set fedora_indo['af-model']
|
91
|
+
#
|
92
|
+
def self.setModel(rdora_obj)
|
93
|
+
# only keep info:fedora/afmodel:XXXXX
|
94
|
+
models = rdora_obj.profile['objModels'].map do |model|
|
95
|
+
Regexp.last_match(1) if model =~ /^info:fedora\/afmodel:(.*)/
|
96
|
+
end.compact
|
97
|
+
models[0]
|
98
|
+
end
|
99
|
+
|
100
|
+
# The methods below are called if the like-named datastream exists in fedora
|
101
|
+
|
102
|
+
# set metadata
|
103
|
+
#
|
104
|
+
def self.descMetadata(ds, _config)
|
105
|
+
# desMetadata is encoded in ntriples, convert to JSON-LD using our special context
|
106
|
+
graph = RDF::Graph.new
|
107
|
+
data = ds.datastream_content
|
108
|
+
# force utf-8 encoding. fedora does not store the encoding, so it defaults to ASCII-8BIT
|
109
|
+
# see https://github.com/ruby-rdf/rdf/issues/142
|
110
|
+
data.force_encoding('utf-8')
|
111
|
+
graph.from_ntriples(data, format: :ntriples)
|
112
|
+
JSON::LD::API.fromRdf(graph) do |expanded|
|
113
|
+
result = JSON::LD::API.compact(expanded, RdfContext)
|
114
|
+
@fedora_info['metadata'] = result
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# set rights
|
119
|
+
#
|
120
|
+
def self.rightsMetadata(ds, _config)
|
121
|
+
# rights is an XML document
|
122
|
+
# the access array may have read or edit elements
|
123
|
+
# each of these elements may contain group or person elements
|
124
|
+
xml_doc = REXML::Document.new(ds.datastream_content)
|
125
|
+
|
126
|
+
rights_array = {}
|
127
|
+
|
128
|
+
root = xml_doc.root
|
129
|
+
|
130
|
+
%w(read edit).each do |access|
|
131
|
+
this_access = root.elements["//access[@type=\'#{access}\']"]
|
132
|
+
|
133
|
+
next if this_access.nil?
|
134
|
+
|
135
|
+
unless this_access.elements['machine'].elements['group'].nil?
|
136
|
+
group_array = []
|
137
|
+
this_access.elements['machine'].elements['group'].each do |this_group|
|
138
|
+
group_array << this_group
|
139
|
+
end
|
140
|
+
rights_array["#{access}-groups"] = group_array
|
141
|
+
end
|
142
|
+
|
143
|
+
next if this_access.elements['machine'].elements['person'].nil?
|
144
|
+
person_array = []
|
145
|
+
|
146
|
+
this_access.elements['machine'].elements['person'].each do |this_person|
|
147
|
+
person_array << this_person
|
148
|
+
end
|
149
|
+
rights_array[access.to_s] = person_array
|
150
|
+
end
|
151
|
+
|
152
|
+
@fedora_info['rights'] = rights_array
|
153
|
+
end
|
154
|
+
|
155
|
+
def self.RELSEXT(ds, _config)
|
156
|
+
# RELS-EXT is RDF-XML - parse it
|
157
|
+
ctx = ROF::RelsExtRefContext.dup
|
158
|
+
ctx.delete('@base') # @base causes problems when converting TO json-ld (it is = "info:/fedora") but info is not a namespace
|
159
|
+
graph = RDF::Graph.new
|
160
|
+
graph.from_rdfxml(ds.datastream_content)
|
161
|
+
result = nil
|
162
|
+
JSON::LD::API.fromRdf(graph) do |expanded|
|
163
|
+
result = JSON::LD::API.compact(expanded, ctx)
|
164
|
+
end
|
165
|
+
# now strip the info:fedora/ prefix from the URIs
|
166
|
+
strip_info_fedora(result)
|
167
|
+
# remove extra items
|
168
|
+
result.delete('hasModel')
|
169
|
+
@fedora_info['rels-ext'] = result
|
170
|
+
end
|
171
|
+
|
172
|
+
private
|
173
|
+
|
174
|
+
def self.strip_info_fedora(rels_ext)
|
175
|
+
rels_ext.each do |relation, targets|
|
176
|
+
next if relation == '@context'
|
177
|
+
if targets.is_a?(Hash)
|
178
|
+
strip_info_fedora(targets)
|
179
|
+
next
|
180
|
+
end
|
181
|
+
targets = [targets] if targets.is_a?(String)
|
182
|
+
targets.map! do |target|
|
183
|
+
if target.is_a?(Hash)
|
184
|
+
strip_info_fedora(target)
|
185
|
+
else
|
186
|
+
target.sub('info:fedora/', '')
|
187
|
+
end
|
188
|
+
end
|
189
|
+
# some single strings cannot be arrays in json-ld, so convert back
|
190
|
+
# this shouldn't cause any problems with items that began as arrays
|
191
|
+
targets = targets[0] if targets.length == 1
|
192
|
+
rels_ext[relation] = targets
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|