rof 0.0.1.pre → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/.travis.yml +12 -2
- data/Gemfile +1 -0
- data/README.md +87 -0
- data/bin/.ruby-version +1 -0
- data/bin/csv_to_rof +26 -0
- data/bin/fedora_to_rof +57 -0
- data/bin/osf_to_rof +40 -0
- data/bin/rof +78 -0
- data/bulk-ingest.md +242 -0
- data/labels.md +111 -0
- data/lib/rof.rb +20 -1
- data/lib/rof/access.rb +57 -0
- data/lib/rof/cli.rb +122 -0
- data/lib/rof/collection.rb +109 -0
- data/lib/rof/compare_rof.rb +92 -0
- data/lib/rof/filters/bendo.rb +33 -0
- data/lib/rof/filters/date_stamp.rb +36 -0
- data/lib/rof/filters/file_to_url.rb +27 -0
- data/lib/rof/filters/label.rb +153 -0
- data/lib/rof/filters/work.rb +111 -0
- data/lib/rof/get_from_fedora.rb +196 -0
- data/lib/rof/ingest.rb +204 -0
- data/lib/rof/ingesters/rels_ext_ingester.rb +78 -0
- data/lib/rof/ingesters/rights_metadata_ingester.rb +68 -0
- data/lib/rof/osf_context.rb +19 -0
- data/lib/rof/osf_to_rof.rb +122 -0
- data/lib/rof/rdf_context.rb +36 -0
- data/lib/rof/translate_csv.rb +112 -0
- data/lib/rof/utility.rb +84 -0
- data/lib/rof/version.rb +2 -2
- data/rof.gemspec +17 -0
- data/spec/fixtures/a.json +4 -0
- data/spec/fixtures/label.json +20 -0
- data/spec/fixtures/osf/b6psa.tar.gz +0 -0
- data/spec/fixtures/rof/dev0012829m.rof +45 -0
- data/spec/fixtures/vcr_tests/fedora_to_rof1.yml +5274 -0
- data/spec/fixtures/vecnet-citation.json +73 -0
- data/spec/lib/rof/access_spec.rb +36 -0
- data/spec/lib/rof/cli_spec.rb +66 -0
- data/spec/lib/rof/collection_spec.rb +90 -0
- data/spec/lib/rof/compare_rof_spec.rb +263 -0
- data/spec/lib/rof/filters/date_stamp_spec.rb +90 -0
- data/spec/lib/rof/filters/file_to_url_spec.rb +70 -0
- data/spec/lib/rof/filters/label_spec.rb +94 -0
- data/spec/lib/rof/filters/work_spec.rb +87 -0
- data/spec/lib/rof/ingest_spec.rb +117 -0
- data/spec/lib/rof/ingesters/rels_ext_ingester_spec.rb +62 -0
- data/spec/lib/rof/ingesters/rights_metadata_ingester_spec.rb +114 -0
- data/spec/lib/rof/osf_to_rof_spec.rb +76 -0
- data/spec/lib/rof/translate_csv_spec.rb +109 -0
- data/spec/lib/rof/utility_spec.rb +64 -0
- data/spec/lib/rof_spec.rb +14 -0
- data/spec/spec_helper.rb +11 -11
- metadata +283 -18
data/lib/rof/ingest.rb
ADDED
@@ -0,0 +1,204 @@
|
|
1
|
+
require 'json/ld'
|
2
|
+
require "rof/ingesters/rels_ext_ingester"
|
3
|
+
require "rof/ingesters/rights_metadata_ingester"
|
4
|
+
|
5
|
+
module ROF
|
6
|
+
class NotFobjectError < RuntimeError
|
7
|
+
end
|
8
|
+
|
9
|
+
class MissingPidError < RuntimeError
|
10
|
+
end
|
11
|
+
|
12
|
+
class TooManyIdentitiesError < RuntimeError
|
13
|
+
end
|
14
|
+
|
15
|
+
class SourceError < RuntimeError
|
16
|
+
end
|
17
|
+
|
18
|
+
# Ingest or update item in fedora
|
19
|
+
# if fedora is nil, then we verify that item is in the proper format
|
20
|
+
# Otherwise fedora is a Rubydora::Reporitory object (for now...)
|
21
|
+
# Returns a list of ingested datastreams, if everything is okay.
|
22
|
+
# Otherwise raises an exception depending on the error.
|
23
|
+
def self.Ingest(item, fedora=nil, search_paths=[], bendo=nil)
|
24
|
+
raise NotFobjectError if item["type"] != "fobject"
|
25
|
+
raise TooManyIdentitiesError if item.key?("id") && item.key?("pid")
|
26
|
+
item["pid"] = item["id"] unless item.key?("pid")
|
27
|
+
raise MissingPidError unless item["pid"].is_a? String
|
28
|
+
models = string_nil_to_array(item["model"])
|
29
|
+
models += string_nil_to_array(item["af-model"]).map { |m| af_model_name(m) }
|
30
|
+
# does it already exist in fedora? Create it otherwise
|
31
|
+
doc = nil
|
32
|
+
if fedora
|
33
|
+
doc = fedora.find_or_initialize(item["pid"])
|
34
|
+
# the addRelationship API is broken in Fedora 3.6.x.
|
35
|
+
# Since the `models` method in Rubydora uses that API, it
|
36
|
+
# also doesn't work. ActiveFedora is not affected since it
|
37
|
+
# serializes to RELS-EXT itself, bypassing addRelationship endpoint.
|
38
|
+
# models.each do |m|
|
39
|
+
# doc.models << m unless doc.models.include?(m)
|
40
|
+
# end
|
41
|
+
|
42
|
+
# it seems like we need to save the document before adding datastreams?!?
|
43
|
+
doc.save
|
44
|
+
end
|
45
|
+
|
46
|
+
ds_touched = []
|
47
|
+
# update rels-ext if there is either a rels-ext present or if there
|
48
|
+
# is a model to set. Otherwise, don't touch it!
|
49
|
+
if (item.has_key?("rels-ext") || !models.empty?)
|
50
|
+
update_rels_ext(models, item, doc)
|
51
|
+
ds_touched << "rels-ext"
|
52
|
+
end
|
53
|
+
# now handle all the other datastreams
|
54
|
+
item.each do |key,value|
|
55
|
+
case key
|
56
|
+
# fields having special treatement
|
57
|
+
when "rights"
|
58
|
+
self.ingest_rights_metadata(item, doc)
|
59
|
+
ds_touched << "rightsMetadata"
|
60
|
+
when "metadata"
|
61
|
+
self.ingest_ld_metadata(item, doc)
|
62
|
+
ds_touched << "descMetadata"
|
63
|
+
|
64
|
+
# ignore these fields
|
65
|
+
when "type", "pid", "model", "id", "af-model", "rels-ext", "collections"
|
66
|
+
|
67
|
+
# datastream fields
|
68
|
+
when /\A(.+)-file\Z/, /\A(.+)-meta\Z/, /\A(.+)\Z/
|
69
|
+
# ingest a datastream
|
70
|
+
dsname = $1
|
71
|
+
next if ds_touched.include?(dsname)
|
72
|
+
self.ingest_datastream(dsname, item, doc, search_paths, bendo)
|
73
|
+
ds_touched << dsname
|
74
|
+
end
|
75
|
+
end
|
76
|
+
return ds_touched
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.ingest_datastream(dsname, item, fdoc, search_paths, bendo)
|
80
|
+
# What kind of content is there?
|
81
|
+
ds_content = item[dsname]
|
82
|
+
ds_filename = item["#{dsname}-file"]
|
83
|
+
ds_meta = item["#{dsname}-meta"]
|
84
|
+
if ds_filename && ds_content
|
85
|
+
raise SourceError.new("Both #{dsname} and #{dsname}-file are present.")
|
86
|
+
end
|
87
|
+
if ds_content && !ds_content.is_a?(String)
|
88
|
+
raise SourceError.new("Content for #{dsname} is not a string.")
|
89
|
+
end
|
90
|
+
# A URL, without content or file, is an R datastream
|
91
|
+
# A URL, with content or file, raises an error
|
92
|
+
ds_url = ds_meta["URL"] if ds_meta && ds_meta.is_a?(Hash)
|
93
|
+
if ds_url && ds_content
|
94
|
+
raise SourceError.new("Both #{ds_url} and #{dsname} are present.")
|
95
|
+
end
|
96
|
+
if ds_url && ds_filename
|
97
|
+
raise SourceError.new("Both #{ds_url} and #{dsname}-file are present.")
|
98
|
+
end
|
99
|
+
|
100
|
+
md = {"mime-type" => "text/plain",
|
101
|
+
"label" => "",
|
102
|
+
"versionable" => true,
|
103
|
+
"control-group" => "M",
|
104
|
+
}
|
105
|
+
|
106
|
+
if ds_meta
|
107
|
+
md.merge!(item["#{dsname}-meta"])
|
108
|
+
end
|
109
|
+
|
110
|
+
if ds_url
|
111
|
+
md["control-group"] = "R"
|
112
|
+
|
113
|
+
# If the bendo server was passed in the command line, assume that the URL is in
|
114
|
+
# the form "bendo:/item/<item#>/<item name> and substitute bendo: w/ the server name
|
115
|
+
# if no bendo provided, use whatever's there.
|
116
|
+
if bendo
|
117
|
+
md["URL"] = md["URL"].sub("bendo:", bendo)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
# NOTE(dbrower): this could be refactored a bit. I was trying to keep the
|
122
|
+
# same path for whether fdoc is nil or not as much as possible.
|
123
|
+
ds = nil
|
124
|
+
if fdoc
|
125
|
+
ds = fdoc[dsname]
|
126
|
+
# TODO(dbrower): maybe verify these options to be within bounds?
|
127
|
+
ds.controlGroup = md["control-group"]
|
128
|
+
ds.dsLabel = md["label"]
|
129
|
+
ds.versionable = md["versionable"]
|
130
|
+
ds.mimeType = md["mime-type"]
|
131
|
+
ds.dsLocation = md["URL"] if md["URL"]
|
132
|
+
end
|
133
|
+
need_close = false
|
134
|
+
if ds_filename
|
135
|
+
ds_content = self.find_file_and_open(ds_filename, search_paths, "rb")
|
136
|
+
need_close = true
|
137
|
+
end
|
138
|
+
if ds
|
139
|
+
ds.content = ds_content if ds_content
|
140
|
+
ds.save
|
141
|
+
end
|
142
|
+
ensure
|
143
|
+
ds_content.close if ds_content && need_close
|
144
|
+
end
|
145
|
+
|
146
|
+
def self.ingest_rights_metadata(item, fdoc)
|
147
|
+
Ingesters::RightsMetadataIngester.call(item: item, fedora_document: fdoc)
|
148
|
+
end
|
149
|
+
|
150
|
+
def self.ingest_ld_metadata(item, fdoc)
|
151
|
+
input = item['metadata']
|
152
|
+
# sometimes json-ld generates @graph structures when converting from fedora to ROF.
|
153
|
+
# in that case, don't provide an id key
|
154
|
+
if !input.has_key?("@graph")
|
155
|
+
input["@id"] = "info:fedora/#{item['pid']}" unless input["@id"]
|
156
|
+
end
|
157
|
+
graph = RDF::Graph.new << JSON::LD::API.toRdf(input)
|
158
|
+
content = graph.dump(:ntriples)
|
159
|
+
# we read the rof file as utf-8. the RDF gem seems to convert it back to
|
160
|
+
# the default encoding. so fix it.
|
161
|
+
content.force_encoding('UTF-8')
|
162
|
+
if fdoc
|
163
|
+
ds = fdoc['descMetadata']
|
164
|
+
ds.mimeType = "text/plain"
|
165
|
+
ds.content = content
|
166
|
+
ds.save
|
167
|
+
end
|
168
|
+
content
|
169
|
+
end
|
170
|
+
|
171
|
+
def self.update_rels_ext(models, item, fdoc)
|
172
|
+
Ingesters::RelsExtIngester.call(models: models, item: item, fedora_document: fdoc)
|
173
|
+
end
|
174
|
+
|
175
|
+
# find fname by looking through directories in search_path,
|
176
|
+
# an array of strings.
|
177
|
+
# Will not find any files if search_path is empty.
|
178
|
+
# Raises Errno::ENOENT if no file is found, otherwise
|
179
|
+
# opens the file and returns a fd
|
180
|
+
def self.find_file_and_open(fname, search_path, flags)
|
181
|
+
# don't search if file has an absolute path
|
182
|
+
if fname[0] == "/"
|
183
|
+
return File.open(fname, flags)
|
184
|
+
end
|
185
|
+
search_path.each do |path|
|
186
|
+
begin
|
187
|
+
f = File.open(File.join(path,fname), flags)
|
188
|
+
return f
|
189
|
+
rescue Errno::ENOENT
|
190
|
+
end
|
191
|
+
end
|
192
|
+
raise Errno::ENOENT.new(fname)
|
193
|
+
end
|
194
|
+
|
195
|
+
def self.af_model_name(model)
|
196
|
+
"info:fedora/afmodel:#{model}"
|
197
|
+
end
|
198
|
+
|
199
|
+
def self.string_nil_to_array(x)
|
200
|
+
return [] if x.nil?
|
201
|
+
return [x] unless x.is_a? Array
|
202
|
+
x
|
203
|
+
end
|
204
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'rdf'
|
2
|
+
require 'json/ld'
|
3
|
+
require 'rdf/rdfxml'
|
4
|
+
|
5
|
+
module ROF
|
6
|
+
module Ingesters
|
7
|
+
class RelsExtIngester
|
8
|
+
def self.call(attributes)
|
9
|
+
new(attributes).call
|
10
|
+
end
|
11
|
+
|
12
|
+
# :models is a list of fedora content models this item has
|
13
|
+
# :item is the hash of the ROF item
|
14
|
+
# :fdoc is an optional fedora document to save to
|
15
|
+
# :pid is the namespaced identifier of this item
|
16
|
+
attr_reader :models, :item, :fdoc, :pid
|
17
|
+
def initialize(attributes = {})
|
18
|
+
@models = attributes.fetch(:models)
|
19
|
+
@item = attributes.fetch(:item)
|
20
|
+
@pid = item.fetch('pid')
|
21
|
+
@fdoc = attributes.fetch(:fedora_document, nil)
|
22
|
+
end
|
23
|
+
|
24
|
+
def call
|
25
|
+
content = build_content
|
26
|
+
persist(content)
|
27
|
+
content
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def rels_ext
|
33
|
+
item.fetch('rels-ext', {})
|
34
|
+
end
|
35
|
+
|
36
|
+
def build_content
|
37
|
+
# this is ugly to work around addRelationship bug in 3.6.x
|
38
|
+
# (See bugs FCREPO-1191 and FCREPO-1187)
|
39
|
+
|
40
|
+
# build up a json-ld object, and then persist that (into XML!)
|
41
|
+
input = rels_ext
|
42
|
+
context = input.fetch("@context", {}).merge(ROF::RelsExtRefContext)
|
43
|
+
input["@context"] = context
|
44
|
+
input["@id"] = "info:fedora/#{pid}"
|
45
|
+
|
46
|
+
input["hasModel"] = models
|
47
|
+
|
48
|
+
# RELS-EXT should only contain references to other (internal) fedora
|
49
|
+
# objects. Rewrite them to have prefix "info:fedora/".
|
50
|
+
# Also need to make sure json-ld interprets each of these object
|
51
|
+
# references as an IRI instead of a string.
|
52
|
+
# This is kinda hacky. Is there a better way?
|
53
|
+
input.each do |relation, targets|
|
54
|
+
next if relation == "@context" || relation == "@id" || relation == "hasModel"
|
55
|
+
targets = [targets] if targets.is_a? String
|
56
|
+
input[relation] = targets.map do |target|
|
57
|
+
target.is_a?(String) ? {"@id" => "info:fedora/#{target}"} : target
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
graph = RDF::Graph.new << JSON::LD::API.toRdf(input)
|
62
|
+
graph.dump(:rdfxml)
|
63
|
+
end
|
64
|
+
|
65
|
+
def persist(content)
|
66
|
+
if fdoc
|
67
|
+
ds = fdoc['RELS-EXT']
|
68
|
+
ds.content = content
|
69
|
+
ds.mimeType = "application/rdf+xml"
|
70
|
+
ds.save
|
71
|
+
else
|
72
|
+
true
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module ROF
|
2
|
+
module Ingesters
|
3
|
+
class RightsMetadataIngester
|
4
|
+
def self.call(attributes)
|
5
|
+
new(attributes).call
|
6
|
+
end
|
7
|
+
|
8
|
+
attr_reader :item, :fdoc
|
9
|
+
def initialize(attributes = {})
|
10
|
+
@item = attributes.fetch(:item)
|
11
|
+
@fdoc = attributes.fetch(:fedora_document, nil)
|
12
|
+
end
|
13
|
+
|
14
|
+
def call
|
15
|
+
rights = item["rights"]
|
16
|
+
return if rights.nil?
|
17
|
+
#
|
18
|
+
# we really should be building this using an xml engine.
|
19
|
+
#
|
20
|
+
content = %Q{<rightsMetadata xmlns="http://hydra-collab.stanford.edu/schemas/rightsMetadata/v1" version="0.1">\n}
|
21
|
+
# TODO(dbrower): Does the copyright need to be exposed in the rof?
|
22
|
+
content += %Q{ <copyright>\n <human type="title"/>\n <human type="description"/>\n <machine type="uri"/>\n </copyright>\n}
|
23
|
+
content += format_rights_section("discover", rights["discover"], rights["discover-groups"])
|
24
|
+
content += format_rights_section("read", rights["read"], rights["read-groups"])
|
25
|
+
content += format_rights_section("edit", rights["edit"], rights["edit-groups"])
|
26
|
+
# TODO(dbrower): expose embargo information
|
27
|
+
content += %Q{ <embargo>\n <human/>\n}
|
28
|
+
if rights["embargo-date"]
|
29
|
+
content += %Q{ <machine>\n}
|
30
|
+
content += %Q{ <date>#{rights["embargo-date"]}</date>\n}
|
31
|
+
content += %Q{ </machine>\n}
|
32
|
+
else
|
33
|
+
content += %Q{ <machine/>\n}
|
34
|
+
end
|
35
|
+
content += %Q{ </embargo>\n}
|
36
|
+
content += %Q{</rightsMetadata>\n}
|
37
|
+
|
38
|
+
if fdoc
|
39
|
+
ds = fdoc['rightsMetadata']
|
40
|
+
ds.mimeType = 'text/xml'
|
41
|
+
ds.content = content
|
42
|
+
ds.save
|
43
|
+
end
|
44
|
+
content
|
45
|
+
end
|
46
|
+
|
47
|
+
def format_rights_section(section_name, people, groups)
|
48
|
+
people = [people] if people.is_a? String
|
49
|
+
groups = [groups] if groups.is_a? String
|
50
|
+
result = " <access type=\"#{section_name}\">\n <human/>\n"
|
51
|
+
if people || groups
|
52
|
+
result += " <machine>\n"
|
53
|
+
(people || []).each do |person|
|
54
|
+
result += " <person>#{person}</person>\n"
|
55
|
+
end
|
56
|
+
(groups || []).each do |group|
|
57
|
+
result += " <group>#{group}</group>\n"
|
58
|
+
end
|
59
|
+
result += " </machine>\n"
|
60
|
+
else
|
61
|
+
result += " <machine/>\n"
|
62
|
+
end
|
63
|
+
result += " </access>\n"
|
64
|
+
result
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module ROF
|
2
|
+
|
3
|
+
OsfPrefixList = {
|
4
|
+
'dcterms' => 'http://purl.org/dc/terms/',
|
5
|
+
'osf-model' => 'http://www.dataconservancy.org/osf-business-object-model#'
|
6
|
+
}.freeze
|
7
|
+
|
8
|
+
OsfToNDMap = {
|
9
|
+
'dc:created' => 'http://purl.org/dc/terms/created',
|
10
|
+
'dc:description' => 'http://purl.org/dc/terms/description',
|
11
|
+
'dc:title' => 'http://purl.org/dc/terms/title',
|
12
|
+
'dc:subject' => 'http://www.dataconservancy.org/osf-business-object-model#hasTag',
|
13
|
+
'isPublic' => 'http://www.dataconservancy.org/osf-business-object-model#isPublic',
|
14
|
+
'hasContributor' => 'http://www.dataconservancy.org/osf-business-object-model#hasContributor',
|
15
|
+
'isBibliographic' => 'http://www.dataconservancy.org/osf-business-object-model#isBibliographic',
|
16
|
+
'hasFullName' => 'http://www.dataconservancy.org/osf-business-object-model#hasFullName',
|
17
|
+
'hasUser' => 'http://www.dataconservancy.org/osf-business-object-model#hasUser'
|
18
|
+
}.freeze
|
19
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'zlib'
|
3
|
+
require 'rubygems/package'
|
4
|
+
require 'rdf/turtle'
|
5
|
+
require 'rof/osf_context'
|
6
|
+
require 'rof/rdf_context'
|
7
|
+
require 'rof/utility'
|
8
|
+
|
9
|
+
module ROF
|
10
|
+
# Class for managing OSF Archive data transformations
|
11
|
+
# It is called after the get-from-osf task, and before the work-xlat task
|
12
|
+
class OsfToRof
|
13
|
+
# Convert Osf Archive tar.gz to ROF
|
14
|
+
def self.osf_to_rof(config, osf_projects = nil)
|
15
|
+
@osf_map = ROF::OsfToNDMap
|
16
|
+
rof_array = []
|
17
|
+
return {} if osf_projects.nil?
|
18
|
+
this_project = osf_projects
|
19
|
+
ttl_data = ttl_from_targz(config, this_project,
|
20
|
+
this_project['project_identifier'] + '.ttl')
|
21
|
+
rof_array[0] = build_archive_record(config, this_project, ttl_data)
|
22
|
+
rof_array
|
23
|
+
end
|
24
|
+
|
25
|
+
# reads a ttl file and makes it a JSON-LD file that we can parse
|
26
|
+
def self.fetch_from_ttl(ttl_file)
|
27
|
+
graph = RDF::Turtle::Reader.open(ttl_file,
|
28
|
+
prefixes: ROF::OsfPrefixList.dup)
|
29
|
+
JSON::LD::API.fromRdf(graph)
|
30
|
+
end
|
31
|
+
|
32
|
+
# extracts given ttl file from JHU tar.gz package
|
33
|
+
# - assumed to live under data/obj/root
|
34
|
+
def self.ttl_from_targz(config, this_project, ttl_filename)
|
35
|
+
id = this_project['project_identifier']
|
36
|
+
ttl_path = File.join(id,
|
37
|
+
'data/obj/root',
|
38
|
+
ttl_filename)
|
39
|
+
ROF::Utility.file_from_targz(File.join(config['package_dir'], id + '.tar.gz'),
|
40
|
+
ttl_path)
|
41
|
+
ttl_data = fetch_from_ttl(File.join(config['package_dir'], ttl_path))
|
42
|
+
# this is an array- the addition elements are the contributor(s)
|
43
|
+
ttl_data
|
44
|
+
end
|
45
|
+
|
46
|
+
# Maps RELS-EXT
|
47
|
+
def self.map_rels_ext(_ttl_data)
|
48
|
+
rels_ext = {}
|
49
|
+
rels_ext['@context'] = ROF::RelsExtRefContext.dup
|
50
|
+
rels_ext
|
51
|
+
end
|
52
|
+
|
53
|
+
# sets metadata
|
54
|
+
def self.map_metadata(config, project, ttl_data)
|
55
|
+
metadata = {}
|
56
|
+
metadata['@context'] = ROF::RdfContext.dup
|
57
|
+
# metdata derived from project ttl file
|
58
|
+
metadata['dc:created'] = Time.iso8601(ttl_data[0][@osf_map['dc:created']][0]['@value']).to_date.iso8601 + "Z"
|
59
|
+
metadata['dc:title'] = ttl_data[0][@osf_map['dc:title']][0]['@value']
|
60
|
+
metadata['dc:description'] =
|
61
|
+
ttl_data[0][@osf_map['dc:description']][0]['@value']
|
62
|
+
metadata['dc:subject'] = map_subject(ttl_data[0])
|
63
|
+
# metadata derived from osf_projects data, passed from UI
|
64
|
+
metadata['dc:source'] = "https://osf.io/" +project['project_identifier']
|
65
|
+
metadata['dc:creator#adminstrative_unit'] = project['administrative_unit']
|
66
|
+
metadata['dc:creator#affiliation'] = project['affiliation']
|
67
|
+
metadata['dc:creator'] = map_creator(config, project, ttl_data)
|
68
|
+
metadata
|
69
|
+
end
|
70
|
+
|
71
|
+
# Constructs OsfArchive Record from ttl_data, data from the UI form,
|
72
|
+
# and task config data
|
73
|
+
def self.build_archive_record(config, this_project, ttl_data)
|
74
|
+
this_rof = {}
|
75
|
+
this_rof['owner'] = this_project['owner']
|
76
|
+
this_rof['type'] = 'OsfArchive'
|
77
|
+
this_rof['rights'] = map_rights(ttl_data[0])
|
78
|
+
this_rof['rels-ext'] = map_rels_ext(ttl_data[0])
|
79
|
+
this_rof['metadata'] = map_metadata(config, this_project, ttl_data)
|
80
|
+
this_rof['files'] = [this_project['project_identifier'] + '.tar.gz']
|
81
|
+
this_rof
|
82
|
+
end
|
83
|
+
|
84
|
+
# sets subject
|
85
|
+
def self.map_subject(ttl_data)
|
86
|
+
if ttl_data.key?(@osf_map['dc:subject'])
|
87
|
+
return ttl_data[@osf_map['dc:subject']][0]['@value']
|
88
|
+
end
|
89
|
+
''
|
90
|
+
end
|
91
|
+
|
92
|
+
# figures out the rights
|
93
|
+
def self.map_rights(ttl_data)
|
94
|
+
rights = {}
|
95
|
+
if ttl_data[@osf_map['isPublic']][0]['@value'] == 'true'
|
96
|
+
rights['read-groups'] = ['public']
|
97
|
+
end
|
98
|
+
rights
|
99
|
+
end
|
100
|
+
|
101
|
+
# sets the creator- needs to read another ttl for the User data
|
102
|
+
# only contrubutors with isBibliographic true are considered
|
103
|
+
def self.map_creator(config, project, ttl_data)
|
104
|
+
creator = ''
|
105
|
+
contributor = ttl_data[0][@osf_map['hasContributor']][0]['@id']
|
106
|
+
ttl_data.each do |item|
|
107
|
+
next unless item['@id'] == contributor
|
108
|
+
if item[@osf_map['isBibliographic']][0]['@value'] == 'true'
|
109
|
+
creator = map_user_from_ttl(config, project,
|
110
|
+
item[@osf_map['hasUser']][0]['@id'])
|
111
|
+
end
|
112
|
+
end
|
113
|
+
creator
|
114
|
+
end
|
115
|
+
|
116
|
+
# read user ttl file, extract User's full name
|
117
|
+
def self.map_user_from_ttl(config, project, file_subpath)
|
118
|
+
ttl_data = ttl_from_targz(config, project, File.basename(file_subpath))
|
119
|
+
ttl_data[0][@osf_map['hasFullName']][0]['@value']
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|