rof 1.0.7 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +9 -7
- data/LICENSE +201 -16
- data/Rakefile +46 -0
- data/bin/csv_to_rof +1 -2
- data/bin/fedora_to_rof +7 -1
- data/bin/jsonld_to_rof +26 -0
- data/bin/osf_to_rof +6 -2
- data/bin/rof +5 -19
- data/lib/rof.rb +2 -6
- data/lib/rof/access.rb +1 -1
- data/lib/rof/cli.rb +104 -67
- data/lib/rof/compare_rof.rb +68 -39
- data/lib/rof/filter.rb +21 -0
- data/lib/rof/filters.rb +38 -0
- data/lib/rof/filters/bendo.rb +15 -17
- data/lib/rof/filters/date_stamp.rb +5 -4
- data/lib/rof/filters/file_to_url.rb +5 -3
- data/lib/rof/filters/label.rb +9 -7
- data/lib/rof/filters/work.rb +7 -5
- data/lib/rof/ingest.rb +5 -0
- data/lib/rof/osf_context.rb +2 -2
- data/lib/rof/rdf_context.rb +2 -0
- data/lib/rof/translator.rb +18 -0
- data/lib/rof/translators.rb +23 -0
- data/lib/rof/{translate_csv.rb → translators/csv_to_rof.rb} +4 -3
- data/lib/rof/translators/fedora_to_rof.rb +244 -0
- data/lib/rof/translators/jsonld_to_rof.rb +112 -0
- data/lib/rof/translators/jsonld_to_rof/accumulator.rb +175 -0
- data/lib/rof/translators/jsonld_to_rof/predicate_handler.rb +223 -0
- data/lib/rof/translators/jsonld_to_rof/predicate_object_handler.rb +125 -0
- data/lib/rof/translators/jsonld_to_rof/statement_handler.rb +91 -0
- data/lib/rof/translators/osf_to_rof.rb +191 -0
- data/lib/rof/utility.rb +44 -1
- data/lib/rof/version.rb +1 -1
- data/rof.gemspec +10 -2
- data/spec/coverage_helper.rb +17 -0
- data/spec/fixtures/for_utility_load_items_from_json_file/multiple_items.json +8 -0
- data/spec/fixtures/for_utility_load_items_from_json_file/parse_error.json +3 -0
- data/spec/fixtures/for_utility_load_items_from_json_file/single_item.json +3 -0
- data/spec/fixtures/jsonld_to_rof/0g354f18610.jsonld +113 -0
- data/spec/fixtures/jsonld_to_rof/0g354f18610.rof +96 -0
- data/spec/fixtures/jsonld_to_rof/2j62s467216.jsonld +113 -0
- data/spec/fixtures/jsonld_to_rof/2j62s467216.rof +93 -0
- data/spec/fixtures/jsonld_to_rof/2v23vt16z2z.jsonld +70 -0
- data/spec/fixtures/jsonld_to_rof/2v23vt16z2z.rof +87 -0
- data/spec/fixtures/jsonld_to_rof/cr56n01253w.jsonld +84 -0
- data/spec/fixtures/jsonld_to_rof/cr56n01253w.rof +95 -0
- data/spec/fixtures/jsonld_to_rof/h989r21069m.jsonld +84 -0
- data/spec/fixtures/jsonld_to_rof/h989r21069m.rof +98 -0
- data/spec/fixtures/jsonld_to_rof/js956d59913.jsonld +79 -0
- data/spec/fixtures/jsonld_to_rof/js956d59913.rof +89 -0
- data/spec/fixtures/jsonld_to_rof/m039k358q5c.jsonld +80 -0
- data/spec/fixtures/jsonld_to_rof/m039k358q5c.rof +64 -0
- data/spec/fixtures/jsonld_to_rof/nk322b9161g.jsonld +89 -0
- data/spec/fixtures/jsonld_to_rof/nk322b9161g.rof +69 -0
- data/spec/fixtures/jsonld_to_rof/p8418k7430d.jsonld +84 -0
- data/spec/fixtures/jsonld_to_rof/p8418k7430d.rof +67 -0
- data/spec/fixtures/jsonld_to_rof/xg94hm53h0c.jsonld +98 -0
- data/spec/fixtures/jsonld_to_rof/xg94hm53h0c.rof +110 -0
- data/spec/fixtures/jsonld_to_rof/zk51vd69n1r.jsonld +94 -0
- data/spec/fixtures/jsonld_to_rof/zk51vd69n1r.rof +121 -0
- data/spec/fixtures/osf/phz6b.tar.gz +0 -0
- data/spec/lib/rof/access_spec.rb +30 -23
- data/spec/lib/rof/cli_spec.rb +83 -60
- data/spec/lib/rof/compare_rof_spec.rb +35 -24
- data/spec/lib/rof/filter_spec.rb +10 -0
- data/spec/lib/rof/filters/bendo_spec.rb +42 -0
- data/spec/lib/rof/filters/date_stamp_spec.rb +9 -5
- data/spec/lib/rof/filters/file_to_url_spec.rb +7 -3
- data/spec/lib/rof/filters/label_spec.rb +121 -77
- data/spec/lib/rof/filters/work_spec.rb +7 -4
- data/spec/lib/rof/filters_spec.rb +14 -0
- data/spec/lib/rof/translator_spec.rb +15 -0
- data/spec/lib/rof/{translate_csv_spec.rb → translators/csv_to_rof_spec.rb} +14 -14
- data/spec/lib/rof/translators/fedora_to_rof_spec.rb +64 -0
- data/spec/lib/rof/translators/jsonld_to_rof/accumulator_spec.rb +121 -0
- data/spec/lib/rof/translators/jsonld_to_rof/predicate_handler_spec.rb +73 -0
- data/spec/lib/rof/translators/jsonld_to_rof/predicate_object_handler_spec.rb +48 -0
- data/spec/lib/rof/translators/jsonld_to_rof/statement_handler_spec.rb +40 -0
- data/spec/lib/rof/translators/jsonld_to_rof_spec.rb +120 -0
- data/spec/lib/rof/{osf_to_rof_spec.rb → translators/osf_to_rof_spec.rb} +55 -25
- data/spec/lib/rof/translators_spec.rb +14 -0
- data/spec/lib/rof/utility_spec.rb +47 -1
- data/spec/spec_helper.rb +1 -1
- data/spec/support/an_rof_filter.rb +10 -0
- metadata +186 -15
- data/lib/rof/get_from_fedora.rb +0 -211
- data/lib/rof/osf_to_rof.rb +0 -123
- data/spec/lib/rof/get_from_fedora_spec.rb +0 -22
data/lib/rof/get_from_fedora.rb
DELETED
@@ -1,211 +0,0 @@
|
|
1
|
-
require 'json'
|
2
|
-
require 'rexml/document'
|
3
|
-
require 'rdf/ntriples'
|
4
|
-
require 'rdf/rdfxml'
|
5
|
-
require 'rubydora'
|
6
|
-
|
7
|
-
module ROF
|
8
|
-
class FedoraToRof
|
9
|
-
# connect to fedora and fetch object
|
10
|
-
# returns array of fedora attributes or nil
|
11
|
-
def self.GetFromFedora(pid, fedora, config)
|
12
|
-
@fedora_info = {}
|
13
|
-
|
14
|
-
# Try to connect to fedora, and search for the desired item
|
15
|
-
# If either of these actions fail, handle it, and exit.
|
16
|
-
begin
|
17
|
-
fedora = Rubydora.connect(fedora)
|
18
|
-
doc = fedora.find(pid)
|
19
|
-
rescue StandardError => e
|
20
|
-
puts "Error: #{e}"
|
21
|
-
exit 1
|
22
|
-
end
|
23
|
-
|
24
|
-
# set pid, type
|
25
|
-
@fedora_info['pid'] = pid
|
26
|
-
@fedora_info['type'] = 'fobject'
|
27
|
-
|
28
|
-
readFedora(doc, config)
|
29
|
-
|
30
|
-
@fedora_info
|
31
|
-
end
|
32
|
-
|
33
|
-
# Given a rubydora object, extract what we need
|
34
|
-
# to create our ROF object in an associative array
|
35
|
-
#
|
36
|
-
def self.readFedora(rdora_obj, config)
|
37
|
-
@fedora_info['af-model'] = setModel(rdora_obj)
|
38
|
-
# iterate through the data streams that are present.
|
39
|
-
# use reflection to call appropriate method for each
|
40
|
-
rdora_obj.datastreams.each do |dsname, ds|
|
41
|
-
next if dsname == 'DC'
|
42
|
-
method_key = dsname.sub('-', '')
|
43
|
-
if respond_to?(method_key)
|
44
|
-
send(method_key, ds, config)
|
45
|
-
else
|
46
|
-
# dump generic datastream
|
47
|
-
meta = create_meta(ds, config)
|
48
|
-
@fedora_info["#{dsname}-meta"] = meta unless meta.empty?
|
49
|
-
|
50
|
-
# if content is short < X bytes and valid utf-8, save as string
|
51
|
-
# if content is > X bytes or is not utf-8, save as file only if config option is given
|
52
|
-
content = ds.datastream_content
|
53
|
-
if content.length <= 1024 || config['inline']
|
54
|
-
# this downloads the contents of the datastream into memory
|
55
|
-
content_string = content.to_s.force_encoding('UTF-8')
|
56
|
-
if content_string.valid_encoding?
|
57
|
-
@fedora_info[dsname] = content_string
|
58
|
-
next # we're done! move on to next datastream
|
59
|
-
end
|
60
|
-
# not utf-8, so keep going and see if download option was given
|
61
|
-
end
|
62
|
-
next unless config['download']
|
63
|
-
# download option was given, so save this datastream as a file
|
64
|
-
fname = "#{@fedora_info['pid']}-#{dsname}"
|
65
|
-
abspath = File.join(config['download_path'], fname)
|
66
|
-
@fedora_info["#{dsname}-file"] = fname
|
67
|
-
if File.file?(config['download_path'])
|
68
|
-
puts "Error: --download directory #{config['download_path']} specified is an existing file."
|
69
|
-
exit 1
|
70
|
-
end
|
71
|
-
FileUtils.mkdir_p(config['download_path'])
|
72
|
-
File.open(abspath, 'w') do |f|
|
73
|
-
f.write(content)
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
def self.create_meta(ds, config)
|
80
|
-
result = {}
|
81
|
-
|
82
|
-
label = ds.profile['dsLabel']
|
83
|
-
result['label'] = label unless label.nil? || label == ''
|
84
|
-
result['mime-type'] = ds.profile['dsMIME'] if ds.profile['dsMIME'] != 'text/plain'
|
85
|
-
# TODO(dbrower): make sure this is working as intended
|
86
|
-
if %w(R E).include?(ds.profile['dsControlGroup'])
|
87
|
-
s = result['URL'] = ds.profile['dsLocation']
|
88
|
-
s = s.sub(config['bendo'], 'bendo:') if config['bendo']
|
89
|
-
result['URL'] = s
|
90
|
-
end
|
91
|
-
result
|
92
|
-
end
|
93
|
-
|
94
|
-
# set fedora_indo['af-model']
|
95
|
-
#
|
96
|
-
def self.setModel(rdora_obj)
|
97
|
-
# only keep info:fedora/afmodel:XXXXX
|
98
|
-
models = rdora_obj.profile['objModels'].map do |model|
|
99
|
-
Regexp.last_match(1) if model =~ /^info:fedora\/afmodel:(.*)/
|
100
|
-
end.compact
|
101
|
-
models[0]
|
102
|
-
end
|
103
|
-
|
104
|
-
# The methods below are called if the like-named datastream exists in fedora
|
105
|
-
|
106
|
-
# set metadata
|
107
|
-
#
|
108
|
-
def self.descMetadata(ds, _config)
|
109
|
-
# desMetadata is encoded in ntriples, convert to JSON-LD using our special context
|
110
|
-
graph = RDF::Graph.new
|
111
|
-
data = ds.datastream_content
|
112
|
-
# force utf-8 encoding. fedora does not store the encoding, so it defaults to ASCII-8BIT
|
113
|
-
# see https://github.com/ruby-rdf/rdf/issues/142
|
114
|
-
data.force_encoding('utf-8')
|
115
|
-
graph.from_ntriples(data, format: :ntriples)
|
116
|
-
JSON::LD::API.fromRdf(graph) do |expanded|
|
117
|
-
result = JSON::LD::API.compact(expanded, RdfContext)
|
118
|
-
@fedora_info['metadata'] = result
|
119
|
-
end
|
120
|
-
end
|
121
|
-
|
122
|
-
# set rights
|
123
|
-
#
|
124
|
-
def self.rightsMetadata(ds, _config)
|
125
|
-
# rights is an XML document
|
126
|
-
# the access array may have read or edit elements
|
127
|
-
# each of these elements may contain group or person elements
|
128
|
-
xml_doc = REXML::Document.new(ds.datastream_content)
|
129
|
-
|
130
|
-
rights_array = {}
|
131
|
-
|
132
|
-
root = xml_doc.root
|
133
|
-
|
134
|
-
# check for optional embargo date - set if present
|
135
|
-
this_embargo = root.elements['embargo']
|
136
|
-
rights_array['embargo-date'] = this_embargo.elements['machine'].elements['date'][0] if has_embargo_date(this_embargo)
|
137
|
-
|
138
|
-
%w(read edit).each do |access|
|
139
|
-
this_access = root.elements["//access[@type=\'#{access}\']"]
|
140
|
-
|
141
|
-
next if this_access.nil?
|
142
|
-
|
143
|
-
unless this_access.elements['machine'].elements['group'].nil?
|
144
|
-
group_array = []
|
145
|
-
this_access.elements['machine'].elements['group'].each do |this_group|
|
146
|
-
group_array << this_group
|
147
|
-
end
|
148
|
-
rights_array["#{access}-groups"] = group_array
|
149
|
-
end
|
150
|
-
|
151
|
-
next if this_access.elements['machine'].elements['person'].nil?
|
152
|
-
person_array = []
|
153
|
-
|
154
|
-
this_access.elements['machine'].elements['person'].each do |this_person|
|
155
|
-
person_array << this_person
|
156
|
-
end
|
157
|
-
rights_array[access.to_s] = person_array
|
158
|
-
end
|
159
|
-
|
160
|
-
@fedora_info['rights'] = rights_array
|
161
|
-
end
|
162
|
-
|
163
|
-
# test for embargo xml cases
|
164
|
-
def self.has_embargo_date(embargo_xml)
|
165
|
-
return false if embargo_xml == '' || embargo_xml.nil?
|
166
|
-
return false unless embargo_xml.elements['machine'].has_elements? && embargo_xml.elements['machine'].elements['date'].has_text?
|
167
|
-
true
|
168
|
-
end
|
169
|
-
|
170
|
-
def self.RELSEXT(ds, _config)
|
171
|
-
# RELS-EXT is RDF-XML - parse it
|
172
|
-
ctx = ROF::RelsExtRefContext.dup
|
173
|
-
ctx.delete('@base') # @base causes problems when converting TO json-ld (it is = "info:/fedora") but info is not a namespace
|
174
|
-
graph = RDF::Graph.new
|
175
|
-
graph.from_rdfxml(ds.datastream_content)
|
176
|
-
result = nil
|
177
|
-
JSON::LD::API.fromRdf(graph) do |expanded|
|
178
|
-
result = JSON::LD::API.compact(expanded, ctx)
|
179
|
-
end
|
180
|
-
# now strip the info:fedora/ prefix from the URIs
|
181
|
-
strip_info_fedora(result)
|
182
|
-
# remove extra items
|
183
|
-
result.delete('hasModel')
|
184
|
-
@fedora_info['rels-ext'] = result
|
185
|
-
end
|
186
|
-
|
187
|
-
private
|
188
|
-
|
189
|
-
def self.strip_info_fedora(rels_ext)
|
190
|
-
rels_ext.each do |relation, targets|
|
191
|
-
next if relation == '@context'
|
192
|
-
if targets.is_a?(Hash)
|
193
|
-
strip_info_fedora(targets)
|
194
|
-
next
|
195
|
-
end
|
196
|
-
targets = [targets] if targets.is_a?(String)
|
197
|
-
targets.map! do |target|
|
198
|
-
if target.is_a?(Hash)
|
199
|
-
strip_info_fedora(target)
|
200
|
-
else
|
201
|
-
target.sub('info:fedora/', '')
|
202
|
-
end
|
203
|
-
end
|
204
|
-
# some single strings cannot be arrays in json-ld, so convert back
|
205
|
-
# this shouldn't cause any problems with items that began as arrays
|
206
|
-
targets = targets[0] if targets.length == 1
|
207
|
-
rels_ext[relation] = targets
|
208
|
-
end
|
209
|
-
end
|
210
|
-
end
|
211
|
-
end
|
data/lib/rof/osf_to_rof.rb
DELETED
@@ -1,123 +0,0 @@
|
|
1
|
-
require 'json'
|
2
|
-
require 'zlib'
|
3
|
-
require 'rubygems/package'
|
4
|
-
require 'rdf/turtle'
|
5
|
-
require 'rof/osf_context'
|
6
|
-
require 'rof/rdf_context'
|
7
|
-
require 'rof/utility'
|
8
|
-
|
9
|
-
module ROF
|
10
|
-
# Class for managing OSF Archive data transformations
|
11
|
-
# It is called after the get-from-osf task, and before the work-xlat task
|
12
|
-
class OsfToRof
|
13
|
-
# Convert Osf Archive tar.gz to ROF
|
14
|
-
def self.osf_to_rof(config, osf_projects = nil)
|
15
|
-
@osf_map = ROF::OsfToNDMap
|
16
|
-
rof_array = []
|
17
|
-
return {} if osf_projects.nil?
|
18
|
-
this_project = osf_projects
|
19
|
-
ttl_data = ttl_from_targz(config, this_project,
|
20
|
-
this_project['project_identifier'] + '.ttl')
|
21
|
-
rof_array[0] = build_archive_record(config, this_project, ttl_data)
|
22
|
-
rof_array
|
23
|
-
end
|
24
|
-
|
25
|
-
# reads a ttl file and makes it a JSON-LD file that we can parse
|
26
|
-
def self.fetch_from_ttl(ttl_file)
|
27
|
-
graph = RDF::Turtle::Reader.open(ttl_file,
|
28
|
-
prefixes: ROF::OsfPrefixList.dup)
|
29
|
-
JSON::LD::API.fromRdf(graph)
|
30
|
-
end
|
31
|
-
|
32
|
-
# extracts given ttl file from JHU tar.gz package
|
33
|
-
# - assumed to live under data/obj/root
|
34
|
-
def self.ttl_from_targz(config, this_project, ttl_filename)
|
35
|
-
id = this_project['project_identifier']
|
36
|
-
ttl_path = File.join(id,
|
37
|
-
'data/obj/root',
|
38
|
-
ttl_filename)
|
39
|
-
ROF::Utility.file_from_targz(File.join(config['package_dir'], id + '.tar.gz'),
|
40
|
-
ttl_path)
|
41
|
-
ttl_data = fetch_from_ttl(File.join(config['package_dir'], ttl_path))
|
42
|
-
# this is an array- the addition elements are the contributor(s)
|
43
|
-
ttl_data
|
44
|
-
end
|
45
|
-
|
46
|
-
# Maps RELS-EXT
|
47
|
-
def self.map_rels_ext(_ttl_data)
|
48
|
-
rels_ext = {}
|
49
|
-
rels_ext['@context'] = ROF::RelsExtRefContext.dup
|
50
|
-
rels_ext
|
51
|
-
end
|
52
|
-
|
53
|
-
# sets metadata
|
54
|
-
def self.map_metadata(config, project, ttl_data)
|
55
|
-
metadata = {}
|
56
|
-
metadata['@context'] = ROF::RdfContext.dup
|
57
|
-
# metdata derived from project ttl file
|
58
|
-
metadata['dc:created'] = Time.iso8601(ttl_data[0][@osf_map['dc:created']][0]['@value']).to_date.iso8601 + 'Z'
|
59
|
-
metadata['dc:title'] = ttl_data[0][@osf_map['dc:title']][0]['@value']
|
60
|
-
metadata['dc:description'] =
|
61
|
-
ttl_data[0][@osf_map['dc:description']][0]['@value']
|
62
|
-
metadata['dc:subject'] = map_subject(ttl_data[0])
|
63
|
-
# metadata derived from osf_projects data, passed from UI
|
64
|
-
metadata['dc:source'] = 'https://osf.io/' + project['project_identifier']
|
65
|
-
metadata['dc:creator#adminstrative_unit'] = project['administrative_unit']
|
66
|
-
metadata['dc:creator#affiliation'] = project['affiliation']
|
67
|
-
metadata['dc:creator'] = map_creator(config, project, ttl_data)
|
68
|
-
metadata
|
69
|
-
end
|
70
|
-
|
71
|
-
# Constructs OsfArchive Record from ttl_data, data from the UI form,
|
72
|
-
# and task config data
|
73
|
-
def self.build_archive_record(config, this_project, ttl_data)
|
74
|
-
this_rof = {}
|
75
|
-
this_rof['owner'] = this_project['owner']
|
76
|
-
this_rof['type'] = 'OsfArchive'
|
77
|
-
this_rof['rights'] = map_rights(ttl_data[0])
|
78
|
-
this_rof['rels-ext'] = map_rels_ext(ttl_data[0])
|
79
|
-
this_rof['metadata'] = map_metadata(config, this_project, ttl_data)
|
80
|
-
this_rof['files'] = [this_project['project_identifier'] + '.tar.gz']
|
81
|
-
this_rof
|
82
|
-
end
|
83
|
-
|
84
|
-
# sets subject
|
85
|
-
def self.map_subject(ttl_data)
|
86
|
-
if ttl_data.key?(@osf_map['dc:subject'])
|
87
|
-
return ttl_data[@osf_map['dc:subject']][0]['@value']
|
88
|
-
end
|
89
|
-
''
|
90
|
-
end
|
91
|
-
|
92
|
-
# figures out the rights
|
93
|
-
def self.map_rights(ttl_data)
|
94
|
-
rights = {}
|
95
|
-
if ttl_data[@osf_map['isPublic']][0]['@value'] == 'true'
|
96
|
-
rights['read-groups'] = ['public']
|
97
|
-
end
|
98
|
-
rights
|
99
|
-
end
|
100
|
-
|
101
|
-
# sets the creator- needs to read another ttl for the User data
|
102
|
-
# only contrubutors with isBibliographic true are considered
|
103
|
-
def self.map_creator(config, project, ttl_data)
|
104
|
-
creator = []
|
105
|
-
ttl_data[0][@osf_map['hasContributor']].each do |contributor|
|
106
|
-
ttl_data.each do |item|
|
107
|
-
next unless item['@id'] == contributor['@id']
|
108
|
-
if item[@osf_map['isBibliographic']][0]['@value'] == 'true'
|
109
|
-
creator.push map_user_from_ttl(config, project,
|
110
|
-
item[@osf_map['hasUser']][0]['@id'])
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|
114
|
-
creator
|
115
|
-
end
|
116
|
-
|
117
|
-
# read user ttl file, extract User's full name
|
118
|
-
def self.map_user_from_ttl(config, project, file_subpath)
|
119
|
-
ttl_data = ttl_from_targz(config, project, File.basename(file_subpath))
|
120
|
-
ttl_data[0][@osf_map['hasFullName']][0]['@value']
|
121
|
-
end
|
122
|
-
end
|
123
|
-
end
|
@@ -1,22 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe ROF::FedoraToRof do
|
4
|
-
it 'handles embargo presence or absence' do
|
5
|
-
|
6
|
-
rights_tests = [
|
7
|
-
['<embargo> <human/> <machine> <date>2017-08-01</date> </machine> </embargo>', true],
|
8
|
-
['<embargo> <human/> <machine> <date></date> </machine> </embargo>', false],
|
9
|
-
['<embargo> <human/> <machine/> </embargo>', false]
|
10
|
-
]
|
11
|
-
|
12
|
-
begin
|
13
|
-
|
14
|
-
rights_tests.each do |this_test|
|
15
|
-
xml_doc = REXML::Document.new(this_test[0])
|
16
|
-
root = xml_doc.root
|
17
|
-
rights = ROF::FedoraToRof.has_embargo_date(root)
|
18
|
-
expect(rights).to eq(this_test[1])
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|