bulkrax 6.0.1 → 8.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +7 -7
- data/app/assets/javascripts/bulkrax/bulkrax.js +11 -0
- data/app/assets/javascripts/bulkrax/datatables.js +139 -0
- data/app/assets/javascripts/bulkrax/exporters.js +4 -4
- data/app/assets/javascripts/bulkrax/importers.js.erb +15 -1
- data/app/assets/stylesheets/bulkrax/import_export.scss +6 -1
- data/app/controllers/bulkrax/entries_controller.rb +52 -3
- data/app/controllers/bulkrax/exporters_controller.rb +20 -8
- data/app/controllers/bulkrax/importers_controller.rb +31 -12
- data/app/controllers/concerns/bulkrax/datatables_behavior.rb +201 -0
- data/app/factories/bulkrax/object_factory.rb +135 -163
- data/app/factories/bulkrax/object_factory_interface.rb +491 -0
- data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
- data/app/helpers/bulkrax/application_helper.rb +7 -3
- data/app/helpers/bulkrax/importers_helper.rb +1 -1
- data/app/helpers/bulkrax/validation_helper.rb +4 -4
- data/app/jobs/bulkrax/create_relationships_job.rb +28 -17
- data/app/jobs/bulkrax/delete_and_import_collection_job.rb +8 -0
- data/app/jobs/bulkrax/delete_and_import_file_set_job.rb +8 -0
- data/app/jobs/bulkrax/delete_and_import_job.rb +20 -0
- data/app/jobs/bulkrax/delete_and_import_work_job.rb +8 -0
- data/app/jobs/bulkrax/delete_job.rb +8 -3
- data/app/jobs/bulkrax/download_cloud_file_job.rb +17 -4
- data/app/jobs/bulkrax/import_collection_job.rb +1 -1
- data/app/jobs/bulkrax/import_file_set_job.rb +6 -3
- data/app/jobs/bulkrax/import_job.rb +7 -0
- data/app/jobs/bulkrax/import_work_job.rb +1 -1
- data/app/jobs/bulkrax/importer_job.rb +19 -3
- data/app/matchers/bulkrax/application_matcher.rb +0 -2
- data/app/models/bulkrax/csv_collection_entry.rb +1 -3
- data/app/models/bulkrax/csv_entry.rb +9 -7
- data/app/models/bulkrax/entry.rb +9 -11
- data/app/models/bulkrax/exporter.rb +11 -4
- data/app/models/bulkrax/importer.rb +49 -10
- data/app/models/bulkrax/oai_entry.rb +0 -3
- data/app/models/bulkrax/oai_set_entry.rb +1 -3
- data/app/models/bulkrax/rdf_collection_entry.rb +1 -4
- data/app/models/bulkrax/rdf_entry.rb +70 -69
- data/app/models/bulkrax/status.rb +10 -1
- data/app/models/bulkrax/xml_entry.rb +0 -1
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
- data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/file_factory.rb +174 -118
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +5 -3
- data/app/models/concerns/bulkrax/has_matchers.rb +28 -25
- data/app/models/concerns/bulkrax/import_behavior.rb +14 -33
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
- data/app/models/concerns/bulkrax/status_info.rb +8 -0
- data/app/parsers/bulkrax/application_parser.rb +116 -21
- data/app/parsers/bulkrax/bagit_parser.rb +173 -195
- data/app/parsers/bulkrax/csv_parser.rb +15 -57
- data/app/parsers/bulkrax/oai_dc_parser.rb +44 -16
- data/app/parsers/bulkrax/parser_export_record_set.rb +20 -24
- data/app/parsers/bulkrax/xml_parser.rb +18 -23
- data/app/services/bulkrax/factory_class_finder.rb +92 -0
- data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
- data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
- data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
- data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/show.html.erb +9 -8
- data/app/views/bulkrax/exporters/_form.html.erb +10 -10
- data/app/views/bulkrax/exporters/edit.html.erb +1 -1
- data/app/views/bulkrax/exporters/index.html.erb +13 -57
- data/app/views/bulkrax/exporters/new.html.erb +1 -1
- data/app/views/bulkrax/exporters/show.html.erb +6 -12
- data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
- data/app/views/bulkrax/importers/_csv_fields.html.erb +8 -2
- data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +8 -1
- data/app/views/bulkrax/importers/_edit_item_buttons.html.erb +18 -0
- data/app/views/bulkrax/importers/edit.html.erb +1 -1
- data/app/views/bulkrax/importers/index.html.erb +20 -64
- data/app/views/bulkrax/importers/new.html.erb +1 -1
- data/app/views/bulkrax/importers/show.html.erb +8 -14
- data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
- data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
- data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
- data/app/views/bulkrax/shared/_entries_tab.html.erb +16 -0
- data/config/locales/bulkrax.en.yml +7 -0
- data/config/routes.rb +8 -2
- data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
- data/db/migrate/20240208005801_denormalize_status_message.rb +7 -0
- data/db/migrate/20240209070952_update_identifier_index.rb +6 -0
- data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
- data/lib/bulkrax/engine.rb +23 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +107 -19
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
- data/lib/tasks/bulkrax_tasks.rake +13 -0
- data/lib/tasks/reset.rake +4 -4
- metadata +64 -8
- data/app/views/bulkrax/shared/_collection_entries_tab.html.erb +0 -39
- data/app/views/bulkrax/shared/_file_set_entries_tab.html.erb +0 -39
- data/app/views/bulkrax/shared/_work_entries_tab.html.erb +0 -39
@@ -1,6 +1,4 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require 'zip'
|
3
|
-
require 'marcel'
|
4
2
|
|
5
3
|
module Bulkrax
|
6
4
|
# An abstract class that establishes the API for Bulkrax's import and export parsing.
|
@@ -14,7 +12,7 @@ module Bulkrax
|
|
14
12
|
:seen, :increment_counters, :parser_fields, :user, :keys_without_numbers,
|
15
13
|
:key_without_numbers, :status, :set_status_info, :status_info, :status_at,
|
16
14
|
:exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
|
17
|
-
:zip?, :file?,
|
15
|
+
:zip?, :file?, :remove_and_rerun,
|
18
16
|
to: :importerexporter
|
19
17
|
|
20
18
|
# @todo Convert to `class_attribute :parser_fiels, default: {}`
|
@@ -47,6 +45,10 @@ module Bulkrax
|
|
47
45
|
raise NotImplementedError, 'must be defined'
|
48
46
|
end
|
49
47
|
|
48
|
+
def work_entry_class
|
49
|
+
entry_class
|
50
|
+
end
|
51
|
+
|
50
52
|
# @api public
|
51
53
|
# @abstract Subclass and override {#collection_entry_class} to implement behavior for the parser.
|
52
54
|
def collection_entry_class
|
@@ -157,6 +159,22 @@ module Bulkrax
|
|
157
159
|
@visibility ||= self.parser_fields['visibility'] || 'open'
|
158
160
|
end
|
159
161
|
|
162
|
+
def create_collections
|
163
|
+
create_objects(['collection'])
|
164
|
+
end
|
165
|
+
|
166
|
+
def create_works
|
167
|
+
create_objects(['work'])
|
168
|
+
end
|
169
|
+
|
170
|
+
def create_file_sets
|
171
|
+
create_objects(['file_set'])
|
172
|
+
end
|
173
|
+
|
174
|
+
def create_relationships
|
175
|
+
create_objects(['relationship'])
|
176
|
+
end
|
177
|
+
|
160
178
|
# @api public
|
161
179
|
#
|
162
180
|
# @param types [Array<Symbol>] the types of objects that we'll create.
|
@@ -166,34 +184,95 @@ module Bulkrax
|
|
166
184
|
# @see #create_works
|
167
185
|
# @see #create_file_sets
|
168
186
|
# @see #create_relationships
|
169
|
-
def create_objects(
|
170
|
-
|
171
|
-
|
187
|
+
def create_objects(types_array = nil)
|
188
|
+
index = 0
|
189
|
+
(types_array || %w[collection work file_set relationship]).each do |type|
|
190
|
+
if type.eql?('relationship')
|
191
|
+
ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importerexporter.id)
|
192
|
+
next
|
193
|
+
end
|
194
|
+
send(type.pluralize).each do |current_record|
|
195
|
+
next unless record_has_source_identifier(current_record, index)
|
196
|
+
break if limit_reached?(limit, index)
|
197
|
+
seen[current_record[source_identifier]] = true
|
198
|
+
create_entry_and_job(current_record, type)
|
199
|
+
increment_counters(index, "#{type}": true)
|
200
|
+
index += 1
|
201
|
+
end
|
202
|
+
importer.record_status
|
203
|
+
end
|
204
|
+
true
|
205
|
+
rescue StandardError => e
|
206
|
+
set_status_info(e)
|
207
|
+
end
|
208
|
+
|
209
|
+
def rebuild_entries(types_array = nil)
|
210
|
+
index = 0
|
211
|
+
(types_array || %w[collection work file_set relationship]).each do |type|
|
212
|
+
# works are not gurneteed to have Work in the type
|
213
|
+
|
214
|
+
importer.entries.where(rebuild_entry_query(type, parser_fields['entry_statuses'])).find_each do |e|
|
215
|
+
seen[e.identifier] = true
|
216
|
+
e.status_info('Pending', importer.current_run)
|
217
|
+
if remove_and_rerun
|
218
|
+
delay = calculate_type_delay(type)
|
219
|
+
"Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.set(wait: delay).send(perform_method, e, current_run)
|
220
|
+
else
|
221
|
+
"Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, e.id, current_run.id)
|
222
|
+
end
|
223
|
+
increment_counters(index)
|
224
|
+
index += 1
|
225
|
+
end
|
172
226
|
end
|
173
227
|
end
|
174
228
|
|
175
|
-
|
176
|
-
|
177
|
-
|
229
|
+
def rebuild_entry_query(type, statuses)
|
230
|
+
type_col = Bulkrax::Entry.arel_table['type']
|
231
|
+
status_col = Bulkrax::Entry.arel_table['status_message']
|
232
|
+
|
233
|
+
query = (type == 'work' ? type_col.does_not_match_all(%w[collection file_set]) : type_col.matches(type.camelize))
|
234
|
+
query.and(status_col.in(statuses))
|
178
235
|
end
|
179
236
|
|
180
|
-
|
181
|
-
|
182
|
-
|
237
|
+
def calculate_type_delay(type)
|
238
|
+
return 2.minutes if type == 'file_set'
|
239
|
+
return 1.minute if type == 'work'
|
240
|
+
return 0
|
183
241
|
end
|
184
242
|
|
185
|
-
|
186
|
-
|
187
|
-
raise NotImplementedError, 'must be defined' if importer?
|
243
|
+
def record_raw_metadata(record)
|
244
|
+
record.to_h
|
188
245
|
end
|
189
246
|
|
190
|
-
|
191
|
-
|
192
|
-
|
247
|
+
def record_deleted?(record)
|
248
|
+
return false unless record.key?(:delete)
|
249
|
+
ActiveModel::Type::Boolean.new.cast(record[:delete])
|
250
|
+
end
|
251
|
+
|
252
|
+
def record_remove_and_rerun?(record)
|
253
|
+
return false unless record.key?(:remove_and_rerun)
|
254
|
+
ActiveModel::Type::Boolean.new.cast(record[:remove_and_rerun])
|
255
|
+
end
|
256
|
+
|
257
|
+
def create_entry_and_job(current_record, type, identifier = nil)
|
258
|
+
identifier ||= current_record[source_identifier]
|
259
|
+
new_entry = find_or_create_entry(send("#{type}_entry_class"),
|
260
|
+
identifier,
|
261
|
+
'Bulkrax::Importer',
|
262
|
+
record_raw_metadata(current_record))
|
263
|
+
new_entry.status_info('Pending', importer.current_run)
|
264
|
+
if record_deleted?(current_record)
|
265
|
+
"Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
|
266
|
+
elsif record_remove_and_rerun?(current_record) || remove_and_rerun
|
267
|
+
delay = calculate_type_delay(type)
|
268
|
+
"Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.set(wait: delay).send(perform_method, new_entry, current_run)
|
269
|
+
else
|
270
|
+
"Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id)
|
271
|
+
end
|
193
272
|
end
|
194
273
|
|
195
274
|
# Optional, define if using browse everything for file upload
|
196
|
-
def retrieve_cloud_files(
|
275
|
+
def retrieve_cloud_files(_files, _importer); end
|
197
276
|
|
198
277
|
# @param file [#path, #original_filename] the file object that with the relevant data for the
|
199
278
|
# import.
|
@@ -305,12 +384,19 @@ module Bulkrax
|
|
305
384
|
end
|
306
385
|
|
307
386
|
def find_or_create_entry(entryclass, identifier, type, raw_metadata = nil)
|
308
|
-
entry
|
387
|
+
# limit entry search to just this importer or exporter. Don't go moving them
|
388
|
+
entry = importerexporter.entries.where(
|
389
|
+
identifier: identifier
|
390
|
+
).first
|
391
|
+
entry ||= entryclass.new(
|
309
392
|
importerexporter_id: importerexporter.id,
|
310
393
|
importerexporter_type: type,
|
311
394
|
identifier: identifier
|
312
|
-
)
|
395
|
+
)
|
313
396
|
entry.raw_metadata = raw_metadata
|
397
|
+
# Setting parsed_metadata specifically for the id so we can find the object via the
|
398
|
+
# id in a delete. This is likely to get clobbered in a regular import, which is fine.
|
399
|
+
entry.parsed_metadata = { id: raw_metadata['id'] } if raw_metadata&.key?('id')
|
314
400
|
entry.save!
|
315
401
|
entry
|
316
402
|
end
|
@@ -342,6 +428,8 @@ module Bulkrax
|
|
342
428
|
end
|
343
429
|
|
344
430
|
def unzip(file_to_unzip)
|
431
|
+
return untar(file_to_unzip) if file_to_unzip.end_with?('.tar.gz')
|
432
|
+
|
345
433
|
Zip::File.open(file_to_unzip) do |zip_file|
|
346
434
|
zip_file.each do |entry|
|
347
435
|
entry_path = File.join(importer_unzip_path, entry.name)
|
@@ -351,6 +439,13 @@ module Bulkrax
|
|
351
439
|
end
|
352
440
|
end
|
353
441
|
|
442
|
+
def untar(file_to_untar)
|
443
|
+
Dir.mkdir(importer_unzip_path) unless File.directory?(importer_unzip_path)
|
444
|
+
command = "tar -xzf #{Shellwords.escape(file_to_untar)} -C #{Shellwords.escape(importer_unzip_path)}"
|
445
|
+
result = system(command)
|
446
|
+
raise "Failed to extract #{file_to_untar}" unless result
|
447
|
+
end
|
448
|
+
|
354
449
|
def zip
|
355
450
|
FileUtils.mkdir_p(exporter_export_zip_path)
|
356
451
|
|
@@ -1,246 +1,224 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require 'bagit'
|
3
2
|
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
unless ENV.fetch('BULKRAX_NO_BAGIT', 'false').to_s == 'true'
|
4
|
+
module Bulkrax
|
5
|
+
class BagitParser < CsvParser # rubocop:disable Metrics/ClassLength
|
6
|
+
include ExportBehavior
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
def valid_import?
|
13
|
-
return true if import_fields.present?
|
14
|
-
rescue => e
|
15
|
-
set_status_info(e)
|
16
|
-
false
|
17
|
-
end
|
18
|
-
|
19
|
-
def entry_class
|
20
|
-
rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
|
21
|
-
rdf_format ? RdfEntry : CsvEntry
|
22
|
-
end
|
23
|
-
|
24
|
-
def path_to_files(filename:)
|
25
|
-
@path_to_files ||= Dir.glob(File.join(import_file_path, '**/data', filename)).first
|
26
|
-
end
|
27
|
-
|
28
|
-
# Take a random sample of 10 metadata_paths and work out the import fields from that
|
29
|
-
def import_fields
|
30
|
-
raise StandardError, 'No metadata files were found' if metadata_paths.blank?
|
31
|
-
@import_fields ||= metadata_paths.sample(10).map do |path|
|
32
|
-
entry_class.fields_from_data(entry_class.read_data(path))
|
33
|
-
end.flatten.compact.uniq
|
34
|
-
end
|
8
|
+
def self.export_supported?
|
9
|
+
true
|
10
|
+
end
|
35
11
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
raise StandardError, 'No metadata files were found' if path.blank?
|
42
|
-
data = entry_class.read_data(path)
|
43
|
-
get_data(bag, data)
|
12
|
+
def valid_import?
|
13
|
+
return true if import_fields.present?
|
14
|
+
rescue => e
|
15
|
+
set_status_info(e)
|
16
|
+
false
|
44
17
|
end
|
45
18
|
|
46
|
-
|
47
|
-
|
19
|
+
def entry_class
|
20
|
+
rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
|
21
|
+
rdf_format ? RdfEntry : CsvEntry
|
22
|
+
end
|
48
23
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
record_data = entry_class.data_for_entry(data_row, source_identifier, self)
|
53
|
-
next record_data if importerexporter.metadata_only?
|
24
|
+
def path_to_files(filename:)
|
25
|
+
@path_to_files ||= Dir.glob(File.join(import_file_path, '**/data', filename)).first
|
26
|
+
end
|
54
27
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
28
|
+
# Take a random sample of 10 metadata_paths and work out the import fields from that
|
29
|
+
def import_fields
|
30
|
+
raise StandardError, 'No metadata files were found' if metadata_paths.blank?
|
31
|
+
@import_fields ||= metadata_paths.sample(10).map do |path|
|
32
|
+
entry_class.fields_from_data(entry_class.read_data(path))
|
33
|
+
end.flatten.compact.uniq
|
61
34
|
end
|
62
35
|
|
63
|
-
|
64
|
-
|
36
|
+
# Create an Array of all metadata records
|
37
|
+
def records(_opts = {})
|
38
|
+
raise StandardError, 'No BagIt records were found' if bags.blank?
|
39
|
+
@records ||= bags.map do |bag|
|
40
|
+
path = metadata_path(bag)
|
41
|
+
raise StandardError, 'No metadata files were found' if path.blank?
|
42
|
+
data = entry_class.read_data(path)
|
43
|
+
get_data(bag, data)
|
44
|
+
end
|
65
45
|
|
66
|
-
|
67
|
-
|
68
|
-
end
|
46
|
+
@records = @records.flatten
|
47
|
+
end
|
69
48
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
49
|
+
def get_data(bag, data)
|
50
|
+
if entry_class == CsvEntry
|
51
|
+
data = data.map do |data_row|
|
52
|
+
record_data = entry_class.data_for_entry(data_row, source_identifier, self)
|
53
|
+
next record_data if importerexporter.metadata_only?
|
74
54
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
DeleteWorkJob.send(perform_method, new_entry, current_run)
|
55
|
+
record_data[:file] = bag.bag_files.join('|') if Bulkrax.curation_concerns.include? record_data[:model]&.constantize
|
56
|
+
record_data
|
57
|
+
end
|
79
58
|
else
|
80
|
-
|
59
|
+
data = entry_class.data_for_entry(data, source_identifier, self)
|
60
|
+
data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
|
81
61
|
end
|
82
|
-
|
62
|
+
|
63
|
+
data
|
83
64
|
end
|
84
|
-
importer.record_status
|
85
|
-
rescue StandardError => e
|
86
|
-
set_status_info(e)
|
87
|
-
end
|
88
65
|
|
89
|
-
|
66
|
+
# export methods
|
90
67
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
68
|
+
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
69
|
+
def write_files
|
70
|
+
require 'open-uri'
|
71
|
+
require 'socket'
|
95
72
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
73
|
+
folder_count = 1
|
74
|
+
records_in_folder = 0
|
75
|
+
work_entries = importerexporter.entries.where(type: work_entry_class.to_s)
|
76
|
+
collection_entries = importerexporter.entries.where(type: collection_entry_class.to_s)
|
77
|
+
file_set_entries = importerexporter.entries.where(type: file_set_entry_class.to_s)
|
101
78
|
|
102
|
-
|
103
|
-
|
104
|
-
|
79
|
+
work_entries[0..limit || total].each do |entry|
|
80
|
+
record = Bulkrax.object_factory.find(entry.identifier)
|
81
|
+
next unless record
|
105
82
|
|
106
|
-
|
83
|
+
bag_entries = [entry]
|
107
84
|
|
108
|
-
|
109
|
-
|
110
|
-
|
85
|
+
if record.member_of_collection_ids.present?
|
86
|
+
collection_entries.each { |ce| bag_entries << ce if ce.parsed_metadata.value?(record.id) }
|
87
|
+
end
|
111
88
|
|
112
|
-
|
113
|
-
|
114
|
-
|
89
|
+
if record.file_sets.present?
|
90
|
+
file_set_entries.each { |fse| bag_entries << fse if fse.parsed_metadata.value?(record.id) }
|
91
|
+
end
|
115
92
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
93
|
+
records_in_folder += bag_entries.count
|
94
|
+
if records_in_folder > records_split_count
|
95
|
+
folder_count += 1
|
96
|
+
records_in_folder = bag_entries.count
|
97
|
+
end
|
121
98
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
99
|
+
bag ||= BagIt::Bag.new setup_bagit_folder(folder_count, entry.identifier)
|
100
|
+
|
101
|
+
record.file_sets.each do |fs|
|
102
|
+
file_name = filename(fs)
|
103
|
+
next if file_name.blank? || fs.original_file.blank?
|
104
|
+
|
105
|
+
io = open(fs.original_file.uri)
|
106
|
+
file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
|
107
|
+
file.write(io.read)
|
108
|
+
file.close
|
109
|
+
begin
|
110
|
+
bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank?
|
111
|
+
rescue => e
|
112
|
+
entry.set_status_info(e)
|
113
|
+
set_status_info(e)
|
114
|
+
end
|
137
115
|
end
|
138
|
-
end
|
139
116
|
|
140
|
-
|
141
|
-
|
142
|
-
|
117
|
+
CSV.open(setup_csv_metadata_export_file(folder_count, entry.identifier), "w", headers: export_headers, write_headers: true) do |csv|
|
118
|
+
bag_entries.each { |csv_entry| csv << csv_entry.parsed_metadata }
|
119
|
+
end
|
143
120
|
|
144
|
-
|
145
|
-
|
121
|
+
write_triples(folder_count, entry)
|
122
|
+
bag.manifest!(algo: 'sha256')
|
123
|
+
end
|
146
124
|
end
|
147
|
-
|
148
|
-
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
125
|
+
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
149
126
|
|
150
|
-
|
151
|
-
|
152
|
-
|
127
|
+
def setup_csv_metadata_export_file(folder_count, id)
|
128
|
+
path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
|
129
|
+
FileUtils.mkdir_p(path) unless File.exist?(path)
|
153
130
|
|
154
|
-
|
155
|
-
|
131
|
+
File.join(path, id, 'metadata.csv')
|
132
|
+
end
|
156
133
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
134
|
+
def key_allowed(key)
|
135
|
+
!Bulkrax.reserved_properties.include?(key) &&
|
136
|
+
new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
|
137
|
+
key != source_identifier.to_s
|
138
|
+
end
|
162
139
|
|
163
|
-
|
164
|
-
|
165
|
-
|
140
|
+
def setup_triple_metadata_export_file(folder_count, id)
|
141
|
+
path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
|
142
|
+
FileUtils.mkdir_p(path) unless File.exist?(path)
|
166
143
|
|
167
|
-
|
168
|
-
|
144
|
+
File.join(path, id, 'metadata.nt')
|
145
|
+
end
|
169
146
|
|
170
|
-
|
171
|
-
|
172
|
-
|
147
|
+
def setup_bagit_folder(folder_count, id)
|
148
|
+
path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
|
149
|
+
FileUtils.mkdir_p(path) unless File.exist?(path)
|
173
150
|
|
174
|
-
|
175
|
-
|
151
|
+
File.join(path, id)
|
152
|
+
end
|
176
153
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
154
|
+
# @todo(bjustice) - remove hyrax reference
|
155
|
+
def write_triples(folder_count, e)
|
156
|
+
sd = SolrDocument.find(e.identifier)
|
157
|
+
return if sd.nil?
|
181
158
|
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
159
|
+
req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname })
|
160
|
+
rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples)
|
161
|
+
File.open(setup_triple_metadata_export_file(folder_count, e.identifier), "w") do |triples|
|
162
|
+
triples.write(rdf)
|
163
|
+
end
|
186
164
|
end
|
187
|
-
end
|
188
165
|
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
166
|
+
# @todo - investigate getting directory structure
|
167
|
+
# @todo - investigate using perform_later, and having the importer check for
|
168
|
+
# DownloadCloudFileJob before it starts
|
169
|
+
def retrieve_cloud_files(files, _importer)
|
170
|
+
# There should only be one zip file for Bagit, take the first
|
171
|
+
return if files['0'].blank?
|
172
|
+
target_file = File.join(path_for_import, files['0']['file_name'].tr(' ', '_'))
|
173
|
+
# Now because we want the files in place before the importer runs
|
174
|
+
Bulkrax::DownloadCloudFileJob.perform_now(files['0'], target_file)
|
175
|
+
return target_file
|
176
|
+
end
|
200
177
|
|
201
|
-
|
178
|
+
private
|
202
179
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
180
|
+
def bags
|
181
|
+
return @bags if @bags.present?
|
182
|
+
new_bag = bag(import_file_path)
|
183
|
+
@bags = new_bag ? [new_bag] : Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
|
184
|
+
@bags.delete(nil)
|
185
|
+
raise StandardError, 'No valid bags found' if @bags.blank?
|
186
|
+
return @bags
|
187
|
+
end
|
211
188
|
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
189
|
+
# Gather the paths to all bags; skip any stray files
|
190
|
+
def bag_paths
|
191
|
+
bags.map(&:bag_dir)
|
192
|
+
end
|
216
193
|
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
194
|
+
def metadata_file_name
|
195
|
+
raise StandardError, 'The metadata file name must be specified' if parser_fields['metadata_file_name'].blank?
|
196
|
+
parser_fields['metadata_file_name']
|
197
|
+
end
|
221
198
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
199
|
+
# Gather the paths to all metadata files matching the metadata_file_name
|
200
|
+
def metadata_paths
|
201
|
+
@metadata_paths ||= bag_paths.map do |b|
|
202
|
+
Dir.glob("#{b}/**/*").select { |f| File.file?(f) && f.ends_with?(metadata_file_name) }
|
203
|
+
end.flatten.compact
|
204
|
+
end
|
228
205
|
|
229
|
-
|
230
|
-
|
231
|
-
|
206
|
+
def metadata_path(bag)
|
207
|
+
Dir.glob("#{bag.bag_dir}/**/*").detect { |f| File.file?(f) && f.ends_with?(metadata_file_name) }
|
208
|
+
end
|
232
209
|
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
210
|
+
def bag(path)
|
211
|
+
return nil unless path && File.exist?(File.join(path, 'bagit.txt'))
|
212
|
+
bag = BagIt::Bag.new(path)
|
213
|
+
return nil unless bag.valid?
|
214
|
+
bag
|
215
|
+
end
|
239
216
|
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
217
|
+
# use the version of this method from the application parser instead
|
218
|
+
def real_import_file_path
|
219
|
+
return importer_unzip_path if file? && zip?
|
220
|
+
parser_fields['import_file_path']
|
221
|
+
end
|
244
222
|
end
|
245
223
|
end
|
246
224
|
end
|