bulkrax 6.0.1 → 8.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +7 -7
  3. data/app/assets/javascripts/bulkrax/bulkrax.js +11 -0
  4. data/app/assets/javascripts/bulkrax/datatables.js +139 -0
  5. data/app/assets/javascripts/bulkrax/exporters.js +4 -4
  6. data/app/assets/javascripts/bulkrax/importers.js.erb +15 -1
  7. data/app/assets/stylesheets/bulkrax/import_export.scss +6 -1
  8. data/app/controllers/bulkrax/entries_controller.rb +52 -3
  9. data/app/controllers/bulkrax/exporters_controller.rb +20 -8
  10. data/app/controllers/bulkrax/importers_controller.rb +31 -12
  11. data/app/controllers/concerns/bulkrax/datatables_behavior.rb +201 -0
  12. data/app/factories/bulkrax/object_factory.rb +135 -163
  13. data/app/factories/bulkrax/object_factory_interface.rb +491 -0
  14. data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
  15. data/app/helpers/bulkrax/application_helper.rb +7 -3
  16. data/app/helpers/bulkrax/importers_helper.rb +1 -1
  17. data/app/helpers/bulkrax/validation_helper.rb +4 -4
  18. data/app/jobs/bulkrax/create_relationships_job.rb +28 -17
  19. data/app/jobs/bulkrax/delete_and_import_collection_job.rb +8 -0
  20. data/app/jobs/bulkrax/delete_and_import_file_set_job.rb +8 -0
  21. data/app/jobs/bulkrax/delete_and_import_job.rb +20 -0
  22. data/app/jobs/bulkrax/delete_and_import_work_job.rb +8 -0
  23. data/app/jobs/bulkrax/delete_job.rb +8 -3
  24. data/app/jobs/bulkrax/download_cloud_file_job.rb +17 -4
  25. data/app/jobs/bulkrax/import_collection_job.rb +1 -1
  26. data/app/jobs/bulkrax/import_file_set_job.rb +6 -3
  27. data/app/jobs/bulkrax/import_job.rb +7 -0
  28. data/app/jobs/bulkrax/import_work_job.rb +1 -1
  29. data/app/jobs/bulkrax/importer_job.rb +19 -3
  30. data/app/matchers/bulkrax/application_matcher.rb +0 -2
  31. data/app/models/bulkrax/csv_collection_entry.rb +1 -3
  32. data/app/models/bulkrax/csv_entry.rb +9 -7
  33. data/app/models/bulkrax/entry.rb +9 -11
  34. data/app/models/bulkrax/exporter.rb +11 -4
  35. data/app/models/bulkrax/importer.rb +49 -10
  36. data/app/models/bulkrax/oai_entry.rb +0 -3
  37. data/app/models/bulkrax/oai_set_entry.rb +1 -3
  38. data/app/models/bulkrax/rdf_collection_entry.rb +1 -4
  39. data/app/models/bulkrax/rdf_entry.rb +70 -69
  40. data/app/models/bulkrax/status.rb +10 -1
  41. data/app/models/bulkrax/xml_entry.rb +0 -1
  42. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
  43. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  44. data/app/models/concerns/bulkrax/file_factory.rb +174 -118
  45. data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +5 -3
  46. data/app/models/concerns/bulkrax/has_matchers.rb +28 -25
  47. data/app/models/concerns/bulkrax/import_behavior.rb +14 -33
  48. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
  49. data/app/models/concerns/bulkrax/status_info.rb +8 -0
  50. data/app/parsers/bulkrax/application_parser.rb +116 -21
  51. data/app/parsers/bulkrax/bagit_parser.rb +173 -195
  52. data/app/parsers/bulkrax/csv_parser.rb +15 -57
  53. data/app/parsers/bulkrax/oai_dc_parser.rb +44 -16
  54. data/app/parsers/bulkrax/parser_export_record_set.rb +20 -24
  55. data/app/parsers/bulkrax/xml_parser.rb +18 -23
  56. data/app/services/bulkrax/factory_class_finder.rb +92 -0
  57. data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
  58. data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
  59. data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
  60. data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
  61. data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
  62. data/app/views/bulkrax/entries/show.html.erb +9 -8
  63. data/app/views/bulkrax/exporters/_form.html.erb +10 -10
  64. data/app/views/bulkrax/exporters/edit.html.erb +1 -1
  65. data/app/views/bulkrax/exporters/index.html.erb +13 -57
  66. data/app/views/bulkrax/exporters/new.html.erb +1 -1
  67. data/app/views/bulkrax/exporters/show.html.erb +6 -12
  68. data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
  69. data/app/views/bulkrax/importers/_csv_fields.html.erb +8 -2
  70. data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +8 -1
  71. data/app/views/bulkrax/importers/_edit_item_buttons.html.erb +18 -0
  72. data/app/views/bulkrax/importers/edit.html.erb +1 -1
  73. data/app/views/bulkrax/importers/index.html.erb +20 -64
  74. data/app/views/bulkrax/importers/new.html.erb +1 -1
  75. data/app/views/bulkrax/importers/show.html.erb +8 -14
  76. data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
  77. data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
  78. data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
  79. data/app/views/bulkrax/shared/_entries_tab.html.erb +16 -0
  80. data/config/locales/bulkrax.en.yml +7 -0
  81. data/config/routes.rb +8 -2
  82. data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
  83. data/db/migrate/20240208005801_denormalize_status_message.rb +7 -0
  84. data/db/migrate/20240209070952_update_identifier_index.rb +6 -0
  85. data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
  86. data/lib/bulkrax/engine.rb +23 -0
  87. data/lib/bulkrax/version.rb +1 -1
  88. data/lib/bulkrax.rb +107 -19
  89. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
  90. data/lib/tasks/bulkrax_tasks.rake +13 -0
  91. data/lib/tasks/reset.rake +4 -4
  92. metadata +64 -8
  93. data/app/views/bulkrax/shared/_collection_entries_tab.html.erb +0 -39
  94. data/app/views/bulkrax/shared/_file_set_entries_tab.html.erb +0 -39
  95. data/app/views/bulkrax/shared/_work_entries_tab.html.erb +0 -39
@@ -1,6 +1,4 @@
1
1
  # frozen_string_literal: true
2
- require 'zip'
3
- require 'marcel'
4
2
 
5
3
  module Bulkrax
6
4
  # An abstract class that establishes the API for Bulkrax's import and export parsing.
@@ -14,7 +12,7 @@ module Bulkrax
14
12
  :seen, :increment_counters, :parser_fields, :user, :keys_without_numbers,
15
13
  :key_without_numbers, :status, :set_status_info, :status_info, :status_at,
16
14
  :exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
17
- :zip?, :file?,
15
+ :zip?, :file?, :remove_and_rerun,
18
16
  to: :importerexporter
19
17
 
20
18
  # @todo Convert to `class_attribute :parser_fiels, default: {}`
@@ -47,6 +45,10 @@ module Bulkrax
47
45
  raise NotImplementedError, 'must be defined'
48
46
  end
49
47
 
48
+ def work_entry_class
49
+ entry_class
50
+ end
51
+
50
52
  # @api public
51
53
  # @abstract Subclass and override {#collection_entry_class} to implement behavior for the parser.
52
54
  def collection_entry_class
@@ -157,6 +159,22 @@ module Bulkrax
157
159
  @visibility ||= self.parser_fields['visibility'] || 'open'
158
160
  end
159
161
 
162
+ def create_collections
163
+ create_objects(['collection'])
164
+ end
165
+
166
+ def create_works
167
+ create_objects(['work'])
168
+ end
169
+
170
+ def create_file_sets
171
+ create_objects(['file_set'])
172
+ end
173
+
174
+ def create_relationships
175
+ create_objects(['relationship'])
176
+ end
177
+
160
178
  # @api public
161
179
  #
162
180
  # @param types [Array<Symbol>] the types of objects that we'll create.
@@ -166,34 +184,95 @@ module Bulkrax
166
184
  # @see #create_works
167
185
  # @see #create_file_sets
168
186
  # @see #create_relationships
169
- def create_objects(types = [])
170
- types.each do |object_type|
171
- send("create_#{object_type.pluralize}")
187
+ def create_objects(types_array = nil)
188
+ index = 0
189
+ (types_array || %w[collection work file_set relationship]).each do |type|
190
+ if type.eql?('relationship')
191
+ ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importerexporter.id)
192
+ next
193
+ end
194
+ send(type.pluralize).each do |current_record|
195
+ next unless record_has_source_identifier(current_record, index)
196
+ break if limit_reached?(limit, index)
197
+ seen[current_record[source_identifier]] = true
198
+ create_entry_and_job(current_record, type)
199
+ increment_counters(index, "#{type}": true)
200
+ index += 1
201
+ end
202
+ importer.record_status
203
+ end
204
+ true
205
+ rescue StandardError => e
206
+ set_status_info(e)
207
+ end
208
+
209
+ def rebuild_entries(types_array = nil)
210
+ index = 0
211
+ (types_array || %w[collection work file_set relationship]).each do |type|
212
+ # works are not gurneteed to have Work in the type
213
+
214
+ importer.entries.where(rebuild_entry_query(type, parser_fields['entry_statuses'])).find_each do |e|
215
+ seen[e.identifier] = true
216
+ e.status_info('Pending', importer.current_run)
217
+ if remove_and_rerun
218
+ delay = calculate_type_delay(type)
219
+ "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.set(wait: delay).send(perform_method, e, current_run)
220
+ else
221
+ "Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, e.id, current_run.id)
222
+ end
223
+ increment_counters(index)
224
+ index += 1
225
+ end
172
226
  end
173
227
  end
174
228
 
175
- # @abstract Subclass and override {#create_collections} to implement behavior for the parser.
176
- def create_collections
177
- raise NotImplementedError, 'must be defined' if importer?
229
+ def rebuild_entry_query(type, statuses)
230
+ type_col = Bulkrax::Entry.arel_table['type']
231
+ status_col = Bulkrax::Entry.arel_table['status_message']
232
+
233
+ query = (type == 'work' ? type_col.does_not_match_all(%w[collection file_set]) : type_col.matches(type.camelize))
234
+ query.and(status_col.in(statuses))
178
235
  end
179
236
 
180
- # @abstract Subclass and override {#create_works} to implement behavior for the parser.
181
- def create_works
182
- raise NotImplementedError, 'must be defined' if importer?
237
+ def calculate_type_delay(type)
238
+ return 2.minutes if type == 'file_set'
239
+ return 1.minute if type == 'work'
240
+ return 0
183
241
  end
184
242
 
185
- # @abstract Subclass and override {#create_file_sets} to implement behavior for the parser.
186
- def create_file_sets
187
- raise NotImplementedError, 'must be defined' if importer?
243
+ def record_raw_metadata(record)
244
+ record.to_h
188
245
  end
189
246
 
190
- # @abstract Subclass and override {#create_relationships} to implement behavior for the parser.
191
- def create_relationships
192
- raise NotImplementedError, 'must be defined' if importer?
247
+ def record_deleted?(record)
248
+ return false unless record.key?(:delete)
249
+ ActiveModel::Type::Boolean.new.cast(record[:delete])
250
+ end
251
+
252
+ def record_remove_and_rerun?(record)
253
+ return false unless record.key?(:remove_and_rerun)
254
+ ActiveModel::Type::Boolean.new.cast(record[:remove_and_rerun])
255
+ end
256
+
257
+ def create_entry_and_job(current_record, type, identifier = nil)
258
+ identifier ||= current_record[source_identifier]
259
+ new_entry = find_or_create_entry(send("#{type}_entry_class"),
260
+ identifier,
261
+ 'Bulkrax::Importer',
262
+ record_raw_metadata(current_record))
263
+ new_entry.status_info('Pending', importer.current_run)
264
+ if record_deleted?(current_record)
265
+ "Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
266
+ elsif record_remove_and_rerun?(current_record) || remove_and_rerun
267
+ delay = calculate_type_delay(type)
268
+ "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.set(wait: delay).send(perform_method, new_entry, current_run)
269
+ else
270
+ "Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id)
271
+ end
193
272
  end
194
273
 
195
274
  # Optional, define if using browse everything for file upload
196
- def retrieve_cloud_files(files); end
275
+ def retrieve_cloud_files(_files, _importer); end
197
276
 
198
277
  # @param file [#path, #original_filename] the file object that with the relevant data for the
199
278
  # import.
@@ -305,12 +384,19 @@ module Bulkrax
305
384
  end
306
385
 
307
386
  def find_or_create_entry(entryclass, identifier, type, raw_metadata = nil)
308
- entry = entryclass.where(
387
+ # limit entry search to just this importer or exporter. Don't go moving them
388
+ entry = importerexporter.entries.where(
389
+ identifier: identifier
390
+ ).first
391
+ entry ||= entryclass.new(
309
392
  importerexporter_id: importerexporter.id,
310
393
  importerexporter_type: type,
311
394
  identifier: identifier
312
- ).first_or_create!
395
+ )
313
396
  entry.raw_metadata = raw_metadata
397
+ # Setting parsed_metadata specifically for the id so we can find the object via the
398
+ # id in a delete. This is likely to get clobbered in a regular import, which is fine.
399
+ entry.parsed_metadata = { id: raw_metadata['id'] } if raw_metadata&.key?('id')
314
400
  entry.save!
315
401
  entry
316
402
  end
@@ -342,6 +428,8 @@ module Bulkrax
342
428
  end
343
429
 
344
430
  def unzip(file_to_unzip)
431
+ return untar(file_to_unzip) if file_to_unzip.end_with?('.tar.gz')
432
+
345
433
  Zip::File.open(file_to_unzip) do |zip_file|
346
434
  zip_file.each do |entry|
347
435
  entry_path = File.join(importer_unzip_path, entry.name)
@@ -351,6 +439,13 @@ module Bulkrax
351
439
  end
352
440
  end
353
441
 
442
+ def untar(file_to_untar)
443
+ Dir.mkdir(importer_unzip_path) unless File.directory?(importer_unzip_path)
444
+ command = "tar -xzf #{Shellwords.escape(file_to_untar)} -C #{Shellwords.escape(importer_unzip_path)}"
445
+ result = system(command)
446
+ raise "Failed to extract #{file_to_untar}" unless result
447
+ end
448
+
354
449
  def zip
355
450
  FileUtils.mkdir_p(exporter_export_zip_path)
356
451
 
@@ -1,246 +1,224 @@
1
1
  # frozen_string_literal: true
2
- require 'bagit'
3
2
 
4
- module Bulkrax
5
- class BagitParser < CsvParser # rubocop:disable Metrics/ClassLength
6
- include ExportBehavior
3
+ unless ENV.fetch('BULKRAX_NO_BAGIT', 'false').to_s == 'true'
4
+ module Bulkrax
5
+ class BagitParser < CsvParser # rubocop:disable Metrics/ClassLength
6
+ include ExportBehavior
7
7
 
8
- def self.export_supported?
9
- true
10
- end
11
-
12
- def valid_import?
13
- return true if import_fields.present?
14
- rescue => e
15
- set_status_info(e)
16
- false
17
- end
18
-
19
- def entry_class
20
- rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
21
- rdf_format ? RdfEntry : CsvEntry
22
- end
23
-
24
- def path_to_files(filename:)
25
- @path_to_files ||= Dir.glob(File.join(import_file_path, '**/data', filename)).first
26
- end
27
-
28
- # Take a random sample of 10 metadata_paths and work out the import fields from that
29
- def import_fields
30
- raise StandardError, 'No metadata files were found' if metadata_paths.blank?
31
- @import_fields ||= metadata_paths.sample(10).map do |path|
32
- entry_class.fields_from_data(entry_class.read_data(path))
33
- end.flatten.compact.uniq
34
- end
8
+ def self.export_supported?
9
+ true
10
+ end
35
11
 
36
- # Create an Array of all metadata records
37
- def records(_opts = {})
38
- raise StandardError, 'No BagIt records were found' if bags.blank?
39
- @records ||= bags.map do |bag|
40
- path = metadata_path(bag)
41
- raise StandardError, 'No metadata files were found' if path.blank?
42
- data = entry_class.read_data(path)
43
- get_data(bag, data)
12
+ def valid_import?
13
+ return true if import_fields.present?
14
+ rescue => e
15
+ set_status_info(e)
16
+ false
44
17
  end
45
18
 
46
- @records = @records.flatten
47
- end
19
+ def entry_class
20
+ rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
21
+ rdf_format ? RdfEntry : CsvEntry
22
+ end
48
23
 
49
- def get_data(bag, data)
50
- if entry_class == CsvEntry
51
- data = data.map do |data_row|
52
- record_data = entry_class.data_for_entry(data_row, source_identifier, self)
53
- next record_data if importerexporter.metadata_only?
24
+ def path_to_files(filename:)
25
+ @path_to_files ||= Dir.glob(File.join(import_file_path, '**/data', filename)).first
26
+ end
54
27
 
55
- record_data[:file] = bag.bag_files.join('|') if Bulkrax.curation_concerns.include? record_data[:model]&.constantize
56
- record_data
57
- end
58
- else
59
- data = entry_class.data_for_entry(data, source_identifier, self)
60
- data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
28
+ # Take a random sample of 10 metadata_paths and work out the import fields from that
29
+ def import_fields
30
+ raise StandardError, 'No metadata files were found' if metadata_paths.blank?
31
+ @import_fields ||= metadata_paths.sample(10).map do |path|
32
+ entry_class.fields_from_data(entry_class.read_data(path))
33
+ end.flatten.compact.uniq
61
34
  end
62
35
 
63
- data
64
- end
36
+ # Create an Array of all metadata records
37
+ def records(_opts = {})
38
+ raise StandardError, 'No BagIt records were found' if bags.blank?
39
+ @records ||= bags.map do |bag|
40
+ path = metadata_path(bag)
41
+ raise StandardError, 'No metadata files were found' if path.blank?
42
+ data = entry_class.read_data(path)
43
+ get_data(bag, data)
44
+ end
65
45
 
66
- def create_works
67
- entry_class == CsvEntry ? super : create_rdf_works
68
- end
46
+ @records = @records.flatten
47
+ end
69
48
 
70
- def create_rdf_works
71
- records.each_with_index do |record, index|
72
- next unless record_has_source_identifier(record, index)
73
- break if limit_reached?(limit, index)
49
+ def get_data(bag, data)
50
+ if entry_class == CsvEntry
51
+ data = data.map do |data_row|
52
+ record_data = entry_class.data_for_entry(data_row, source_identifier, self)
53
+ next record_data if importerexporter.metadata_only?
74
54
 
75
- seen[record[source_identifier]] = true
76
- new_entry = find_or_create_entry(entry_class, record[source_identifier], 'Bulkrax::Importer', record)
77
- if record[:delete].present?
78
- DeleteWorkJob.send(perform_method, new_entry, current_run)
55
+ record_data[:file] = bag.bag_files.join('|') if Bulkrax.curation_concerns.include? record_data[:model]&.constantize
56
+ record_data
57
+ end
79
58
  else
80
- ImportWorkJob.send(perform_method, new_entry.id, current_run.id)
59
+ data = entry_class.data_for_entry(data, source_identifier, self)
60
+ data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
81
61
  end
82
- increment_counters(index, work: true)
62
+
63
+ data
83
64
  end
84
- importer.record_status
85
- rescue StandardError => e
86
- set_status_info(e)
87
- end
88
65
 
89
- # export methods
66
+ # export methods
90
67
 
91
- # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
92
- def write_files
93
- require 'open-uri'
94
- require 'socket'
68
+ # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
69
+ def write_files
70
+ require 'open-uri'
71
+ require 'socket'
95
72
 
96
- folder_count = 1
97
- records_in_folder = 0
98
- work_entries = importerexporter.entries.where(type: work_entry_class.to_s)
99
- collection_entries = importerexporter.entries.where(type: collection_entry_class.to_s)
100
- file_set_entries = importerexporter.entries.where(type: file_set_entry_class.to_s)
73
+ folder_count = 1
74
+ records_in_folder = 0
75
+ work_entries = importerexporter.entries.where(type: work_entry_class.to_s)
76
+ collection_entries = importerexporter.entries.where(type: collection_entry_class.to_s)
77
+ file_set_entries = importerexporter.entries.where(type: file_set_entry_class.to_s)
101
78
 
102
- work_entries[0..limit || total].each do |entry|
103
- record = ActiveFedora::Base.find(entry.identifier)
104
- next unless record
79
+ work_entries[0..limit || total].each do |entry|
80
+ record = Bulkrax.object_factory.find(entry.identifier)
81
+ next unless record
105
82
 
106
- bag_entries = [entry]
83
+ bag_entries = [entry]
107
84
 
108
- if record.member_of_collection_ids.present?
109
- collection_entries.each { |ce| bag_entries << ce if ce.parsed_metadata.value?(record.id) }
110
- end
85
+ if record.member_of_collection_ids.present?
86
+ collection_entries.each { |ce| bag_entries << ce if ce.parsed_metadata.value?(record.id) }
87
+ end
111
88
 
112
- if record.file_sets.present?
113
- file_set_entries.each { |fse| bag_entries << fse if fse.parsed_metadata.value?(record.id) }
114
- end
89
+ if record.file_sets.present?
90
+ file_set_entries.each { |fse| bag_entries << fse if fse.parsed_metadata.value?(record.id) }
91
+ end
115
92
 
116
- records_in_folder += bag_entries.count
117
- if records_in_folder > records_split_count
118
- folder_count += 1
119
- records_in_folder = bag_entries.count
120
- end
93
+ records_in_folder += bag_entries.count
94
+ if records_in_folder > records_split_count
95
+ folder_count += 1
96
+ records_in_folder = bag_entries.count
97
+ end
121
98
 
122
- bag ||= BagIt::Bag.new setup_bagit_folder(folder_count, entry.identifier)
123
-
124
- record.file_sets.each do |fs|
125
- file_name = filename(fs)
126
- next if file_name.blank? || fs.original_file.blank?
127
-
128
- io = open(fs.original_file.uri)
129
- file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
130
- file.write(io.read)
131
- file.close
132
- begin
133
- bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank?
134
- rescue => e
135
- entry.set_status_info(e)
136
- set_status_info(e)
99
+ bag ||= BagIt::Bag.new setup_bagit_folder(folder_count, entry.identifier)
100
+
101
+ record.file_sets.each do |fs|
102
+ file_name = filename(fs)
103
+ next if file_name.blank? || fs.original_file.blank?
104
+
105
+ io = open(fs.original_file.uri)
106
+ file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
107
+ file.write(io.read)
108
+ file.close
109
+ begin
110
+ bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank?
111
+ rescue => e
112
+ entry.set_status_info(e)
113
+ set_status_info(e)
114
+ end
137
115
  end
138
- end
139
116
 
140
- CSV.open(setup_csv_metadata_export_file(folder_count, entry.identifier), "w", headers: export_headers, write_headers: true) do |csv|
141
- bag_entries.each { |csv_entry| csv << csv_entry.parsed_metadata }
142
- end
117
+ CSV.open(setup_csv_metadata_export_file(folder_count, entry.identifier), "w", headers: export_headers, write_headers: true) do |csv|
118
+ bag_entries.each { |csv_entry| csv << csv_entry.parsed_metadata }
119
+ end
143
120
 
144
- write_triples(folder_count, entry)
145
- bag.manifest!(algo: 'sha256')
121
+ write_triples(folder_count, entry)
122
+ bag.manifest!(algo: 'sha256')
123
+ end
146
124
  end
147
- end
148
- # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
125
+ # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
149
126
 
150
- def setup_csv_metadata_export_file(folder_count, id)
151
- path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
152
- FileUtils.mkdir_p(path) unless File.exist?(path)
127
+ def setup_csv_metadata_export_file(folder_count, id)
128
+ path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
129
+ FileUtils.mkdir_p(path) unless File.exist?(path)
153
130
 
154
- File.join(path, id, 'metadata.csv')
155
- end
131
+ File.join(path, id, 'metadata.csv')
132
+ end
156
133
 
157
- def key_allowed(key)
158
- !Bulkrax.reserved_properties.include?(key) &&
159
- new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
160
- key != source_identifier.to_s
161
- end
134
+ def key_allowed(key)
135
+ !Bulkrax.reserved_properties.include?(key) &&
136
+ new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
137
+ key != source_identifier.to_s
138
+ end
162
139
 
163
- def setup_triple_metadata_export_file(folder_count, id)
164
- path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
165
- FileUtils.mkdir_p(path) unless File.exist?(path)
140
+ def setup_triple_metadata_export_file(folder_count, id)
141
+ path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
142
+ FileUtils.mkdir_p(path) unless File.exist?(path)
166
143
 
167
- File.join(path, id, 'metadata.nt')
168
- end
144
+ File.join(path, id, 'metadata.nt')
145
+ end
169
146
 
170
- def setup_bagit_folder(folder_count, id)
171
- path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
172
- FileUtils.mkdir_p(path) unless File.exist?(path)
147
+ def setup_bagit_folder(folder_count, id)
148
+ path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
149
+ FileUtils.mkdir_p(path) unless File.exist?(path)
173
150
 
174
- File.join(path, id)
175
- end
151
+ File.join(path, id)
152
+ end
176
153
 
177
- # @todo(bjustice) - remove hyrax reference
178
- def write_triples(folder_count, e)
179
- sd = SolrDocument.find(e.identifier)
180
- return if sd.nil?
154
+ # @todo(bjustice) - remove hyrax reference
155
+ def write_triples(folder_count, e)
156
+ sd = SolrDocument.find(e.identifier)
157
+ return if sd.nil?
181
158
 
182
- req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname })
183
- rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples)
184
- File.open(setup_triple_metadata_export_file(folder_count, e.identifier), "w") do |triples|
185
- triples.write(rdf)
159
+ req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname })
160
+ rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples)
161
+ File.open(setup_triple_metadata_export_file(folder_count, e.identifier), "w") do |triples|
162
+ triples.write(rdf)
163
+ end
186
164
  end
187
- end
188
165
 
189
- # @todo - investigate getting directory structure
190
- # @todo - investigate using perform_later, and having the importer check for
191
- # DownloadCloudFileJob before it starts
192
- def retrieve_cloud_files(files)
193
- # There should only be one zip file for Bagit, take the first
194
- return if files['0'].blank?
195
- target_file = File.join(path_for_import, files['0']['file_name'].tr(' ', '_'))
196
- # Now because we want the files in place before the importer runs
197
- Bulkrax::DownloadCloudFileJob.perform_now(files['0'], target_file)
198
- return target_file
199
- end
166
+ # @todo - investigate getting directory structure
167
+ # @todo - investigate using perform_later, and having the importer check for
168
+ # DownloadCloudFileJob before it starts
169
+ def retrieve_cloud_files(files, _importer)
170
+ # There should only be one zip file for Bagit, take the first
171
+ return if files['0'].blank?
172
+ target_file = File.join(path_for_import, files['0']['file_name'].tr(' ', '_'))
173
+ # Now because we want the files in place before the importer runs
174
+ Bulkrax::DownloadCloudFileJob.perform_now(files['0'], target_file)
175
+ return target_file
176
+ end
200
177
 
201
- private
178
+ private
202
179
 
203
- def bags
204
- return @bags if @bags.present?
205
- new_bag = bag(import_file_path)
206
- @bags = new_bag ? [new_bag] : Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
207
- @bags.delete(nil)
208
- raise StandardError, 'No valid bags found' if @bags.blank?
209
- return @bags
210
- end
180
+ def bags
181
+ return @bags if @bags.present?
182
+ new_bag = bag(import_file_path)
183
+ @bags = new_bag ? [new_bag] : Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
184
+ @bags.delete(nil)
185
+ raise StandardError, 'No valid bags found' if @bags.blank?
186
+ return @bags
187
+ end
211
188
 
212
- # Gather the paths to all bags; skip any stray files
213
- def bag_paths
214
- bags.map(&:bag_dir)
215
- end
189
+ # Gather the paths to all bags; skip any stray files
190
+ def bag_paths
191
+ bags.map(&:bag_dir)
192
+ end
216
193
 
217
- def metadata_file_name
218
- raise StandardError, 'The metadata file name must be specified' if parser_fields['metadata_file_name'].blank?
219
- parser_fields['metadata_file_name']
220
- end
194
+ def metadata_file_name
195
+ raise StandardError, 'The metadata file name must be specified' if parser_fields['metadata_file_name'].blank?
196
+ parser_fields['metadata_file_name']
197
+ end
221
198
 
222
- # Gather the paths to all metadata files matching the metadata_file_name
223
- def metadata_paths
224
- @metadata_paths ||= bag_paths.map do |b|
225
- Dir.glob("#{b}/**/*").select { |f| File.file?(f) && f.ends_with?(metadata_file_name) }
226
- end.flatten.compact
227
- end
199
+ # Gather the paths to all metadata files matching the metadata_file_name
200
+ def metadata_paths
201
+ @metadata_paths ||= bag_paths.map do |b|
202
+ Dir.glob("#{b}/**/*").select { |f| File.file?(f) && f.ends_with?(metadata_file_name) }
203
+ end.flatten.compact
204
+ end
228
205
 
229
- def metadata_path(bag)
230
- Dir.glob("#{bag.bag_dir}/**/*").detect { |f| File.file?(f) && f.ends_with?(metadata_file_name) }
231
- end
206
+ def metadata_path(bag)
207
+ Dir.glob("#{bag.bag_dir}/**/*").detect { |f| File.file?(f) && f.ends_with?(metadata_file_name) }
208
+ end
232
209
 
233
- def bag(path)
234
- return nil unless path && File.exist?(File.join(path, 'bagit.txt'))
235
- bag = BagIt::Bag.new(path)
236
- return nil unless bag.valid?
237
- bag
238
- end
210
+ def bag(path)
211
+ return nil unless path && File.exist?(File.join(path, 'bagit.txt'))
212
+ bag = BagIt::Bag.new(path)
213
+ return nil unless bag.valid?
214
+ bag
215
+ end
239
216
 
240
- # use the version of this method from the application parser instead
241
- def real_import_file_path
242
- return importer_unzip_path if file? && zip?
243
- parser_fields['import_file_path']
217
+ # use the version of this method from the application parser instead
218
+ def real_import_file_path
219
+ return importer_unzip_path if file? && zip?
220
+ parser_fields['import_file_path']
221
+ end
244
222
  end
245
223
  end
246
224
  end