bulkrax 7.0.0 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/javascripts/bulkrax/datatables.js +1 -1
  3. data/app/controllers/bulkrax/exporters_controller.rb +1 -1
  4. data/app/controllers/bulkrax/importers_controller.rb +2 -1
  5. data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
  6. data/app/factories/bulkrax/object_factory.rb +135 -163
  7. data/app/factories/bulkrax/object_factory_interface.rb +491 -0
  8. data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
  9. data/app/helpers/bulkrax/importers_helper.rb +1 -1
  10. data/app/helpers/bulkrax/validation_helper.rb +4 -4
  11. data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
  12. data/app/jobs/bulkrax/delete_job.rb +3 -2
  13. data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
  14. data/app/jobs/bulkrax/import_file_set_job.rb +5 -2
  15. data/app/jobs/bulkrax/importer_job.rb +18 -2
  16. data/app/matchers/bulkrax/application_matcher.rb +0 -2
  17. data/app/models/bulkrax/csv_collection_entry.rb +1 -1
  18. data/app/models/bulkrax/csv_entry.rb +7 -6
  19. data/app/models/bulkrax/entry.rb +7 -11
  20. data/app/models/bulkrax/exporter.rb +2 -2
  21. data/app/models/bulkrax/importer.rb +1 -3
  22. data/app/models/bulkrax/oai_entry.rb +0 -3
  23. data/app/models/bulkrax/oai_set_entry.rb +1 -1
  24. data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
  25. data/app/models/bulkrax/rdf_entry.rb +70 -69
  26. data/app/models/bulkrax/xml_entry.rb +0 -1
  27. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
  28. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  29. data/app/models/concerns/bulkrax/file_factory.rb +174 -118
  30. data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
  31. data/app/models/concerns/bulkrax/has_matchers.rb +28 -25
  32. data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
  33. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
  34. data/app/parsers/bulkrax/application_parser.rb +31 -7
  35. data/app/parsers/bulkrax/bagit_parser.rb +175 -174
  36. data/app/parsers/bulkrax/csv_parser.rb +15 -5
  37. data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
  38. data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
  39. data/app/parsers/bulkrax/xml_parser.rb +0 -2
  40. data/app/services/bulkrax/factory_class_finder.rb +2 -0
  41. data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
  42. data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
  43. data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
  44. data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
  45. data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
  46. data/app/views/bulkrax/entries/show.html.erb +9 -8
  47. data/app/views/bulkrax/exporters/edit.html.erb +1 -1
  48. data/app/views/bulkrax/exporters/new.html.erb +1 -1
  49. data/app/views/bulkrax/exporters/show.html.erb +4 -2
  50. data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
  51. data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
  52. data/app/views/bulkrax/importers/edit.html.erb +1 -1
  53. data/app/views/bulkrax/importers/new.html.erb +1 -1
  54. data/app/views/bulkrax/importers/show.html.erb +1 -1
  55. data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
  56. data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
  57. data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
  58. data/config/locales/bulkrax.en.yml +7 -0
  59. data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
  60. data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
  61. data/lib/bulkrax/engine.rb +23 -6
  62. data/lib/bulkrax/version.rb +1 -1
  63. data/lib/bulkrax.rb +54 -52
  64. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
  65. data/lib/tasks/bulkrax_tasks.rake +1 -0
  66. data/lib/tasks/reset.rake +4 -4
  67. metadata +24 -8
  68. data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
  69. data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
  70. data/lib/bulkrax/persistence_layer.rb +0 -38
@@ -1,153 +1,209 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
+ ##
5
+ # NOTE: Historically (e.g. Bulkrax v7.0.0 and earlier) we mixed in all of the
6
+ # {Bulkrax::FileFactory} methods into {Bulkrax::ObjectFactory}. However, with
7
+ # the introduction of {Bulkrax::ValkyrieObjectFactory} we needed to account
8
+ # for branching logic.
9
+ #
10
+ # This refactor where we expose the bare minimum interface of file interaction
11
+ # should help with encapsulation.
12
+ #
13
+ # The refactor pattern was to find FileFactory methods used by the
14
+ # ObjectFactory and delegate those to the new {FileFactory::InnerWorkings}
15
+ # class. Likewise within the InnerWorkings we wanted to delegate to the given
16
+ # object_factory the methods that the InnerWorkings need.
17
+ #
18
+ # Futher, by preserving the FileFactory as a mixed in module, downstream
19
+ # implementers will hopefully experience less of an impact regarding this
20
+ # change.
4
21
  module FileFactory
5
22
  extend ActiveSupport::Concern
6
23
 
7
- # Find existing files or upload new files. This assumes a Work will have unique file titles;
8
- # and that those file titles will not have changed
9
- # could filter by URIs instead (slower).
10
- # When an uploaded_file already exists we do not want to pass its id in `file_attributes`
11
- # otherwise it gets reuploaded by `work_actor`.
12
- # support multiple files; ensure attributes[:file] is an Array
13
- def upload_ids
14
- return [] if klass == Collection
15
- attributes[:file] = file_paths
16
- import_files
17
- end
24
+ included do
25
+ class_attribute :file_set_factory_inner_workings_class, default: Bulkrax::FileFactory::InnerWorkings
26
+
27
+ def file_set_factory_inner_workings
28
+ @file_set_factory_inner_workings ||= file_set_factory_inner_workings_class.new(object_factory: self)
29
+ end
18
30
 
19
- def file_attributes(update_files = false)
20
- @update_files = update_files
21
- hash = {}
22
- return hash if klass == Collection
23
- hash[:uploaded_files] = upload_ids if attributes[:file].present?
24
- hash[:remote_files] = new_remote_files if new_remote_files.present?
25
- hash
31
+ delegate :file_attributes, :destroy_existing_files, to: :file_set_factory_inner_workings
26
32
  end
27
33
 
28
- # Its possible to get just an array of strings here, so we need to make sure they are all hashes
29
- def parsed_remote_files
30
- return @parsed_remote_files if @parsed_remote_files.present?
31
- @parsed_remote_files = attributes[:remote_files] || []
32
- @parsed_remote_files = @parsed_remote_files.map do |file_value|
33
- if file_value.is_a?(Hash)
34
- file_value
35
- elsif file_value.is_a?(String)
36
- name = Bulkrax::Importer.safe_uri_filename(file_value)
37
- { url: file_value, file_name: name }
38
- else
39
- Rails.logger.error("skipped remote file #{file_value} because we do not recognize the type")
40
- nil
34
+ class InnerWorkings
35
+ def initialize(object_factory:)
36
+ @object_factory = object_factory
37
+ end
38
+
39
+ attr_reader :object_factory
40
+
41
+ delegate :object, :klass, :attributes, :user, to: :object_factory
42
+
43
+ # Find existing files or upload new files. This assumes a Work will have unique file titles;
44
+ # and that those file titles will not have changed
45
+ # could filter by URIs instead (slower).
46
+ # When an uploaded_file already exists we do not want to pass its id in `file_attributes`
47
+ # otherwise it gets reuploaded by `work_actor`.
48
+ # support multiple files; ensure attributes[:file] is an Array
49
+ def upload_ids
50
+ return [] if klass == Bulkrax.collection_model_class
51
+ attributes[:file] = file_paths
52
+ import_files
53
+ end
54
+
55
+ def file_attributes(update_files = false)
56
+ # NOTE: Unclear why we're changing a instance variable based on what was
57
+ # passed, which itself is derived from the instance variable we're about
58
+ # to change. It's very easy to mutate the initialized @update_files if
59
+ # you don't pass the parameter.
60
+ object_factory.update_files = update_files
61
+ hash = {}
62
+ return hash if klass == Bulkrax.collection_model_class
63
+ hash[:uploaded_files] = upload_ids if attributes[:file].present?
64
+ hash[:remote_files] = new_remote_files if new_remote_files.present?
65
+ hash
66
+ end
67
+
68
+ # Its possible to get just an array of strings here, so we need to make sure they are all hashes
69
+ def parsed_remote_files
70
+ return @parsed_remote_files if @parsed_remote_files.present?
71
+ @parsed_remote_files = attributes[:remote_files] || []
72
+ @parsed_remote_files = @parsed_remote_files.map do |file_value|
73
+ if file_value.is_a?(Hash)
74
+ file_value
75
+ elsif file_value.is_a?(String)
76
+ name = Bulkrax::Importer.safe_uri_filename(file_value)
77
+ { url: file_value, file_name: name }
78
+ else
79
+ Rails.logger.error("skipped remote file #{file_value} because we do not recognize the type")
80
+ nil
81
+ end
41
82
  end
83
+ @parsed_remote_files.delete(nil)
84
+ @parsed_remote_files
42
85
  end
43
- @parsed_remote_files.delete(nil)
44
- @parsed_remote_files
45
- end
46
86
 
47
- def new_remote_files
48
- @new_remote_files ||= if object.is_a? FileSet
49
- parsed_remote_files.select do |file|
50
- # is the url valid?
51
- is_valid = file[:url]&.match(URI::ABS_URI)
52
- # does the file already exist
53
- is_existing = object.import_url && object.import_url == file[:url]
54
- is_valid && !is_existing
55
- end
56
- elsif object.present? && object.file_sets.present?
57
- parsed_remote_files.select do |file|
58
- # is the url valid?
59
- is_valid = file[:url]&.match(URI::ABS_URI)
60
- # does the file already exist
61
- is_existing = object.file_sets.detect { |f| f.import_url && f.import_url == file[:url] }
62
- is_valid && !is_existing
63
- end
64
- else
65
- parsed_remote_files.select do |file|
66
- file[:url]&.match(URI::ABS_URI)
67
- end
68
- end
69
- end
87
+ def new_remote_files
88
+ return @new_remote_files if @new_remote_files
89
+
90
+ # TODO: This code could first loop through all remote files and select
91
+ # only the valid ones; then load the file_sets and do comparisons.
92
+ file_sets = object_factory.class.file_sets_for(resource: object)
93
+ @new_remote_files = parsed_remote_files.select do |file|
94
+ # is the url valid?
95
+ is_valid = file[:url]&.match(URI::ABS_URI)
96
+ # does the file already exist
97
+ is_existing = file_sets.detect { |f| f.import_url && f.import_url == file[:url] }
98
+ is_valid && !is_existing
99
+ end
100
+ end
70
101
 
71
- def file_paths
72
- @file_paths ||= Array.wrap(attributes[:file])&.select { |file| File.exist?(file) }
73
- end
102
+ def file_paths
103
+ @file_paths ||= Array.wrap(attributes[:file])&.select { |file| File.exist?(file) }
104
+ end
74
105
 
75
- # Retrieve the orginal filenames for the files to be imported
76
- def work_files_filenames
77
- object.file_sets.map { |fn| fn.original_file.file_name.to_a }.flatten if object.present? && object.file_sets.present?
78
- end
106
+ # Retrieve the orginal filenames for the files to be imported
107
+ def work_files_filenames
108
+ object.file_sets.map { |fn| fn.original_file.file_name.to_a }.flatten if object.present? && object.file_sets.present?
109
+ end
79
110
 
80
- # Retrieve the filenames for the files to be imported
81
- def import_files_filenames
82
- file_paths.map { |f| f.split('/').last }
83
- end
111
+ # Retrieve the filenames for the files to be imported
112
+ def import_files_filenames
113
+ file_paths.map { |f| f.split('/').last }
114
+ end
84
115
 
85
- # Called if #replace_files is true
86
- # Destroy all file_sets for this object
87
- # Reload the object to ensure the remaining methods have the most up to date object
88
- def destroy_existing_files
89
- return unless object.present? && object.file_sets.present?
90
- object.file_sets.each do |fs|
91
- Hyrax::Actors::FileSetActor.new(fs, @user).destroy
116
+ # Called if #replace_files is true
117
+ # Destroy all file_sets for this object
118
+ # Reload the object to ensure the remaining methods have the most up to date object
119
+ def destroy_existing_files
120
+ return unless object.present? && object.file_sets.present?
121
+ object.file_sets.each do |fs|
122
+ Hyrax::Actors::FileSetActor.new(fs, @user).destroy
123
+ end
124
+ @object = object.reload
125
+ log_deleted_fs(object)
92
126
  end
93
- @object = object.reload
94
- log_deleted_fs(object)
95
- end
96
127
 
97
- def set_removed_filesets
98
- local_file_sets.each do |fileset|
99
- fileset.files.first.create_version
128
+ def set_removed_filesets
129
+ local_file_sets.each do |fileset|
130
+ # TODO: We need to consider the Valkyrie pathway
131
+ next if fileset.is_a?(Valkyrie::Resource)
132
+
133
+ remove_file_set(file_set: fileset)
134
+ end
135
+ end
136
+
137
+ def remove_file_set(file_set:)
138
+ # TODO: We need to consider the Valkyrie pathway
139
+ file = file_set.files.first
140
+ file.create_version
100
141
  opts = {}
101
- opts[:path] = fileset.files.first.id.split('/', 2).last
142
+ opts[:path] = file.id.split('/', 2).last
102
143
  opts[:original_name] = 'removed.png'
103
144
  opts[:mime_type] = 'image/png'
104
145
 
105
- fileset.add_file(File.open(Bulkrax.removed_image_path), opts)
106
- fileset.save
107
- ::CreateDerivativesJob.set(wait: 1.minute).perform_later(fileset, fileset.files.first.id)
146
+ file_set.add_file(File.open(Bulkrax.removed_image_path), opts)
147
+ file_set.save
148
+ ::CreateDerivativesJob.set(wait: 1.minute).perform_later(file_set, file.id)
108
149
  end
109
- end
110
150
 
111
- def local_file_sets
112
- @local_file_sets ||= ordered_file_sets
113
- end
151
+ def local_file_sets
152
+ # NOTE: we'll be mutating this list of file_sets via the import_files
153
+ # method
154
+ @local_file_sets ||= ordered_file_sets
155
+ end
114
156
 
115
- def ordered_file_sets
116
- # OVERRIDE Hyrda-works 1.2.0 - this method was deprecated in v1.0
117
- object&.ordered_members.to_a.select(&:file_set?)
118
- end
157
+ def ordered_file_sets
158
+ Bulkrax.object_factory.ordered_file_sets_for(object)
159
+ end
119
160
 
120
- def import_files
121
- paths = file_paths.map { |path| import_file(path) }.compact
122
- set_removed_filesets if local_file_sets.present?
123
- paths
124
- end
161
+ ##
162
+ # @return [Array<Integer>] An array of Hyrax::UploadFile#id representing the
163
+ # files that we should be uploading.
164
+ def import_files
165
+ paths = file_paths.map { |path| import_file(path) }.compact
166
+ set_removed_filesets if local_file_sets.present?
167
+ paths
168
+ end
125
169
 
126
- def import_file(path)
127
- u = Hyrax::UploadedFile.new
128
- u.user_id = @user.id
129
- u.file = CarrierWave::SanitizedFile.new(path)
130
- update_filesets(u)
131
- end
170
+ def import_file(path)
171
+ u = Hyrax::UploadedFile.new
172
+ u.user_id = user.id
173
+ u.file = CarrierWave::SanitizedFile.new(path)
174
+ update_filesets(u)
175
+ end
176
+
177
+ def update_filesets(current_file)
178
+ if @update_files && local_file_sets.present?
179
+ # NOTE: We're mutating local_file_sets as we process the updated file.
180
+ fileset = local_file_sets.shift
181
+ update_file_set(file_set: fileset, uploaded: current_file)
182
+ else
183
+ current_file.save
184
+ current_file.id
185
+ end
186
+ end
187
+
188
+ ##
189
+ # @return [NilClass] indicating that we've successfully began work on the file_set.
190
+ def update_file_set(file_set:, uploaded:)
191
+ # TODO: We need to consider the Valkyrie pathway
192
+ file = file_set.files.first
193
+ uploaded_file = uploaded.file
132
194
 
133
- def update_filesets(current_file)
134
- if @update_files && local_file_sets.present?
135
- fileset = local_file_sets.shift
136
- return nil if fileset.files.first.checksum.value == Digest::SHA1.file(current_file.file.path).to_s
195
+ return nil if file.checksum.value == Digest::SHA1.file(uploaded_file.path).to_s
137
196
 
138
- fileset.files.first.create_version
197
+ file.create_version
139
198
  opts = {}
140
- opts[:path] = fileset.files.first.id.split('/', 2).last
141
- opts[:original_name] = current_file.file.file.original_filename
142
- opts[:mime_type] = current_file.file.content_type
199
+ opts[:path] = file.id.split('/', 2).last
200
+ opts[:original_name] = uploaded_file.file.original_filename
201
+ opts[:mime_type] = uploaded_file.content_type
143
202
 
144
- fileset.add_file(File.open(current_file.file.to_s), opts)
145
- fileset.save
146
- ::CreateDerivativesJob.set(wait: 1.minute).perform_later(fileset, fileset.files.first.id)
203
+ file_set.add_file(File.open(uploaded_file.to_s), opts)
204
+ file_set.save
205
+ ::CreateDerivativesJob.set(wait: 1.minute).perform_later(file_set, file.id)
147
206
  nil
148
- else
149
- current_file.save
150
- current_file.id
151
207
  end
152
208
  end
153
209
  end
@@ -5,7 +5,7 @@ module Bulkrax
5
5
  extend ActiveSupport::Concern
6
6
 
7
7
  included do
8
- self.default_work_type = "::FileSet"
8
+ self.default_work_type = Bulkrax.file_model_class.to_s
9
9
  end
10
10
 
11
11
  def file_reference
@@ -47,7 +47,7 @@ module Bulkrax
47
47
  end
48
48
 
49
49
  def child_jobs
50
- raise ::StandardError, 'A FileSet cannot be a parent of a Collection, Work, or other FileSet'
50
+ raise ::StandardError, "A #{Bulkrax.file_model_class} cannot be a parent of a #{Bulkrax.collection_model_class}, Work, or other #{Bulkrax.file_model_class}"
51
51
  end
52
52
  end
53
53
  end
@@ -56,6 +56,10 @@ module Bulkrax
56
56
  end
57
57
  end
58
58
 
59
+ def get_object_name(field)
60
+ mapping&.[](field)&.[]('object')
61
+ end
62
+
59
63
  def set_parsed_data(name, value)
60
64
  return parsed_metadata[name] = value unless multiple?(name)
61
65
 
@@ -125,41 +129,40 @@ module Bulkrax
125
129
 
126
130
  return false if excluded?(field)
127
131
  return true if supported_bulkrax_fields.include?(field)
128
- return factory_class.method_defined?(field) && factory_class.properties[field].present?
132
+
133
+ Bulkrax.object_factory.field_supported?(field: field, model: factory_class)
129
134
  end
130
135
 
131
136
  def supported_bulkrax_fields
132
- @supported_bulkrax_fields ||=
133
- %W[
134
- id
135
- file
136
- remote_files
137
- model
138
- visibility
139
- delete
140
- #{related_parents_parsed_mapping}
141
- #{related_children_parsed_mapping}
142
- ]
137
+ @supported_bulkrax_fields ||= fields_that_are_always_singular +
138
+ fields_that_are_always_multiple
143
139
  end
144
140
 
141
+ ##
142
+ # Determine a multiple properties field
145
143
  def multiple?(field)
146
- @multiple_bulkrax_fields ||=
147
- %W[
148
- file
149
- remote_files
150
- rights_statement
151
- #{related_parents_parsed_mapping}
152
- #{related_children_parsed_mapping}
153
- ]
144
+ return true if fields_that_are_always_singular.include?(field.to_s)
145
+ return false if fields_that_are_always_multiple.include?(field.to_s)
154
146
 
155
- return true if @multiple_bulkrax_fields.include?(field)
156
- return false if field == 'model'
147
+ Bulkrax.object_factory.field_multi_value?(field: field, model: factory_class)
148
+ end
157
149
 
158
- field_supported?(field) && factory_class&.properties&.[](field)&.[]('multiple')
150
+ def fields_that_are_always_multiple
151
+ %w[id delete model visibility]
159
152
  end
160
153
 
161
- def get_object_name(field)
162
- mapping&.[](field)&.[]('object')
154
+ def fields_that_are_always_singular
155
+ @fields_that_are_always_singular ||= %W[
156
+ file
157
+ remote_files
158
+ rights_statement
159
+ #{related_parents_parsed_mapping}
160
+ #{related_children_parsed_mapping}
161
+ ]
162
+ end
163
+
164
+ def schema_form_definitions
165
+ @schema_form_definitions ||= ::SchemaLoader.new.form_definitions_for(factory_class.name.underscore.to_sym)
163
166
  end
164
167
 
165
168
  # Hyrax field to use for the given import field
@@ -11,7 +11,7 @@ module Bulkrax
11
11
  unless self.importerexporter.validate_only
12
12
  raise CollectionsCreatedError unless collections_created?
13
13
  @item = factory.run!
14
- add_user_to_permission_templates! if self.class.to_s.include?("Collection") && defined?(::Hyrax)
14
+ add_user_to_permission_templates!
15
15
  parent_jobs if self.parsed_metadata[related_parents_parsed_mapping]&.join.present?
16
16
  child_jobs if self.parsed_metadata[related_children_parsed_mapping]&.join.present?
17
17
  end
@@ -28,22 +28,15 @@ module Bulkrax
28
28
  end
29
29
 
30
30
  def add_user_to_permission_templates!
31
- permission_template = Hyrax::PermissionTemplate.find_or_create_by!(source_id: @item.id)
32
-
33
- Hyrax::PermissionTemplateAccess.find_or_create_by!(
34
- permission_template_id: permission_template.id,
35
- agent_id: user.user_key,
36
- agent_type: 'user',
37
- access: 'manage'
38
- )
39
- Hyrax::PermissionTemplateAccess.find_or_create_by!(
40
- permission_template_id: permission_template.id,
41
- agent_id: 'admin',
42
- agent_type: 'group',
43
- access: 'manage'
44
- )
45
-
46
- @item.reset_access_controls!
31
+ # NOTE: This is a cheat for the class is a CollectionEntry. Consider
32
+ # that we have default_work_type.
33
+ #
34
+ # TODO: This guard clause is not necessary as we can handle it in the
35
+ # underlying factory. However, to do that requires adjusting about 7
36
+ # failing specs. So for now this refactor appears acceptable
37
+ return unless defined?(::Hyrax)
38
+ return unless self.class.to_s.include?("Collection")
39
+ factory.add_user_to_collection_permissions(collection: @item, user: user)
47
40
  end
48
41
 
49
42
  def parent_jobs
@@ -1,5 +1,4 @@
1
1
  # frozen_string_literal: true
2
- require 'marcel'
3
2
 
4
3
  module Bulkrax
5
4
  module ImporterExporterBehavior
@@ -54,9 +53,11 @@ module Bulkrax
54
53
  filename = parser_fields&.[]('import_file_path')
55
54
  return false unless filename
56
55
  return false unless File.file?(filename)
56
+
57
57
  returning_value = false
58
58
  File.open(filename) do |file|
59
- returning_value = ::Marcel::MimeType.for(file).include?('application/zip')
59
+ mime_type = ::Marcel::MimeType.for(file)
60
+ returning_value = mime_type.include?('application/zip') || mime_type.include?('application/gzip')
60
61
  end
61
62
  returning_value
62
63
  end
@@ -1,6 +1,4 @@
1
1
  # frozen_string_literal: true
2
- require 'zip'
3
- require 'marcel'
4
2
 
5
3
  module Bulkrax
6
4
  # An abstract class that establishes the API for Bulkrax's import and export parsing.
@@ -232,7 +230,7 @@ module Bulkrax
232
230
  type_col = Bulkrax::Entry.arel_table['type']
233
231
  status_col = Bulkrax::Entry.arel_table['status_message']
234
232
 
235
- query = (type == 'work' ? type_col.not.matches(%w[collection file_set]) : type_col.matches(type.camelize))
233
+ query = (type == 'work' ? type_col.does_not_match_all(%w[collection file_set]) : type_col.matches(type.camelize))
236
234
  query.and(status_col.in(statuses))
237
235
  end
238
236
 
@@ -242,16 +240,30 @@ module Bulkrax
242
240
  return 0
243
241
  end
244
242
 
243
+ def record_raw_metadata(record)
244
+ record.to_h
245
+ end
246
+
247
+ def record_deleted?(record)
248
+ return false unless record.key?(:delete)
249
+ ActiveModel::Type::Boolean.new.cast(record[:delete])
250
+ end
251
+
252
+ def record_remove_and_rerun?(record)
253
+ return false unless record.key?(:remove_and_rerun)
254
+ ActiveModel::Type::Boolean.new.cast(record[:remove_and_rerun])
255
+ end
256
+
245
257
  def create_entry_and_job(current_record, type, identifier = nil)
246
258
  identifier ||= current_record[source_identifier]
247
259
  new_entry = find_or_create_entry(send("#{type}_entry_class"),
248
260
  identifier,
249
261
  'Bulkrax::Importer',
250
- current_record.to_h)
262
+ record_raw_metadata(current_record))
251
263
  new_entry.status_info('Pending', importer.current_run)
252
- if current_record[:delete].present?
264
+ if record_deleted?(current_record)
253
265
  "Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
254
- elsif current_record[:remove_and_rerun].present? || remove_and_rerun
266
+ elsif record_remove_and_rerun?(current_record) || remove_and_rerun
255
267
  delay = calculate_type_delay(type)
256
268
  "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.set(wait: delay).send(perform_method, new_entry, current_run)
257
269
  else
@@ -260,7 +272,7 @@ module Bulkrax
260
272
  end
261
273
 
262
274
  # Optional, define if using browse everything for file upload
263
- def retrieve_cloud_files(files); end
275
+ def retrieve_cloud_files(_files, _importer); end
264
276
 
265
277
  # @param file [#path, #original_filename] the file object that with the relevant data for the
266
278
  # import.
@@ -382,6 +394,9 @@ module Bulkrax
382
394
  identifier: identifier
383
395
  )
384
396
  entry.raw_metadata = raw_metadata
397
+ # Setting parsed_metadata specifically for the id so we can find the object via the
398
+ # id in a delete. This is likely to get clobbered in a regular import, which is fine.
399
+ entry.parsed_metadata = { id: raw_metadata['id'] } if raw_metadata&.key?('id')
385
400
  entry.save!
386
401
  entry
387
402
  end
@@ -413,6 +428,8 @@ module Bulkrax
413
428
  end
414
429
 
415
430
  def unzip(file_to_unzip)
431
+ return untar(file_to_unzip) if file_to_unzip.end_with?('.tar.gz')
432
+
416
433
  Zip::File.open(file_to_unzip) do |zip_file|
417
434
  zip_file.each do |entry|
418
435
  entry_path = File.join(importer_unzip_path, entry.name)
@@ -422,6 +439,13 @@ module Bulkrax
422
439
  end
423
440
  end
424
441
 
442
+ def untar(file_to_untar)
443
+ Dir.mkdir(importer_unzip_path) unless File.directory?(importer_unzip_path)
444
+ command = "tar -xzf #{Shellwords.escape(file_to_untar)} -C #{Shellwords.escape(importer_unzip_path)}"
445
+ result = system(command)
446
+ raise "Failed to extract #{file_to_untar}" unless result
447
+ end
448
+
425
449
  def zip
426
450
  FileUtils.mkdir_p(exporter_export_zip_path)
427
451