bulkrax 7.0.0 → 8.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/javascripts/bulkrax/datatables.js +1 -1
  3. data/app/controllers/bulkrax/exporters_controller.rb +1 -1
  4. data/app/controllers/bulkrax/importers_controller.rb +2 -1
  5. data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
  6. data/app/factories/bulkrax/object_factory.rb +135 -163
  7. data/app/factories/bulkrax/object_factory_interface.rb +491 -0
  8. data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
  9. data/app/helpers/bulkrax/importers_helper.rb +1 -1
  10. data/app/helpers/bulkrax/validation_helper.rb +4 -4
  11. data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
  12. data/app/jobs/bulkrax/delete_job.rb +3 -2
  13. data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
  14. data/app/jobs/bulkrax/import_file_set_job.rb +5 -2
  15. data/app/jobs/bulkrax/importer_job.rb +18 -2
  16. data/app/matchers/bulkrax/application_matcher.rb +0 -2
  17. data/app/models/bulkrax/csv_collection_entry.rb +1 -1
  18. data/app/models/bulkrax/csv_entry.rb +7 -6
  19. data/app/models/bulkrax/entry.rb +7 -11
  20. data/app/models/bulkrax/exporter.rb +2 -2
  21. data/app/models/bulkrax/importer.rb +1 -3
  22. data/app/models/bulkrax/oai_entry.rb +0 -3
  23. data/app/models/bulkrax/oai_set_entry.rb +1 -1
  24. data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
  25. data/app/models/bulkrax/rdf_entry.rb +70 -69
  26. data/app/models/bulkrax/xml_entry.rb +0 -1
  27. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
  28. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  29. data/app/models/concerns/bulkrax/file_factory.rb +174 -118
  30. data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
  31. data/app/models/concerns/bulkrax/has_matchers.rb +28 -25
  32. data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
  33. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
  34. data/app/parsers/bulkrax/application_parser.rb +31 -7
  35. data/app/parsers/bulkrax/bagit_parser.rb +175 -174
  36. data/app/parsers/bulkrax/csv_parser.rb +15 -5
  37. data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
  38. data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
  39. data/app/parsers/bulkrax/xml_parser.rb +0 -2
  40. data/app/services/bulkrax/factory_class_finder.rb +2 -0
  41. data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
  42. data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
  43. data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
  44. data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
  45. data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
  46. data/app/views/bulkrax/entries/show.html.erb +9 -8
  47. data/app/views/bulkrax/exporters/edit.html.erb +1 -1
  48. data/app/views/bulkrax/exporters/new.html.erb +1 -1
  49. data/app/views/bulkrax/exporters/show.html.erb +4 -2
  50. data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
  51. data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
  52. data/app/views/bulkrax/importers/edit.html.erb +1 -1
  53. data/app/views/bulkrax/importers/new.html.erb +1 -1
  54. data/app/views/bulkrax/importers/show.html.erb +1 -1
  55. data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
  56. data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
  57. data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
  58. data/config/locales/bulkrax.en.yml +7 -0
  59. data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
  60. data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
  61. data/lib/bulkrax/engine.rb +23 -6
  62. data/lib/bulkrax/version.rb +1 -1
  63. data/lib/bulkrax.rb +54 -52
  64. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
  65. data/lib/tasks/bulkrax_tasks.rake +1 -0
  66. data/lib/tasks/reset.rake +4 -4
  67. metadata +24 -8
  68. data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
  69. data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
  70. data/lib/bulkrax/persistence_layer.rb +0 -38
@@ -1,153 +1,209 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
+ ##
5
+ # NOTE: Historically (e.g. Bulkrax v7.0.0 and earlier) we mixed in all of the
6
+ # {Bulkrax::FileFactory} methods into {Bulkrax::ObjectFactory}. However, with
7
+ # the introduction of {Bulkrax::ValkyrieObjectFactory} we needed to account
8
+ # for branching logic.
9
+ #
10
+ # This refactor where we expose the bare minimum interface of file interaction
11
+ # should help with encapsulation.
12
+ #
13
+ # The refactor pattern was to find FileFactory methods used by the
14
+ # ObjectFactory and delegate those to the new {FileFactory::InnerWorkings}
15
+ # class. Likewise within the InnerWorkings we wanted to delegate to the given
16
+ # object_factory the methods that the InnerWorkings need.
17
+ #
18
+ # Futher, by preserving the FileFactory as a mixed in module, downstream
19
+ # implementers will hopefully experience less of an impact regarding this
20
+ # change.
4
21
  module FileFactory
5
22
  extend ActiveSupport::Concern
6
23
 
7
- # Find existing files or upload new files. This assumes a Work will have unique file titles;
8
- # and that those file titles will not have changed
9
- # could filter by URIs instead (slower).
10
- # When an uploaded_file already exists we do not want to pass its id in `file_attributes`
11
- # otherwise it gets reuploaded by `work_actor`.
12
- # support multiple files; ensure attributes[:file] is an Array
13
- def upload_ids
14
- return [] if klass == Collection
15
- attributes[:file] = file_paths
16
- import_files
17
- end
24
+ included do
25
+ class_attribute :file_set_factory_inner_workings_class, default: Bulkrax::FileFactory::InnerWorkings
26
+
27
+ def file_set_factory_inner_workings
28
+ @file_set_factory_inner_workings ||= file_set_factory_inner_workings_class.new(object_factory: self)
29
+ end
18
30
 
19
- def file_attributes(update_files = false)
20
- @update_files = update_files
21
- hash = {}
22
- return hash if klass == Collection
23
- hash[:uploaded_files] = upload_ids if attributes[:file].present?
24
- hash[:remote_files] = new_remote_files if new_remote_files.present?
25
- hash
31
+ delegate :file_attributes, :destroy_existing_files, to: :file_set_factory_inner_workings
26
32
  end
27
33
 
28
- # Its possible to get just an array of strings here, so we need to make sure they are all hashes
29
- def parsed_remote_files
30
- return @parsed_remote_files if @parsed_remote_files.present?
31
- @parsed_remote_files = attributes[:remote_files] || []
32
- @parsed_remote_files = @parsed_remote_files.map do |file_value|
33
- if file_value.is_a?(Hash)
34
- file_value
35
- elsif file_value.is_a?(String)
36
- name = Bulkrax::Importer.safe_uri_filename(file_value)
37
- { url: file_value, file_name: name }
38
- else
39
- Rails.logger.error("skipped remote file #{file_value} because we do not recognize the type")
40
- nil
34
+ class InnerWorkings
35
+ def initialize(object_factory:)
36
+ @object_factory = object_factory
37
+ end
38
+
39
+ attr_reader :object_factory
40
+
41
+ delegate :object, :klass, :attributes, :user, to: :object_factory
42
+
43
+ # Find existing files or upload new files. This assumes a Work will have unique file titles;
44
+ # and that those file titles will not have changed
45
+ # could filter by URIs instead (slower).
46
+ # When an uploaded_file already exists we do not want to pass its id in `file_attributes`
47
+ # otherwise it gets reuploaded by `work_actor`.
48
+ # support multiple files; ensure attributes[:file] is an Array
49
+ def upload_ids
50
+ return [] if klass == Bulkrax.collection_model_class
51
+ attributes[:file] = file_paths
52
+ import_files
53
+ end
54
+
55
+ def file_attributes(update_files = false)
56
+ # NOTE: Unclear why we're changing a instance variable based on what was
57
+ # passed, which itself is derived from the instance variable we're about
58
+ # to change. It's very easy to mutate the initialized @update_files if
59
+ # you don't pass the parameter.
60
+ object_factory.update_files = update_files
61
+ hash = {}
62
+ return hash if klass == Bulkrax.collection_model_class
63
+ hash[:uploaded_files] = upload_ids if attributes[:file].present?
64
+ hash[:remote_files] = new_remote_files if new_remote_files.present?
65
+ hash
66
+ end
67
+
68
+ # Its possible to get just an array of strings here, so we need to make sure they are all hashes
69
+ def parsed_remote_files
70
+ return @parsed_remote_files if @parsed_remote_files.present?
71
+ @parsed_remote_files = attributes[:remote_files] || []
72
+ @parsed_remote_files = @parsed_remote_files.map do |file_value|
73
+ if file_value.is_a?(Hash)
74
+ file_value
75
+ elsif file_value.is_a?(String)
76
+ name = Bulkrax::Importer.safe_uri_filename(file_value)
77
+ { url: file_value, file_name: name }
78
+ else
79
+ Rails.logger.error("skipped remote file #{file_value} because we do not recognize the type")
80
+ nil
81
+ end
41
82
  end
83
+ @parsed_remote_files.delete(nil)
84
+ @parsed_remote_files
42
85
  end
43
- @parsed_remote_files.delete(nil)
44
- @parsed_remote_files
45
- end
46
86
 
47
- def new_remote_files
48
- @new_remote_files ||= if object.is_a? FileSet
49
- parsed_remote_files.select do |file|
50
- # is the url valid?
51
- is_valid = file[:url]&.match(URI::ABS_URI)
52
- # does the file already exist
53
- is_existing = object.import_url && object.import_url == file[:url]
54
- is_valid && !is_existing
55
- end
56
- elsif object.present? && object.file_sets.present?
57
- parsed_remote_files.select do |file|
58
- # is the url valid?
59
- is_valid = file[:url]&.match(URI::ABS_URI)
60
- # does the file already exist
61
- is_existing = object.file_sets.detect { |f| f.import_url && f.import_url == file[:url] }
62
- is_valid && !is_existing
63
- end
64
- else
65
- parsed_remote_files.select do |file|
66
- file[:url]&.match(URI::ABS_URI)
67
- end
68
- end
69
- end
87
+ def new_remote_files
88
+ return @new_remote_files if @new_remote_files
89
+
90
+ # TODO: This code could first loop through all remote files and select
91
+ # only the valid ones; then load the file_sets and do comparisons.
92
+ file_sets = object_factory.class.file_sets_for(resource: object)
93
+ @new_remote_files = parsed_remote_files.select do |file|
94
+ # is the url valid?
95
+ is_valid = file[:url]&.match(URI::ABS_URI)
96
+ # does the file already exist
97
+ is_existing = file_sets.detect { |f| f.import_url && f.import_url == file[:url] }
98
+ is_valid && !is_existing
99
+ end
100
+ end
70
101
 
71
- def file_paths
72
- @file_paths ||= Array.wrap(attributes[:file])&.select { |file| File.exist?(file) }
73
- end
102
+ def file_paths
103
+ @file_paths ||= Array.wrap(attributes[:file])&.select { |file| File.exist?(file) }
104
+ end
74
105
 
75
- # Retrieve the orginal filenames for the files to be imported
76
- def work_files_filenames
77
- object.file_sets.map { |fn| fn.original_file.file_name.to_a }.flatten if object.present? && object.file_sets.present?
78
- end
106
+ # Retrieve the orginal filenames for the files to be imported
107
+ def work_files_filenames
108
+ object.file_sets.map { |fn| fn.original_file.file_name.to_a }.flatten if object.present? && object.file_sets.present?
109
+ end
79
110
 
80
- # Retrieve the filenames for the files to be imported
81
- def import_files_filenames
82
- file_paths.map { |f| f.split('/').last }
83
- end
111
+ # Retrieve the filenames for the files to be imported
112
+ def import_files_filenames
113
+ file_paths.map { |f| f.split('/').last }
114
+ end
84
115
 
85
- # Called if #replace_files is true
86
- # Destroy all file_sets for this object
87
- # Reload the object to ensure the remaining methods have the most up to date object
88
- def destroy_existing_files
89
- return unless object.present? && object.file_sets.present?
90
- object.file_sets.each do |fs|
91
- Hyrax::Actors::FileSetActor.new(fs, @user).destroy
116
+ # Called if #replace_files is true
117
+ # Destroy all file_sets for this object
118
+ # Reload the object to ensure the remaining methods have the most up to date object
119
+ def destroy_existing_files
120
+ return unless object.present? && object.file_sets.present?
121
+ object.file_sets.each do |fs|
122
+ Hyrax::Actors::FileSetActor.new(fs, @user).destroy
123
+ end
124
+ @object = object.reload
125
+ log_deleted_fs(object)
92
126
  end
93
- @object = object.reload
94
- log_deleted_fs(object)
95
- end
96
127
 
97
- def set_removed_filesets
98
- local_file_sets.each do |fileset|
99
- fileset.files.first.create_version
128
+ def set_removed_filesets
129
+ local_file_sets.each do |fileset|
130
+ # TODO: We need to consider the Valkyrie pathway
131
+ next if fileset.is_a?(Valkyrie::Resource)
132
+
133
+ remove_file_set(file_set: fileset)
134
+ end
135
+ end
136
+
137
+ def remove_file_set(file_set:)
138
+ # TODO: We need to consider the Valkyrie pathway
139
+ file = file_set.files.first
140
+ file.create_version
100
141
  opts = {}
101
- opts[:path] = fileset.files.first.id.split('/', 2).last
142
+ opts[:path] = file.id.split('/', 2).last
102
143
  opts[:original_name] = 'removed.png'
103
144
  opts[:mime_type] = 'image/png'
104
145
 
105
- fileset.add_file(File.open(Bulkrax.removed_image_path), opts)
106
- fileset.save
107
- ::CreateDerivativesJob.set(wait: 1.minute).perform_later(fileset, fileset.files.first.id)
146
+ file_set.add_file(File.open(Bulkrax.removed_image_path), opts)
147
+ file_set.save
148
+ ::CreateDerivativesJob.set(wait: 1.minute).perform_later(file_set, file.id)
108
149
  end
109
- end
110
150
 
111
- def local_file_sets
112
- @local_file_sets ||= ordered_file_sets
113
- end
151
+ def local_file_sets
152
+ # NOTE: we'll be mutating this list of file_sets via the import_files
153
+ # method
154
+ @local_file_sets ||= ordered_file_sets
155
+ end
114
156
 
115
- def ordered_file_sets
116
- # OVERRIDE Hyrda-works 1.2.0 - this method was deprecated in v1.0
117
- object&.ordered_members.to_a.select(&:file_set?)
118
- end
157
+ def ordered_file_sets
158
+ Bulkrax.object_factory.ordered_file_sets_for(object)
159
+ end
119
160
 
120
- def import_files
121
- paths = file_paths.map { |path| import_file(path) }.compact
122
- set_removed_filesets if local_file_sets.present?
123
- paths
124
- end
161
+ ##
162
+ # @return [Array<Integer>] An array of Hyrax::UploadFile#id representing the
163
+ # files that we should be uploading.
164
+ def import_files
165
+ paths = file_paths.map { |path| import_file(path) }.compact
166
+ set_removed_filesets if local_file_sets.present?
167
+ paths
168
+ end
125
169
 
126
- def import_file(path)
127
- u = Hyrax::UploadedFile.new
128
- u.user_id = @user.id
129
- u.file = CarrierWave::SanitizedFile.new(path)
130
- update_filesets(u)
131
- end
170
+ def import_file(path)
171
+ u = Hyrax::UploadedFile.new
172
+ u.user_id = user.id
173
+ u.file = CarrierWave::SanitizedFile.new(path)
174
+ update_filesets(u)
175
+ end
176
+
177
+ def update_filesets(current_file)
178
+ if @update_files && local_file_sets.present?
179
+ # NOTE: We're mutating local_file_sets as we process the updated file.
180
+ fileset = local_file_sets.shift
181
+ update_file_set(file_set: fileset, uploaded: current_file)
182
+ else
183
+ current_file.save
184
+ current_file.id
185
+ end
186
+ end
187
+
188
+ ##
189
+ # @return [NilClass] indicating that we've successfully began work on the file_set.
190
+ def update_file_set(file_set:, uploaded:)
191
+ # TODO: We need to consider the Valkyrie pathway
192
+ file = file_set.files.first
193
+ uploaded_file = uploaded.file
132
194
 
133
- def update_filesets(current_file)
134
- if @update_files && local_file_sets.present?
135
- fileset = local_file_sets.shift
136
- return nil if fileset.files.first.checksum.value == Digest::SHA1.file(current_file.file.path).to_s
195
+ return nil if file.checksum.value == Digest::SHA1.file(uploaded_file.path).to_s
137
196
 
138
- fileset.files.first.create_version
197
+ file.create_version
139
198
  opts = {}
140
- opts[:path] = fileset.files.first.id.split('/', 2).last
141
- opts[:original_name] = current_file.file.file.original_filename
142
- opts[:mime_type] = current_file.file.content_type
199
+ opts[:path] = file.id.split('/', 2).last
200
+ opts[:original_name] = uploaded_file.file.original_filename
201
+ opts[:mime_type] = uploaded_file.content_type
143
202
 
144
- fileset.add_file(File.open(current_file.file.to_s), opts)
145
- fileset.save
146
- ::CreateDerivativesJob.set(wait: 1.minute).perform_later(fileset, fileset.files.first.id)
203
+ file_set.add_file(File.open(uploaded_file.to_s), opts)
204
+ file_set.save
205
+ ::CreateDerivativesJob.set(wait: 1.minute).perform_later(file_set, file.id)
147
206
  nil
148
- else
149
- current_file.save
150
- current_file.id
151
207
  end
152
208
  end
153
209
  end
@@ -5,7 +5,7 @@ module Bulkrax
5
5
  extend ActiveSupport::Concern
6
6
 
7
7
  included do
8
- self.default_work_type = "::FileSet"
8
+ self.default_work_type = Bulkrax.file_model_class.to_s
9
9
  end
10
10
 
11
11
  def file_reference
@@ -47,7 +47,7 @@ module Bulkrax
47
47
  end
48
48
 
49
49
  def child_jobs
50
- raise ::StandardError, 'A FileSet cannot be a parent of a Collection, Work, or other FileSet'
50
+ raise ::StandardError, "A #{Bulkrax.file_model_class} cannot be a parent of a #{Bulkrax.collection_model_class}, Work, or other #{Bulkrax.file_model_class}"
51
51
  end
52
52
  end
53
53
  end
@@ -56,6 +56,10 @@ module Bulkrax
56
56
  end
57
57
  end
58
58
 
59
+ def get_object_name(field)
60
+ mapping&.[](field)&.[]('object')
61
+ end
62
+
59
63
  def set_parsed_data(name, value)
60
64
  return parsed_metadata[name] = value unless multiple?(name)
61
65
 
@@ -125,41 +129,40 @@ module Bulkrax
125
129
 
126
130
  return false if excluded?(field)
127
131
  return true if supported_bulkrax_fields.include?(field)
128
- return factory_class.method_defined?(field) && factory_class.properties[field].present?
132
+
133
+ Bulkrax.object_factory.field_supported?(field: field, model: factory_class)
129
134
  end
130
135
 
131
136
  def supported_bulkrax_fields
132
- @supported_bulkrax_fields ||=
133
- %W[
134
- id
135
- file
136
- remote_files
137
- model
138
- visibility
139
- delete
140
- #{related_parents_parsed_mapping}
141
- #{related_children_parsed_mapping}
142
- ]
137
+ @supported_bulkrax_fields ||= fields_that_are_always_singular +
138
+ fields_that_are_always_multiple
143
139
  end
144
140
 
141
+ ##
142
+ # Determine a multiple properties field
145
143
  def multiple?(field)
146
- @multiple_bulkrax_fields ||=
147
- %W[
148
- file
149
- remote_files
150
- rights_statement
151
- #{related_parents_parsed_mapping}
152
- #{related_children_parsed_mapping}
153
- ]
144
+ return true if fields_that_are_always_singular.include?(field.to_s)
145
+ return false if fields_that_are_always_multiple.include?(field.to_s)
154
146
 
155
- return true if @multiple_bulkrax_fields.include?(field)
156
- return false if field == 'model'
147
+ Bulkrax.object_factory.field_multi_value?(field: field, model: factory_class)
148
+ end
157
149
 
158
- field_supported?(field) && factory_class&.properties&.[](field)&.[]('multiple')
150
+ def fields_that_are_always_multiple
151
+ %w[id delete model visibility]
159
152
  end
160
153
 
161
- def get_object_name(field)
162
- mapping&.[](field)&.[]('object')
154
+ def fields_that_are_always_singular
155
+ @fields_that_are_always_singular ||= %W[
156
+ file
157
+ remote_files
158
+ rights_statement
159
+ #{related_parents_parsed_mapping}
160
+ #{related_children_parsed_mapping}
161
+ ]
162
+ end
163
+
164
+ def schema_form_definitions
165
+ @schema_form_definitions ||= ::SchemaLoader.new.form_definitions_for(factory_class.name.underscore.to_sym)
163
166
  end
164
167
 
165
168
  # Hyrax field to use for the given import field
@@ -11,7 +11,7 @@ module Bulkrax
11
11
  unless self.importerexporter.validate_only
12
12
  raise CollectionsCreatedError unless collections_created?
13
13
  @item = factory.run!
14
- add_user_to_permission_templates! if self.class.to_s.include?("Collection") && defined?(::Hyrax)
14
+ add_user_to_permission_templates!
15
15
  parent_jobs if self.parsed_metadata[related_parents_parsed_mapping]&.join.present?
16
16
  child_jobs if self.parsed_metadata[related_children_parsed_mapping]&.join.present?
17
17
  end
@@ -28,22 +28,15 @@ module Bulkrax
28
28
  end
29
29
 
30
30
  def add_user_to_permission_templates!
31
- permission_template = Hyrax::PermissionTemplate.find_or_create_by!(source_id: @item.id)
32
-
33
- Hyrax::PermissionTemplateAccess.find_or_create_by!(
34
- permission_template_id: permission_template.id,
35
- agent_id: user.user_key,
36
- agent_type: 'user',
37
- access: 'manage'
38
- )
39
- Hyrax::PermissionTemplateAccess.find_or_create_by!(
40
- permission_template_id: permission_template.id,
41
- agent_id: 'admin',
42
- agent_type: 'group',
43
- access: 'manage'
44
- )
45
-
46
- @item.reset_access_controls!
31
+ # NOTE: This is a cheat for the class is a CollectionEntry. Consider
32
+ # that we have default_work_type.
33
+ #
34
+ # TODO: This guard clause is not necessary as we can handle it in the
35
+ # underlying factory. However, to do that requires adjusting about 7
36
+ # failing specs. So for now this refactor appears acceptable
37
+ return unless defined?(::Hyrax)
38
+ return unless self.class.to_s.include?("Collection")
39
+ factory.add_user_to_collection_permissions(collection: @item, user: user)
47
40
  end
48
41
 
49
42
  def parent_jobs
@@ -1,5 +1,4 @@
1
1
  # frozen_string_literal: true
2
- require 'marcel'
3
2
 
4
3
  module Bulkrax
5
4
  module ImporterExporterBehavior
@@ -54,9 +53,11 @@ module Bulkrax
54
53
  filename = parser_fields&.[]('import_file_path')
55
54
  return false unless filename
56
55
  return false unless File.file?(filename)
56
+
57
57
  returning_value = false
58
58
  File.open(filename) do |file|
59
- returning_value = ::Marcel::MimeType.for(file).include?('application/zip')
59
+ mime_type = ::Marcel::MimeType.for(file)
60
+ returning_value = mime_type.include?('application/zip') || mime_type.include?('application/gzip')
60
61
  end
61
62
  returning_value
62
63
  end
@@ -1,6 +1,4 @@
1
1
  # frozen_string_literal: true
2
- require 'zip'
3
- require 'marcel'
4
2
 
5
3
  module Bulkrax
6
4
  # An abstract class that establishes the API for Bulkrax's import and export parsing.
@@ -232,7 +230,7 @@ module Bulkrax
232
230
  type_col = Bulkrax::Entry.arel_table['type']
233
231
  status_col = Bulkrax::Entry.arel_table['status_message']
234
232
 
235
- query = (type == 'work' ? type_col.not.matches(%w[collection file_set]) : type_col.matches(type.camelize))
233
+ query = (type == 'work' ? type_col.does_not_match_all(%w[collection file_set]) : type_col.matches(type.camelize))
236
234
  query.and(status_col.in(statuses))
237
235
  end
238
236
 
@@ -242,16 +240,30 @@ module Bulkrax
242
240
  return 0
243
241
  end
244
242
 
243
+ def record_raw_metadata(record)
244
+ record.to_h
245
+ end
246
+
247
+ def record_deleted?(record)
248
+ return false unless record.key?(:delete)
249
+ ActiveModel::Type::Boolean.new.cast(record[:delete])
250
+ end
251
+
252
+ def record_remove_and_rerun?(record)
253
+ return false unless record.key?(:remove_and_rerun)
254
+ ActiveModel::Type::Boolean.new.cast(record[:remove_and_rerun])
255
+ end
256
+
245
257
  def create_entry_and_job(current_record, type, identifier = nil)
246
258
  identifier ||= current_record[source_identifier]
247
259
  new_entry = find_or_create_entry(send("#{type}_entry_class"),
248
260
  identifier,
249
261
  'Bulkrax::Importer',
250
- current_record.to_h)
262
+ record_raw_metadata(current_record))
251
263
  new_entry.status_info('Pending', importer.current_run)
252
- if current_record[:delete].present?
264
+ if record_deleted?(current_record)
253
265
  "Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
254
- elsif current_record[:remove_and_rerun].present? || remove_and_rerun
266
+ elsif record_remove_and_rerun?(current_record) || remove_and_rerun
255
267
  delay = calculate_type_delay(type)
256
268
  "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.set(wait: delay).send(perform_method, new_entry, current_run)
257
269
  else
@@ -260,7 +272,7 @@ module Bulkrax
260
272
  end
261
273
 
262
274
  # Optional, define if using browse everything for file upload
263
- def retrieve_cloud_files(files); end
275
+ def retrieve_cloud_files(_files, _importer); end
264
276
 
265
277
  # @param file [#path, #original_filename] the file object that with the relevant data for the
266
278
  # import.
@@ -382,6 +394,9 @@ module Bulkrax
382
394
  identifier: identifier
383
395
  )
384
396
  entry.raw_metadata = raw_metadata
397
+ # Setting parsed_metadata specifically for the id so we can find the object via the
398
+ # id in a delete. This is likely to get clobbered in a regular import, which is fine.
399
+ entry.parsed_metadata = { id: raw_metadata['id'] } if raw_metadata&.key?('id')
385
400
  entry.save!
386
401
  entry
387
402
  end
@@ -413,6 +428,8 @@ module Bulkrax
413
428
  end
414
429
 
415
430
  def unzip(file_to_unzip)
431
+ return untar(file_to_unzip) if file_to_unzip.end_with?('.tar.gz')
432
+
416
433
  Zip::File.open(file_to_unzip) do |zip_file|
417
434
  zip_file.each do |entry|
418
435
  entry_path = File.join(importer_unzip_path, entry.name)
@@ -422,6 +439,13 @@ module Bulkrax
422
439
  end
423
440
  end
424
441
 
442
+ def untar(file_to_untar)
443
+ Dir.mkdir(importer_unzip_path) unless File.directory?(importer_unzip_path)
444
+ command = "tar -xzf #{Shellwords.escape(file_to_untar)} -C #{Shellwords.escape(importer_unzip_path)}"
445
+ result = system(command)
446
+ raise "Failed to extract #{file_to_untar}" unless result
447
+ end
448
+
425
449
  def zip
426
450
  FileUtils.mkdir_p(exporter_export_zip_path)
427
451