bulkrax 7.0.0 → 8.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/javascripts/bulkrax/datatables.js +1 -1
  3. data/app/concerns/loggable.rb +25 -0
  4. data/app/controllers/bulkrax/exporters_controller.rb +1 -1
  5. data/app/controllers/bulkrax/importers_controller.rb +2 -1
  6. data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
  7. data/app/factories/bulkrax/object_factory.rb +135 -163
  8. data/app/factories/bulkrax/object_factory_interface.rb +483 -0
  9. data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
  10. data/app/factories/bulkrax/valkyrize-hyku.code-workspace +19 -0
  11. data/app/helpers/bulkrax/importers_helper.rb +1 -1
  12. data/app/helpers/bulkrax/validation_helper.rb +4 -4
  13. data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
  14. data/app/jobs/bulkrax/delete_job.rb +3 -2
  15. data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
  16. data/app/jobs/bulkrax/import_file_set_job.rb +23 -19
  17. data/app/jobs/bulkrax/importer_job.rb +18 -2
  18. data/app/matchers/bulkrax/application_matcher.rb +0 -2
  19. data/app/models/bulkrax/csv_collection_entry.rb +1 -1
  20. data/app/models/bulkrax/csv_entry.rb +7 -6
  21. data/app/models/bulkrax/entry.rb +7 -11
  22. data/app/models/bulkrax/exporter.rb +2 -2
  23. data/app/models/bulkrax/importer.rb +1 -3
  24. data/app/models/bulkrax/oai_entry.rb +0 -3
  25. data/app/models/bulkrax/oai_set_entry.rb +1 -1
  26. data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
  27. data/app/models/bulkrax/rdf_entry.rb +70 -69
  28. data/app/models/bulkrax/xml_entry.rb +0 -1
  29. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
  30. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  31. data/app/models/concerns/bulkrax/file_factory.rb +178 -118
  32. data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
  33. data/app/models/concerns/bulkrax/has_matchers.rb +39 -25
  34. data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
  35. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
  36. data/app/parsers/bulkrax/application_parser.rb +31 -7
  37. data/app/parsers/bulkrax/bagit_parser.rb +175 -174
  38. data/app/parsers/bulkrax/csv_parser.rb +15 -5
  39. data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
  40. data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
  41. data/app/parsers/bulkrax/xml_parser.rb +0 -2
  42. data/app/services/bulkrax/factory_class_finder.rb +2 -0
  43. data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
  44. data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
  45. data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
  46. data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
  47. data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
  48. data/app/views/bulkrax/entries/show.html.erb +9 -8
  49. data/app/views/bulkrax/exporters/edit.html.erb +1 -1
  50. data/app/views/bulkrax/exporters/new.html.erb +1 -1
  51. data/app/views/bulkrax/exporters/show.html.erb +4 -2
  52. data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
  53. data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
  54. data/app/views/bulkrax/importers/edit.html.erb +1 -1
  55. data/app/views/bulkrax/importers/new.html.erb +1 -1
  56. data/app/views/bulkrax/importers/show.html.erb +1 -1
  57. data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
  58. data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
  59. data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
  60. data/config/locales/bulkrax.en.yml +7 -0
  61. data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
  62. data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
  63. data/lib/bulkrax/engine.rb +23 -6
  64. data/lib/bulkrax/version.rb +1 -1
  65. data/lib/bulkrax.rb +54 -52
  66. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
  67. data/lib/tasks/bulkrax_tasks.rake +1 -0
  68. data/lib/tasks/reset.rake +4 -4
  69. metadata +25 -7
  70. data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
  71. data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
  72. data/lib/bulkrax/persistence_layer.rb +0 -38
@@ -1,153 +1,213 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
+ ##
5
+ # NOTE: Historically (e.g. Bulkrax v7.0.0 and earlier) we mixed in all of the
6
+ # {Bulkrax::FileFactory} methods into {Bulkrax::ObjectFactory}. However, with
7
+ # the introduction of {Bulkrax::ValkyrieObjectFactory} we needed to account
8
+ # for branching logic.
9
+ #
10
+ # This refactor where we expose the bare minimum interface of file interaction
11
+ # should help with encapsulation.
12
+ #
13
+ # The refactor pattern was to find FileFactory methods used by the
14
+ # ObjectFactory and delegate those to the new {FileFactory::InnerWorkings}
15
+ # class. Likewise within the InnerWorkings we wanted to delegate to the given
16
+ # object_factory the methods that the InnerWorkings need.
17
+ #
18
+ # Futher, by preserving the FileFactory as a mixed in module, downstream
19
+ # implementers will hopefully experience less of an impact regarding this
20
+ # change.
4
21
  module FileFactory
5
22
  extend ActiveSupport::Concern
6
23
 
7
- # Find existing files or upload new files. This assumes a Work will have unique file titles;
8
- # and that those file titles will not have changed
9
- # could filter by URIs instead (slower).
10
- # When an uploaded_file already exists we do not want to pass its id in `file_attributes`
11
- # otherwise it gets reuploaded by `work_actor`.
12
- # support multiple files; ensure attributes[:file] is an Array
13
- def upload_ids
14
- return [] if klass == Collection
15
- attributes[:file] = file_paths
16
- import_files
17
- end
24
+ included do
25
+ class_attribute :file_set_factory_inner_workings_class, default: Bulkrax::FileFactory::InnerWorkings
26
+
27
+ def file_set_factory_inner_workings
28
+ @file_set_factory_inner_workings ||= file_set_factory_inner_workings_class.new(object_factory: self)
29
+ end
18
30
 
19
- def file_attributes(update_files = false)
20
- @update_files = update_files
21
- hash = {}
22
- return hash if klass == Collection
23
- hash[:uploaded_files] = upload_ids if attributes[:file].present?
24
- hash[:remote_files] = new_remote_files if new_remote_files.present?
25
- hash
31
+ delegate :file_attributes, :destroy_existing_files, to: :file_set_factory_inner_workings
26
32
  end
27
33
 
28
- # Its possible to get just an array of strings here, so we need to make sure they are all hashes
29
- def parsed_remote_files
30
- return @parsed_remote_files if @parsed_remote_files.present?
31
- @parsed_remote_files = attributes[:remote_files] || []
32
- @parsed_remote_files = @parsed_remote_files.map do |file_value|
33
- if file_value.is_a?(Hash)
34
- file_value
35
- elsif file_value.is_a?(String)
36
- name = Bulkrax::Importer.safe_uri_filename(file_value)
37
- { url: file_value, file_name: name }
38
- else
39
- Rails.logger.error("skipped remote file #{file_value} because we do not recognize the type")
40
- nil
34
+ class InnerWorkings
35
+ include Loggable
36
+
37
+ def initialize(object_factory:)
38
+ @object_factory = object_factory
39
+ end
40
+
41
+ attr_reader :object_factory
42
+
43
+ delegate :object, :klass, :attributes, :user, to: :object_factory
44
+
45
+ # Find existing files or upload new files. This assumes a Work will have unique file titles;
46
+ # and that those file titles will not have changed
47
+ # could filter by URIs instead (slower).
48
+ # When an uploaded_file already exists we do not want to pass its id in `file_attributes`
49
+ # otherwise it gets reuploaded by `work_actor`.
50
+ # support multiple files; ensure attributes[:file] is an Array
51
+ def upload_ids
52
+ return [] if klass == Bulkrax.collection_model_class
53
+ attributes[:file] = file_paths
54
+ import_files
55
+ end
56
+
57
+ def file_attributes(update_files = false)
58
+ # NOTE: Unclear why we're changing a instance variable based on what was
59
+ # passed, which itself is derived from the instance variable we're about
60
+ # to change. It's very easy to mutate the initialized @update_files if
61
+ # you don't pass the parameter.
62
+ object_factory.update_files = update_files
63
+ hash = {}
64
+ return hash if klass == Bulkrax.collection_model_class
65
+ hash[:uploaded_files] = upload_ids if attributes[:file].present?
66
+ hash[:remote_files] = new_remote_files if new_remote_files.present?
67
+ hash
68
+ end
69
+
70
+ # Its possible to get just an array of strings here, so we need to make sure they are all hashes
71
+ def parsed_remote_files
72
+ return @parsed_remote_files if @parsed_remote_files.present?
73
+ @parsed_remote_files = attributes[:remote_files] || []
74
+ @parsed_remote_files = @parsed_remote_files.map do |file_value|
75
+ if file_value.is_a?(Hash)
76
+ file_value
77
+ elsif file_value.is_a?(String)
78
+ name = Bulkrax::Importer.safe_uri_filename(file_value)
79
+ { url: file_value, file_name: name }
80
+ else
81
+ Rails.logger.error("skipped remote file #{file_value} because we do not recognize the type")
82
+ nil
83
+ end
41
84
  end
85
+ @parsed_remote_files.delete(nil)
86
+ @parsed_remote_files
42
87
  end
43
- @parsed_remote_files.delete(nil)
44
- @parsed_remote_files
45
- end
46
88
 
47
- def new_remote_files
48
- @new_remote_files ||= if object.is_a? FileSet
49
- parsed_remote_files.select do |file|
50
- # is the url valid?
51
- is_valid = file[:url]&.match(URI::ABS_URI)
52
- # does the file already exist
53
- is_existing = object.import_url && object.import_url == file[:url]
54
- is_valid && !is_existing
55
- end
56
- elsif object.present? && object.file_sets.present?
57
- parsed_remote_files.select do |file|
58
- # is the url valid?
59
- is_valid = file[:url]&.match(URI::ABS_URI)
60
- # does the file already exist
61
- is_existing = object.file_sets.detect { |f| f.import_url && f.import_url == file[:url] }
62
- is_valid && !is_existing
63
- end
64
- else
65
- parsed_remote_files.select do |file|
66
- file[:url]&.match(URI::ABS_URI)
67
- end
68
- end
69
- end
89
+ def new_remote_files
90
+ return @new_remote_files if @new_remote_files
91
+
92
+ # TODO: This code could first loop through all remote files and select
93
+ # only the valid ones; then load the file_sets and do comparisons.
94
+ file_sets = object_factory.class.file_sets_for(resource: object)
95
+ @new_remote_files = parsed_remote_files.select do |file|
96
+ # is the url valid?
97
+ is_valid = file[:url]&.match(URI::ABS_URI)
98
+ # does the file already exist
99
+ is_existing = file_sets.detect { |f| f.import_url && f.import_url == file[:url] }
100
+ is_valid && !is_existing
101
+ end
102
+ end
70
103
 
71
- def file_paths
72
- @file_paths ||= Array.wrap(attributes[:file])&.select { |file| File.exist?(file) }
73
- end
104
+ def file_paths
105
+ @file_paths ||= Array.wrap(attributes[:file])&.select { |file| File.exist?(file) }
106
+ end
74
107
 
75
- # Retrieve the orginal filenames for the files to be imported
76
- def work_files_filenames
77
- object.file_sets.map { |fn| fn.original_file.file_name.to_a }.flatten if object.present? && object.file_sets.present?
78
- end
108
+ # Retrieve the orginal filenames for the files to be imported
109
+ def work_files_filenames
110
+ object.file_sets.map { |fn| fn.original_file.file_name.to_a }.flatten if object.present? && object.file_sets.present?
111
+ end
79
112
 
80
- # Retrieve the filenames for the files to be imported
81
- def import_files_filenames
82
- file_paths.map { |f| f.split('/').last }
83
- end
113
+ # Retrieve the filenames for the files to be imported
114
+ def import_files_filenames
115
+ file_paths.map { |f| f.split('/').last }
116
+ end
84
117
 
85
- # Called if #replace_files is true
86
- # Destroy all file_sets for this object
87
- # Reload the object to ensure the remaining methods have the most up to date object
88
- def destroy_existing_files
89
- return unless object.present? && object.file_sets.present?
90
- object.file_sets.each do |fs|
91
- Hyrax::Actors::FileSetActor.new(fs, @user).destroy
118
+ # Called if #replace_files is true
119
+ # Destroy all file_sets for this object
120
+ # Reload the object to ensure the remaining methods have the most up to date object
121
+ def destroy_existing_files
122
+ return unless object.present? && object.file_sets.present?
123
+ object.file_sets.each do |fs|
124
+ Hyrax::Actors::FileSetActor.new(fs, user).destroy
125
+ end
126
+ @object = object.reload
127
+ log_deleted_fs(object)
92
128
  end
93
- @object = object.reload
94
- log_deleted_fs(object)
95
- end
96
129
 
97
- def set_removed_filesets
98
- local_file_sets.each do |fileset|
99
- fileset.files.first.create_version
130
+ def set_removed_filesets
131
+ local_file_sets.each do |fileset|
132
+ # TODO: We need to consider the Valkyrie pathway
133
+ next if fileset.is_a?(Valkyrie::Resource)
134
+
135
+ remove_file_set(file_set: fileset)
136
+ end
137
+ end
138
+
139
+ def remove_file_set(file_set:)
140
+ # TODO: We need to consider the Valkyrie pathway
141
+ file = file_set.files.first
142
+ file.create_version
100
143
  opts = {}
101
- opts[:path] = fileset.files.first.id.split('/', 2).last
144
+ opts[:path] = file.id.split('/', 2).last
102
145
  opts[:original_name] = 'removed.png'
103
146
  opts[:mime_type] = 'image/png'
104
147
 
105
- fileset.add_file(File.open(Bulkrax.removed_image_path), opts)
106
- fileset.save
107
- ::CreateDerivativesJob.set(wait: 1.minute).perform_later(fileset, fileset.files.first.id)
148
+ file_set.add_file(File.open(Bulkrax.removed_image_path), opts)
149
+ file_set.save
150
+ ::CreateDerivativesJob.set(wait: 1.minute).perform_later(file_set, file.id)
108
151
  end
109
- end
110
152
 
111
- def local_file_sets
112
- @local_file_sets ||= ordered_file_sets
113
- end
153
+ def local_file_sets
154
+ # NOTE: we'll be mutating this list of file_sets via the import_files
155
+ # method
156
+ @local_file_sets ||= ordered_file_sets
157
+ end
114
158
 
115
- def ordered_file_sets
116
- # OVERRIDE Hyrda-works 1.2.0 - this method was deprecated in v1.0
117
- object&.ordered_members.to_a.select(&:file_set?)
118
- end
159
+ def ordered_file_sets
160
+ return [] if object.blank?
119
161
 
120
- def import_files
121
- paths = file_paths.map { |path| import_file(path) }.compact
122
- set_removed_filesets if local_file_sets.present?
123
- paths
124
- end
162
+ Bulkrax.object_factory.ordered_file_sets_for(object)
163
+ end
125
164
 
126
- def import_file(path)
127
- u = Hyrax::UploadedFile.new
128
- u.user_id = @user.id
129
- u.file = CarrierWave::SanitizedFile.new(path)
130
- update_filesets(u)
131
- end
165
+ ##
166
+ # @return [Array<Integer>] An array of Hyrax::UploadFile#id representing the
167
+ # files that we should be uploading.
168
+ def import_files
169
+ paths = file_paths.map { |path| import_file(path) }.compact
170
+ set_removed_filesets if local_file_sets.present?
171
+ paths
172
+ end
173
+
174
+ def import_file(path)
175
+ u = Hyrax::UploadedFile.new
176
+ u.user_id = user.id
177
+ u.file = CarrierWave::SanitizedFile.new(path)
178
+ update_filesets(u)
179
+ end
180
+
181
+ def update_filesets(current_file)
182
+ if @update_files && local_file_sets.present?
183
+ # NOTE: We're mutating local_file_sets as we process the updated file.
184
+ fileset = local_file_sets.shift
185
+ update_file_set(file_set: fileset, uploaded: current_file)
186
+ else
187
+ current_file.save
188
+ current_file.id
189
+ end
190
+ end
191
+
192
+ ##
193
+ # @return [NilClass] indicating that we've successfully began work on the file_set.
194
+ def update_file_set(file_set:, uploaded:)
195
+ # TODO: We need to consider the Valkyrie pathway
196
+ file = file_set.files.first
197
+ uploaded_file = uploaded.file
132
198
 
133
- def update_filesets(current_file)
134
- if @update_files && local_file_sets.present?
135
- fileset = local_file_sets.shift
136
- return nil if fileset.files.first.checksum.value == Digest::SHA1.file(current_file.file.path).to_s
199
+ return nil if file.checksum.value == Digest::SHA1.file(uploaded_file.path).to_s
137
200
 
138
- fileset.files.first.create_version
201
+ file.create_version
139
202
  opts = {}
140
- opts[:path] = fileset.files.first.id.split('/', 2).last
141
- opts[:original_name] = current_file.file.file.original_filename
142
- opts[:mime_type] = current_file.file.content_type
203
+ opts[:path] = file.id.split('/', 2).last
204
+ opts[:original_name] = uploaded_file.file.original_filename
205
+ opts[:mime_type] = uploaded_file.content_type
143
206
 
144
- fileset.add_file(File.open(current_file.file.to_s), opts)
145
- fileset.save
146
- ::CreateDerivativesJob.set(wait: 1.minute).perform_later(fileset, fileset.files.first.id)
207
+ file_set.add_file(File.open(uploaded_file.to_s), opts)
208
+ file_set.save
209
+ ::CreateDerivativesJob.set(wait: 1.minute).perform_later(file_set, file.id)
147
210
  nil
148
- else
149
- current_file.save
150
- current_file.id
151
211
  end
152
212
  end
153
213
  end
@@ -5,7 +5,7 @@ module Bulkrax
5
5
  extend ActiveSupport::Concern
6
6
 
7
7
  included do
8
- self.default_work_type = "::FileSet"
8
+ self.default_work_type = Bulkrax.file_model_class.to_s
9
9
  end
10
10
 
11
11
  def file_reference
@@ -47,7 +47,7 @@ module Bulkrax
47
47
  end
48
48
 
49
49
  def child_jobs
50
- raise ::StandardError, 'A FileSet cannot be a parent of a Collection, Work, or other FileSet'
50
+ raise ::StandardError, "A #{Bulkrax.file_model_class} cannot be a parent of a #{Bulkrax.collection_model_class}, Work, or other #{Bulkrax.file_model_class}"
51
51
  end
52
52
  end
53
53
  end
@@ -56,6 +56,10 @@ module Bulkrax
56
56
  end
57
57
  end
58
58
 
59
+ def get_object_name(field)
60
+ mapping&.[](field)&.[]('object')
61
+ end
62
+
59
63
  def set_parsed_data(name, value)
60
64
  return parsed_metadata[name] = value unless multiple?(name)
61
65
 
@@ -125,41 +129,51 @@ module Bulkrax
125
129
 
126
130
  return false if excluded?(field)
127
131
  return true if supported_bulkrax_fields.include?(field)
128
- return factory_class.method_defined?(field) && factory_class.properties[field].present?
132
+
133
+ Bulkrax.object_factory.field_supported?(field: field, model: factory_class)
129
134
  end
130
135
 
131
136
  def supported_bulkrax_fields
132
- @supported_bulkrax_fields ||=
133
- %W[
134
- id
135
- file
136
- remote_files
137
- model
138
- visibility
139
- delete
140
- #{related_parents_parsed_mapping}
141
- #{related_children_parsed_mapping}
142
- ]
137
+ @supported_bulkrax_fields ||= fields_that_are_always_singular +
138
+ fields_that_are_always_multiple
143
139
  end
144
140
 
141
+ ##
142
+ # Determine a multiple properties field
145
143
  def multiple?(field)
146
- @multiple_bulkrax_fields ||=
147
- %W[
148
- file
149
- remote_files
150
- rights_statement
151
- #{related_parents_parsed_mapping}
152
- #{related_children_parsed_mapping}
153
- ]
144
+ return true if fields_that_are_always_singular.include?(field.to_s)
145
+ return false if fields_that_are_always_multiple.include?(field.to_s)
154
146
 
155
- return true if @multiple_bulkrax_fields.include?(field)
156
- return false if field == 'model'
147
+ Bulkrax.object_factory.field_multi_value?(field: field, model: factory_class)
148
+ end
157
149
 
158
- field_supported?(field) && factory_class&.properties&.[](field)&.[]('multiple')
150
+ def fields_that_are_always_multiple
151
+ @fields_that_are_always_multiple = %w[
152
+ id
153
+ delete
154
+ model
155
+ visibility
156
+ visibility_during_embargo
157
+ embargo_release_date
158
+ visibility_after_embargo
159
+ visibility_during_lease
160
+ lease_expiration_date
161
+ visibility_after_lease
162
+ ]
159
163
  end
160
164
 
161
- def get_object_name(field)
162
- mapping&.[](field)&.[]('object')
165
+ def fields_that_are_always_singular
166
+ @fields_that_are_always_singular ||= %W[
167
+ file
168
+ remote_files
169
+ rights_statement
170
+ #{related_parents_parsed_mapping}
171
+ #{related_children_parsed_mapping}
172
+ ]
173
+ end
174
+
175
+ def schema_form_definitions
176
+ @schema_form_definitions ||= ::SchemaLoader.new.form_definitions_for(factory_class.name.underscore.to_sym)
163
177
  end
164
178
 
165
179
  # Hyrax field to use for the given import field
@@ -11,7 +11,7 @@ module Bulkrax
11
11
  unless self.importerexporter.validate_only
12
12
  raise CollectionsCreatedError unless collections_created?
13
13
  @item = factory.run!
14
- add_user_to_permission_templates! if self.class.to_s.include?("Collection") && defined?(::Hyrax)
14
+ add_user_to_permission_templates!
15
15
  parent_jobs if self.parsed_metadata[related_parents_parsed_mapping]&.join.present?
16
16
  child_jobs if self.parsed_metadata[related_children_parsed_mapping]&.join.present?
17
17
  end
@@ -28,22 +28,15 @@ module Bulkrax
28
28
  end
29
29
 
30
30
  def add_user_to_permission_templates!
31
- permission_template = Hyrax::PermissionTemplate.find_or_create_by!(source_id: @item.id)
32
-
33
- Hyrax::PermissionTemplateAccess.find_or_create_by!(
34
- permission_template_id: permission_template.id,
35
- agent_id: user.user_key,
36
- agent_type: 'user',
37
- access: 'manage'
38
- )
39
- Hyrax::PermissionTemplateAccess.find_or_create_by!(
40
- permission_template_id: permission_template.id,
41
- agent_id: 'admin',
42
- agent_type: 'group',
43
- access: 'manage'
44
- )
45
-
46
- @item.reset_access_controls!
31
+ # NOTE: This is a cheat for the class is a CollectionEntry. Consider
32
+ # that we have default_work_type.
33
+ #
34
+ # TODO: This guard clause is not necessary as we can handle it in the
35
+ # underlying factory. However, to do that requires adjusting about 7
36
+ # failing specs. So for now this refactor appears acceptable
37
+ return unless defined?(::Hyrax)
38
+ return unless self.class.to_s.include?("Collection")
39
+ factory.add_user_to_collection_permissions(collection: @item, user: user)
47
40
  end
48
41
 
49
42
  def parent_jobs
@@ -1,5 +1,4 @@
1
1
  # frozen_string_literal: true
2
- require 'marcel'
3
2
 
4
3
  module Bulkrax
5
4
  module ImporterExporterBehavior
@@ -54,9 +53,11 @@ module Bulkrax
54
53
  filename = parser_fields&.[]('import_file_path')
55
54
  return false unless filename
56
55
  return false unless File.file?(filename)
56
+
57
57
  returning_value = false
58
58
  File.open(filename) do |file|
59
- returning_value = ::Marcel::MimeType.for(file).include?('application/zip')
59
+ mime_type = ::Marcel::MimeType.for(file)
60
+ returning_value = mime_type.include?('application/zip') || mime_type.include?('application/gzip')
60
61
  end
61
62
  returning_value
62
63
  end
@@ -1,6 +1,4 @@
1
1
  # frozen_string_literal: true
2
- require 'zip'
3
- require 'marcel'
4
2
 
5
3
  module Bulkrax
6
4
  # An abstract class that establishes the API for Bulkrax's import and export parsing.
@@ -232,7 +230,7 @@ module Bulkrax
232
230
  type_col = Bulkrax::Entry.arel_table['type']
233
231
  status_col = Bulkrax::Entry.arel_table['status_message']
234
232
 
235
- query = (type == 'work' ? type_col.not.matches(%w[collection file_set]) : type_col.matches(type.camelize))
233
+ query = (type == 'work' ? type_col.does_not_match_all(%w[collection file_set]) : type_col.matches(type.camelize))
236
234
  query.and(status_col.in(statuses))
237
235
  end
238
236
 
@@ -242,16 +240,30 @@ module Bulkrax
242
240
  return 0
243
241
  end
244
242
 
243
+ def record_raw_metadata(record)
244
+ record.to_h
245
+ end
246
+
247
+ def record_deleted?(record)
248
+ return false unless record.key?(:delete)
249
+ ActiveModel::Type::Boolean.new.cast(record[:delete])
250
+ end
251
+
252
+ def record_remove_and_rerun?(record)
253
+ return false unless record.key?(:remove_and_rerun)
254
+ ActiveModel::Type::Boolean.new.cast(record[:remove_and_rerun])
255
+ end
256
+
245
257
  def create_entry_and_job(current_record, type, identifier = nil)
246
258
  identifier ||= current_record[source_identifier]
247
259
  new_entry = find_or_create_entry(send("#{type}_entry_class"),
248
260
  identifier,
249
261
  'Bulkrax::Importer',
250
- current_record.to_h)
262
+ record_raw_metadata(current_record))
251
263
  new_entry.status_info('Pending', importer.current_run)
252
- if current_record[:delete].present?
264
+ if record_deleted?(current_record)
253
265
  "Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
254
- elsif current_record[:remove_and_rerun].present? || remove_and_rerun
266
+ elsif record_remove_and_rerun?(current_record) || remove_and_rerun
255
267
  delay = calculate_type_delay(type)
256
268
  "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.set(wait: delay).send(perform_method, new_entry, current_run)
257
269
  else
@@ -260,7 +272,7 @@ module Bulkrax
260
272
  end
261
273
 
262
274
  # Optional, define if using browse everything for file upload
263
- def retrieve_cloud_files(files); end
275
+ def retrieve_cloud_files(_files, _importer); end
264
276
 
265
277
  # @param file [#path, #original_filename] the file object that with the relevant data for the
266
278
  # import.
@@ -382,6 +394,9 @@ module Bulkrax
382
394
  identifier: identifier
383
395
  )
384
396
  entry.raw_metadata = raw_metadata
397
+ # Setting parsed_metadata specifically for the id so we can find the object via the
398
+ # id in a delete. This is likely to get clobbered in a regular import, which is fine.
399
+ entry.parsed_metadata = { id: raw_metadata['id'] } if raw_metadata&.key?('id')
385
400
  entry.save!
386
401
  entry
387
402
  end
@@ -413,6 +428,8 @@ module Bulkrax
413
428
  end
414
429
 
415
430
  def unzip(file_to_unzip)
431
+ return untar(file_to_unzip) if file_to_unzip.end_with?('.tar.gz')
432
+
416
433
  Zip::File.open(file_to_unzip) do |zip_file|
417
434
  zip_file.each do |entry|
418
435
  entry_path = File.join(importer_unzip_path, entry.name)
@@ -422,6 +439,13 @@ module Bulkrax
422
439
  end
423
440
  end
424
441
 
442
+ def untar(file_to_untar)
443
+ Dir.mkdir(importer_unzip_path) unless File.directory?(importer_unzip_path)
444
+ command = "tar -xzf #{Shellwords.escape(file_to_untar)} -C #{Shellwords.escape(importer_unzip_path)}"
445
+ result = system(command)
446
+ raise "Failed to extract #{file_to_untar}" unless result
447
+ end
448
+
425
449
  def zip
426
450
  FileUtils.mkdir_p(exporter_export_zip_path)
427
451