RubyGems - bulkrax - Versions diffs - 7.0.0 → 8.1.0 - Mend

bulkrax 7.0.0 → 8.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

checksums.yaml +4 -4
data/app/assets/javascripts/bulkrax/datatables.js +1 -1
data/app/concerns/loggable.rb +25 -0
data/app/controllers/bulkrax/exporters_controller.rb +1 -1
data/app/controllers/bulkrax/importers_controller.rb +2 -1
data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
data/app/factories/bulkrax/object_factory.rb +135 -163
data/app/factories/bulkrax/object_factory_interface.rb +483 -0
data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
data/app/factories/bulkrax/valkyrize-hyku.code-workspace +19 -0
data/app/helpers/bulkrax/importers_helper.rb +1 -1
data/app/helpers/bulkrax/validation_helper.rb +4 -4
data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
data/app/jobs/bulkrax/delete_job.rb +3 -2
data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
data/app/jobs/bulkrax/import_file_set_job.rb +23 -19
data/app/jobs/bulkrax/importer_job.rb +18 -2
data/app/matchers/bulkrax/application_matcher.rb +0 -2
data/app/models/bulkrax/csv_collection_entry.rb +1 -1
data/app/models/bulkrax/csv_entry.rb +7 -6
data/app/models/bulkrax/entry.rb +7 -11
data/app/models/bulkrax/exporter.rb +2 -2
data/app/models/bulkrax/importer.rb +1 -3
data/app/models/bulkrax/oai_entry.rb +0 -3
data/app/models/bulkrax/oai_set_entry.rb +1 -1
data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
data/app/models/bulkrax/rdf_entry.rb +70 -69
data/app/models/bulkrax/xml_entry.rb +0 -1
data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
data/app/models/concerns/bulkrax/file_factory.rb +178 -118
data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
data/app/models/concerns/bulkrax/has_matchers.rb +39 -25
data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
data/app/parsers/bulkrax/application_parser.rb +31 -7
data/app/parsers/bulkrax/bagit_parser.rb +175 -174
data/app/parsers/bulkrax/csv_parser.rb +15 -5
data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
data/app/parsers/bulkrax/xml_parser.rb +0 -2
data/app/services/bulkrax/factory_class_finder.rb +2 -0
data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
data/app/views/bulkrax/entries/show.html.erb +9 -8
data/app/views/bulkrax/exporters/edit.html.erb +1 -1
data/app/views/bulkrax/exporters/new.html.erb +1 -1
data/app/views/bulkrax/exporters/show.html.erb +4 -2
data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
data/app/views/bulkrax/importers/edit.html.erb +1 -1
data/app/views/bulkrax/importers/new.html.erb +1 -1
data/app/views/bulkrax/importers/show.html.erb +1 -1
data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
data/config/locales/bulkrax.en.yml +7 -0
data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
data/lib/bulkrax/engine.rb +23 -6
data/lib/bulkrax/version.rb +1 -1
data/lib/bulkrax.rb +54 -52
data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
data/lib/tasks/bulkrax_tasks.rake +1 -0
data/lib/tasks/reset.rake +4 -4
metadata +25 -7
data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
data/lib/bulkrax/persistence_layer.rb +0 -38

data/app/models/concerns/bulkrax/file_factory.rb CHANGED Viewed

@@ -1,153 +1,213 @@
 # frozen_string_literal: true
 module Bulkrax
+  ##
+  # NOTE: Historically (e.g. Bulkrax v7.0.0 and earlier) we mixed in all of the
+  # {Bulkrax::FileFactory} methods into {Bulkrax::ObjectFactory}.  However, with
+  # the introduction of {Bulkrax::ValkyrieObjectFactory} we needed to account
+  # for branching logic.
+  #
+  # This refactor where we expose the bare minimum interface of file interaction
+  # should help with encapsulation.
+  #
+  # The refactor pattern was to find FileFactory methods used by the
+  # ObjectFactory and delegate those to the new {FileFactory::InnerWorkings}
+  # class.  Likewise within the InnerWorkings we wanted to delegate to the given
+  # object_factory the methods that the InnerWorkings need.
+  #
+  # Futher, by preserving the FileFactory as a mixed in module, downstream
+  # implementers will hopefully experience less of an impact regarding this
+  # change.
   module FileFactory
     extend ActiveSupport::Concern
-    # Find existing files or upload new files. This assumes a Work will have unique file titles;
-    #   and that those file titles will not have changed
-    # could filter by URIs instead (slower).
-    # When an uploaded_file already exists we do not want to pass its id in `file_attributes`
-    # otherwise it gets reuploaded by `work_actor`.
-    # support multiple files; ensure attributes[:file] is an Array
-    def upload_ids
-      return [] if klass == Collection
-      attributes[:file] = file_paths
-      import_files
-    end
+    included do
+      class_attribute :file_set_factory_inner_workings_class, default: Bulkrax::FileFactory::InnerWorkings
+      def file_set_factory_inner_workings
+        @file_set_factory_inner_workings ||= file_set_factory_inner_workings_class.new(object_factory: self)
+      end
-    def file_attributes(update_files = false)
-      @update_files = update_files
-      hash = {}
-      return hash if klass == Collection
-      hash[:uploaded_files] = upload_ids if attributes[:file].present?
-      hash[:remote_files] = new_remote_files if new_remote_files.present?
-      hash
+      delegate :file_attributes, :destroy_existing_files, to: :file_set_factory_inner_workings
     end
-    # Its possible to get just an array of strings here, so we need to make sure they are all hashes
-    def parsed_remote_files
-      return @parsed_remote_files if @parsed_remote_files.present?
-      @parsed_remote_files = attributes[:remote_files] || []
-      @parsed_remote_files = @parsed_remote_files.map do |file_value|
-        if file_value.is_a?(Hash)
-          file_value
-        elsif file_value.is_a?(String)
-          name = Bulkrax::Importer.safe_uri_filename(file_value)
-          { url: file_value, file_name: name }
-        else
-          Rails.logger.error("skipped remote file #{file_value} because we do not recognize the type")
-          nil
+    class InnerWorkings
+      include Loggable
+      def initialize(object_factory:)
+        @object_factory = object_factory
+      end
+      attr_reader :object_factory
+      delegate :object, :klass, :attributes, :user, to: :object_factory
+      # Find existing files or upload new files. This assumes a Work will have unique file titles;
+      #   and that those file titles will not have changed
+      # could filter by URIs instead (slower).
+      # When an uploaded_file already exists we do not want to pass its id in `file_attributes`
+      # otherwise it gets reuploaded by `work_actor`.
+      # support multiple files; ensure attributes[:file] is an Array
+      def upload_ids
+        return [] if klass == Bulkrax.collection_model_class
+        attributes[:file] = file_paths
+        import_files
+      end
+      def file_attributes(update_files = false)
+        # NOTE: Unclear why we're changing a instance variable based on what was
+        # passed, which itself is derived from the instance variable we're about
+        # to change.  It's very easy to mutate the initialized @update_files if
+        # you don't pass the parameter.
+        object_factory.update_files = update_files
+        hash = {}
+        return hash if klass == Bulkrax.collection_model_class
+        hash[:uploaded_files] = upload_ids if attributes[:file].present?
+        hash[:remote_files] = new_remote_files if new_remote_files.present?
+        hash
+      end
+      # Its possible to get just an array of strings here, so we need to make sure they are all hashes
+      def parsed_remote_files
+        return @parsed_remote_files if @parsed_remote_files.present?
+        @parsed_remote_files = attributes[:remote_files] || []
+        @parsed_remote_files = @parsed_remote_files.map do |file_value|
+          if file_value.is_a?(Hash)
+            file_value
+          elsif file_value.is_a?(String)
+            name = Bulkrax::Importer.safe_uri_filename(file_value)
+            { url: file_value, file_name: name }
+          else
+            Rails.logger.error("skipped remote file #{file_value} because we do not recognize the type")
+            nil
+          end
         end
+        @parsed_remote_files.delete(nil)
+        @parsed_remote_files
       end
-      @parsed_remote_files.delete(nil)
-      @parsed_remote_files
-    end
-    def new_remote_files
-      @new_remote_files ||= if object.is_a? FileSet
-                              parsed_remote_files.select do |file|
-                                # is the url valid?
-                                is_valid = file[:url]&.match(URI::ABS_URI)
-                                # does the file already exist
-                                is_existing = object.import_url && object.import_url == file[:url]
-                                is_valid && !is_existing
-                              end
-                            elsif object.present? && object.file_sets.present?
-                              parsed_remote_files.select do |file|
-                                # is the url valid?
-                                is_valid = file[:url]&.match(URI::ABS_URI)
-                                # does the file already exist
-                                is_existing = object.file_sets.detect { |f| f.import_url && f.import_url == file[:url] }
-                                is_valid && !is_existing
-                              end
-                            else
-                              parsed_remote_files.select do |file|
-                                file[:url]&.match(URI::ABS_URI)
-                              end
-                            end
-    end
+      def new_remote_files
+        return @new_remote_files if @new_remote_files
+        # TODO: This code could first loop through all remote files and select
+        # only the valid ones; then load the file_sets and do comparisons.
+        file_sets = object_factory.class.file_sets_for(resource: object)
+        @new_remote_files = parsed_remote_files.select do |file|
+          # is the url valid?
+          is_valid = file[:url]&.match(URI::ABS_URI)
+          # does the file already exist
+          is_existing = file_sets.detect { |f| f.import_url && f.import_url == file[:url] }
+          is_valid && !is_existing
+        end
+      end
-    def file_paths
-      @file_paths ||= Array.wrap(attributes[:file])&.select { |file| File.exist?(file) }
-    end
+      def file_paths
+        @file_paths ||= Array.wrap(attributes[:file])&.select { |file| File.exist?(file) }
+      end
-    # Retrieve the orginal filenames for the files to be imported
-    def work_files_filenames
-      object.file_sets.map { |fn| fn.original_file.file_name.to_a }.flatten if object.present? && object.file_sets.present?
-    end
+      # Retrieve the orginal filenames for the files to be imported
+      def work_files_filenames
+        object.file_sets.map { |fn| fn.original_file.file_name.to_a }.flatten if object.present? && object.file_sets.present?
+      end
-    # Retrieve the filenames for the files to be imported
-    def import_files_filenames
-      file_paths.map { |f| f.split('/').last }
-    end
+      # Retrieve the filenames for the files to be imported
+      def import_files_filenames
+        file_paths.map { |f| f.split('/').last }
+      end
-    # Called if #replace_files is true
-    # Destroy all file_sets for this object
-    # Reload the object to ensure the remaining methods have the most up to date object
-    def destroy_existing_files
-      return unless object.present? && object.file_sets.present?
-      object.file_sets.each do |fs|
-        Hyrax::Actors::FileSetActor.new(fs, @user).destroy
+      # Called if #replace_files is true
+      # Destroy all file_sets for this object
+      # Reload the object to ensure the remaining methods have the most up to date object
+      def destroy_existing_files
+        return unless object.present? && object.file_sets.present?
+        object.file_sets.each do |fs|
+          Hyrax::Actors::FileSetActor.new(fs, user).destroy
+        end
+        @object = object.reload
+        log_deleted_fs(object)
       end
-      @object = object.reload
-      log_deleted_fs(object)
-    end
-    def set_removed_filesets
-      local_file_sets.each do |fileset|
-        fileset.files.first.create_version
+      def set_removed_filesets
+        local_file_sets.each do |fileset|
+          # TODO: We need to consider the Valkyrie pathway
+          next if fileset.is_a?(Valkyrie::Resource)
+          remove_file_set(file_set: fileset)
+        end
+      end
+      def remove_file_set(file_set:)
+        # TODO: We need to consider the Valkyrie pathway
+        file = file_set.files.first
+        file.create_version
         opts = {}
-        opts[:path] = fileset.files.first.id.split('/', 2).last
+        opts[:path] = file.id.split('/', 2).last
         opts[:original_name] = 'removed.png'
         opts[:mime_type] = 'image/png'
-        fileset.add_file(File.open(Bulkrax.removed_image_path), opts)
-        fileset.save
-        ::CreateDerivativesJob.set(wait: 1.minute).perform_later(fileset, fileset.files.first.id)
+        file_set.add_file(File.open(Bulkrax.removed_image_path), opts)
+        file_set.save
+        ::CreateDerivativesJob.set(wait: 1.minute).perform_later(file_set, file.id)
       end
-    end
-    def local_file_sets
-      @local_file_sets ||= ordered_file_sets
-    end
+      def local_file_sets
+        # NOTE: we'll be mutating this list of file_sets via the import_files
+        # method
+        @local_file_sets ||= ordered_file_sets
+      end
-    def ordered_file_sets
-      # OVERRIDE Hyrda-works 1.2.0 - this method was deprecated in v1.0
-      object&.ordered_members.to_a.select(&:file_set?)
-    end
+      def ordered_file_sets
+        return [] if object.blank?
-    def import_files
-      paths = file_paths.map { |path| import_file(path) }.compact
-      set_removed_filesets if local_file_sets.present?
-      paths
-    end
+        Bulkrax.object_factory.ordered_file_sets_for(object)
+      end
-    def import_file(path)
-      u = Hyrax::UploadedFile.new
-      u.user_id = @user.id
-      u.file = CarrierWave::SanitizedFile.new(path)
-      update_filesets(u)
-    end
+      ##
+      # @return [Array<Integer>] An array of Hyrax::UploadFile#id representing the
+      #         files that we should be uploading.
+      def import_files
+        paths = file_paths.map { |path| import_file(path) }.compact
+        set_removed_filesets if local_file_sets.present?
+        paths
+      end
+      def import_file(path)
+        u = Hyrax::UploadedFile.new
+        u.user_id = user.id
+        u.file = CarrierWave::SanitizedFile.new(path)
+        update_filesets(u)
+      end
+      def update_filesets(current_file)
+        if @update_files && local_file_sets.present?
+          # NOTE: We're mutating local_file_sets as we process the updated file.
+          fileset = local_file_sets.shift
+          update_file_set(file_set: fileset, uploaded: current_file)
+        else
+          current_file.save
+          current_file.id
+        end
+      end
+      ##
+      # @return [NilClass] indicating that we've successfully began work on the file_set.
+      def update_file_set(file_set:, uploaded:)
+        # TODO: We need to consider the Valkyrie pathway
+        file = file_set.files.first
+        uploaded_file = uploaded.file
-    def update_filesets(current_file)
-      if @update_files && local_file_sets.present?
-        fileset = local_file_sets.shift
-        return nil if fileset.files.first.checksum.value == Digest::SHA1.file(current_file.file.path).to_s
+        return nil if file.checksum.value == Digest::SHA1.file(uploaded_file.path).to_s
-        fileset.files.first.create_version
+        file.create_version
         opts = {}
-        opts[:path] = fileset.files.first.id.split('/', 2).last
-        opts[:original_name] = current_file.file.file.original_filename
-        opts[:mime_type] = current_file.file.content_type
+        opts[:path] = file.id.split('/', 2).last
+        opts[:original_name] = uploaded_file.file.original_filename
+        opts[:mime_type] = uploaded_file.content_type
-        fileset.add_file(File.open(current_file.file.to_s), opts)
-        fileset.save
-        ::CreateDerivativesJob.set(wait: 1.minute).perform_later(fileset, fileset.files.first.id)
+        file_set.add_file(File.open(uploaded_file.to_s), opts)
+        file_set.save
+        ::CreateDerivativesJob.set(wait: 1.minute).perform_later(file_set, file.id)
         nil
-      else
-        current_file.save
-        current_file.id
       end
     end
   end

data/app/models/concerns/bulkrax/file_set_entry_behavior.rb CHANGED Viewed

@@ -5,7 +5,7 @@ module Bulkrax
     extend ActiveSupport::Concern
     included do
-      self.default_work_type = "::FileSet"
+      self.default_work_type = Bulkrax.file_model_class.to_s
     end
     def file_reference
@@ -47,7 +47,7 @@ module Bulkrax
     end
     def child_jobs
-      raise ::StandardError, 'A FileSet cannot be a parent of a Collection, Work, or other FileSet'
+      raise ::StandardError, "A #{Bulkrax.file_model_class} cannot be a parent of a #{Bulkrax.collection_model_class}, Work, or other #{Bulkrax.file_model_class}"
     end
   end
 end

data/app/models/concerns/bulkrax/has_matchers.rb CHANGED Viewed

@@ -56,6 +56,10 @@ module Bulkrax
       end
     end
+    def get_object_name(field)
+      mapping&.[](field)&.[]('object')
+    end
     def set_parsed_data(name, value)
       return parsed_metadata[name] = value unless multiple?(name)
@@ -125,41 +129,51 @@ module Bulkrax
       return false if excluded?(field)
       return true if supported_bulkrax_fields.include?(field)
-      return factory_class.method_defined?(field) && factory_class.properties[field].present?
+      Bulkrax.object_factory.field_supported?(field: field, model: factory_class)
     end
     def supported_bulkrax_fields
-      @supported_bulkrax_fields ||=
-        %W[
-          id
-          file
-          remote_files
-          model
-          visibility
-          delete
-          #{related_parents_parsed_mapping}
-          #{related_children_parsed_mapping}
-        ]
+      @supported_bulkrax_fields ||= fields_that_are_always_singular +
+                                    fields_that_are_always_multiple
     end
+    ##
+    # Determine a multiple properties field
     def multiple?(field)
-      @multiple_bulkrax_fields ||=
-        %W[
-          file
-          remote_files
-          rights_statement
-          #{related_parents_parsed_mapping}
-          #{related_children_parsed_mapping}
-        ]
+      return true if fields_that_are_always_singular.include?(field.to_s)
+      return false if fields_that_are_always_multiple.include?(field.to_s)
-      return true if @multiple_bulkrax_fields.include?(field)
-      return false if field == 'model'
+      Bulkrax.object_factory.field_multi_value?(field: field, model: factory_class)
+    end
-      field_supported?(field) && factory_class&.properties&.[](field)&.[]('multiple')
+    def fields_that_are_always_multiple
+      @fields_that_are_always_multiple = %w[
+        id
+        delete
+        model
+        visibility
+        visibility_during_embargo
+        embargo_release_date
+        visibility_after_embargo
+        visibility_during_lease
+        lease_expiration_date
+        visibility_after_lease
+      ]
     end
-    def get_object_name(field)
-      mapping&.[](field)&.[]('object')
+    def fields_that_are_always_singular
+      @fields_that_are_always_singular ||= %W[
+        file
+        remote_files
+        rights_statement
+        #{related_parents_parsed_mapping}
+        #{related_children_parsed_mapping}
+      ]
+    end
+    def schema_form_definitions
+      @schema_form_definitions ||= ::SchemaLoader.new.form_definitions_for(factory_class.name.underscore.to_sym)
     end
     # Hyrax field to use for the given import field

data/app/models/concerns/bulkrax/import_behavior.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module Bulkrax
         unless self.importerexporter.validate_only
           raise CollectionsCreatedError unless collections_created?
           @item = factory.run!
-          add_user_to_permission_templates! if self.class.to_s.include?("Collection") && defined?(::Hyrax)
+          add_user_to_permission_templates!
           parent_jobs if self.parsed_metadata[related_parents_parsed_mapping]&.join.present?
           child_jobs if self.parsed_metadata[related_children_parsed_mapping]&.join.present?
         end
@@ -28,22 +28,15 @@ module Bulkrax
     end
     def add_user_to_permission_templates!
-      permission_template = Hyrax::PermissionTemplate.find_or_create_by!(source_id: @item.id)
-      Hyrax::PermissionTemplateAccess.find_or_create_by!(
-        permission_template_id: permission_template.id,
-        agent_id: user.user_key,
-        agent_type: 'user',
-        access: 'manage'
-      )
-      Hyrax::PermissionTemplateAccess.find_or_create_by!(
-        permission_template_id: permission_template.id,
-        agent_id: 'admin',
-        agent_type: 'group',
-        access: 'manage'
-      )
-      @item.reset_access_controls!
+      # NOTE: This is a cheat for the class is a CollectionEntry.  Consider
+      # that we have default_work_type.
+      #
+      # TODO: This guard clause is not necessary as we can handle it in the
+      # underlying factory.  However, to do that requires adjusting about 7
+      # failing specs.  So for now this refactor appears acceptable
+      return unless defined?(::Hyrax)
+      return unless self.class.to_s.include?("Collection")
+      factory.add_user_to_collection_permissions(collection: @item, user: user)
     end
     def parent_jobs

data/app/models/concerns/bulkrax/importer_exporter_behavior.rb CHANGED Viewed

@@ -1,5 +1,4 @@
 # frozen_string_literal: true
-require 'marcel'
 module Bulkrax
   module ImporterExporterBehavior
@@ -54,9 +53,11 @@ module Bulkrax
       filename = parser_fields&.[]('import_file_path')
       return false unless filename
       return false unless File.file?(filename)
       returning_value = false
       File.open(filename) do |file|
-        returning_value = ::Marcel::MimeType.for(file).include?('application/zip')
+        mime_type = ::Marcel::MimeType.for(file)
+        returning_value = mime_type.include?('application/zip') || mime_type.include?('application/gzip')
       end
       returning_value
     end

data/app/parsers/bulkrax/application_parser.rb CHANGED Viewed

@@ -1,6 +1,4 @@
 # frozen_string_literal: true
-require 'zip'
-require 'marcel'
 module Bulkrax
   # An abstract class that establishes the API for Bulkrax's import and export parsing.
@@ -232,7 +230,7 @@ module Bulkrax
       type_col = Bulkrax::Entry.arel_table['type']
       status_col = Bulkrax::Entry.arel_table['status_message']
-      query = (type == 'work' ? type_col.not.matches(%w[collection file_set]) : type_col.matches(type.camelize))
+      query = (type == 'work' ? type_col.does_not_match_all(%w[collection file_set]) : type_col.matches(type.camelize))
       query.and(status_col.in(statuses))
     end
@@ -242,16 +240,30 @@ module Bulkrax
       return 0
     end
+    def record_raw_metadata(record)
+      record.to_h
+    end
+    def record_deleted?(record)
+      return false unless record.key?(:delete)
+      ActiveModel::Type::Boolean.new.cast(record[:delete])
+    end
+    def record_remove_and_rerun?(record)
+      return false unless record.key?(:remove_and_rerun)
+      ActiveModel::Type::Boolean.new.cast(record[:remove_and_rerun])
+    end
     def create_entry_and_job(current_record, type, identifier = nil)
       identifier ||= current_record[source_identifier]
       new_entry = find_or_create_entry(send("#{type}_entry_class"),
                                        identifier,
                                        'Bulkrax::Importer',
-                                       current_record.to_h)
+                                       record_raw_metadata(current_record))
       new_entry.status_info('Pending', importer.current_run)
-      if current_record[:delete].present?
+      if record_deleted?(current_record)
         "Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
-      elsif current_record[:remove_and_rerun].present? || remove_and_rerun
+      elsif record_remove_and_rerun?(current_record) || remove_and_rerun
         delay = calculate_type_delay(type)
         "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.set(wait: delay).send(perform_method, new_entry, current_run)
       else
@@ -260,7 +272,7 @@ module Bulkrax
     end
     # Optional, define if using browse everything for file upload
-    def retrieve_cloud_files(files); end
+    def retrieve_cloud_files(_files, _importer); end
     # @param file [#path, #original_filename] the file object that with the relevant data for the
     #        import.
@@ -382,6 +394,9 @@ module Bulkrax
         identifier: identifier
       )
       entry.raw_metadata = raw_metadata
+      # Setting parsed_metadata specifically for the id so we can find the object via the
+      # id in a delete.  This is likely to get clobbered in a regular import, which is fine.
+      entry.parsed_metadata = { id: raw_metadata['id'] } if raw_metadata&.key?('id')
       entry.save!
       entry
     end
@@ -413,6 +428,8 @@ module Bulkrax
     end
     def unzip(file_to_unzip)
+      return untar(file_to_unzip) if file_to_unzip.end_with?('.tar.gz')
       Zip::File.open(file_to_unzip) do |zip_file|
         zip_file.each do |entry|
           entry_path = File.join(importer_unzip_path, entry.name)
@@ -422,6 +439,13 @@ module Bulkrax
       end
     end
+    def untar(file_to_untar)
+      Dir.mkdir(importer_unzip_path) unless File.directory?(importer_unzip_path)
+      command = "tar -xzf #{Shellwords.escape(file_to_untar)} -C #{Shellwords.escape(importer_unzip_path)}"
+      result = system(command)
+      raise "Failed to extract #{file_to_untar}" unless result
+    end
     def zip
       FileUtils.mkdir_p(exporter_export_zip_path)