bulkrax 7.0.0 → 8.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/assets/javascripts/bulkrax/datatables.js +1 -1
- data/app/concerns/loggable.rb +25 -0
- data/app/controllers/bulkrax/exporters_controller.rb +1 -1
- data/app/controllers/bulkrax/importers_controller.rb +2 -1
- data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
- data/app/factories/bulkrax/object_factory.rb +135 -163
- data/app/factories/bulkrax/object_factory_interface.rb +483 -0
- data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
- data/app/factories/bulkrax/valkyrize-hyku.code-workspace +19 -0
- data/app/helpers/bulkrax/importers_helper.rb +1 -1
- data/app/helpers/bulkrax/validation_helper.rb +4 -4
- data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
- data/app/jobs/bulkrax/delete_job.rb +3 -2
- data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
- data/app/jobs/bulkrax/import_file_set_job.rb +23 -19
- data/app/jobs/bulkrax/importer_job.rb +18 -2
- data/app/matchers/bulkrax/application_matcher.rb +0 -2
- data/app/models/bulkrax/csv_collection_entry.rb +1 -1
- data/app/models/bulkrax/csv_entry.rb +7 -6
- data/app/models/bulkrax/entry.rb +7 -11
- data/app/models/bulkrax/exporter.rb +2 -2
- data/app/models/bulkrax/importer.rb +1 -3
- data/app/models/bulkrax/oai_entry.rb +0 -3
- data/app/models/bulkrax/oai_set_entry.rb +1 -1
- data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
- data/app/models/bulkrax/rdf_entry.rb +70 -69
- data/app/models/bulkrax/xml_entry.rb +0 -1
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
- data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/file_factory.rb +178 -118
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/has_matchers.rb +39 -25
- data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
- data/app/parsers/bulkrax/application_parser.rb +31 -7
- data/app/parsers/bulkrax/bagit_parser.rb +175 -174
- data/app/parsers/bulkrax/csv_parser.rb +15 -5
- data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
- data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
- data/app/parsers/bulkrax/xml_parser.rb +0 -2
- data/app/services/bulkrax/factory_class_finder.rb +2 -0
- data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
- data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
- data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
- data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/show.html.erb +9 -8
- data/app/views/bulkrax/exporters/edit.html.erb +1 -1
- data/app/views/bulkrax/exporters/new.html.erb +1 -1
- data/app/views/bulkrax/exporters/show.html.erb +4 -2
- data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
- data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
- data/app/views/bulkrax/importers/edit.html.erb +1 -1
- data/app/views/bulkrax/importers/new.html.erb +1 -1
- data/app/views/bulkrax/importers/show.html.erb +1 -1
- data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
- data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
- data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
- data/config/locales/bulkrax.en.yml +7 -0
- data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
- data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
- data/lib/bulkrax/engine.rb +23 -6
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +54 -52
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
- data/lib/tasks/bulkrax_tasks.rake +1 -0
- data/lib/tasks/reset.rake +4 -4
- metadata +25 -7
- data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
- data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
- data/lib/bulkrax/persistence_layer.rb +0 -38
| @@ -1,153 +1,213 @@ | |
| 1 1 | 
             
            # frozen_string_literal: true
         | 
| 2 2 |  | 
| 3 3 | 
             
            module Bulkrax
         | 
| 4 | 
            +
              ##
         | 
| 5 | 
            +
              # NOTE: Historically (e.g. Bulkrax v7.0.0 and earlier) we mixed in all of the
         | 
| 6 | 
            +
              # {Bulkrax::FileFactory} methods into {Bulkrax::ObjectFactory}.  However, with
         | 
| 7 | 
            +
              # the introduction of {Bulkrax::ValkyrieObjectFactory} we needed to account
         | 
| 8 | 
            +
              # for branching logic.
         | 
| 9 | 
            +
              #
         | 
| 10 | 
            +
              # This refactor where we expose the bare minimum interface of file interaction
         | 
| 11 | 
            +
              # should help with encapsulation.
         | 
| 12 | 
            +
              #
         | 
| 13 | 
            +
              # The refactor pattern was to find FileFactory methods used by the
         | 
| 14 | 
            +
              # ObjectFactory and delegate those to the new {FileFactory::InnerWorkings}
         | 
| 15 | 
            +
              # class.  Likewise within the InnerWorkings we wanted to delegate to the given
         | 
| 16 | 
            +
              # object_factory the methods that the InnerWorkings need.
         | 
| 17 | 
            +
              #
         | 
| 18 | 
            +
              # Futher, by preserving the FileFactory as a mixed in module, downstream
         | 
| 19 | 
            +
              # implementers will hopefully experience less of an impact regarding this
         | 
| 20 | 
            +
              # change.
         | 
| 4 21 | 
             
              module FileFactory
         | 
| 5 22 | 
             
                extend ActiveSupport::Concern
         | 
| 6 23 |  | 
| 7 | 
            -
                 | 
| 8 | 
            -
             | 
| 9 | 
            -
             | 
| 10 | 
            -
             | 
| 11 | 
            -
             | 
| 12 | 
            -
             | 
| 13 | 
            -
                def upload_ids
         | 
| 14 | 
            -
                  return [] if klass == Collection
         | 
| 15 | 
            -
                  attributes[:file] = file_paths
         | 
| 16 | 
            -
                  import_files
         | 
| 17 | 
            -
                end
         | 
| 24 | 
            +
                included do
         | 
| 25 | 
            +
                  class_attribute :file_set_factory_inner_workings_class, default: Bulkrax::FileFactory::InnerWorkings
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                  def file_set_factory_inner_workings
         | 
| 28 | 
            +
                    @file_set_factory_inner_workings ||= file_set_factory_inner_workings_class.new(object_factory: self)
         | 
| 29 | 
            +
                  end
         | 
| 18 30 |  | 
| 19 | 
            -
             | 
| 20 | 
            -
                  @update_files = update_files
         | 
| 21 | 
            -
                  hash = {}
         | 
| 22 | 
            -
                  return hash if klass == Collection
         | 
| 23 | 
            -
                  hash[:uploaded_files] = upload_ids if attributes[:file].present?
         | 
| 24 | 
            -
                  hash[:remote_files] = new_remote_files if new_remote_files.present?
         | 
| 25 | 
            -
                  hash
         | 
| 31 | 
            +
                  delegate :file_attributes, :destroy_existing_files, to: :file_set_factory_inner_workings
         | 
| 26 32 | 
             
                end
         | 
| 27 33 |  | 
| 28 | 
            -
                 | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
| 31 | 
            -
                   | 
| 32 | 
            -
             | 
| 33 | 
            -
             | 
| 34 | 
            -
             | 
| 35 | 
            -
             | 
| 36 | 
            -
             | 
| 37 | 
            -
             | 
| 38 | 
            -
             | 
| 39 | 
            -
             | 
| 40 | 
            -
             | 
| 34 | 
            +
                class InnerWorkings
         | 
| 35 | 
            +
                  include Loggable
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                  def initialize(object_factory:)
         | 
| 38 | 
            +
                    @object_factory = object_factory
         | 
| 39 | 
            +
                  end
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                  attr_reader :object_factory
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                  delegate :object, :klass, :attributes, :user, to: :object_factory
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                  # Find existing files or upload new files. This assumes a Work will have unique file titles;
         | 
| 46 | 
            +
                  #   and that those file titles will not have changed
         | 
| 47 | 
            +
                  # could filter by URIs instead (slower).
         | 
| 48 | 
            +
                  # When an uploaded_file already exists we do not want to pass its id in `file_attributes`
         | 
| 49 | 
            +
                  # otherwise it gets reuploaded by `work_actor`.
         | 
| 50 | 
            +
                  # support multiple files; ensure attributes[:file] is an Array
         | 
| 51 | 
            +
                  def upload_ids
         | 
| 52 | 
            +
                    return [] if klass == Bulkrax.collection_model_class
         | 
| 53 | 
            +
                    attributes[:file] = file_paths
         | 
| 54 | 
            +
                    import_files
         | 
| 55 | 
            +
                  end
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                  def file_attributes(update_files = false)
         | 
| 58 | 
            +
                    # NOTE: Unclear why we're changing a instance variable based on what was
         | 
| 59 | 
            +
                    # passed, which itself is derived from the instance variable we're about
         | 
| 60 | 
            +
                    # to change.  It's very easy to mutate the initialized @update_files if
         | 
| 61 | 
            +
                    # you don't pass the parameter.
         | 
| 62 | 
            +
                    object_factory.update_files = update_files
         | 
| 63 | 
            +
                    hash = {}
         | 
| 64 | 
            +
                    return hash if klass == Bulkrax.collection_model_class
         | 
| 65 | 
            +
                    hash[:uploaded_files] = upload_ids if attributes[:file].present?
         | 
| 66 | 
            +
                    hash[:remote_files] = new_remote_files if new_remote_files.present?
         | 
| 67 | 
            +
                    hash
         | 
| 68 | 
            +
                  end
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                  # Its possible to get just an array of strings here, so we need to make sure they are all hashes
         | 
| 71 | 
            +
                  def parsed_remote_files
         | 
| 72 | 
            +
                    return @parsed_remote_files if @parsed_remote_files.present?
         | 
| 73 | 
            +
                    @parsed_remote_files = attributes[:remote_files] || []
         | 
| 74 | 
            +
                    @parsed_remote_files = @parsed_remote_files.map do |file_value|
         | 
| 75 | 
            +
                      if file_value.is_a?(Hash)
         | 
| 76 | 
            +
                        file_value
         | 
| 77 | 
            +
                      elsif file_value.is_a?(String)
         | 
| 78 | 
            +
                        name = Bulkrax::Importer.safe_uri_filename(file_value)
         | 
| 79 | 
            +
                        { url: file_value, file_name: name }
         | 
| 80 | 
            +
                      else
         | 
| 81 | 
            +
                        Rails.logger.error("skipped remote file #{file_value} because we do not recognize the type")
         | 
| 82 | 
            +
                        nil
         | 
| 83 | 
            +
                      end
         | 
| 41 84 | 
             
                    end
         | 
| 85 | 
            +
                    @parsed_remote_files.delete(nil)
         | 
| 86 | 
            +
                    @parsed_remote_files
         | 
| 42 87 | 
             
                  end
         | 
| 43 | 
            -
                  @parsed_remote_files.delete(nil)
         | 
| 44 | 
            -
                  @parsed_remote_files
         | 
| 45 | 
            -
                end
         | 
| 46 88 |  | 
| 47 | 
            -
             | 
| 48 | 
            -
             | 
| 49 | 
            -
             | 
| 50 | 
            -
             | 
| 51 | 
            -
             | 
| 52 | 
            -
             | 
| 53 | 
            -
             | 
| 54 | 
            -
             | 
| 55 | 
            -
             | 
| 56 | 
            -
             | 
| 57 | 
            -
             | 
| 58 | 
            -
             | 
| 59 | 
            -
             | 
| 60 | 
            -
             | 
| 61 | 
            -
                                            is_existing = object.file_sets.detect { |f| f.import_url && f.import_url == file[:url] }
         | 
| 62 | 
            -
                                            is_valid && !is_existing
         | 
| 63 | 
            -
                                          end
         | 
| 64 | 
            -
                                        else
         | 
| 65 | 
            -
                                          parsed_remote_files.select do |file|
         | 
| 66 | 
            -
                                            file[:url]&.match(URI::ABS_URI)
         | 
| 67 | 
            -
                                          end
         | 
| 68 | 
            -
                                        end
         | 
| 69 | 
            -
                end
         | 
| 89 | 
            +
                  def new_remote_files
         | 
| 90 | 
            +
                    return @new_remote_files if @new_remote_files
         | 
| 91 | 
            +
             | 
| 92 | 
            +
                    # TODO: This code could first loop through all remote files and select
         | 
| 93 | 
            +
                    # only the valid ones; then load the file_sets and do comparisons.
         | 
| 94 | 
            +
                    file_sets = object_factory.class.file_sets_for(resource: object)
         | 
| 95 | 
            +
                    @new_remote_files = parsed_remote_files.select do |file|
         | 
| 96 | 
            +
                      # is the url valid?
         | 
| 97 | 
            +
                      is_valid = file[:url]&.match(URI::ABS_URI)
         | 
| 98 | 
            +
                      # does the file already exist
         | 
| 99 | 
            +
                      is_existing = file_sets.detect { |f| f.import_url && f.import_url == file[:url] }
         | 
| 100 | 
            +
                      is_valid && !is_existing
         | 
| 101 | 
            +
                    end
         | 
| 102 | 
            +
                  end
         | 
| 70 103 |  | 
| 71 | 
            -
             | 
| 72 | 
            -
             | 
| 73 | 
            -
             | 
| 104 | 
            +
                  def file_paths
         | 
| 105 | 
            +
                    @file_paths ||= Array.wrap(attributes[:file])&.select { |file| File.exist?(file) }
         | 
| 106 | 
            +
                  end
         | 
| 74 107 |  | 
| 75 | 
            -
             | 
| 76 | 
            -
             | 
| 77 | 
            -
             | 
| 78 | 
            -
             | 
| 108 | 
            +
                  # Retrieve the orginal filenames for the files to be imported
         | 
| 109 | 
            +
                  def work_files_filenames
         | 
| 110 | 
            +
                    object.file_sets.map { |fn| fn.original_file.file_name.to_a }.flatten if object.present? && object.file_sets.present?
         | 
| 111 | 
            +
                  end
         | 
| 79 112 |  | 
| 80 | 
            -
             | 
| 81 | 
            -
             | 
| 82 | 
            -
             | 
| 83 | 
            -
             | 
| 113 | 
            +
                  # Retrieve the filenames for the files to be imported
         | 
| 114 | 
            +
                  def import_files_filenames
         | 
| 115 | 
            +
                    file_paths.map { |f| f.split('/').last }
         | 
| 116 | 
            +
                  end
         | 
| 84 117 |  | 
| 85 | 
            -
             | 
| 86 | 
            -
             | 
| 87 | 
            -
             | 
| 88 | 
            -
             | 
| 89 | 
            -
             | 
| 90 | 
            -
             | 
| 91 | 
            -
             | 
| 118 | 
            +
                  # Called if #replace_files is true
         | 
| 119 | 
            +
                  # Destroy all file_sets for this object
         | 
| 120 | 
            +
                  # Reload the object to ensure the remaining methods have the most up to date object
         | 
| 121 | 
            +
                  def destroy_existing_files
         | 
| 122 | 
            +
                    return unless object.present? && object.file_sets.present?
         | 
| 123 | 
            +
                    object.file_sets.each do |fs|
         | 
| 124 | 
            +
                      Hyrax::Actors::FileSetActor.new(fs, user).destroy
         | 
| 125 | 
            +
                    end
         | 
| 126 | 
            +
                    @object = object.reload
         | 
| 127 | 
            +
                    log_deleted_fs(object)
         | 
| 92 128 | 
             
                  end
         | 
| 93 | 
            -
                  @object = object.reload
         | 
| 94 | 
            -
                  log_deleted_fs(object)
         | 
| 95 | 
            -
                end
         | 
| 96 129 |  | 
| 97 | 
            -
             | 
| 98 | 
            -
             | 
| 99 | 
            -
             | 
| 130 | 
            +
                  def set_removed_filesets
         | 
| 131 | 
            +
                    local_file_sets.each do |fileset|
         | 
| 132 | 
            +
                      # TODO: We need to consider the Valkyrie pathway
         | 
| 133 | 
            +
                      next if fileset.is_a?(Valkyrie::Resource)
         | 
| 134 | 
            +
             | 
| 135 | 
            +
                      remove_file_set(file_set: fileset)
         | 
| 136 | 
            +
                    end
         | 
| 137 | 
            +
                  end
         | 
| 138 | 
            +
             | 
| 139 | 
            +
                  def remove_file_set(file_set:)
         | 
| 140 | 
            +
                    # TODO: We need to consider the Valkyrie pathway
         | 
| 141 | 
            +
                    file = file_set.files.first
         | 
| 142 | 
            +
                    file.create_version
         | 
| 100 143 | 
             
                    opts = {}
         | 
| 101 | 
            -
                    opts[:path] =  | 
| 144 | 
            +
                    opts[:path] = file.id.split('/', 2).last
         | 
| 102 145 | 
             
                    opts[:original_name] = 'removed.png'
         | 
| 103 146 | 
             
                    opts[:mime_type] = 'image/png'
         | 
| 104 147 |  | 
| 105 | 
            -
                     | 
| 106 | 
            -
                     | 
| 107 | 
            -
                    ::CreateDerivativesJob.set(wait: 1.minute).perform_later( | 
| 148 | 
            +
                    file_set.add_file(File.open(Bulkrax.removed_image_path), opts)
         | 
| 149 | 
            +
                    file_set.save
         | 
| 150 | 
            +
                    ::CreateDerivativesJob.set(wait: 1.minute).perform_later(file_set, file.id)
         | 
| 108 151 | 
             
                  end
         | 
| 109 | 
            -
                end
         | 
| 110 152 |  | 
| 111 | 
            -
             | 
| 112 | 
            -
             | 
| 113 | 
            -
             | 
| 153 | 
            +
                  def local_file_sets
         | 
| 154 | 
            +
                    # NOTE: we'll be mutating this list of file_sets via the import_files
         | 
| 155 | 
            +
                    # method
         | 
| 156 | 
            +
                    @local_file_sets ||= ordered_file_sets
         | 
| 157 | 
            +
                  end
         | 
| 114 158 |  | 
| 115 | 
            -
             | 
| 116 | 
            -
             | 
| 117 | 
            -
                  object&.ordered_members.to_a.select(&:file_set?)
         | 
| 118 | 
            -
                end
         | 
| 159 | 
            +
                  def ordered_file_sets
         | 
| 160 | 
            +
                    return [] if object.blank?
         | 
| 119 161 |  | 
| 120 | 
            -
             | 
| 121 | 
            -
                   | 
| 122 | 
            -
                  set_removed_filesets if local_file_sets.present?
         | 
| 123 | 
            -
                  paths
         | 
| 124 | 
            -
                end
         | 
| 162 | 
            +
                    Bulkrax.object_factory.ordered_file_sets_for(object)
         | 
| 163 | 
            +
                  end
         | 
| 125 164 |  | 
| 126 | 
            -
             | 
| 127 | 
            -
                   | 
| 128 | 
            -
                   | 
| 129 | 
            -
                   | 
| 130 | 
            -
             | 
| 131 | 
            -
             | 
| 165 | 
            +
                  ##
         | 
| 166 | 
            +
                  # @return [Array<Integer>] An array of Hyrax::UploadFile#id representing the
         | 
| 167 | 
            +
                  #         files that we should be uploading.
         | 
| 168 | 
            +
                  def import_files
         | 
| 169 | 
            +
                    paths = file_paths.map { |path| import_file(path) }.compact
         | 
| 170 | 
            +
                    set_removed_filesets if local_file_sets.present?
         | 
| 171 | 
            +
                    paths
         | 
| 172 | 
            +
                  end
         | 
| 173 | 
            +
             | 
| 174 | 
            +
                  def import_file(path)
         | 
| 175 | 
            +
                    u = Hyrax::UploadedFile.new
         | 
| 176 | 
            +
                    u.user_id = user.id
         | 
| 177 | 
            +
                    u.file = CarrierWave::SanitizedFile.new(path)
         | 
| 178 | 
            +
                    update_filesets(u)
         | 
| 179 | 
            +
                  end
         | 
| 180 | 
            +
             | 
| 181 | 
            +
                  def update_filesets(current_file)
         | 
| 182 | 
            +
                    if @update_files && local_file_sets.present?
         | 
| 183 | 
            +
                      # NOTE: We're mutating local_file_sets as we process the updated file.
         | 
| 184 | 
            +
                      fileset = local_file_sets.shift
         | 
| 185 | 
            +
                      update_file_set(file_set: fileset, uploaded: current_file)
         | 
| 186 | 
            +
                    else
         | 
| 187 | 
            +
                      current_file.save
         | 
| 188 | 
            +
                      current_file.id
         | 
| 189 | 
            +
                    end
         | 
| 190 | 
            +
                  end
         | 
| 191 | 
            +
             | 
| 192 | 
            +
                  ##
         | 
| 193 | 
            +
                  # @return [NilClass] indicating that we've successfully began work on the file_set.
         | 
| 194 | 
            +
                  def update_file_set(file_set:, uploaded:)
         | 
| 195 | 
            +
                    # TODO: We need to consider the Valkyrie pathway
         | 
| 196 | 
            +
                    file = file_set.files.first
         | 
| 197 | 
            +
                    uploaded_file = uploaded.file
         | 
| 132 198 |  | 
| 133 | 
            -
             | 
| 134 | 
            -
                  if @update_files && local_file_sets.present?
         | 
| 135 | 
            -
                    fileset = local_file_sets.shift
         | 
| 136 | 
            -
                    return nil if fileset.files.first.checksum.value == Digest::SHA1.file(current_file.file.path).to_s
         | 
| 199 | 
            +
                    return nil if file.checksum.value == Digest::SHA1.file(uploaded_file.path).to_s
         | 
| 137 200 |  | 
| 138 | 
            -
                     | 
| 201 | 
            +
                    file.create_version
         | 
| 139 202 | 
             
                    opts = {}
         | 
| 140 | 
            -
                    opts[:path] =  | 
| 141 | 
            -
                    opts[:original_name] =  | 
| 142 | 
            -
                    opts[:mime_type] =  | 
| 203 | 
            +
                    opts[:path] = file.id.split('/', 2).last
         | 
| 204 | 
            +
                    opts[:original_name] = uploaded_file.file.original_filename
         | 
| 205 | 
            +
                    opts[:mime_type] = uploaded_file.content_type
         | 
| 143 206 |  | 
| 144 | 
            -
                     | 
| 145 | 
            -
                     | 
| 146 | 
            -
                    ::CreateDerivativesJob.set(wait: 1.minute).perform_later( | 
| 207 | 
            +
                    file_set.add_file(File.open(uploaded_file.to_s), opts)
         | 
| 208 | 
            +
                    file_set.save
         | 
| 209 | 
            +
                    ::CreateDerivativesJob.set(wait: 1.minute).perform_later(file_set, file.id)
         | 
| 147 210 | 
             
                    nil
         | 
| 148 | 
            -
                  else
         | 
| 149 | 
            -
                    current_file.save
         | 
| 150 | 
            -
                    current_file.id
         | 
| 151 211 | 
             
                  end
         | 
| 152 212 | 
             
                end
         | 
| 153 213 | 
             
              end
         | 
| @@ -5,7 +5,7 @@ module Bulkrax | |
| 5 5 | 
             
                extend ActiveSupport::Concern
         | 
| 6 6 |  | 
| 7 7 | 
             
                included do
         | 
| 8 | 
            -
                  self.default_work_type =  | 
| 8 | 
            +
                  self.default_work_type = Bulkrax.file_model_class.to_s
         | 
| 9 9 | 
             
                end
         | 
| 10 10 |  | 
| 11 11 | 
             
                def file_reference
         | 
| @@ -47,7 +47,7 @@ module Bulkrax | |
| 47 47 | 
             
                end
         | 
| 48 48 |  | 
| 49 49 | 
             
                def child_jobs
         | 
| 50 | 
            -
                  raise ::StandardError,  | 
| 50 | 
            +
                  raise ::StandardError, "A #{Bulkrax.file_model_class} cannot be a parent of a #{Bulkrax.collection_model_class}, Work, or other #{Bulkrax.file_model_class}"
         | 
| 51 51 | 
             
                end
         | 
| 52 52 | 
             
              end
         | 
| 53 53 | 
             
            end
         | 
| @@ -56,6 +56,10 @@ module Bulkrax | |
| 56 56 | 
             
                  end
         | 
| 57 57 | 
             
                end
         | 
| 58 58 |  | 
| 59 | 
            +
                def get_object_name(field)
         | 
| 60 | 
            +
                  mapping&.[](field)&.[]('object')
         | 
| 61 | 
            +
                end
         | 
| 62 | 
            +
             | 
| 59 63 | 
             
                def set_parsed_data(name, value)
         | 
| 60 64 | 
             
                  return parsed_metadata[name] = value unless multiple?(name)
         | 
| 61 65 |  | 
| @@ -125,41 +129,51 @@ module Bulkrax | |
| 125 129 |  | 
| 126 130 | 
             
                  return false if excluded?(field)
         | 
| 127 131 | 
             
                  return true if supported_bulkrax_fields.include?(field)
         | 
| 128 | 
            -
             | 
| 132 | 
            +
             | 
| 133 | 
            +
                  Bulkrax.object_factory.field_supported?(field: field, model: factory_class)
         | 
| 129 134 | 
             
                end
         | 
| 130 135 |  | 
| 131 136 | 
             
                def supported_bulkrax_fields
         | 
| 132 | 
            -
                  @supported_bulkrax_fields ||=
         | 
| 133 | 
            -
             | 
| 134 | 
            -
                      id
         | 
| 135 | 
            -
                      file
         | 
| 136 | 
            -
                      remote_files
         | 
| 137 | 
            -
                      model
         | 
| 138 | 
            -
                      visibility
         | 
| 139 | 
            -
                      delete
         | 
| 140 | 
            -
                      #{related_parents_parsed_mapping}
         | 
| 141 | 
            -
                      #{related_children_parsed_mapping}
         | 
| 142 | 
            -
                    ]
         | 
| 137 | 
            +
                  @supported_bulkrax_fields ||= fields_that_are_always_singular +
         | 
| 138 | 
            +
                                                fields_that_are_always_multiple
         | 
| 143 139 | 
             
                end
         | 
| 144 140 |  | 
| 141 | 
            +
                ##
         | 
| 142 | 
            +
                # Determine a multiple properties field
         | 
| 145 143 | 
             
                def multiple?(field)
         | 
| 146 | 
            -
                   | 
| 147 | 
            -
             | 
| 148 | 
            -
                      file
         | 
| 149 | 
            -
                      remote_files
         | 
| 150 | 
            -
                      rights_statement
         | 
| 151 | 
            -
                      #{related_parents_parsed_mapping}
         | 
| 152 | 
            -
                      #{related_children_parsed_mapping}
         | 
| 153 | 
            -
                    ]
         | 
| 144 | 
            +
                  return true if fields_that_are_always_singular.include?(field.to_s)
         | 
| 145 | 
            +
                  return false if fields_that_are_always_multiple.include?(field.to_s)
         | 
| 154 146 |  | 
| 155 | 
            -
                   | 
| 156 | 
            -
             | 
| 147 | 
            +
                  Bulkrax.object_factory.field_multi_value?(field: field, model: factory_class)
         | 
| 148 | 
            +
                end
         | 
| 157 149 |  | 
| 158 | 
            -
             | 
| 150 | 
            +
                def fields_that_are_always_multiple
         | 
| 151 | 
            +
                  @fields_that_are_always_multiple = %w[
         | 
| 152 | 
            +
                    id
         | 
| 153 | 
            +
                    delete
         | 
| 154 | 
            +
                    model
         | 
| 155 | 
            +
                    visibility
         | 
| 156 | 
            +
                    visibility_during_embargo
         | 
| 157 | 
            +
                    embargo_release_date
         | 
| 158 | 
            +
                    visibility_after_embargo
         | 
| 159 | 
            +
                    visibility_during_lease
         | 
| 160 | 
            +
                    lease_expiration_date
         | 
| 161 | 
            +
                    visibility_after_lease
         | 
| 162 | 
            +
                  ]
         | 
| 159 163 | 
             
                end
         | 
| 160 164 |  | 
| 161 | 
            -
                def  | 
| 162 | 
            -
                   | 
| 165 | 
            +
                def fields_that_are_always_singular
         | 
| 166 | 
            +
                  @fields_that_are_always_singular ||= %W[
         | 
| 167 | 
            +
                    file
         | 
| 168 | 
            +
                    remote_files
         | 
| 169 | 
            +
                    rights_statement
         | 
| 170 | 
            +
                    #{related_parents_parsed_mapping}
         | 
| 171 | 
            +
                    #{related_children_parsed_mapping}
         | 
| 172 | 
            +
                  ]
         | 
| 173 | 
            +
                end
         | 
| 174 | 
            +
             | 
| 175 | 
            +
                def schema_form_definitions
         | 
| 176 | 
            +
                  @schema_form_definitions ||= ::SchemaLoader.new.form_definitions_for(factory_class.name.underscore.to_sym)
         | 
| 163 177 | 
             
                end
         | 
| 164 178 |  | 
| 165 179 | 
             
                # Hyrax field to use for the given import field
         | 
| @@ -11,7 +11,7 @@ module Bulkrax | |
| 11 11 | 
             
                    unless self.importerexporter.validate_only
         | 
| 12 12 | 
             
                      raise CollectionsCreatedError unless collections_created?
         | 
| 13 13 | 
             
                      @item = factory.run!
         | 
| 14 | 
            -
                      add_user_to_permission_templates! | 
| 14 | 
            +
                      add_user_to_permission_templates!
         | 
| 15 15 | 
             
                      parent_jobs if self.parsed_metadata[related_parents_parsed_mapping]&.join.present?
         | 
| 16 16 | 
             
                      child_jobs if self.parsed_metadata[related_children_parsed_mapping]&.join.present?
         | 
| 17 17 | 
             
                    end
         | 
| @@ -28,22 +28,15 @@ module Bulkrax | |
| 28 28 | 
             
                end
         | 
| 29 29 |  | 
| 30 30 | 
             
                def add_user_to_permission_templates!
         | 
| 31 | 
            -
                   | 
| 32 | 
            -
             | 
| 33 | 
            -
                   | 
| 34 | 
            -
             | 
| 35 | 
            -
             | 
| 36 | 
            -
             | 
| 37 | 
            -
             | 
| 38 | 
            -
                  )
         | 
| 39 | 
            -
                   | 
| 40 | 
            -
                    permission_template_id: permission_template.id,
         | 
| 41 | 
            -
                    agent_id: 'admin',
         | 
| 42 | 
            -
                    agent_type: 'group',
         | 
| 43 | 
            -
                    access: 'manage'
         | 
| 44 | 
            -
                  )
         | 
| 45 | 
            -
             | 
| 46 | 
            -
                  @item.reset_access_controls!
         | 
| 31 | 
            +
                  # NOTE: This is a cheat for the class is a CollectionEntry.  Consider
         | 
| 32 | 
            +
                  # that we have default_work_type.
         | 
| 33 | 
            +
                  #
         | 
| 34 | 
            +
                  # TODO: This guard clause is not necessary as we can handle it in the
         | 
| 35 | 
            +
                  # underlying factory.  However, to do that requires adjusting about 7
         | 
| 36 | 
            +
                  # failing specs.  So for now this refactor appears acceptable
         | 
| 37 | 
            +
                  return unless defined?(::Hyrax)
         | 
| 38 | 
            +
                  return unless self.class.to_s.include?("Collection")
         | 
| 39 | 
            +
                  factory.add_user_to_collection_permissions(collection: @item, user: user)
         | 
| 47 40 | 
             
                end
         | 
| 48 41 |  | 
| 49 42 | 
             
                def parent_jobs
         | 
| @@ -1,5 +1,4 @@ | |
| 1 1 | 
             
            # frozen_string_literal: true
         | 
| 2 | 
            -
            require 'marcel'
         | 
| 3 2 |  | 
| 4 3 | 
             
            module Bulkrax
         | 
| 5 4 | 
             
              module ImporterExporterBehavior
         | 
| @@ -54,9 +53,11 @@ module Bulkrax | |
| 54 53 | 
             
                  filename = parser_fields&.[]('import_file_path')
         | 
| 55 54 | 
             
                  return false unless filename
         | 
| 56 55 | 
             
                  return false unless File.file?(filename)
         | 
| 56 | 
            +
             | 
| 57 57 | 
             
                  returning_value = false
         | 
| 58 58 | 
             
                  File.open(filename) do |file|
         | 
| 59 | 
            -
                     | 
| 59 | 
            +
                    mime_type = ::Marcel::MimeType.for(file)
         | 
| 60 | 
            +
                    returning_value = mime_type.include?('application/zip') || mime_type.include?('application/gzip')
         | 
| 60 61 | 
             
                  end
         | 
| 61 62 | 
             
                  returning_value
         | 
| 62 63 | 
             
                end
         | 
| @@ -1,6 +1,4 @@ | |
| 1 1 | 
             
            # frozen_string_literal: true
         | 
| 2 | 
            -
            require 'zip'
         | 
| 3 | 
            -
            require 'marcel'
         | 
| 4 2 |  | 
| 5 3 | 
             
            module Bulkrax
         | 
| 6 4 | 
             
              # An abstract class that establishes the API for Bulkrax's import and export parsing.
         | 
| @@ -232,7 +230,7 @@ module Bulkrax | |
| 232 230 | 
             
                  type_col = Bulkrax::Entry.arel_table['type']
         | 
| 233 231 | 
             
                  status_col = Bulkrax::Entry.arel_table['status_message']
         | 
| 234 232 |  | 
| 235 | 
            -
                  query = (type == 'work' ? type_col. | 
| 233 | 
            +
                  query = (type == 'work' ? type_col.does_not_match_all(%w[collection file_set]) : type_col.matches(type.camelize))
         | 
| 236 234 | 
             
                  query.and(status_col.in(statuses))
         | 
| 237 235 | 
             
                end
         | 
| 238 236 |  | 
| @@ -242,16 +240,30 @@ module Bulkrax | |
| 242 240 | 
             
                  return 0
         | 
| 243 241 | 
             
                end
         | 
| 244 242 |  | 
| 243 | 
            +
                def record_raw_metadata(record)
         | 
| 244 | 
            +
                  record.to_h
         | 
| 245 | 
            +
                end
         | 
| 246 | 
            +
             | 
| 247 | 
            +
                def record_deleted?(record)
         | 
| 248 | 
            +
                  return false unless record.key?(:delete)
         | 
| 249 | 
            +
                  ActiveModel::Type::Boolean.new.cast(record[:delete])
         | 
| 250 | 
            +
                end
         | 
| 251 | 
            +
             | 
| 252 | 
            +
                def record_remove_and_rerun?(record)
         | 
| 253 | 
            +
                  return false unless record.key?(:remove_and_rerun)
         | 
| 254 | 
            +
                  ActiveModel::Type::Boolean.new.cast(record[:remove_and_rerun])
         | 
| 255 | 
            +
                end
         | 
| 256 | 
            +
             | 
| 245 257 | 
             
                def create_entry_and_job(current_record, type, identifier = nil)
         | 
| 246 258 | 
             
                  identifier ||= current_record[source_identifier]
         | 
| 247 259 | 
             
                  new_entry = find_or_create_entry(send("#{type}_entry_class"),
         | 
| 248 260 | 
             
                                                   identifier,
         | 
| 249 261 | 
             
                                                   'Bulkrax::Importer',
         | 
| 250 | 
            -
                                                   current_record | 
| 262 | 
            +
                                                   record_raw_metadata(current_record))
         | 
| 251 263 | 
             
                  new_entry.status_info('Pending', importer.current_run)
         | 
| 252 | 
            -
                  if current_record | 
| 264 | 
            +
                  if record_deleted?(current_record)
         | 
| 253 265 | 
             
                    "Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
         | 
| 254 | 
            -
                  elsif current_record | 
| 266 | 
            +
                  elsif record_remove_and_rerun?(current_record) || remove_and_rerun
         | 
| 255 267 | 
             
                    delay = calculate_type_delay(type)
         | 
| 256 268 | 
             
                    "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.set(wait: delay).send(perform_method, new_entry, current_run)
         | 
| 257 269 | 
             
                  else
         | 
| @@ -260,7 +272,7 @@ module Bulkrax | |
| 260 272 | 
             
                end
         | 
| 261 273 |  | 
| 262 274 | 
             
                # Optional, define if using browse everything for file upload
         | 
| 263 | 
            -
                def retrieve_cloud_files( | 
| 275 | 
            +
                def retrieve_cloud_files(_files, _importer); end
         | 
| 264 276 |  | 
| 265 277 | 
             
                # @param file [#path, #original_filename] the file object that with the relevant data for the
         | 
| 266 278 | 
             
                #        import.
         | 
| @@ -382,6 +394,9 @@ module Bulkrax | |
| 382 394 | 
             
                    identifier: identifier
         | 
| 383 395 | 
             
                  )
         | 
| 384 396 | 
             
                  entry.raw_metadata = raw_metadata
         | 
| 397 | 
            +
                  # Setting parsed_metadata specifically for the id so we can find the object via the
         | 
| 398 | 
            +
                  # id in a delete.  This is likely to get clobbered in a regular import, which is fine.
         | 
| 399 | 
            +
                  entry.parsed_metadata = { id: raw_metadata['id'] } if raw_metadata&.key?('id')
         | 
| 385 400 | 
             
                  entry.save!
         | 
| 386 401 | 
             
                  entry
         | 
| 387 402 | 
             
                end
         | 
| @@ -413,6 +428,8 @@ module Bulkrax | |
| 413 428 | 
             
                end
         | 
| 414 429 |  | 
| 415 430 | 
             
                def unzip(file_to_unzip)
         | 
| 431 | 
            +
                  return untar(file_to_unzip) if file_to_unzip.end_with?('.tar.gz')
         | 
| 432 | 
            +
             | 
| 416 433 | 
             
                  Zip::File.open(file_to_unzip) do |zip_file|
         | 
| 417 434 | 
             
                    zip_file.each do |entry|
         | 
| 418 435 | 
             
                      entry_path = File.join(importer_unzip_path, entry.name)
         | 
| @@ -422,6 +439,13 @@ module Bulkrax | |
| 422 439 | 
             
                  end
         | 
| 423 440 | 
             
                end
         | 
| 424 441 |  | 
| 442 | 
            +
                def untar(file_to_untar)
         | 
| 443 | 
            +
                  Dir.mkdir(importer_unzip_path) unless File.directory?(importer_unzip_path)
         | 
| 444 | 
            +
                  command = "tar -xzf #{Shellwords.escape(file_to_untar)} -C #{Shellwords.escape(importer_unzip_path)}"
         | 
| 445 | 
            +
                  result = system(command)
         | 
| 446 | 
            +
                  raise "Failed to extract #{file_to_untar}" unless result
         | 
| 447 | 
            +
                end
         | 
| 448 | 
            +
             | 
| 425 449 | 
             
                def zip
         | 
| 426 450 | 
             
                  FileUtils.mkdir_p(exporter_export_zip_path)
         | 
| 427 451 |  |