RubyGems - bulkrax - Versions diffs - 1.0.0 → 2.0.1 - Mend

bulkrax 1.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

checksums.yaml +4 -4
data/README.md +1 -1
data/app/controllers/bulkrax/exporters_controller.rb +12 -4
data/app/controllers/bulkrax/importers_controller.rb +22 -17
data/app/factories/bulkrax/object_factory.rb +44 -61
data/app/jobs/bulkrax/create_relationships_job.rb +187 -0
data/app/jobs/bulkrax/delete_work_job.rb +6 -2
data/app/jobs/bulkrax/export_work_job.rb +3 -1
data/app/jobs/bulkrax/exporter_job.rb +1 -0
data/app/jobs/bulkrax/{import_work_collection_job.rb → import_collection_job.rb} +2 -2
data/app/jobs/bulkrax/importer_job.rb +16 -1
data/app/matchers/bulkrax/application_matcher.rb +9 -6
data/app/models/bulkrax/csv_collection_entry.rb +8 -6
data/app/models/bulkrax/csv_entry.rb +139 -45
data/app/models/bulkrax/entry.rb +19 -8
data/app/models/bulkrax/exporter.rb +12 -5
data/app/models/bulkrax/importer.rb +22 -5
data/app/models/bulkrax/oai_entry.rb +5 -1
data/app/models/bulkrax/rdf_entry.rb +16 -7
data/app/models/bulkrax/xml_entry.rb +4 -0
data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
data/app/models/concerns/bulkrax/file_factory.rb +2 -1
data/app/models/concerns/bulkrax/has_matchers.rb +59 -16
data/app/models/concerns/bulkrax/import_behavior.rb +35 -5
data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +19 -0
data/app/models/concerns/bulkrax/status_info.rb +4 -4
data/app/parsers/bulkrax/application_parser.rb +59 -84
data/app/parsers/bulkrax/bagit_parser.rb +12 -3
data/app/parsers/bulkrax/csv_parser.rb +117 -62
data/app/parsers/bulkrax/oai_dc_parser.rb +5 -2
data/app/parsers/bulkrax/xml_parser.rb +5 -0
data/app/views/bulkrax/exporters/_form.html.erb +1 -1
data/app/views/bulkrax/exporters/show.html.erb +13 -1
data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +45 -14
data/app/views/bulkrax/importers/edit.html.erb +2 -0
data/app/views/bulkrax/importers/index.html.erb +15 -17
data/app/views/bulkrax/importers/show.html.erb +6 -2
data/config/locales/bulkrax.en.yml +1 -0
data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +5 -1
data/db/migrate/20211004170708_change_bulkrax_statuses_error_message_column_type_to_text.rb +5 -0
data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +6 -0
data/lib/bulkrax/engine.rb +1 -1
data/lib/bulkrax/version.rb +1 -1
data/lib/bulkrax.rb +9 -17
data/lib/generators/bulkrax/templates/bin/importer +17 -11
data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +3 -1
data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +7 -12
metadata +13 -7
data/app/jobs/bulkrax/child_relationships_job.rb +0 -128

data/app/models/concerns/bulkrax/import_behavior.rb CHANGED Viewed

@@ -12,6 +12,8 @@ module Bulkrax
           raise CollectionsCreatedError unless collections_created?
           @item = factory.run!
         end
+        parent_jobs if self.parsed_metadata[related_parents_parsed_mapping].present?
+        child_jobs if self.parsed_metadata[related_children_parsed_mapping].present?
       rescue RSolr::Error::Http, CollectionsCreatedError => e
         raise e
       rescue StandardError => e
@@ -22,7 +24,19 @@ module Bulkrax
       return @item
     end
-    def find_or_create_collection_ids
+    def parent_jobs
+      self.parsed_metadata[related_parents_parsed_mapping].each do |parent_identifier|
+        CreateRelationshipsJob.perform_later(entry_identifier: self.identifier, parent_identifier: parent_identifier, importer_run: self.last_run)
+      end
+    end
+    def child_jobs
+      self.parsed_metadata[related_children_parsed_mapping].each do |child_identifier|
+        CreateRelationshipsJob.perform_later(entry_identifier: self.identifier, child_identifier: child_identifier, importer_run: self.last_run)
+      end
+    end
+    def find_collection_ids
       self.collection_ids
     end
@@ -57,15 +71,27 @@ module Bulkrax
     end
     def add_collections
-      return if find_or_create_collection_ids.blank?
-      self.parsed_metadata['collections'] = []
-      self.parsed_metadata['collections'] += find_or_create_collection_ids.map { |c| { id: c } }
+      return if find_collection_ids.blank?
+      ActiveSupport::Deprecation.warn(
+        'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
+        ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
+      )
+      self.parsed_metadata['member_of_collections_attributes'] = {}
+      find_collection_ids.each_with_index do |c, i|
+        self.parsed_metadata['member_of_collections_attributes'][i.to_s] = { id: c }
+      end
     end
     def factory
+      ActiveSupport::Deprecation.warn(
+        'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
+        ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
+      )
       @factory ||= Bulkrax::ObjectFactory.new(attributes: self.parsed_metadata,
                                               source_identifier_value: identifier,
                                               work_identifier: parser.work_identifier,
+                                              collection_field_mapping: parser.collection_field_mapping,
                                               replace_files: replace_files,
                                               user: user,
                                               klass: factory_class,
@@ -80,7 +106,11 @@ module Bulkrax
            else
              Bulkrax.default_work_type
            end
-      fc.constantize
+      # return the name of the collection or work
+      fc.tr!(' ', '_')
+      fc.downcase! if fc.match?(/[-_]/)
+      fc.camelcase.constantize
     rescue NameError
       nil
     rescue

data/app/models/concerns/bulkrax/importer_exporter_behavior.rb CHANGED Viewed

@@ -25,10 +25,29 @@ module Bulkrax
       if collection
         current_run.total_collection_entries = index + 1 unless parser.collections_total.positive?
       else
+        # TODO: differentiate between work and collection counts for exporters
         current_run.total_work_entries = index + 1 unless limit.to_i.positive? || parser.total.positive?
       end
       current_run.enqueued_records = index + 1
       current_run.save!
     end
+    def keys_without_numbers(keys)
+      keys.map { |key| key_without_numbers(key) }
+    end
+    def key_without_numbers(key)
+      key.gsub(/_\d+/, '').sub(/^\d+_/, '')
+    end
+    # Is this a file?
+    def file?
+      parser_fields&.[]('import_file_path') && File.file?(parser_fields['import_file_path'])
+    end
+    # Is this a zip file?
+    def zip?
+      parser_fields&.[]('import_file_path') && MIME::Types.type_for(parser_fields['import_file_path']).include?('application/zip')
+    end
   end
 end

data/app/models/concerns/bulkrax/status_info.rb CHANGED Viewed

@@ -33,13 +33,13 @@ module Bulkrax
       current_status&.created_at
     end
-    def status_info(e = nil)
+    def status_info(e = nil, current_run = nil)
       if e.nil?
-        self.statuses.create!(status_message: 'Complete', runnable: last_run)
+        self.statuses.create!(status_message: 'Complete', runnable: current_run || last_run)
       elsif e.is_a?(String)
-        self.statuses.create!(status_message: e, runnable: last_run)
+        self.statuses.create!(status_message: e, runnable: current_run || last_run)
       else
-        self.statuses.create!(status_message: 'Failed', runnable: last_run, error_class: e.class.to_s, error_message: e.message, error_backtrace: e.backtrace)
+        self.statuses.create!(status_message: 'Failed', runnable: current_run || last_run, error_class: e.class.to_s, error_message: e.message, error_backtrace: e.backtrace)
       end
     end

data/app/parsers/bulkrax/application_parser.rb CHANGED Viewed

@@ -1,15 +1,15 @@
 # frozen_string_literal: true
 module Bulkrax
-  class ApplicationParser
-    attr_accessor :importerexporter
+  class ApplicationParser # rubocop:disable Metrics/ClassLength
+    attr_accessor :importerexporter, :headers
     alias importer importerexporter
     alias exporter importerexporter
-    delegate :only_updates, :limit, :current_run, :errors,
-             :seen, :increment_counters, :parser_fields, :user,
-             :exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
-             :status, :status_info, :status_at,
-             to: :importerexporter
+    delegate :only_updates, :limit, :current_run, :errors, :mapping,
+      :seen, :increment_counters, :parser_fields, :user, :keys_without_numbers,
+      :key_without_numbers, :status, :status_info, :status_at,
+      :exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
+      to: :importerexporter
     def self.parser_fields
       {}
@@ -25,6 +25,7 @@ module Bulkrax
     def initialize(importerexporter)
       @importerexporter = importerexporter
+      @headers = []
     end
     # @api
@@ -43,20 +44,54 @@ module Bulkrax
     end
     def source_identifier
-      @source_identifier ||= identifier_hash.values.first&.[]("from")&.first&.to_sym || :source_identifier
+      @source_identifier ||= get_field_mapping_hash_for('source_identifier')&.values&.first&.[]('from')&.first&.to_sym || :source_identifier
     end
     def work_identifier
-      @work_identifier ||= identifier_hash.keys.first&.to_sym || :source
+      @work_identifier ||= get_field_mapping_hash_for('source_identifier')&.keys&.first&.to_sym || :source
     end
-    def identifier_hash
-      @identifier_hash ||= importerexporter.mapping.select do |_, h|
-        h.key?("source_identifier")
-      end
-      raise StandardError, "more than one source_identifier declared: #{@identifier_hash.keys.join(', ')}" if @identifier_hash.length > 1
+    def related_parents_raw_mapping
+      @related_parents_raw_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.values&.first&.[]('from')&.first
+    end
+    def related_parents_parsed_mapping
+      @related_parents_parsed_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.keys&.first
+    end
+    def related_children_raw_mapping
+      @related_children_raw_mapping ||= get_field_mapping_hash_for('related_children_field_mapping')&.values&.first&.[]('from')&.first
+    end
+    def related_children_parsed_mapping
+      @related_children_parsed_mapping ||= get_field_mapping_hash_for('related_children_field_mapping')&.keys&.first
+    end
+    def get_field_mapping_hash_for(key)
+      return instance_variable_get("@#{key}_hash") if instance_variable_get("@#{key}_hash").present?
+      instance_variable_set(
+        "@#{key}_hash",
+        importerexporter.mapping.with_indifferent_access.select { |_, h| h.key?(key) }
+      )
+      raise StandardError, "more than one #{key} declared: #{instance_variable_get("@#{key}_hash").keys.join(', ')}" if instance_variable_get("@#{key}_hash").length > 1
-      @identifier_hash
+      instance_variable_get("@#{key}_hash")
+    end
+    def collection_field_mapping
+      ActiveSupport::Deprecation.warn(
+        'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
+        ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
+      )
+      Bulkrax.collection_field_mapping[self.entry_class.to_s]&.to_sym || :collection
+    end
+    def model_field_mappings
+      model_mappings = Bulkrax.field_mappings[self.class.to_s]&.dig('model', :from) || []
+      model_mappings |= ['model']
+      model_mappings
     end
     def perform_method
@@ -91,76 +126,19 @@ module Bulkrax
       path
     end
+    # Base path for imported and exported files
+    def base_path(type = 'import')
+      ENV['HYKU_MULTITENANT'] ? File.join(Bulkrax.send("#{type}_path"), Site.instance.account.name) : Bulkrax.send("#{type}_path")
+    end
     # Path where we'll store the import metadata and files
     #  this is used for uploaded and cloud files
     def path_for_import
-      @path_for_import = File.join(Bulkrax.import_path, importerexporter.path_string)
+      @path_for_import = File.join(base_path, importerexporter.path_string)
       FileUtils.mkdir_p(@path_for_import) unless File.exist?(@path_for_import)
       @path_for_import
     end
-    # Optional, only used by certain parsers
-    # Other parsers should override with a custom or empty method
-    # Will be skipped unless the #record is a Hash
-    def create_parent_child_relationships
-      parents.each do |key, value|
-        parent = entry_class.where(
-          identifier: key,
-          importerexporter_id: importerexporter.id,
-          importerexporter_type: 'Bulkrax::Importer'
-        ).first
-        # not finding the entries here indicates that the given identifiers are incorrect
-        # in that case we should log that
-        children = value.map do |child|
-          entry_class.where(
-            identifier: child,
-            importerexporter_id: importerexporter.id,
-            importerexporter_type: 'Bulkrax::Importer'
-          ).first
-        end.compact.uniq
-        if parent.present? && (children.length != value.length)
-          # Increment the failures for the number we couldn't find
-          # Because all of our entries have been created by now, if we can't find them, the data is wrong
-          Rails.logger.error("Expected #{value.length} children for parent entry #{parent.id}, found #{children.length}")
-          break if children.empty?
-          Rails.logger.warn("Adding #{children.length} children to parent entry #{parent.id} (expected #{value.length})")
-        end
-        parent_id = parent.id
-        child_entry_ids = children.map(&:id)
-        ChildRelationshipsJob.perform_later(parent_id, child_entry_ids, current_run.id)
-      end
-    rescue StandardError => e
-      status_info(e)
-    end
-    def parents
-      @parents ||= setup_parents
-    end
-    def setup_parents
-      pts = []
-      records.each do |record|
-        r = if record.respond_to?(:to_h)
-              record.to_h
-            else
-              record
-            end
-        next unless r.is_a?(Hash)
-        children = if r[:children].is_a?(String)
-                     r[:children].split(/\s*[:;|]\s*/)
-                   else
-                     r[:children]
-                   end
-        next if children.blank?
-        pts << {
-          r[source_identifier] => children
-        }
-      end
-      pts.blank? ? pts : pts.inject(:merge)
-    end
     def setup_export_file
       raise StandardError, 'must be defined' if exporter?
     end
@@ -288,12 +266,9 @@ module Bulkrax
     private
     def real_import_file_path
-      if file? && zip?
-        unzip(parser_fields['import_file_path'])
-        return importer_unzip_path
-      else
-        parser_fields['import_file_path']
-      end
+      return importer_unzip_path if file? && zip?
+      parser_fields['import_file_path']
     end
   end
 end

data/app/parsers/bulkrax/bagit_parser.rb CHANGED Viewed

@@ -40,7 +40,7 @@ module Bulkrax
         raise StandardError, 'No metadata files were found' if path.blank?
         data = entry_class.read_data(path)
         data = entry_class.data_for_entry(data, source_identifier)
-        data[:file] = bag.bag_files.join('|')
+        data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
         data
       end
     end
@@ -58,7 +58,7 @@ module Bulkrax
           collection_type_gid: Hyrax::CollectionType.find_or_create_default_collection_type.gid
         }
         new_entry = find_or_create_entry(collection_entry_class, collection, 'Bulkrax::Importer', metadata)
-        ImportWorkCollectionJob.perform_now(new_entry.id, current_run.id)
+        ImportCollectionJob.perform_now(new_entry.id, current_run.id)
         increment_counters(index, true)
       end
     end
@@ -83,13 +83,22 @@ module Bulkrax
     end
     def collections
-      records.map { |r| r[:collection].split(/\s*[;|]\s*/) if r[:collection].present? }.flatten.compact.uniq
+      ActiveSupport::Deprecation.warn(
+        'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
+        ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
+      )
+      records.map { |r| r[collection_field_mapping].split(/\s*[;|]\s*/) if r[collection_field_mapping].present? }.flatten.compact.uniq
     end
     def collections_total
       collections.size
     end
+    # TODO: change to differentiate between collection and work records when adding ability to import collection metadata
+    def works_total
+      total
+    end
     def total
       metadata_paths.count
     end

data/app/parsers/bulkrax/csv_parser.rb CHANGED Viewed

@@ -2,31 +2,47 @@
 require 'csv'
 module Bulkrax
-  class CsvParser < ApplicationParser
+  class CsvParser < ApplicationParser # rubocop:disable Metrics/ClassLength
     include ErroredEntries
     def self.export_supported?
       true
     end
-    def initialize(importerexporter)
-      @importerexporter = importerexporter
+    def records(_opts = {})
+      file_for_import = only_updates ? parser_fields['partial_import_file_path'] : import_file_path
+      # data for entry does not need source_identifier for csv, because csvs are read sequentially and mapped after raw data is read.
+      csv_data = entry_class.read_data(file_for_import)
+      importer.parser_fields['total'] = csv_data.count
+      importer.save
+      @records ||= csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil) }
     end
     def collections
-      # does the CSV contain a collection column?
-      return [] unless import_fields.include?(:collection)
+      ActiveSupport::Deprecation.warn(
+        'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
+        ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
+      )
       # retrieve a list of unique collections
-      records.map { |r| r[:collection].split(/\s*[;|]\s*/) if r[:collection].present? }.flatten.compact.uniq
+      records.map do |r|
+        collections = []
+        r[collection_field_mapping].split(/\s*[;|]\s*/).each { |title| collections << { title: title } } if r[collection_field_mapping].present?
+        model_field_mappings.each do |model_mapping|
+          collections << r if r[model_mapping.to_sym]&.downcase == 'collection'
+        end
+        collections
+      end.flatten.compact.uniq
     end
     def collections_total
       collections.size
     end
-    def records(_opts = {})
-      file_for_import = only_updates ? parser_fields['partial_import_file_path'] : import_file_path
-      # data for entry does not need source_identifier for csv, because csvs are read sequentially and mapped after raw data is read.
-      @records ||= entry_class.read_data(file_for_import).map { |record_data| entry_class.data_for_entry(record_data, nil) }
+    def works
+      records - collections
+    end
+    def works_total
+      works.size
     end
     # We could use CsvEntry#fields_from_data(data) but that would mean re-reading the data
@@ -44,8 +60,9 @@ module Bulkrax
     end
     def valid_import?
-      error_alert = "Missing at least one required element, missing element(s) are: #{missing_elements(import_fields).join(', ')}"
-      raise StandardError, error_alert unless required_elements?(import_fields)
+      import_strings = keys_without_numbers(import_fields.map(&:to_s))
+      error_alert = "Missing at least one required element, missing element(s) are: #{missing_elements(import_strings).join(', ')}"
+      raise StandardError, error_alert unless required_elements?(import_strings)
       file_paths.is_a?(Array)
     rescue StandardError => e
@@ -56,26 +73,26 @@ module Bulkrax
     def create_collections
       collections.each_with_index do |collection, index|
         next if collection.blank?
-        metadata = {
-          title: [collection],
-          work_identifier => [collection],
-          visibility: 'open',
-          collection_type_gid: Hyrax::CollectionType.find_or_create_default_collection_type.gid
-        }
-        new_entry = find_or_create_entry(collection_entry_class, collection, 'Bulkrax::Importer', metadata)
-        ImportWorkCollectionJob.perform_now(new_entry.id, current_run.id)
+        break if records.find_index(collection).present? && limit_reached?(limit, records.find_index(collection))
+        new_entry = find_or_create_entry(collection_entry_class, unique_collection_identifier(collection), 'Bulkrax::Importer', collection.to_h)
+        # TODO: add support for :delete option
+        ImportCollectionJob.perform_now(new_entry.id, current_run.id)
         increment_counters(index, true)
       end
+      importer.record_status
+    rescue StandardError => e
+      status_info(e)
     end
     def create_works
-      records.each_with_index do |record, index|
-        next unless record_has_source_identifier(record, index)
-        break if limit_reached?(limit, index)
+      works.each_with_index do |work, index|
+        next unless record_has_source_identifier(work, records.find_index(work))
+        break if limit_reached?(limit, records.find_index(work))
-        seen[record[source_identifier]] = true
-        new_entry = find_or_create_entry(entry_class, record[source_identifier], 'Bulkrax::Importer', record.to_h.compact)
-        if record[:delete].present?
+        seen[work[source_identifier]] = true
+        new_entry = find_or_create_entry(entry_class, work[source_identifier], 'Bulkrax::Importer', work.to_h)
+        if work[:delete].present?
           DeleteWorkJob.send(perform_method, new_entry, current_run)
         else
           ImportWorkJob.send(perform_method, new_entry.id, current_run.id)
@@ -99,10 +116,6 @@ module Bulkrax
       path
     end
-    def create_parent_child_relationships
-      super
-    end
     def extra_filters
       output = ""
       if importerexporter.start_date.present?
@@ -117,6 +130,8 @@ module Bulkrax
     def current_work_ids
       case importerexporter.export_from
+      when 'all'
+        ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", rows: 2_147_483_647).map(&:id)
       when 'collection'
         ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", rows: 2_000_000_000).map(&:id)
       when 'worktype'
@@ -126,9 +141,16 @@ module Bulkrax
         complete_statuses = Bulkrax::Status.latest_by_statusable
                                            .includes(:statusable)
                                            .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
-        complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier }
-        ActiveFedora::SolrService.query("#{work_identifier}_tesim:(#{complete_entry_identifiers.join(' OR ')})#{extra_filters}", rows: 2_000_000_000).map(&:id)
+        complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
+        extra_filters = extra_filters.presence || '*:*'
+        ActiveFedora::SolrService.get(
+          extra_filters.to_s,
+          fq: "#{work_identifier}_sim:(#{complete_entry_identifiers.join(' OR ')})",
+          fl: 'id',
+          rows: 2_000_000_000
+        )['response']['docs'].map { |obj| obj['id'] }
       end
     end
@@ -136,12 +158,18 @@ module Bulkrax
       current_work_ids.each_with_index do |wid, index|
         break if limit_reached?(limit, index)
         new_entry = find_or_create_entry(entry_class, wid, 'Bulkrax::Exporter')
-        Bulkrax::ExportWorkJob.perform_now(new_entry.id, current_run.id)
+        begin
+          entry = Bulkrax::ExportWorkJob.perform_now(new_entry.id, current_run.id)
+        rescue => e
+          Rails.logger.info("#{e.message} was detected during export")
+        end
+        self.headers |= entry.parsed_metadata.keys if entry
       end
     end
     alias create_from_collection create_new_entries
     alias create_from_importer create_new_entries
     alias create_from_worktype create_new_entries
+    alias create_from_all create_new_entries
     def entry_class
       CsvEntry
@@ -154,19 +182,11 @@ module Bulkrax
     # See https://stackoverflow.com/questions/2650517/count-the-number-of-lines-in-a-file-without-reading-entire-file-into-memory
     #   Changed to grep as wc -l counts blank lines, and ignores the final unescaped line (which may or may not contain data)
     def total
-      if importer?
-        return @total if @total&.positive?
-        # windows enocded
-        @total = `grep -c ^M #{real_import_file_path}`.to_i - 1
-        # unix encoded
-        @total = `grep -vc ^$ #{real_import_file_path}`.to_i - 1 if @total < 1
-      elsif exporter?
-        @total = importerexporter.entries.count
-      else
-        @total = 0
-      end
-      return @total
-    rescue StandardErrorr
+      @total = importer.parser_fields['total'] || 0 if importer?
+      @total = importerexporter.entries.count if exporter?
+      return @total || 0
+    rescue StandardError
       @total = 0
     end
@@ -201,31 +221,58 @@ module Bulkrax
       end
     end
-    def key_allowed(key)
-      !Bulkrax.reserved_properties.include?(key) &&
-        new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
+    def export_key_allowed(key)
+      new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
         key != source_identifier.to_s
     end
     # All possible column names
     def export_headers
-      headers = ['id']
-      headers << source_identifier.to_s
-      headers << 'model'
-      importerexporter.mapping.each_key { |key| headers << key if key_allowed(key) }
-      headers << 'file'
+      headers = sort_headers(self.headers)
+      # we don't want access_control_id exported and we want file at the end
+      headers.delete('access_control_id') if headers.include?('access_control_id')
+      # add the headers below at the beginning or end to maintain the preexisting export behavior
+      headers.prepend('model')
+      headers.prepend(source_identifier.to_s)
+      headers.prepend('id')
       headers.uniq
     end
+    def object_names
+      return @object_names if @object_names
+      @object_names = mapping.values.map { |value| value['object'] }
+      @object_names.uniq!.delete(nil)
+      @object_names
+    end
+    def sort_headers(headers)
+      # converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
+      # while keeping objects grouped together
+      headers.sort_by do |item|
+        number = item.match(/\d+/)&.[](0) || 0.to_s
+        sort_number = number.rjust(4, "0")
+        object_prefix = object_names.detect { |o| item.match(/^#{o}/) } || item
+        remainder = item.gsub(/^#{object_prefix}_/, '').gsub(/_#{number}/, '')
+        "#{object_prefix}_#{sort_number}_#{remainder}"
+      end
+    end
     # in the parser as it is specific to the format
     def setup_export_file
-      File.join(importerexporter.exporter_export_path, 'export.csv')
+      File.join(importerexporter.exporter_export_path, "export_#{importerexporter.export_source}_from_#{importerexporter.export_from}.csv")
     end
     # Retrieve file paths for [:file] mapping in records
     #  and check all listed files exist.
     def file_paths
       raise StandardError, 'No records were found' if records.blank?
+      return [] if importerexporter.metadata_only?
       @file_paths ||= records.map do |r|
         file_mapping = Bulkrax.field_mappings.dig(self.class.to_s, 'file', :from)&.first&.to_sym || :file
         next if r[file_mapping].blank?
@@ -244,23 +291,31 @@ module Bulkrax
     # Retrieve the path where we expect to find the files
     def path_to_files
       @path_to_files ||= File.join(
-        File.file?(import_file_path) ? File.dirname(import_file_path) : import_file_path,
+        zip? ? importer_unzip_path : File.dirname(import_file_path),
         'files'
       )
     end
     private
+    def unique_collection_identifier(collection_hash)
+      entry_uid = collection_hash[source_identifier]
+      entry_uid ||= if Bulkrax.fill_in_blank_source_identifiers.present?
+                      Bulkrax.fill_in_blank_source_identifiers.call(self, records.find_index(collection_hash))
+                    else
+                      collection_hash[:title].split(/\s*[;|]\s*/).first
+                    end
+      entry_uid
+    end
     # Override to return the first CSV in the path, if a zip file is supplied
     # We expect a single CSV at the top level of the zip in the CSVParser
     # but we are willing to go look for it if need be
     def real_import_file_path
-      if file? && zip?
-        unzip(parser_fields['import_file_path'])
-        return Dir["#{importer_unzip_path}/**/*.csv"].first
-      else
-        parser_fields['import_file_path']
-      end
+      return Dir["#{importer_unzip_path}/**/*.csv"].first if file? && zip?
+      parser_fields['import_file_path']
     end
   end
 end

data/app/parsers/bulkrax/oai_dc_parser.rb CHANGED Viewed

@@ -75,7 +75,7 @@ module Bulkrax
         new_entry = collection_entry_class.where(importerexporter: importerexporter, identifier: unique_collection_identifier, raw_metadata: metadata).first_or_create!
         # perform now to ensure this gets created before work imports start
-        ImportWorkCollectionJob.perform_now(new_entry.id, importerexporter.current_run.id)
+        ImportCollectionJob.perform_now(new_entry.id, importerexporter.current_run.id)
         increment_counters(index, true)
       end
     end
@@ -119,7 +119,10 @@ module Bulkrax
       end
     end
-    def create_parent_child_relationships; end
+    # TODO: change to differentiate between collection and work records when adding ability to import collection metadata
+    def works_total
+      total
+    end
     def total
       @total ||= records(quick: true).doc.find(".//resumptionToken").to_a.first.attributes["completeListSize"].to_i