RubyGems - bulkrax - Versions diffs - 9.3.5 → 9.4.1 - Mend

bulkrax 9.3.5 → 9.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

checksums.yaml +4 -4
data/README.md +11 -1
data/app/assets/javascripts/bulkrax/application.js +2 -1
data/app/assets/javascripts/bulkrax/bulkrax.js +13 -4
data/app/assets/javascripts/bulkrax/bulkrax_utils.js +96 -0
data/app/assets/javascripts/bulkrax/datatables.js +1 -0
data/app/assets/javascripts/bulkrax/entries.js +17 -10
data/app/assets/javascripts/bulkrax/importers.js.erb +9 -2
data/app/assets/javascripts/bulkrax/importers_stepper.js +2420 -0
data/app/assets/stylesheets/bulkrax/application.css +1 -1
data/app/assets/stylesheets/bulkrax/stepper/_header.scss +83 -0
data/app/assets/stylesheets/bulkrax/stepper/_mixins.scss +26 -0
data/app/assets/stylesheets/bulkrax/stepper/_navigation.scss +103 -0
data/app/assets/stylesheets/bulkrax/stepper/_responsive.scss +46 -0
data/app/assets/stylesheets/bulkrax/stepper/_review.scss +92 -0
data/app/assets/stylesheets/bulkrax/stepper/_settings.scss +106 -0
data/app/assets/stylesheets/bulkrax/stepper/_success.scss +26 -0
data/app/assets/stylesheets/bulkrax/stepper/_summary.scss +171 -0
data/app/assets/stylesheets/bulkrax/stepper/_upload.scss +339 -0
data/app/assets/stylesheets/bulkrax/stepper/_validation.scss +237 -0
data/app/assets/stylesheets/bulkrax/stepper/_variables.scss +46 -0
data/app/assets/stylesheets/bulkrax/stepper.scss +32 -0
data/app/controllers/bulkrax/guided_imports_controller.rb +175 -0
data/app/controllers/bulkrax/importers_controller.rb +28 -31
data/app/controllers/concerns/bulkrax/guided_import_demo_scenarios.rb +201 -0
data/app/controllers/concerns/bulkrax/importer_file_handler.rb +212 -0
data/app/errors/bulkrax/unzip_error.rb +16 -0
data/app/factories/bulkrax/object_factory.rb +3 -2
data/app/factories/bulkrax/valkyrie_object_factory.rb +61 -17
data/app/jobs/bulkrax/importer_job.rb +42 -4
data/app/models/bulkrax/csv_entry.rb +27 -7
data/app/models/bulkrax/entry.rb +4 -0
data/app/models/bulkrax/importer.rb +27 -10
data/app/models/concerns/bulkrax/has_matchers.rb +2 -2
data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +6 -5
data/app/parsers/bulkrax/application_parser.rb +63 -20
data/app/parsers/bulkrax/bagit_parser.rb +12 -0
data/app/parsers/bulkrax/csv_parser.rb +168 -25
data/app/parsers/concerns/bulkrax/csv_parser/csv_template_generation.rb +73 -0
data/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb +133 -0
data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_helpers.rb +282 -0
data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_hierarchy.rb +96 -0
data/app/services/bulkrax/csv_template/column_builder.rb +60 -0
data/app/services/bulkrax/csv_template/column_descriptor.rb +58 -0
data/app/services/bulkrax/csv_template/csv_builder.rb +83 -0
data/app/services/bulkrax/csv_template/explanation_builder.rb +57 -0
data/app/services/bulkrax/csv_template/field_analyzer.rb +56 -0
data/app/services/bulkrax/csv_template/file_path_generator.rb +47 -0
data/app/services/bulkrax/csv_template/file_validator.rb +68 -0
data/app/services/bulkrax/csv_template/mapping_manager.rb +55 -0
data/app/services/bulkrax/csv_template/model_loader.rb +50 -0
data/app/services/bulkrax/csv_template/row_builder.rb +35 -0
data/app/services/bulkrax/csv_template/schema_analyzer.rb +70 -0
data/app/services/bulkrax/csv_template/split_formatter.rb +44 -0
data/app/services/bulkrax/csv_template/value_determiner.rb +68 -0
data/app/services/bulkrax/stepper_response_formatter.rb +347 -0
data/app/services/bulkrax/validation_error_csv_builder.rb +99 -0
data/app/validators/bulkrax/csv_row/child_reference.rb +56 -0
data/app/validators/bulkrax/csv_row/circular_reference.rb +71 -0
data/app/validators/bulkrax/csv_row/controlled_vocabulary.rb +74 -0
data/app/validators/bulkrax/csv_row/duplicate_identifier.rb +63 -0
data/app/validators/bulkrax/csv_row/missing_source_identifier.rb +31 -0
data/app/validators/bulkrax/csv_row/parent_reference.rb +59 -0
data/app/validators/bulkrax/csv_row/required_values.rb +64 -0
data/app/views/bulkrax/guided_imports/new.html.erb +567 -0
data/app/views/bulkrax/importers/index.html.erb +6 -1
data/app/views/bulkrax/importers/new.html.erb +1 -1
data/app/views/bulkrax/importers/show.html.erb +17 -1
data/config/i18n-tasks.yml +195 -0
data/config/locales/bulkrax.de.yml +508 -0
data/config/locales/bulkrax.en.yml +463 -233
data/config/locales/bulkrax.es.yml +508 -0
data/config/locales/bulkrax.fr.yml +508 -0
data/config/locales/bulkrax.it.yml +508 -0
data/config/locales/bulkrax.pt-BR.yml +508 -0
data/config/locales/bulkrax.zh.yml +507 -0
data/config/routes.rb +10 -1
data/lib/bulkrax/data/demo_scenarios.json +2235 -0
data/lib/bulkrax/version.rb +1 -1
data/lib/bulkrax.rb +31 -0
metadata +56 -16
data/app/services/bulkrax/sample_csv_service/column_builder.rb +0 -58
data/app/services/bulkrax/sample_csv_service/column_descriptor.rb +0 -56
data/app/services/bulkrax/sample_csv_service/csv_builder.rb +0 -82
data/app/services/bulkrax/sample_csv_service/explanation_builder.rb +0 -51
data/app/services/bulkrax/sample_csv_service/field_analyzer.rb +0 -54
data/app/services/bulkrax/sample_csv_service/file_path_generator.rb +0 -16
data/app/services/bulkrax/sample_csv_service/mapping_manager.rb +0 -36
data/app/services/bulkrax/sample_csv_service/model_loader.rb +0 -40
data/app/services/bulkrax/sample_csv_service/row_builder.rb +0 -33
data/app/services/bulkrax/sample_csv_service/schema_analyzer.rb +0 -69
data/app/services/bulkrax/sample_csv_service/split_formatter.rb +0 -42
data/app/services/bulkrax/sample_csv_service/value_determiner.rb +0 -67
data/app/services/bulkrax/sample_csv_service.rb +0 -78
/data/{app/services → lib}/wings/custom_queries/find_by_source_identifier.rb +0 -0

data/app/parsers/bulkrax/csv_parser.rb CHANGED Viewed

@@ -4,7 +4,10 @@ module Bulkrax
   class CsvParser < ApplicationParser # rubocop:disable Metrics/ClassLength
     include ErroredEntries
     include ExportBehavior
+    include CsvParser::CsvTemplateGeneration
+    include CsvParser::CsvValidation
     attr_writer :collections, :file_sets, :works
+    attr_accessor :validation_mode
     def self.export_supported?
       true
@@ -14,12 +17,14 @@ module Bulkrax
       return @records if @records.present?
       file_for_import = only_updates ? parser_fields['partial_import_file_path'] : import_file_path
-      # data for entry does not need source_identifier for csv, because csvs are read sequentially and mapped after raw data is read.
       csv_data = entry_class.read_data(file_for_import)
-      importer.parser_fields['total'] = csv_data.count
-      importer.save
+      unless validation_mode
+        importer.parser_fields['total'] = csv_data.count
+        importer.save
+      end
       @records = csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
+      @records
     end
     # rubocop:disable Metrics/AbcSize
@@ -95,11 +100,11 @@ module Bulkrax
     def missing_elements(record)
       keys_from_record = keys_without_numbers(record.reject { |_, v| v.blank? }.keys.compact.uniq.map(&:to_s))
       keys = []
-      # Because we're persisting the mapping in the database, these are likely string keys.
-      # However, there's no guarantee.  So, we need to ensure that by running stringify.
-      importerexporter.mapping.stringify_keys.map do |k, v|
-        Array.wrap(v['from']).each do |vf|
-          keys << k if keys_from_record.include?(vf)
+      mapping_values = importerexporter.mapping.stringify_keys
+      mapping_values.each do |k, v|
+        from_values = Array.wrap(v.is_a?(Hash) ? (v['from'] || v[:from]) : nil)
+        from_values.each do |vf|
+          keys << k if vf.present? && keys_from_record.include?(vf.to_s.strip)
         end
       end
       required_elements.map(&:to_s) - keys.uniq.map(&:to_s)
@@ -360,8 +365,11 @@ module Bulkrax
                         else
                           Bulkrax.multi_value_element_split_on
                         end
+        files_dir = path_to_files
+        raise StandardError, "Record references local files but no files directory could be resolved from the import path" if files_dir.nil?
         r[file_mapping].split(split_pattern).map do |f|
-          file = File.join(path_to_files, f.tr(' ', '_'))
+          file = File.join(files_dir, f.strip.tr(' ', '_'))
           if File.exist?(file) # rubocop:disable Style/GuardClause
             file
           else
@@ -371,23 +379,161 @@ module Bulkrax
       end.flatten.compact.uniq
     end
-    # Retrieve the path where we expect to find the files
+    # Retrieve the path where we expect to find the files for this import.
+    # After {ImporterJob#unzip_imported_file} runs (zip cases), attachments
+    # live under `{importer_unzip_path}/files/`. For a server-path-style
+    # import (the user specified a CSV file path with a sibling `files/`
+    # directory on disk), resolve relative to the CSV's directory instead.
+    #
+    # When called with `filename:`, returns the full path to that file if
+    # it exists on disk, or `nil` otherwise — callers like
+    # `Bulkrax::FileSetEntryBehavior#add_path_to_file` rely on the nil
+    # sentinel to fall back to the raw filename in their error messages.
+    #
+    # When called with no filename, returns the `files/` directory itself
+    # (only when that directory exists on disk — else `nil` so callers can
+    # raise a clear "no files directory" error).
     def path_to_files(**args)
       filename = args.fetch(:filename, '')
+      base_dir = files_dir
+      return base_dir if filename.blank? && Dir.exist?(base_dir)
+      return nil if filename.blank?
+      candidate = File.join(base_dir, filename)
+      candidate if File.exist?(candidate)
+    end
+    # Extracts a zip that contains a primary CSV. The primary CSV lands at
+    # the root of {#importer_unzip_path}; every other entry lands under
+    # {#importer_unzip_path}/files/, preserving its path relative to the
+    # primary CSV's directory.
+    #
+    # Primary-CSV selection matches the guided-import validator's rule
+    # (see {Bulkrax::ImporterFileHandler#locate_csv_entry_in_zip}): the CSV
+    # entry at the shallowest directory level. Visible errors are raised on
+    # zero CSVs or multiple CSVs at the shallowest level.
+    #
+    # @param file_to_unzip [String] absolute path to a .zip
+    # @raise [Bulkrax::UnzipError] on no CSV or ambiguous CSVs
+    def unzip_with_primary_csv(file_to_unzip)
+      dest_dir = importer_unzip_path(mkdir: true)
+      Zip::File.open(file_to_unzip) do |zip_file|
+        entries = real_zip_entries(zip_file)
+        primary = select_primary_csv!(entries)
+        primary_dir = File.dirname(primary.name)
+        entries.each do |entry|
+          if entry == primary
+            extract_to(zip_file, entry, dest_dir, File.basename(entry.name))
+          else
+            extract_to(zip_file, entry, dest_dir, File.join('files', relative_to(primary_dir, entry.name)))
+          end
+        end
+      end
+    end
-      return @path_to_files if @path_to_files.present? && filename.blank?
-      @path_to_files = File.join(
-          zip? ? importer_unzip_path : File.dirname(import_file_path), 'files', filename
-        )
+    # Extracts a zip that accompanies a separately-uploaded CSV. Every
+    # entry lands under {#importer_unzip_path}/files/ — including any
+    # CSVs inside the zip, which are treated as attachments since the
+    # primary CSV was uploaded outside the zip. Strips a single top-level
+    # wrapper directory if present, so users can zip either the contents
+    # or the enclosing folder.
+    #
+    # @param file_to_unzip [String] absolute path to a .zip
+    def unzip_attachments_only(file_to_unzip)
+      dest_dir = importer_unzip_path(mkdir: true)
+      Zip::File.open(file_to_unzip) do |zip_file|
+        entries = real_zip_entries(zip_file)
+        wrapper = single_top_level_wrapper(entries)
+        entries.each do |entry|
+          relative = wrapper ? entry.name.delete_prefix("#{wrapper}/") : entry.name
+          next if relative.empty?
+          extract_to(zip_file, entry, dest_dir, File.join('files', relative))
+        end
+      end
+    end
-      return @path_to_files if File.exist?(@path_to_files)
+    # File names referenced in CSVs have spaces replaced with underscores.
+    # @see #file_paths
+    def remove_spaces_from_filenames
+      files = Dir.glob(File.join(importer_unzip_path, 'files', '*'))
+      files_with_spaces = files.select { |f| f.split('/').last.include?(' ') }
+      return if files_with_spaces.blank?
-      # TODO: This method silently returns nil if there is no file & no zip file
-      File.join(importer_unzip_path, 'files', filename) if file? && zip?
+      files_with_spaces.map! { |path| Pathname.new(path) }
+      files_with_spaces.each do |path|
+        filename_without_spaces = path.basename.to_s.tr(' ', '_')
+        path.rename(File.join(path.dirname, filename_without_spaces))
+      end
     end
     private
+    # Memoized base directory under which import attachments live. Kept
+    # separate from `#path_to_files`' per-filename return value to avoid
+    # cross-contamination between directory lookups and file lookups.
+    def files_dir
+      @files_dir ||= begin
+        has_attachments_zip = parser_fields['attachments_zip_path'].present? && zip_file?(parser_fields['attachments_zip_path'])
+        base = zip? || has_attachments_zip ? importer_unzip_path : File.dirname(import_file_path)
+        File.join(base, 'files')
+      end
+    end
+    # Returns zip entries filtered down to real files (no directories, no
+    # macOS junk). Raises {Bulkrax::UnzipError} if any entry's name would
+    # escape the destination directory (Zip Slip).
+    def real_zip_entries(zip_file)
+      entries = zip_file.entries.select { |e| e.file? && !macos_junk_entry?(e.name) }
+      entries.each { |e| reject_unsafe_entry!(e.name) }
+      entries
+    end
+    # Picks the single primary CSV from zip entries, enforcing the
+    # shallowest-level rule. Raises {Bulkrax::UnzipError} on failure.
+    def select_primary_csv!(entries)
+      csvs = entries.select { |e| e.name.end_with?('.csv') }
+      raise Bulkrax::UnzipError, I18n.t('bulkrax.importer.unzip.errors.no_csv') if csvs.empty?
+      by_depth = csvs.group_by { |e| e.name.count('/') }
+      shallowest = by_depth[by_depth.keys.min]
+      raise Bulkrax::UnzipError, I18n.t('bulkrax.importer.unzip.errors.multiple_csv') if shallowest.size > 1
+      shallowest.first
+    end
+    # If every entry shares a single top-level directory, returns that
+    # directory name; otherwise nil.
+    def single_top_level_wrapper(entries)
+      tops = entries.map { |e| e.name.split('/').first }.uniq
+      return nil unless tops.size == 1
+      # If the single top segment is a file (no slashes in the entry), not a dir,
+      # there's no wrapper to strip.
+      return nil if entries.any? { |e| e.name == tops.first }
+      tops.first
+    end
+    # Returns `path` with `prefix/` removed from the front, if present, and
+    # a leading `files/` segment also stripped so callers can join under
+    # `files/` without doubling when the zip already uses that convention.
+    def relative_to(prefix, path)
+      remaining = prefix == '.' || prefix.empty? ? path : path.delete_prefix("#{prefix}/")
+      remaining.delete_prefix('files/')
+    end
+    # Extracts a zip entry to `dest_dir/relative_dest`. Creates intermediate
+    # directories and honors the rubyzip 2/3 extract-method signature.
+    # The destination path is validated by {#safe_extract_path} — an unsafe
+    # `relative_dest` raises {Bulkrax::UnzipError} before any write.
+    def extract_to(zip_file, entry, dest_dir, relative_dest)
+      dest_path = safe_extract_path(dest_dir, relative_dest)
+      FileUtils.mkdir_p(File.dirname(dest_path))
+      return if File.exist?(dest_path)
+      extract_zip_entry(zip_file, entry, dest_dir, relative_dest, dest_path)
+    end
     def unique_collection_identifier(collection_hash)
       entry_uid = collection_hash[source_identifier]
       entry_uid ||= if Bulkrax.fill_in_blank_source_identifiers.present?
@@ -402,16 +548,13 @@ module Bulkrax
     # Override to return the first CSV in the path, if a zip file is supplied
     # We expect a single CSV at the top level of the zip in the CSVParser
     # but we are willing to go look for it if need be
+    # When the user uploaded a zip containing a CSV, the job extracts the
+    # primary CSV to the root of `importer_unzip_path` (see
+    # {#unzip_with_primary_csv}). Any non-primary CSVs live under `files/`
+    # and are treated as attachments, so a shallow glob suffices.
     def real_import_file_path
-      return Dir["#{importer_unzip_path}/**/*.csv"].reject { |path| in_files_dir?(path) }.first if file? && zip?
+      return Dir["#{importer_unzip_path}/*.csv"].first if file? && zip?
       parser_fields['import_file_path']
     end
-    # If there are CSVs that are meant to be attachments in the files directory,
-    # we don't want to consider them as the import CSV
-    def in_files_dir?(path)
-      File.dirname(path).ends_with?('files')
-    end
   end
 end

data/app/parsers/concerns/bulkrax/csv_parser/csv_template_generation.rb ADDED Viewed

@@ -0,0 +1,73 @@
+# frozen_string_literal: true
+module Bulkrax
+  class CsvParser < ApplicationParser
+    module CsvTemplateGeneration
+      extend ActiveSupport::Concern
+      class_methods do
+        # Generate a CSV template for the specified models.
+        #
+        # @param models [Array<String>, String] Model names or 'all' for all available models
+        # @param output [String] Output format: 'file' or 'csv_string'
+        # @param admin_set_id [String, nil] Optional admin set ID for context
+        # @param args [Hash] Additional arguments passed to output method (e.g., file_path)
+        # @return [String] File path (for 'file' output) or CSV string (for 'csv_string' output)
+        def generate_template(models: [], output: 'file', admin_set_id: nil, **args)
+          raise NameError, "Hyrax is not defined" unless defined?(::Hyrax)
+          TemplateContext.new(models: models, admin_set_id: admin_set_id).send("to_#{output}", **args)
+        end
+      end
+      ##
+      # Holds state for a single template generation run.
+      # Provides the interface expected by CsvTemplate:: components.
+      class TemplateContext
+        attr_reader :mappings, :all_models, :admin_set_id, :field_analyzer, :mapping_manager
+        def initialize(models: nil, admin_set_id: nil)
+          @admin_set_id = admin_set_id
+          @mapping_manager = CsvTemplate::MappingManager.new
+          @mappings = @mapping_manager.mappings
+          @field_analyzer = CsvTemplate::FieldAnalyzer.new(@mappings, admin_set_id)
+          @all_models = CsvTemplate::ModelLoader.new(Array.wrap(models)).models
+          @csv_builder = CsvTemplate::CsvBuilder.new(self)
+        end
+        def to_file(file_path: nil)
+          file_path ||= CsvTemplate::FilePathGenerator.default_path(@admin_set_id)
+          @csv_builder.write_to_file(file_path)
+          file_path
+        end
+        def to_csv_string
+          @csv_builder.generate_string
+        end
+        def field_metadata_for_all_models
+          @field_metadata ||= @all_models.each_with_object({}) do |model, hash|
+            field_list = @field_analyzer.find_or_create_field_list_for(model_name: model)
+            hash[model] = {
+              properties: field_list.dig(model, "properties") || [],
+              required_terms: field_list.dig(model, "required_terms") || [],
+              controlled_vocab_terms: field_list.dig(model, "controlled_vocab_terms") || []
+            }
+          end
+        end
+        def valid_headers_for_models
+          @valid_headers ||= begin
+            column_builder = CsvTemplate::ColumnBuilder.new(self)
+            all_columns = column_builder.all_columns
+            all_columns - CsvTemplate::CsvBuilder::IGNORED_PROPERTIES
+                             rescue StandardError => e
+                               Rails.logger.error("Error building valid headers: #{e.message}")
+                               standard_fields = %w[model source_identifier parent parents file]
+                               model_fields = field_metadata_for_all_models.values.flat_map { |m| m[:properties] }
+                               (standard_fields + model_fields).uniq
+          end
+        end
+      end
+    end
+  end
+end

data/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb ADDED Viewed

@@ -0,0 +1,133 @@
+# frozen_string_literal: true
+module Bulkrax
+  class CsvParser < ApplicationParser
+    module CsvValidation
+      extend ActiveSupport::Concern
+      included do
+        # Lightweight struct used to satisfy the CsvTemplate::ColumnBuilder
+        # interface without constructing a full template context.
+        ValidationContext = Struct.new(:mapping_manager, :field_analyzer, :all_models, :mappings, keyword_init: true)
+      end
+      class_methods do
+        include CsvValidationHelpers
+        # Validate a CSV (and optional zip) without a persisted Importer record.
+        #
+        # @param csv_file [File, ActionDispatch::Http::UploadedFile, String] path or file object
+        # @param zip_file [File, ActionDispatch::Http::UploadedFile, nil]
+        # @param admin_set_id [String, nil]
+        # @return [Hash] validation result compatible with the guided import UI
+        def validate_csv(csv_file:, zip_file: nil, admin_set_id: nil)
+          raw_csv, headers, mapping_manager, mappings, source_id_key, csv_data, field_metadata, field_analyzer =
+            parse_csv_inputs(csv_file, admin_set_id)
+          all_ids          = csv_data.map { |r| r[:source_identifier] }.compact.to_set
+          header_issues    = check_headers(headers, raw_csv, mapping_manager, mappings, field_metadata, field_analyzer)
+          missing_required = header_issues[:missing_required]
+          notices, row_errors, file_validator, collections, works, file_sets =
+            run_validations(csv_data, all_ids, headers, source_id_key, mappings, field_metadata, missing_required, zip_file, admin_set_id)
+          result = assemble_result(
+            headers: headers, missing_required: missing_required, header_issues: header_issues,
+            row_errors: row_errors, csv_data: csv_data, file_validator: file_validator,
+            collections: collections, works: works, file_sets: file_sets, notices: notices
+          )
+          apply_rights_statement_validation_override!(result, missing_required)
+          result[:raw_csv_data] = csv_data
+          result
+        end
+        private
+        # Builds notices, runs row validators, file validator, and hierarchy extraction.
+        # Returns [notices, row_errors, file_validator, collections, works, file_sets].
+        def run_validations(csv_data, all_ids, headers, source_id_key, mappings, field_metadata, missing_required, zip_file, admin_set_id) # rubocop:disable Metrics/ParameterLists
+          find_record = build_find_record
+          notices     = []
+          append_missing_source_id!(missing_required, headers, source_id_key, csv_data.map { |r| r[:model] }.compact.uniq)
+          append_missing_model_notice!(notices, headers, csv_data)
+          row_errors                       = run_row_validators(csv_data, all_ids, source_id_key, mappings, field_metadata, find_record, notices)
+          file_validator                   = CsvTemplate::FileValidator.new(csv_data, zip_file, admin_set_id)
+          collections, works, file_sets    = extract_hierarchy_items(csv_data, all_ids, find_record, mappings)
+          [notices, row_errors, file_validator, collections, works, file_sets]
+        end
+        # Reads the CSV, resolves mappings, parses rows, and builds field metadata.
+        # Returns the values needed by all subsequent validation steps.
+        def parse_csv_inputs(csv_file, admin_set_id)
+          # Use CsvEntry.read_data so header normalisation is identical to a real import.
+          raw_csv = CsvEntry.read_data(csv_file)
+          headers = raw_csv.headers.map(&:to_s)
+          mapping_manager = CsvTemplate::MappingManager.new
+          mappings        = mapping_manager.mappings
+          source_id_key = resolve_validation_key(mapping_manager, flag: 'source_identifier', default: :source_identifier)
+          parent_key    = resolve_validation_key(mapping_manager, flag: 'related_parents_field_mapping',  default: :parents)
+          children_key  = resolve_validation_key(mapping_manager, flag: 'related_children_field_mapping', default: :children)
+          file_key      = resolve_validation_key(mapping_manager, key: 'file',                            default: :file)
+          csv_data       = parse_validation_rows(raw_csv, source_id_key, parent_key, children_key, file_key)
+          all_models     = csv_data.map { |r| r[:model] }.compact.uniq
+          all_models    |= [Bulkrax.default_work_type] if Bulkrax.default_work_type.present?
+          field_analyzer = CsvTemplate::FieldAnalyzer.new(mappings, admin_set_id)
+          field_metadata = build_validation_field_metadata(all_models, field_analyzer)
+          [raw_csv, headers, mapping_manager, mappings, source_id_key, csv_data, field_metadata, field_analyzer]
+        end
+        # Runs all header-level checks and returns a hash of results.
+        def check_headers(headers, raw_csv, mapping_manager, mappings, field_metadata, field_analyzer) # rubocop:disable Metrics/ParameterLists
+          all_models    = field_metadata.keys
+          valid_headers = build_valid_validation_headers(mapping_manager, field_analyzer,
+                                                         all_models, mappings, field_metadata)
+          suffixed      = headers.select { |h| h.match?(/_\d+\z/) }
+          valid_headers = (valid_headers + suffixed).uniq
+          {
+            missing_required: find_missing_required_headers(headers, field_metadata, mapping_manager),
+            unrecognized: find_unrecognized_validation_headers(headers, valid_headers),
+            empty_columns: find_empty_column_positions(headers, raw_csv)
+          }
+        end
+        def extract_hierarchy_items(csv_data, all_ids, find_record, mappings)
+          extract_validation_items(
+            csv_data, all_ids, find_record,
+            parent_split_pattern: resolve_parent_split_pattern(mappings),
+            child_split_pattern: resolve_children_split_pattern(mappings) || '|'
+          )
+        end
+        # Runs all registered row validators and returns the collected errors.
+        def run_row_validators(csv_data, all_ids, source_id_key, mappings, field_metadata, find_record, notices = []) # rubocop:disable Metrics/ParameterLists
+          context = {
+            errors: [],
+            warnings: [],
+            seen_ids: {},
+            all_ids: all_ids,
+            source_identifier: source_id_key.to_s,
+            parent_split_pattern: resolve_parent_split_pattern(mappings),
+            child_split_pattern: resolve_children_split_pattern(mappings),
+            parent_column: resolve_relationship_column(mappings, 'related_parents_field_mapping', 'parents'),
+            children_column: resolve_relationship_column(mappings, 'related_children_field_mapping', 'children'),
+            mappings: mappings,
+            field_metadata: field_metadata,
+            find_record_by_source_identifier: find_record,
+            relationship_graph: build_relationship_graph(csv_data, mappings),
+            notices: notices
+          }
+          csv_data.each_with_index do |record, index|
+            row_number = index + 2 # 1-indexed, plus header row
+            Bulkrax.csv_row_validators.each { |v| v.call(record, row_number, context) }
+          end
+          context[:errors]
+        end
+      end
+    end
+  end
+end