RubyGems - bulkrax - Versions diffs - 9.4.0 → 9.4.2 - Mend

bulkrax 9.4.0 → 9.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

checksums.yaml +4 -4
data/README.md +8 -2
data/app/assets/javascripts/bulkrax/datatables.js +43 -8
data/app/assets/javascripts/bulkrax/importers_stepper.js +221 -26
data/app/assets/stylesheets/bulkrax/stepper/_review.scss +14 -12
data/app/controllers/bulkrax/entries_controller.rb +2 -2
data/app/controllers/bulkrax/exporters_controller.rb +3 -3
data/app/controllers/bulkrax/guided_imports_controller.rb +3 -1
data/app/controllers/bulkrax/importers_controller.rb +5 -5
data/app/controllers/concerns/bulkrax/importer_file_handler.rb +1 -6
data/app/errors/bulkrax/unzip_error.rb +16 -0
data/app/jobs/bulkrax/importer_job.rb +40 -9
data/app/matchers/bulkrax/application_matcher.rb +5 -6
data/app/models/bulkrax/csv_entry.rb +1 -1
data/app/models/bulkrax/importer.rb +3 -16
data/app/parsers/bulkrax/application_parser.rb +50 -33
data/app/parsers/bulkrax/bagit_parser.rb +12 -0
data/app/parsers/bulkrax/csv_parser.rb +163 -49
data/app/parsers/concerns/bulkrax/csv_parser/csv_template_generation.rb +4 -1
data/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb +10 -8
data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_helpers.rb +69 -36
data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_hierarchy.rb +9 -7
data/app/services/bulkrax/csv_template/file_validator.rb +1 -1
data/app/services/bulkrax/csv_template/mapping_manager.rb +15 -6
data/app/services/bulkrax/csv_template/split_formatter.rb +10 -3
data/app/services/bulkrax/split_pattern_coercion.rb +42 -0
data/app/services/bulkrax/stepper_response_formatter.rb +2 -1
data/app/services/bulkrax/validation_error_csv_builder.rb +36 -12
data/app/validators/bulkrax/csv_row/child_reference.rb +2 -1
data/app/validators/bulkrax/csv_row/parent_reference.rb +1 -1
data/app/validators/bulkrax/csv_row/required_values.rb +17 -3
data/app/views/bulkrax/exporters/edit.html.erb +1 -1
data/app/views/bulkrax/exporters/index.html.erb +3 -1
data/app/views/bulkrax/exporters/new.html.erb +1 -1
data/app/views/bulkrax/exporters/show.html.erb +1 -1
data/app/views/bulkrax/guided_imports/new.html.erb +7 -0
data/app/views/bulkrax/importers/_edit_item_buttons.html.erb +3 -3
data/app/views/bulkrax/importers/index.html.erb +2 -0
data/app/views/bulkrax/importers/new.html.erb +1 -1
data/app/views/bulkrax/importers/show.html.erb +3 -1
data/app/views/bulkrax/shared/_datatable_i18n.html.erb +3 -0
data/config/locales/bulkrax.de.yml +95 -2
data/config/locales/bulkrax.en.yml +58 -2
data/config/locales/bulkrax.es.yml +95 -2
data/config/locales/bulkrax.fr.yml +95 -2
data/config/locales/bulkrax.it.yml +95 -2
data/config/locales/bulkrax.pt-BR.yml +95 -2
data/config/locales/bulkrax.zh.yml +96 -2
data/db/migrate/20260424081537_remove_parents_from_bulkrax_importer_runs.rb +9 -0
data/lib/bulkrax/version.rb +1 -1
data/lib/bulkrax.rb +15 -1
metadata +8 -4

data/app/jobs/bulkrax/importer_job.rb CHANGED Viewed

@@ -13,7 +13,7 @@ module Bulkrax
       import(importer, only_updates_since_last_import)
       update_current_run_counters(importer)
       schedule(importer) if importer.schedulable?
-    rescue ::CSV::MalformedCSVError => e
+    rescue ::CSV::MalformedCSVError, Bulkrax::UnzipError => e
       importer.set_status_info(e)
     end
@@ -26,18 +26,49 @@ module Bulkrax
       importer.import_objects
     end
+    # Populates `importer_unzip_path` with the uploaded file(s), leaving
+    # the working directory in the shape each parser expects.
+    #
+    # Dispatch by parser capability rather than class name:
+    # - CsvParser (and subclasses that replicate its shape) implements
+    #   `#unzip_with_primary_csv` and `#unzip_attachments_only`, which
+    #   place the primary CSV at root and attachments under `files/`.
+    # - Other parsers (XML, raw BagIt) inherit the base-class `#unzip`,
+    #   which extracts the zip verbatim.
+    # - The separate attachments-zip flow is CSV-only (guided import is
+    #   the only UI that produces it).
+    #
+    # A retry of this job gets a clean working directory: any prior
+    # extraction state from an earlier attempt is wiped, so nothing runs
+    # against partially-populated state.
     def unzip_imported_file(parser)
       return unless parser.file?
+      reset_unzip_path(parser)
+      import_file_path = parser.parser_fields['import_file_path']
+      attachments_zip_path = parser.parser_fields['attachments_zip_path']
       if parser.zip?
-        # we have a zip file, and we need to unzip it before we can import the files
-        parser.unzip(parser.parser_fields['import_file_path'])
-        parser.remove_spaces_from_filenames
-      elsif parser.zip_file?(parser.parser_fields['attachments_zip_path'])
-        # we have a separate csv and zip file. We need to unzip the zip file, and move the csv file to the unzip location before we can import the files
-        parser.unzip(parser.parser_fields['attachments_zip_path'])
-        parser.copy_file(parser.parser_fields['import_file_path'])
-        parser.remove_spaces_from_filenames
+        if parser.respond_to?(:unzip_with_primary_csv)
+          parser.unzip_with_primary_csv(import_file_path)
+        else
+          parser.unzip(import_file_path)
+        end
+      elsif parser.respond_to?(:unzip_attachments_only) && parser.zip_file?(attachments_zip_path)
+        parser.copy_file(import_file_path)
+        parser.unzip_attachments_only(attachments_zip_path)
+      else
+        parser.copy_file(import_file_path)
       end
+      parser.remove_spaces_from_filenames if parser.respond_to?(:remove_spaces_from_filenames)
+    end
+    def reset_unzip_path(parser)
+      path = parser.importer_unzip_path
+      FileUtils.rm_rf(path) if Dir.exist?(path)
+      FileUtils.mkdir_p(path)
     end
     def update_current_run_counters(importer)

data/app/matchers/bulkrax/application_matcher.rb CHANGED Viewed

@@ -33,12 +33,11 @@ module Bulkrax
     end
     def process_split
-      if self.split.is_a?(TrueClass)
-        @result = @result.split(Bulkrax.multi_value_element_split_on)
-      elsif self.split
-        @result = @result.split(Regexp.new(self.split))
-        @result = @result.map(&:strip).select(&:present?)
-      end
+      pattern = Bulkrax::SplitPatternCoercion.coerce(self.split)
+      return unless pattern
+      @result = @result.split(pattern)
+      @result = @result.map(&:strip).select(&:present?) unless self.split.is_a?(TrueClass)
     end
     def process_parse

data/app/models/bulkrax/csv_entry.rb CHANGED Viewed

@@ -165,7 +165,7 @@ module Bulkrax
     def add_file
       self.parsed_metadata['file'] ||= []
       if record['file']&.is_a?(String)
-        self.parsed_metadata['file'] = record['file'].split(Bulkrax.multi_value_element_split_on)
+        self.parsed_metadata['file'] = record['file'].split(Bulkrax::CsvParser.file_split_pattern)
       elsif record['file'].is_a?(Array)
         self.parsed_metadata['file'] = record['file']
       end

data/app/models/bulkrax/importer.rb CHANGED Viewed

@@ -266,22 +266,9 @@ module Bulkrax
     # end
     def importer_unzip_path(mkdir: false)
-      entry = parser_fields&.[]('import_file_path')
-      if entry.is_a?(String) && entry.end_with?('.zip') && File.file?(entry) && parser_fields["file_style"] != I18n.t('bulkrax.importer.xml.file_style.server_path')
-        unzip_dir = File.dirname(entry)
-        FileUtils.mkdir_p(unzip_dir) if mkdir
-        return unzip_dir
-      end
-      @importer_unzip_path ||= File.join(parser.base_path, "import_#{path_string}")
-      return @importer_unzip_path if Dir.exist?(@importer_unzip_path) || mkdir == true
-      # turns "tmp/imports/tenant/import_1_20250122035229_1" to "tmp/imports/tenant/import_1_20250122035229"
-      base_importer_unzip_path = @importer_unzip_path.split('_')[0...-1].join('_')
-      # If we don't have an existing unzip path, we'll try and find it.
-      # Just in case there are multiple paths, we sort by the number at the end of the path and get the last one
-      @importer_unzip_path = Dir.glob(base_importer_unzip_path + '*').sort_by { |path| path.split(base_importer_unzip_path).last[1..-1].to_i }.last
+      path = File.join(parser.base_path, "import_#{path_string}")
+      FileUtils.mkdir_p(path) if mkdir
+      path
     end
     def errored_entries_csv_path

data/app/parsers/bulkrax/application_parser.rb CHANGED Viewed

@@ -430,39 +430,72 @@ module Bulkrax
       zip
     end
+    # Extracts a zip verbatim into {#importer_unzip_path}, preserving the zip's
+    # internal structure. Filters macOS junk (`__MACOSX/`, `.DS_Store`, `._*`).
+    # Parser subclasses that need to interpret the zip's structure (e.g.
+    # {Bulkrax::CsvParser#unzip_with_primary_csv}) should call a more specific
+    # method rather than this one.
     def unzip(file_to_unzip)
       return untar(file_to_unzip) if file_to_unzip.end_with?('.tar.gz')
+      dest_dir = importer_unzip_path(mkdir: true)
       Zip::File.open(file_to_unzip) do |zip_file|
-        real_entries = zip_file.reject { |e| macos_junk_entry?(e.name) }
-        top_level_dirs = real_entries.map { |e| e.name.split('/').first }.uniq
-        strip_prefix = top_level_dirs.size == 1 ? "#{top_level_dirs.first}/" : nil
-        dest_dir = importer_unzip_path(mkdir: true)
         zip_file.each do |entry|
           next unless entry.file?
           next if macos_junk_entry?(entry.name)
-          name = strip_prefix ? entry.name.delete_prefix(strip_prefix) : entry.name
-          next if name.empty?
-          dest_path = File.join(dest_dir, name)
+          reject_unsafe_entry!(entry.name)
+          dest_path = safe_extract_path(dest_dir, entry.name)
           FileUtils.mkdir_p(File.dirname(dest_path))
-          unless File.exist?(dest_path)
-            # rubyzip 2.x: extract(entry, absolute_dest_path)
-            # rubyzip 3.x: extract(entry, relative_name, destination_directory: dir)
-            if zip_file.method(:extract).arity == 2
-              zip_file.extract(entry, dest_path)
-            else
-              zip_file.extract(entry, name, destination_directory: dest_dir)
-            end
-          end
+          next if File.exist?(dest_path)
+          extract_zip_entry(zip_file, entry, dest_dir, entry.name, dest_path)
         end
       end
     end
+    # rubyzip 2.x: extract(entry, absolute_dest_path)
+    # rubyzip 3.x: extract(entry, relative_name, destination_directory: dir)
+    #
+    # Callers are responsible for passing a `dest_path` produced by
+    # {#safe_extract_path} so the write can't escape `dest_dir`.
+    def extract_zip_entry(zip_file, entry, dest_dir, relative_name, dest_path)
+      if zip_file.method(:extract).arity == 2
+        zip_file.extract(entry, dest_path)
+      else
+        zip_file.extract(entry, relative_name, destination_directory: dest_dir)
+      end
+    end
     def macos_junk_entry?(name)
       name.start_with?('__MACOSX/') || name.split('/').any? { |part| part == '.DS_Store' || part.start_with?('._') }
     end
+    # Zip Slip preflight — reject entries whose names are obviously unsafe
+    # (absolute paths, `..` segments) before we touch the filesystem.
+    # {#safe_extract_path} is the final line of defense; this check just
+    # fails fast with a clear message.
+    #
+    # @raise [Bulkrax::UnzipError] if the entry name is unsafe
+    def reject_unsafe_entry!(name)
+      return unless name.start_with?('/') || name.split('/').include?('..')
+      raise Bulkrax::UnzipError, I18n.t('bulkrax.importer.unzip.errors.unsafe_entry', name: name)
+    end
+    # Zip Slip chokepoint. Resolves `relative_dest` against `dest_dir` and
+    # returns the absolute destination path — but only if it stays inside
+    # `dest_dir`. Callers must use this value rather than building their
+    # own path with `File.join`, so the path returned is always safe by
+    # construction.
+    #
+    # @return [String] absolute destination path, validated to be inside `dest_dir`
+    # @raise  [Bulkrax::UnzipError] if `relative_dest` escapes `dest_dir`
+    def safe_extract_path(dest_dir, relative_dest)
+      expanded_dest_dir = File.expand_path(dest_dir)
+      dest_path = File.expand_path(relative_dest.to_s, expanded_dest_dir)
+      return dest_path if dest_path == expanded_dest_dir
+      return dest_path if dest_path.start_with?("#{expanded_dest_dir}#{File::SEPARATOR}")
+      raise Bulkrax::UnzipError, I18n.t('bulkrax.importer.unzip.errors.unsafe_entry', name: relative_dest)
+    end
     def copy_file(file_to_copy)
       destination = File.join(importer_unzip_path(mkdir: true), File.basename(file_to_copy))
       FileUtils.cp(file_to_copy, destination)
@@ -475,21 +508,6 @@ module Bulkrax
       raise "Failed to extract #{file_to_untar}" unless result
     end
-    # File names referenced in CSVs have spaces replaced with underscores
-    # @see Bulkrax::CsvParser#file_paths
-    def remove_spaces_from_filenames
-      files = Dir.glob(File.join(importer_unzip_path, 'files', '*')).uniq
-      files_with_spaces = files.select { |f| f.split('/').last.match?(' ') }
-      return if files_with_spaces.blank?
-      files_with_spaces.map! { |path| Pathname.new(path) }
-      files_with_spaces.each do |path|
-        filename = path.basename
-        filename_without_spaces = filename.to_s.tr(' ', '_')
-        path.rename(File.join(path.dirname, filename_without_spaces))
-      end
-    end
     def zip
       FileUtils.mkdir_p(exporter_export_zip_path)
@@ -515,7 +533,6 @@ module Bulkrax
     # @return [String]
     def real_import_file_path
-      return importer_unzip_path if file? && zip?
       parser_fields['import_file_path']
     end
   end

data/app/parsers/bulkrax/bagit_parser.rb CHANGED Viewed

@@ -25,6 +25,18 @@ unless ENV.fetch('BULKRAX_NO_BAGIT', 'false').to_s == 'true'
         @path_to_files ||= Dir.glob(File.join(import_file_path, '**/data', filename)).first
       end
+      # BagIt archives are not CSV imports: they don't contain a primary
+      # CSV at a shallowest level, and their structure (bagit.txt + data/
+      # + manifests) must be preserved verbatim. Override both CSV-flavored
+      # unzip entry points to use the base-class verbatim extraction.
+      def unzip_with_primary_csv(file_to_unzip)
+        unzip(file_to_unzip)
+      end
+      def unzip_attachments_only(file_to_unzip)
+        unzip(file_to_unzip)
+      end
       # Take a random sample of 10 metadata_paths and work out the import fields from that
       def import_fields
         raise StandardError, 'No metadata files were found' if metadata_paths.blank?

data/app/parsers/bulkrax/csv_parser.rb CHANGED Viewed

@@ -13,6 +13,16 @@ module Bulkrax
       true
     end
+    # @return [Regexp] the pattern String#split should use on a `file` cell.
+    #   Honours the `file` mapping's `split:` when set, otherwise falls back
+    #   to {Bulkrax.multi_value_element_split_on}.
+    def self.file_split_pattern
+      file_mapping = Bulkrax.field_mappings.dig(to_s, 'file') ||
+                     Bulkrax.field_mappings.dig(to_s, :file) || {}
+      split_value  = file_mapping['split'] || file_mapping[:split]
+      Bulkrax::SplitPatternCoercion.coerce(split_value) || Bulkrax.multi_value_element_split_on
+    end
     def records(_opts = {})
       return @records if @records.present?
@@ -352,20 +362,13 @@ module Bulkrax
       raise StandardError, 'No records were found' if records.blank?
       return [] if importerexporter.metadata_only?
+      # Compute once — these don't vary per record.
+      file_mapping  = Bulkrax.field_mappings.dig(self.class.to_s, 'file', :from)&.first&.to_sym || :file
+      split_pattern = self.class.file_split_pattern
+      files_dir     = path_to_files
       @file_paths ||= records.map do |r|
-        file_mapping = Bulkrax.field_mappings.dig(self.class.to_s, 'file', :from)&.first&.to_sym || :file
         next if r[file_mapping].blank?
-        split_value = Bulkrax.field_mappings.dig(self.class.to_s, :file, :split)
-        split_pattern = case split_value
-                        when Regexp
-                          split_value
-                        when String
-                          Regexp.new(split_value)
-                        else
-                          Bulkrax.multi_value_element_split_on
-                        end
-        files_dir = path_to_files
         raise StandardError, "Record references local files but no files directory could be resolved from the import path" if files_dir.nil?
         r[file_mapping].split(split_pattern).map do |f|
@@ -379,47 +382,161 @@ module Bulkrax
       end.flatten.compact.uniq
     end
-    # Retrieve the path where we expect to find the files
+    # Retrieve the path where we expect to find the files for this import.
+    # After {ImporterJob#unzip_imported_file} runs (zip cases), attachments
+    # live under `{importer_unzip_path}/files/`. For a server-path-style
+    # import (the user specified a CSV file path with a sibling `files/`
+    # directory on disk), resolve relative to the CSV's directory instead.
+    #
+    # When called with `filename:`, returns the full path to that file if
+    # it exists on disk, or `nil` otherwise — callers like
+    # `Bulkrax::FileSetEntryBehavior#add_path_to_file` rely on the nil
+    # sentinel to fall back to the raw filename in their error messages.
+    #
+    # When called with no filename, returns the `files/` directory itself
+    # (only when that directory exists on disk — else `nil` so callers can
+    # raise a clear "no files directory" error).
     def path_to_files(**args)
       filename = args.fetch(:filename, '')
+      base_dir = files_dir
+      return base_dir if filename.blank? && Dir.exist?(base_dir)
+      return nil if filename.blank?
+      candidate = File.join(base_dir, filename)
+      candidate if File.exist?(candidate)
+    end
+    # Extracts a zip that contains a primary CSV. The primary CSV lands at
+    # the root of {#importer_unzip_path}; every other entry lands under
+    # {#importer_unzip_path}/files/, preserving its path relative to the
+    # primary CSV's directory.
+    #
+    # Primary-CSV selection matches the guided-import validator's rule
+    # (see {Bulkrax::ImporterFileHandler#locate_csv_entry_in_zip}): the CSV
+    # entry at the shallowest directory level. Visible errors are raised on
+    # zero CSVs or multiple CSVs at the shallowest level.
+    #
+    # @param file_to_unzip [String] absolute path to a .zip
+    # @raise [Bulkrax::UnzipError] on no CSV or ambiguous CSVs
+    def unzip_with_primary_csv(file_to_unzip)
+      dest_dir = importer_unzip_path(mkdir: true)
+      Zip::File.open(file_to_unzip) do |zip_file|
+        entries = real_zip_entries(zip_file)
+        primary = select_primary_csv!(entries)
+        primary_dir = File.dirname(primary.name)
+        entries.each do |entry|
+          if entry == primary
+            extract_to(zip_file, entry, dest_dir, File.basename(entry.name))
+          else
+            extract_to(zip_file, entry, dest_dir, File.join('files', relative_to(primary_dir, entry.name)))
+          end
+        end
+      end
+    end
-      return @path_to_files if @path_to_files.present? && filename.blank?
-      # The zip file could be either the main import file, or a separate attachments zip file.
-      # We want to check for both of those before we determine the path to the files.
-      have_zip_file = zip? || (parser_fields['attachments_zip_path'] && zip_file?(parser_fields['attachments_zip_path']))
-      @path_to_files = File.join(
-          have_zip_file ? importer_unzip_path : File.dirname(import_file_path), 'files', filename
-        )
-      return @path_to_files if File.exist?(@path_to_files)
-      # TODO: This method silently returns nil if there is no file & no zip file
-      File.join(importer_unzip_path, 'files', filename) if file? && zip?
+    # Extracts a zip that accompanies a separately-uploaded CSV. Every
+    # entry lands under {#importer_unzip_path}/files/ — including any
+    # CSVs inside the zip, which are treated as attachments since the
+    # primary CSV was uploaded outside the zip. Strips a single top-level
+    # wrapper directory if present, so users can zip either the contents
+    # or the enclosing folder.
+    #
+    # @param file_to_unzip [String] absolute path to a .zip
+    def unzip_attachments_only(file_to_unzip)
+      dest_dir = importer_unzip_path(mkdir: true)
+      Zip::File.open(file_to_unzip) do |zip_file|
+        entries = real_zip_entries(zip_file)
+        wrapper = single_top_level_wrapper(entries)
+        entries.each do |entry|
+          relative = wrapper ? entry.name.delete_prefix("#{wrapper}/") : entry.name
+          next if relative.empty?
+          extract_to(zip_file, entry, dest_dir, File.join('files', relative))
+        end
+      end
     end
-    def unzip(file_to_unzip)
-      super
-      normalize_unzipped_files_structure(importer_unzip_path)
+    # File names referenced in CSVs have spaces replaced with underscores.
+    # @see #file_paths
+    def remove_spaces_from_filenames
+      files = Dir.glob(File.join(importer_unzip_path, 'files', '*'))
+      files_with_spaces = files.select { |f| f.split('/').last.include?(' ') }
+      return if files_with_spaces.blank?
+      files_with_spaces.map! { |path| Pathname.new(path) }
+      files_with_spaces.each do |path|
+        filename_without_spaces = path.basename.to_s.tr(' ', '_')
+        path.rename(File.join(path.dirname, filename_without_spaces))
+      end
     end
     private
-    # Ensure files extracted from a zip always land in a `files/` subdirectory
-    # regardless of how the zip was structured. If files were extracted directly
-    # into dest_dir (flat zip with no `files/` folder), move them into
-    # dest_dir/files/ so that path_to_files can reliably locate them.
-    def normalize_unzipped_files_structure(dest_dir)
-      flat_files = Dir.glob(File.join(dest_dir, '*')).select { |f| File.file?(f) && !f.end_with?('.csv') }
-      return if flat_files.empty?
-      files_dir = File.join(dest_dir, 'files')
-      FileUtils.mkdir_p(files_dir)
-      flat_files.each do |f|
-        dest = File.join(files_dir, File.basename(f))
-        FileUtils.mv(f, dest) unless File.exist?(dest)
+    # Memoized base directory under which import attachments live. Kept
+    # separate from `#path_to_files`' per-filename return value to avoid
+    # cross-contamination between directory lookups and file lookups.
+    def files_dir
+      @files_dir ||= begin
+        has_attachments_zip = parser_fields['attachments_zip_path'].present? && zip_file?(parser_fields['attachments_zip_path'])
+        base = zip? || has_attachments_zip ? importer_unzip_path : File.dirname(import_file_path)
+        File.join(base, 'files')
       end
     end
+    # Returns zip entries filtered down to real files (no directories, no
+    # macOS junk). Raises {Bulkrax::UnzipError} if any entry's name would
+    # escape the destination directory (Zip Slip).
+    def real_zip_entries(zip_file)
+      entries = zip_file.entries.select { |e| e.file? && !macos_junk_entry?(e.name) }
+      entries.each { |e| reject_unsafe_entry!(e.name) }
+      entries
+    end
+    # Picks the single primary CSV from zip entries, enforcing the
+    # shallowest-level rule. Raises {Bulkrax::UnzipError} on failure.
+    def select_primary_csv!(entries)
+      csvs = entries.select { |e| e.name.end_with?('.csv') }
+      raise Bulkrax::UnzipError, I18n.t('bulkrax.importer.unzip.errors.no_csv') if csvs.empty?
+      by_depth = csvs.group_by { |e| e.name.count('/') }
+      shallowest = by_depth[by_depth.keys.min]
+      raise Bulkrax::UnzipError, I18n.t('bulkrax.importer.unzip.errors.multiple_csv') if shallowest.size > 1
+      shallowest.first
+    end
+    # If every entry shares a single top-level directory, returns that
+    # directory name; otherwise nil.
+    def single_top_level_wrapper(entries)
+      tops = entries.map { |e| e.name.split('/').first }.uniq
+      return nil unless tops.size == 1
+      # If the single top segment is a file (no slashes in the entry), not a dir,
+      # there's no wrapper to strip.
+      return nil if entries.any? { |e| e.name == tops.first }
+      tops.first
+    end
+    # Returns `path` with `prefix/` removed from the front, if present, and
+    # a leading `files/` segment also stripped so callers can join under
+    # `files/` without doubling when the zip already uses that convention.
+    def relative_to(prefix, path)
+      remaining = prefix == '.' || prefix.empty? ? path : path.delete_prefix("#{prefix}/")
+      remaining.delete_prefix('files/')
+    end
+    # Extracts a zip entry to `dest_dir/relative_dest`. Creates intermediate
+    # directories and honors the rubyzip 2/3 extract-method signature.
+    # The destination path is validated by {#safe_extract_path} — an unsafe
+    # `relative_dest` raises {Bulkrax::UnzipError} before any write.
+    def extract_to(zip_file, entry, dest_dir, relative_dest)
+      dest_path = safe_extract_path(dest_dir, relative_dest)
+      FileUtils.mkdir_p(File.dirname(dest_path))
+      return if File.exist?(dest_path)
+      extract_zip_entry(zip_file, entry, dest_dir, relative_dest, dest_path)
+    end
     def unique_collection_identifier(collection_hash)
       entry_uid = collection_hash[source_identifier]
       entry_uid ||= if Bulkrax.fill_in_blank_source_identifiers.present?
@@ -434,16 +551,13 @@ module Bulkrax
     # Override to return the first CSV in the path, if a zip file is supplied
     # We expect a single CSV at the top level of the zip in the CSVParser
     # but we are willing to go look for it if need be
+    # When the user uploaded a zip containing a CSV, the job extracts the
+    # primary CSV to the root of `importer_unzip_path` (see
+    # {#unzip_with_primary_csv}). Any non-primary CSVs live under `files/`
+    # and are treated as attachments, so a shallow glob suffices.
     def real_import_file_path
-      return Dir["#{importer_unzip_path}/**/*.csv"].reject { |path| in_files_dir?(path) }.first if file? && zip?
+      return Dir["#{importer_unzip_path}/*.csv"].first if file? && zip?
       parser_fields['import_file_path']
     end
-    # If there are CSVs that are meant to be attachments in the files directory,
-    # we don't want to consider them as the import CSV
-    def in_files_dir?(path)
-      File.dirname(path).ends_with?('files')
-    end
   end
 end

data/app/parsers/concerns/bulkrax/csv_parser/csv_template_generation.rb CHANGED Viewed

@@ -27,7 +27,10 @@ module Bulkrax
         def initialize(models: nil, admin_set_id: nil)
           @admin_set_id = admin_set_id
-          @mapping_manager = CsvTemplate::MappingManager.new
+          # Template generation excludes system-maintained fields (generated:
+          # true) so users don't see columns like date_uploaded, depositor,
+          # etc. on the downloadable template.
+          @mapping_manager = CsvTemplate::MappingManager.new(include_generated: false)
           @mappings = @mapping_manager.mappings
           @field_analyzer = CsvTemplate::FieldAnalyzer.new(@mappings, admin_set_id)
           @all_models = CsvTemplate::ModelLoader.new(Array.wrap(models)).models

data/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb CHANGED Viewed

@@ -28,14 +28,13 @@ module Bulkrax
           header_issues    = check_headers(headers, raw_csv, mapping_manager, mappings, field_metadata, field_analyzer)
           missing_required = header_issues[:missing_required]
           notices, row_errors, file_validator, collections, works, file_sets =
-            run_validations(csv_data, all_ids, headers, source_id_key, mappings, field_metadata, missing_required, zip_file, admin_set_id)
+            run_validations(csv_data, all_ids, headers, source_id_key, mappings, field_metadata, missing_required, zip_file, admin_set_id, mapping_manager: mapping_manager)
           result = assemble_result(
             headers: headers, missing_required: missing_required, header_issues: header_issues,
             row_errors: row_errors, csv_data: csv_data, file_validator: file_validator,
             collections: collections, works: works, file_sets: file_sets, notices: notices
           )
-          apply_rights_statement_validation_override!(result, missing_required)
           result[:raw_csv_data] = csv_data
           result
         end
@@ -44,13 +43,13 @@ module Bulkrax
         # Builds notices, runs row validators, file validator, and hierarchy extraction.
         # Returns [notices, row_errors, file_validator, collections, works, file_sets].
-        def run_validations(csv_data, all_ids, headers, source_id_key, mappings, field_metadata, missing_required, zip_file, admin_set_id) # rubocop:disable Metrics/ParameterLists
+        def run_validations(csv_data, all_ids, headers, source_id_key, mappings, field_metadata, missing_required, zip_file, admin_set_id, mapping_manager: nil) # rubocop:disable Metrics/ParameterLists
           find_record = build_find_record
           notices     = []
           append_missing_source_id!(missing_required, headers, source_id_key, csv_data.map { |r| r[:model] }.compact.uniq)
           append_missing_model_notice!(notices, headers, csv_data)
-          row_errors                       = run_row_validators(csv_data, all_ids, source_id_key, mappings, field_metadata, find_record, notices)
+          row_errors                       = run_row_validators(csv_data, all_ids, source_id_key, mappings, field_metadata, find_record, notices, mapping_manager: mapping_manager)
           file_validator                   = CsvTemplate::FileValidator.new(csv_data, zip_file, admin_set_id)
           collections, works, file_sets    = extract_hierarchy_items(csv_data, all_ids, find_record, mappings)
           [notices, row_errors, file_validator, collections, works, file_sets]
@@ -72,7 +71,7 @@ module Bulkrax
           file_key      = resolve_validation_key(mapping_manager, key: 'file',                            default: :file)
           csv_data       = parse_validation_rows(raw_csv, source_id_key, parent_key, children_key, file_key)
-          all_models     = csv_data.map { |r| r[:model] }.compact.uniq
+          all_models     = csv_data.map { |r| r[:model].to_s }.reject(&:blank?).uniq
           all_models    |= [Bulkrax.default_work_type] if Bulkrax.default_work_type.present?
           field_analyzer = CsvTemplate::FieldAnalyzer.new(mappings, admin_set_id)
           field_metadata = build_validation_field_metadata(all_models, field_analyzer)
@@ -90,7 +89,9 @@ module Bulkrax
           {
             missing_required: find_missing_required_headers(headers, field_metadata, mapping_manager),
-            unrecognized: find_unrecognized_validation_headers(headers, valid_headers),
+            unrecognized: find_unrecognized_validation_headers(headers, valid_headers,
+                                                               mapping_manager: mapping_manager,
+                                                               field_metadata: field_metadata),
             empty_columns: find_empty_column_positions(headers, raw_csv)
           }
         end
@@ -99,12 +100,12 @@ module Bulkrax
           extract_validation_items(
             csv_data, all_ids, find_record,
             parent_split_pattern: resolve_parent_split_pattern(mappings),
-            child_split_pattern: resolve_children_split_pattern(mappings) || '|'
+            child_split_pattern: resolve_children_split_pattern(mappings) || Bulkrax::DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON
           )
         end
         # Runs all registered row validators and returns the collected errors.
-        def run_row_validators(csv_data, all_ids, source_id_key, mappings, field_metadata, find_record, notices = []) # rubocop:disable Metrics/ParameterLists
+        def run_row_validators(csv_data, all_ids, source_id_key, mappings, field_metadata, find_record, notices = [], mapping_manager: nil) # rubocop:disable Metrics/ParameterLists
           context = {
             errors: [],
             warnings: [],
@@ -116,6 +117,7 @@ module Bulkrax
             parent_column: resolve_relationship_column(mappings, 'related_parents_field_mapping', 'parents'),
             children_column: resolve_relationship_column(mappings, 'related_children_field_mapping', 'children'),
             mappings: mappings,
+            mapping_manager: mapping_manager,
             field_metadata: field_metadata,
             find_record_by_source_identifier: find_record,
             relationship_graph: build_relationship_graph(csv_data, mappings),