RubyGems - bulkrax - Versions diffs - 9.4.0 → 9.4.1 - Mend

bulkrax 9.4.0 → 9.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/app/controllers/concerns/bulkrax/importer_file_handler.rb +1 -6
data/app/errors/bulkrax/unzip_error.rb +16 -0
data/app/jobs/bulkrax/importer_job.rb +40 -9
data/app/models/bulkrax/importer.rb +3 -16
data/app/parsers/bulkrax/application_parser.rb +50 -33
data/app/parsers/bulkrax/bagit_parser.rb +12 -0
data/app/parsers/bulkrax/csv_parser.rb +148 -37
data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_helpers.rb +1 -1
data/config/locales/bulkrax.de.yml +6 -2
data/config/locales/bulkrax.en.yml +6 -2
data/config/locales/bulkrax.es.yml +6 -2
data/config/locales/bulkrax.fr.yml +6 -2
data/config/locales/bulkrax.it.yml +6 -2
data/config/locales/bulkrax.pt-BR.yml +6 -2
data/config/locales/bulkrax.zh.yml +6 -2
data/lib/bulkrax/version.rb +1 -1
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 05440b3212ae1571e13cd9ee789c0222b8113a2cf717e4fc84a14f3c2aa2627a
-  data.tar.gz: 99d843e51cb362be57e279246fc63937b0772429368142a5a383e7b0a1ccfa4a
+  metadata.gz: 420e0b83f78ad1c411b0532bda121bebe74a651a9d1abec51549273896a00bcb
+  data.tar.gz: 892e143d2de6c714121804bf547b9473545c1071ecce6ef9a6269cb60eeaf66f
 SHA512:
-  metadata.gz: 7c8d2bd4ec608ceea8b567ebf39972c3e14b300731b8202d133e9a541e29995e69ed6f1dc1528ed5d0c877e38664f05260a60147985da08b65a8c3630cb80ebb
-  data.tar.gz: b4a1956d149a23c0bfc299ef3235a36c1d680784283957cb8fe56b06a3745f94d810318a58c9b791a193a76cd218f78129d8e8773b0ff78f8c73bb38d9e6975f
+  metadata.gz: 497fe999aa3d39f3e7281b5e743a75d5b6c60ba93d0a7a40bd63bdfe248b0c35e52dffaa9b8aebe59489e68999ea9a0e22826ed30ffea0f2d8927cfaa61852d5
+  data.tar.gz: 176a04163d610ad5241b96ecd107ae4bc79e64dddaff94f2fa80b7b93ce48951934ffb8f4a3438169c185f7c5a6e9f468aa8371c3c1c3d282d7c9d59ebfde946

data/app/controllers/concerns/bulkrax/importer_file_handler.rb CHANGED Viewed

@@ -121,12 +121,7 @@ module Bulkrax
       csv_by_depth = get_directory_depth_for_each_csv(csv_entries)
       csvs_at_level = determine_csvs_at_shallowest_level(csv_by_depth)
-      csvs_by_directory = csvs_at_level.group_by { |entry| File.dirname(entry.name) }
-      csvs_by_directory.each do |_dir, csvs|
-        return StepperResponseFormatter.error(message: I18n.t('bulkrax.importer.guided_import.validation.multiple_csv_same_dir')) if csvs.count > 1
-      end
-      return StepperResponseFormatter.error(message: I18n.t('bulkrax.importer.guided_import.validation.multiple_csv_same_level')) if csvs_at_level.size > 1
+      return StepperResponseFormatter.error(message: I18n.t('bulkrax.importer.guided_import.validation.multiple_csv')) if csvs_at_level.size > 1
       csvs_at_level.first
     end

data/app/errors/bulkrax/unzip_error.rb ADDED Viewed

@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+module Bulkrax
+  # Raised when a zip cannot be safely or meaningfully extracted during
+  # import. Covered scenarios include:
+  #
+  # - A single upload zip has no CSV at any level.
+  # - A single upload zip has multiple CSVs at its shallowest level
+  #   (primary CSV cannot be determined).
+  # - A zip entry's name would escape the destination directory
+  #   (Zip Slip: absolute paths, `..` traversal, etc.).
+  #
+  # Defined in its own file so Zeitwerk can autoload the constant by name
+  # from any parser or job that raises or rescues it.
+  class UnzipError < StandardError; end
+end

data/app/jobs/bulkrax/importer_job.rb CHANGED Viewed

@@ -13,7 +13,7 @@ module Bulkrax
       import(importer, only_updates_since_last_import)
       update_current_run_counters(importer)
       schedule(importer) if importer.schedulable?
-    rescue ::CSV::MalformedCSVError => e
+    rescue ::CSV::MalformedCSVError, Bulkrax::UnzipError => e
       importer.set_status_info(e)
     end
@@ -26,18 +26,49 @@ module Bulkrax
       importer.import_objects
     end
+    # Populates `importer_unzip_path` with the uploaded file(s), leaving
+    # the working directory in the shape each parser expects.
+    #
+    # Dispatch by parser capability rather than class name:
+    # - CsvParser (and subclasses that replicate its shape) implements
+    #   `#unzip_with_primary_csv` and `#unzip_attachments_only`, which
+    #   place the primary CSV at root and attachments under `files/`.
+    # - Other parsers (XML, raw BagIt) inherit the base-class `#unzip`,
+    #   which extracts the zip verbatim.
+    # - The separate attachments-zip flow is CSV-only (guided import is
+    #   the only UI that produces it).
+    #
+    # A retry of this job gets a clean working directory: any prior
+    # extraction state from an earlier attempt is wiped, so nothing runs
+    # against partially-populated state.
     def unzip_imported_file(parser)
       return unless parser.file?
+      reset_unzip_path(parser)
+      import_file_path = parser.parser_fields['import_file_path']
+      attachments_zip_path = parser.parser_fields['attachments_zip_path']
       if parser.zip?
-        # we have a zip file, and we need to unzip it before we can import the files
-        parser.unzip(parser.parser_fields['import_file_path'])
-        parser.remove_spaces_from_filenames
-      elsif parser.zip_file?(parser.parser_fields['attachments_zip_path'])
-        # we have a separate csv and zip file. We need to unzip the zip file, and move the csv file to the unzip location before we can import the files
-        parser.unzip(parser.parser_fields['attachments_zip_path'])
-        parser.copy_file(parser.parser_fields['import_file_path'])
-        parser.remove_spaces_from_filenames
+        if parser.respond_to?(:unzip_with_primary_csv)
+          parser.unzip_with_primary_csv(import_file_path)
+        else
+          parser.unzip(import_file_path)
+        end
+      elsif parser.respond_to?(:unzip_attachments_only) && parser.zip_file?(attachments_zip_path)
+        parser.copy_file(import_file_path)
+        parser.unzip_attachments_only(attachments_zip_path)
+      else
+        parser.copy_file(import_file_path)
       end
+      parser.remove_spaces_from_filenames if parser.respond_to?(:remove_spaces_from_filenames)
+    end
+    def reset_unzip_path(parser)
+      path = parser.importer_unzip_path
+      FileUtils.rm_rf(path) if Dir.exist?(path)
+      FileUtils.mkdir_p(path)
     end
     def update_current_run_counters(importer)

data/app/models/bulkrax/importer.rb CHANGED Viewed

@@ -266,22 +266,9 @@ module Bulkrax
     # end
     def importer_unzip_path(mkdir: false)
-      entry = parser_fields&.[]('import_file_path')
-      if entry.is_a?(String) && entry.end_with?('.zip') && File.file?(entry) && parser_fields["file_style"] != I18n.t('bulkrax.importer.xml.file_style.server_path')
-        unzip_dir = File.dirname(entry)
-        FileUtils.mkdir_p(unzip_dir) if mkdir
-        return unzip_dir
-      end
-      @importer_unzip_path ||= File.join(parser.base_path, "import_#{path_string}")
-      return @importer_unzip_path if Dir.exist?(@importer_unzip_path) || mkdir == true
-      # turns "tmp/imports/tenant/import_1_20250122035229_1" to "tmp/imports/tenant/import_1_20250122035229"
-      base_importer_unzip_path = @importer_unzip_path.split('_')[0...-1].join('_')
-      # If we don't have an existing unzip path, we'll try and find it.
-      # Just in case there are multiple paths, we sort by the number at the end of the path and get the last one
-      @importer_unzip_path = Dir.glob(base_importer_unzip_path + '*').sort_by { |path| path.split(base_importer_unzip_path).last[1..-1].to_i }.last
+      path = File.join(parser.base_path, "import_#{path_string}")
+      FileUtils.mkdir_p(path) if mkdir
+      path
     end
     def errored_entries_csv_path

data/app/parsers/bulkrax/application_parser.rb CHANGED Viewed

@@ -430,39 +430,72 @@ module Bulkrax
       zip
     end
+    # Extracts a zip verbatim into {#importer_unzip_path}, preserving the zip's
+    # internal structure. Filters macOS junk (`__MACOSX/`, `.DS_Store`, `._*`).
+    # Parser subclasses that need to interpret the zip's structure (e.g.
+    # {Bulkrax::CsvParser#unzip_with_primary_csv}) should call a more specific
+    # method rather than this one.
     def unzip(file_to_unzip)
       return untar(file_to_unzip) if file_to_unzip.end_with?('.tar.gz')
+      dest_dir = importer_unzip_path(mkdir: true)
       Zip::File.open(file_to_unzip) do |zip_file|
-        real_entries = zip_file.reject { |e| macos_junk_entry?(e.name) }
-        top_level_dirs = real_entries.map { |e| e.name.split('/').first }.uniq
-        strip_prefix = top_level_dirs.size == 1 ? "#{top_level_dirs.first}/" : nil
-        dest_dir = importer_unzip_path(mkdir: true)
         zip_file.each do |entry|
           next unless entry.file?
           next if macos_junk_entry?(entry.name)
-          name = strip_prefix ? entry.name.delete_prefix(strip_prefix) : entry.name
-          next if name.empty?
-          dest_path = File.join(dest_dir, name)
+          reject_unsafe_entry!(entry.name)
+          dest_path = safe_extract_path(dest_dir, entry.name)
           FileUtils.mkdir_p(File.dirname(dest_path))
-          unless File.exist?(dest_path)
-            # rubyzip 2.x: extract(entry, absolute_dest_path)
-            # rubyzip 3.x: extract(entry, relative_name, destination_directory: dir)
-            if zip_file.method(:extract).arity == 2
-              zip_file.extract(entry, dest_path)
-            else
-              zip_file.extract(entry, name, destination_directory: dest_dir)
-            end
-          end
+          next if File.exist?(dest_path)
+          extract_zip_entry(zip_file, entry, dest_dir, entry.name, dest_path)
         end
       end
     end
+    # rubyzip 2.x: extract(entry, absolute_dest_path)
+    # rubyzip 3.x: extract(entry, relative_name, destination_directory: dir)
+    #
+    # Callers are responsible for passing a `dest_path` produced by
+    # {#safe_extract_path} so the write can't escape `dest_dir`.
+    def extract_zip_entry(zip_file, entry, dest_dir, relative_name, dest_path)
+      if zip_file.method(:extract).arity == 2
+        zip_file.extract(entry, dest_path)
+      else
+        zip_file.extract(entry, relative_name, destination_directory: dest_dir)
+      end
+    end
     def macos_junk_entry?(name)
       name.start_with?('__MACOSX/') || name.split('/').any? { |part| part == '.DS_Store' || part.start_with?('._') }
     end
+    # Zip Slip preflight — reject entries whose names are obviously unsafe
+    # (absolute paths, `..` segments) before we touch the filesystem.
+    # {#safe_extract_path} is the final line of defense; this check just
+    # fails fast with a clear message.
+    #
+    # @raise [Bulkrax::UnzipError] if the entry name is unsafe
+    def reject_unsafe_entry!(name)
+      return unless name.start_with?('/') || name.split('/').include?('..')
+      raise Bulkrax::UnzipError, I18n.t('bulkrax.importer.unzip.errors.unsafe_entry', name: name)
+    end
+    # Zip Slip chokepoint. Resolves `relative_dest` against `dest_dir` and
+    # returns the absolute destination path — but only if it stays inside
+    # `dest_dir`. Callers must use this value rather than building their
+    # own path with `File.join`, so the path returned is always safe by
+    # construction.
+    #
+    # @return [String] absolute destination path, validated to be inside `dest_dir`
+    # @raise  [Bulkrax::UnzipError] if `relative_dest` escapes `dest_dir`
+    def safe_extract_path(dest_dir, relative_dest)
+      expanded_dest_dir = File.expand_path(dest_dir)
+      dest_path = File.expand_path(relative_dest.to_s, expanded_dest_dir)
+      return dest_path if dest_path == expanded_dest_dir
+      return dest_path if dest_path.start_with?("#{expanded_dest_dir}#{File::SEPARATOR}")
+      raise Bulkrax::UnzipError, I18n.t('bulkrax.importer.unzip.errors.unsafe_entry', name: relative_dest)
+    end
     def copy_file(file_to_copy)
       destination = File.join(importer_unzip_path(mkdir: true), File.basename(file_to_copy))
       FileUtils.cp(file_to_copy, destination)
@@ -475,21 +508,6 @@ module Bulkrax
       raise "Failed to extract #{file_to_untar}" unless result
     end
-    # File names referenced in CSVs have spaces replaced with underscores
-    # @see Bulkrax::CsvParser#file_paths
-    def remove_spaces_from_filenames
-      files = Dir.glob(File.join(importer_unzip_path, 'files', '*')).uniq
-      files_with_spaces = files.select { |f| f.split('/').last.match?(' ') }
-      return if files_with_spaces.blank?
-      files_with_spaces.map! { |path| Pathname.new(path) }
-      files_with_spaces.each do |path|
-        filename = path.basename
-        filename_without_spaces = filename.to_s.tr(' ', '_')
-        path.rename(File.join(path.dirname, filename_without_spaces))
-      end
-    end
     def zip
       FileUtils.mkdir_p(exporter_export_zip_path)
@@ -515,7 +533,6 @@ module Bulkrax
     # @return [String]
     def real_import_file_path
-      return importer_unzip_path if file? && zip?
       parser_fields['import_file_path']
     end
   end

data/app/parsers/bulkrax/bagit_parser.rb CHANGED Viewed

@@ -25,6 +25,18 @@ unless ENV.fetch('BULKRAX_NO_BAGIT', 'false').to_s == 'true'
         @path_to_files ||= Dir.glob(File.join(import_file_path, '**/data', filename)).first
       end
+      # BagIt archives are not CSV imports: they don't contain a primary
+      # CSV at a shallowest level, and their structure (bagit.txt + data/
+      # + manifests) must be preserved verbatim. Override both CSV-flavored
+      # unzip entry points to use the base-class verbatim extraction.
+      def unzip_with_primary_csv(file_to_unzip)
+        unzip(file_to_unzip)
+      end
+      def unzip_attachments_only(file_to_unzip)
+        unzip(file_to_unzip)
+      end
       # Take a random sample of 10 metadata_paths and work out the import fields from that
       def import_fields
         raise StandardError, 'No metadata files were found' if metadata_paths.blank?

data/app/parsers/bulkrax/csv_parser.rb CHANGED Viewed

@@ -379,47 +379,161 @@ module Bulkrax
       end.flatten.compact.uniq
     end
-    # Retrieve the path where we expect to find the files
+    # Retrieve the path where we expect to find the files for this import.
+    # After {ImporterJob#unzip_imported_file} runs (zip cases), attachments
+    # live under `{importer_unzip_path}/files/`. For a server-path-style
+    # import (the user specified a CSV file path with a sibling `files/`
+    # directory on disk), resolve relative to the CSV's directory instead.
+    #
+    # When called with `filename:`, returns the full path to that file if
+    # it exists on disk, or `nil` otherwise — callers like
+    # `Bulkrax::FileSetEntryBehavior#add_path_to_file` rely on the nil
+    # sentinel to fall back to the raw filename in their error messages.
+    #
+    # When called with no filename, returns the `files/` directory itself
+    # (only when that directory exists on disk — else `nil` so callers can
+    # raise a clear "no files directory" error).
     def path_to_files(**args)
       filename = args.fetch(:filename, '')
+      base_dir = files_dir
+      return base_dir if filename.blank? && Dir.exist?(base_dir)
+      return nil if filename.blank?
+      candidate = File.join(base_dir, filename)
+      candidate if File.exist?(candidate)
+    end
+    # Extracts a zip that contains a primary CSV. The primary CSV lands at
+    # the root of {#importer_unzip_path}; every other entry lands under
+    # {#importer_unzip_path}/files/, preserving its path relative to the
+    # primary CSV's directory.
+    #
+    # Primary-CSV selection matches the guided-import validator's rule
+    # (see {Bulkrax::ImporterFileHandler#locate_csv_entry_in_zip}): the CSV
+    # entry at the shallowest directory level. Visible errors are raised on
+    # zero CSVs or multiple CSVs at the shallowest level.
+    #
+    # @param file_to_unzip [String] absolute path to a .zip
+    # @raise [Bulkrax::UnzipError] on no CSV or ambiguous CSVs
+    def unzip_with_primary_csv(file_to_unzip)
+      dest_dir = importer_unzip_path(mkdir: true)
+      Zip::File.open(file_to_unzip) do |zip_file|
+        entries = real_zip_entries(zip_file)
+        primary = select_primary_csv!(entries)
+        primary_dir = File.dirname(primary.name)
+        entries.each do |entry|
+          if entry == primary
+            extract_to(zip_file, entry, dest_dir, File.basename(entry.name))
+          else
+            extract_to(zip_file, entry, dest_dir, File.join('files', relative_to(primary_dir, entry.name)))
+          end
+        end
+      end
+    end
-      return @path_to_files if @path_to_files.present? && filename.blank?
-      # The zip file could be either the main import file, or a separate attachments zip file.
-      # We want to check for both of those before we determine the path to the files.
-      have_zip_file = zip? || (parser_fields['attachments_zip_path'] && zip_file?(parser_fields['attachments_zip_path']))
-      @path_to_files = File.join(
-          have_zip_file ? importer_unzip_path : File.dirname(import_file_path), 'files', filename
-        )
-      return @path_to_files if File.exist?(@path_to_files)
-      # TODO: This method silently returns nil if there is no file & no zip file
-      File.join(importer_unzip_path, 'files', filename) if file? && zip?
+    # Extracts a zip that accompanies a separately-uploaded CSV. Every
+    # entry lands under {#importer_unzip_path}/files/ — including any
+    # CSVs inside the zip, which are treated as attachments since the
+    # primary CSV was uploaded outside the zip. Strips a single top-level
+    # wrapper directory if present, so users can zip either the contents
+    # or the enclosing folder.
+    #
+    # @param file_to_unzip [String] absolute path to a .zip
+    def unzip_attachments_only(file_to_unzip)
+      dest_dir = importer_unzip_path(mkdir: true)
+      Zip::File.open(file_to_unzip) do |zip_file|
+        entries = real_zip_entries(zip_file)
+        wrapper = single_top_level_wrapper(entries)
+        entries.each do |entry|
+          relative = wrapper ? entry.name.delete_prefix("#{wrapper}/") : entry.name
+          next if relative.empty?
+          extract_to(zip_file, entry, dest_dir, File.join('files', relative))
+        end
+      end
     end
-    def unzip(file_to_unzip)
-      super
-      normalize_unzipped_files_structure(importer_unzip_path)
+    # File names referenced in CSVs have spaces replaced with underscores.
+    # @see #file_paths
+    def remove_spaces_from_filenames
+      files = Dir.glob(File.join(importer_unzip_path, 'files', '*'))
+      files_with_spaces = files.select { |f| f.split('/').last.include?(' ') }
+      return if files_with_spaces.blank?
+      files_with_spaces.map! { |path| Pathname.new(path) }
+      files_with_spaces.each do |path|
+        filename_without_spaces = path.basename.to_s.tr(' ', '_')
+        path.rename(File.join(path.dirname, filename_without_spaces))
+      end
     end
     private
-    # Ensure files extracted from a zip always land in a `files/` subdirectory
-    # regardless of how the zip was structured. If files were extracted directly
-    # into dest_dir (flat zip with no `files/` folder), move them into
-    # dest_dir/files/ so that path_to_files can reliably locate them.
-    def normalize_unzipped_files_structure(dest_dir)
-      flat_files = Dir.glob(File.join(dest_dir, '*')).select { |f| File.file?(f) && !f.end_with?('.csv') }
-      return if flat_files.empty?
-      files_dir = File.join(dest_dir, 'files')
-      FileUtils.mkdir_p(files_dir)
-      flat_files.each do |f|
-        dest = File.join(files_dir, File.basename(f))
-        FileUtils.mv(f, dest) unless File.exist?(dest)
+    # Memoized base directory under which import attachments live. Kept
+    # separate from `#path_to_files`' per-filename return value to avoid
+    # cross-contamination between directory lookups and file lookups.
+    def files_dir
+      @files_dir ||= begin
+        has_attachments_zip = parser_fields['attachments_zip_path'].present? && zip_file?(parser_fields['attachments_zip_path'])
+        base = zip? || has_attachments_zip ? importer_unzip_path : File.dirname(import_file_path)
+        File.join(base, 'files')
       end
     end
+    # Returns zip entries filtered down to real files (no directories, no
+    # macOS junk). Raises {Bulkrax::UnzipError} if any entry's name would
+    # escape the destination directory (Zip Slip).
+    def real_zip_entries(zip_file)
+      entries = zip_file.entries.select { |e| e.file? && !macos_junk_entry?(e.name) }
+      entries.each { |e| reject_unsafe_entry!(e.name) }
+      entries
+    end
+    # Picks the single primary CSV from zip entries, enforcing the
+    # shallowest-level rule. Raises {Bulkrax::UnzipError} on failure.
+    def select_primary_csv!(entries)
+      csvs = entries.select { |e| e.name.end_with?('.csv') }
+      raise Bulkrax::UnzipError, I18n.t('bulkrax.importer.unzip.errors.no_csv') if csvs.empty?
+      by_depth = csvs.group_by { |e| e.name.count('/') }
+      shallowest = by_depth[by_depth.keys.min]
+      raise Bulkrax::UnzipError, I18n.t('bulkrax.importer.unzip.errors.multiple_csv') if shallowest.size > 1
+      shallowest.first
+    end
+    # If every entry shares a single top-level directory, returns that
+    # directory name; otherwise nil.
+    def single_top_level_wrapper(entries)
+      tops = entries.map { |e| e.name.split('/').first }.uniq
+      return nil unless tops.size == 1
+      # If the single top segment is a file (no slashes in the entry), not a dir,
+      # there's no wrapper to strip.
+      return nil if entries.any? { |e| e.name == tops.first }
+      tops.first
+    end
+    # Returns `path` with `prefix/` removed from the front, if present, and
+    # a leading `files/` segment also stripped so callers can join under
+    # `files/` without doubling when the zip already uses that convention.
+    def relative_to(prefix, path)
+      remaining = prefix == '.' || prefix.empty? ? path : path.delete_prefix("#{prefix}/")
+      remaining.delete_prefix('files/')
+    end
+    # Extracts a zip entry to `dest_dir/relative_dest`. Creates intermediate
+    # directories and honors the rubyzip 2/3 extract-method signature.
+    # The destination path is validated by {#safe_extract_path} — an unsafe
+    # `relative_dest` raises {Bulkrax::UnzipError} before any write.
+    def extract_to(zip_file, entry, dest_dir, relative_dest)
+      dest_path = safe_extract_path(dest_dir, relative_dest)
+      FileUtils.mkdir_p(File.dirname(dest_path))
+      return if File.exist?(dest_path)
+      extract_zip_entry(zip_file, entry, dest_dir, relative_dest, dest_path)
+    end
     def unique_collection_identifier(collection_hash)
       entry_uid = collection_hash[source_identifier]
       entry_uid ||= if Bulkrax.fill_in_blank_source_identifiers.present?
@@ -434,16 +548,13 @@ module Bulkrax
     # Override to return the first CSV in the path, if a zip file is supplied
     # We expect a single CSV at the top level of the zip in the CSVParser
     # but we are willing to go look for it if need be
+    # When the user uploaded a zip containing a CSV, the job extracts the
+    # primary CSV to the root of `importer_unzip_path` (see
+    # {#unzip_with_primary_csv}). Any non-primary CSVs live under `files/`
+    # and are treated as attachments, so a shallow glob suffices.
     def real_import_file_path
-      return Dir["#{importer_unzip_path}/**/*.csv"].reject { |path| in_files_dir?(path) }.first if file? && zip?
+      return Dir["#{importer_unzip_path}/*.csv"].first if file? && zip?
       parser_fields['import_file_path']
     end
-    # If there are CSVs that are meant to be attachments in the files directory,
-    # we don't want to consider them as the import CSV
-    def in_files_dir?(path)
-      File.dirname(path).ends_with?('files')
-    end
   end
 end

data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_helpers.rb CHANGED Viewed

@@ -46,7 +46,7 @@ module Bulkrax
       end
       def build_valid_validation_headers(mapping_manager, field_analyzer, all_models, mappings, field_metadata)
-        svc = ValidationContext.new(
+        svc = Bulkrax::CsvParser::ValidationContext.new(
           mapping_manager: mapping_manager,
           field_analyzer: field_analyzer,
           all_models: all_models,

data/config/locales/bulkrax.de.yml CHANGED Viewed

@@ -315,8 +315,7 @@ de:
           missing_required_hint: Fügen Sie diese Spalte zu Ihrer CSV-Datei hinzu.
           missing_required_title: Fehlende Pflichtfelder
           missing_rights_desc: Ihre CSV-Datei enthält keine Spalte „rights_statement“. Sie können diese entweder Ihrer CSV-Datei hinzufügen oder im nächsten Schritt eine Standard-Rechteerklärung auswählen.
-          multiple_csv_same_dir: Mehrere CSV-Dateien im selben Verzeichnis innerhalb der ZIP-Datei gefunden
-          multiple_csv_same_level: Mehrere CSV-Dateien auf derselben Ebene innerhalb der ZIP-Datei gefunden
+          multiple_csv: Mehrere CSV-Dateien befinden sich auf der obersten Ebene des ZIP-Archivs, sodass die primäre CSV nicht bestimmt werden kann. Belassen Sie genau eine CSV auf dieser Ebene; weitere CSVs müssen tiefer verschachtelt sein.
           no_csv_in_zip: Es wurden keine CSV-Dateien im ZIP-Archiv gefunden.
           no_csv_uploaded: Es wurde keine CSV-Metadatendatei hochgeladen.
           no_files_uploaded: Es wurden keine Dateien hochgeladen.
@@ -346,6 +345,11 @@ de:
           notices_title: Importhinweise
           unrecognized_desc: 'Diese Spalten werden beim Import ignoriert:'
           unrecognized_title: Nicht anerkannte Felder
+      unzip:
+        errors:
+          multiple_csv: Mehrere CSV-Dateien befinden sich auf der obersten Ebene des ZIP-Archivs, sodass die primäre CSV nicht bestimmt werden kann. Belassen Sie genau eine CSV auf dieser Ebene; weitere CSVs müssen tiefer verschachtelt sein.
+          no_csv: Es wurden keine CSV-Dateien im ZIP-Archiv gefunden.
+          unsafe_entry: "Das ZIP enthält einen Eintrag mit unsicherem Pfad (%{name}). Einträge dürfen weder absolute Pfade noch Referenzen auf übergeordnete Verzeichnisse verwenden."
       validations:
         errors_prohibited:
           one: 'Ein Fehler verhinderte das Speichern dieses Importers:'

data/config/locales/bulkrax.en.yml CHANGED Viewed

@@ -342,8 +342,7 @@ en:
           missing_required_hint: add this column to your CSV
           missing_required_title: Missing Required Fields
           missing_rights_desc: Your CSV does not include a rights_statement column. You can add it to your CSV or select a Default Rights Statement in the next step.
-          multiple_csv_same_dir: Multiple CSV files found in the same directory within ZIP
-          multiple_csv_same_level: Multiple CSV files found at the same level within ZIP
+          multiple_csv: Multiple CSV files share the shallowest directory level in the ZIP, so the primary CSV cannot be determined. Keep exactly one CSV at that level; any additional CSVs must be nested deeper.
           no_csv_in_zip: No CSV files found in ZIP
           no_csv_uploaded: No CSV metadata file uploaded
           no_files_uploaded: No files uploaded
@@ -382,6 +381,11 @@ en:
           notices_title: Import Notices
           unrecognized_desc: 'These columns will be ignored during import:'
           unrecognized_title: Unrecognized Fields
+      unzip:
+        errors:
+          multiple_csv: Multiple CSV files share the shallowest directory level in the ZIP, so the primary CSV cannot be determined. Keep exactly one CSV at that level; any additional CSVs must be nested deeper.
+          no_csv: No CSV file found in the ZIP
+          unsafe_entry: "The ZIP contains an entry with an unsafe path (%{name}). Entries must not use absolute paths or parent-directory references."
       validations:
         errors_prohibited:
           one: '1 error prohibited this importer from being saved:'

data/config/locales/bulkrax.es.yml CHANGED Viewed

@@ -315,8 +315,7 @@ es:
           missing_required_hint: Añade esta columna a tu CSV
           missing_required_title: Campos obligatorios faltantes
           missing_rights_desc: Su archivo CSV no incluye la columna "rights_statement". Puede añadirla o seleccionar una "Declaración de derechos predeterminada" en el siguiente paso.
-          multiple_csv_same_dir: Se encontraron varios archivos CSV en el mismo directorio dentro de ZIP
-          multiple_csv_same_level: Se encontraron varios archivos CSV en el mismo nivel dentro de ZIP
+          multiple_csv: Hay varios archivos CSV en el nivel menos profundo del ZIP, por lo que no se puede determinar el CSV principal. Mantén exactamente un CSV en ese nivel; cualquier CSV adicional debe estar anidado más profundamente.
           no_csv_in_zip: No se encontraron archivos CSV en ZIP
           no_csv_uploaded: No se cargó ningún archivo de metadatos CSV
           no_files_uploaded: No hay archivos subidos
@@ -346,6 +345,11 @@ es:
           notices_title: Avisos de importación
           unrecognized_desc: 'Estas columnas se ignorarán durante la importación:'
           unrecognized_title: Campos no reconocidos
+      unzip:
+        errors:
+          multiple_csv: Hay varios archivos CSV en el nivel menos profundo del ZIP, por lo que no se puede determinar el CSV principal. Mantén exactamente un CSV en ese nivel; cualquier CSV adicional debe estar anidado más profundamente.
+          no_csv: No se encontraron archivos CSV en ZIP
+          unsafe_entry: "El ZIP contiene una entrada con una ruta no segura (%{name}). Las entradas no deben usar rutas absolutas ni referencias al directorio padre."
       validations:
         errors_prohibited:
           one: '1 error impidió que se guardara este importador:'

data/config/locales/bulkrax.fr.yml CHANGED Viewed

@@ -315,8 +315,7 @@ fr:
           missing_required_hint: Ajoutez cette colonne à votre fichier CSV.
           missing_required_title: Champs obligatoires manquants
           missing_rights_desc: Votre fichier CSV ne contient pas de colonne « droits_statement ». Vous pouvez l'ajouter ou sélectionner une déclaration de droits par défaut à l'étape suivante.
-          multiple_csv_same_dir: Plusieurs fichiers CSV trouvés dans le même répertoire à l'intérieur du fichier ZIP
-          multiple_csv_same_level: Plusieurs fichiers CSV trouvés au même niveau dans le fichier ZIP
+          multiple_csv: Plusieurs fichiers CSV se trouvent au niveau le moins profond dans le ZIP, ce qui empêche d'identifier le CSV principal. Conservez exactement un fichier CSV à ce niveau ; les CSV supplémentaires doivent être imbriqués plus profondément.
           no_csv_in_zip: Aucun fichier CSV trouvé dans le fichier ZIP
           no_csv_uploaded: Aucun fichier de métadonnées CSV n'a été téléchargé.
           no_files_uploaded: Aucun fichier téléchargé
@@ -346,6 +345,11 @@ fr:
           notices_title: Avis d'importation
           unrecognized_desc: 'Ces colonnes seront ignorées lors de l''importation :'
           unrecognized_title: Champs non reconnus
+      unzip:
+        errors:
+          multiple_csv: Plusieurs fichiers CSV se trouvent au niveau le moins profond dans le ZIP, ce qui empêche d'identifier le CSV principal. Conservez exactement un fichier CSV à ce niveau ; les CSV supplémentaires doivent être imbriqués plus profondément.
+          no_csv: Aucun fichier CSV trouvé dans le fichier ZIP
+          unsafe_entry: "Le ZIP contient une entrée avec un chemin non sûr (%{name}). Les entrées ne doivent pas utiliser de chemins absolus ni de références au répertoire parent."
       validations:
         errors_prohibited:
           one: 'Une erreur a empêché l''enregistrement de cet importateur :'

data/config/locales/bulkrax.it.yml CHANGED Viewed

@@ -315,8 +315,7 @@ it:
           missing_required_hint: aggiungi questa colonna al tuo CSV
           missing_required_title: Campi obbligatori mancanti
           missing_rights_desc: Il tuo file CSV non include una colonna rights_statement. Puoi aggiungerla al tuo file CSV o selezionare una colonna "Default Rights Statement" nel passaggio successivo.
-          multiple_csv_same_dir: Sono stati trovati più file CSV nella stessa directory all'interno di ZIP
-          multiple_csv_same_level: Sono stati trovati più file CSV allo stesso livello all'interno dello ZIP
+          multiple_csv: Sono stati trovati più file CSV al livello meno profondo all'interno dello ZIP, quindi non è possibile determinare il CSV principale. Mantieni esattamente un CSV a quel livello; eventuali CSV aggiuntivi devono essere annidati più in profondità.
           no_csv_in_zip: Nessun file CSV trovato nello ZIP
           no_csv_uploaded: Nessun file di metadati CSV caricato
           no_files_uploaded: Nessun file caricato
@@ -346,6 +345,11 @@ it:
           notices_title: Avvisi di importazione
           unrecognized_desc: 'Queste colonne verranno ignorate durante l''importazione:'
           unrecognized_title: Campi non riconosciuti
+      unzip:
+        errors:
+          multiple_csv: Sono stati trovati più file CSV al livello meno profondo all'interno dello ZIP, quindi non è possibile determinare il CSV principale. Mantieni esattamente un CSV a quel livello; eventuali CSV aggiuntivi devono essere annidati più in profondità.
+          no_csv: Nessun file CSV trovato nello ZIP
+          unsafe_entry: "Lo ZIP contiene una voce con un percorso non sicuro (%{name}). Le voci non devono utilizzare percorsi assoluti né riferimenti alla directory superiore."
       validations:
         errors_prohibited:
           one: '1 errore ha impedito il salvataggio di questo importatore:'

data/config/locales/bulkrax.pt-BR.yml CHANGED Viewed

@@ -315,8 +315,7 @@ pt-BR:
           missing_required_hint: Adicione esta coluna ao seu arquivo CSV.
           missing_required_title: Campos obrigatórios ausentes
           missing_rights_desc: Seu arquivo CSV não inclui uma coluna `rights_statement`. Você pode adicioná-la ao seu CSV ou selecionar uma Declaração de Direitos Padrão na próxima etapa.
-          multiple_csv_same_dir: Vários arquivos CSV encontrados no mesmo diretório dentro do arquivo ZIP.
-          multiple_csv_same_level: Vários arquivos CSV encontrados no mesmo nível dentro do arquivo ZIP.
+          multiple_csv: Vários arquivos CSV estão no nível menos profundo dentro do ZIP, então não é possível determinar qual é o CSV principal. Mantenha exatamente um CSV nesse nível; CSVs adicionais devem estar aninhados em níveis mais profundos.
           no_csv_in_zip: Nenhum arquivo CSV encontrado no arquivo ZIP.
           no_csv_uploaded: Nenhum arquivo de metadados CSV foi carregado.
           no_files_uploaded: Nenhum arquivo foi enviado.
@@ -346,6 +345,11 @@ pt-BR:
           notices_title: Avisos de importação
           unrecognized_desc: 'Estas colunas serão ignoradas durante a importação:'
           unrecognized_title: Campos não reconhecidos
+      unzip:
+        errors:
+          multiple_csv: Vários arquivos CSV estão no nível menos profundo dentro do ZIP, então não é possível determinar qual é o CSV principal. Mantenha exatamente um CSV nesse nível; CSVs adicionais devem estar aninhados em níveis mais profundos.
+          no_csv: Nenhum arquivo CSV encontrado no arquivo ZIP.
+          unsafe_entry: "O ZIP contém uma entrada com caminho inseguro (%{name}). As entradas não devem usar caminhos absolutos nem referências ao diretório pai."
       validations:
         errors_prohibited:
           one: '1 erro impediu que este importador fosse salvo:'

data/config/locales/bulkrax.zh.yml CHANGED Viewed

@@ -314,8 +314,7 @@ zh:
           missing_required_hint: 将此列添加到您的 CSV 文件中
           missing_required_title: 缺少必填字段
           missing_rights_desc: 您的 CSV 文件不包含 rights_statement 列。您可以在下一步中将其添加到 CSV 文件中，或选择默认的权利声明。
-          multiple_csv_same_dir: ZIP 文件中的同一目录下发现了多个 CSV 文件
-          multiple_csv_same_level: 在 ZIP 文件中的同一层级发现了多个 CSV 文件
+          multiple_csv: ZIP 文件中的同一最浅层级下发现了多个 CSV 文件，无法确定主 CSV。请在该层级仅保留一个 CSV，其他 CSV 必须位于更深的目录中。
           no_csv_in_zip: ZIP 文件中未找到 CSV 文件
           no_csv_uploaded: 未上传 CSV 元数据文件。
           no_files_uploaded: 未上传任何文件。
@@ -345,6 +344,11 @@ zh:
           notices_title: 导入通知
           unrecognized_desc: 导入过程中将忽略以下列：
           unrecognized_title: 未识别字段
+      unzip:
+        errors:
+          multiple_csv: ZIP 文件中的同一最浅层级下发现了多个 CSV 文件，无法确定主 CSV。请在该层级仅保留一个 CSV，其他 CSV 必须位于更深的目录中。
+          no_csv: ZIP 文件中未找到 CSV 文件
+          unsafe_entry: "ZIP 文件包含路径不安全的条目（%{name}）。条目不得使用绝对路径或父目录引用。"
       validations:
         errors_prohibited:
           one: 1 个错误导致此导入程序无法保存：

data/lib/bulkrax/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Bulkrax
-  VERSION = '9.4.0'
+  VERSION = '9.4.1'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: bulkrax
 version: !ruby/object:Gem::Version
-  version: 9.4.0
+  version: 9.4.1
 platform: ruby
 authors:
 - Rob Kaufman
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2026-04-15 00:00:00.000000000 Z
+date: 2026-04-21 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rails
@@ -365,6 +365,7 @@ files:
 - app/controllers/concerns/bulkrax/download_behavior.rb
 - app/controllers/concerns/bulkrax/guided_import_demo_scenarios.rb
 - app/controllers/concerns/bulkrax/importer_file_handler.rb
+- app/errors/bulkrax/unzip_error.rb
 - app/factories/bulkrax/object_factory.rb
 - app/factories/bulkrax/object_factory_interface.rb
 - app/factories/bulkrax/valkyrie_object_factory.rb