bulkrax 9.3.5 → 9.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +11 -1
  3. data/app/assets/javascripts/bulkrax/application.js +2 -1
  4. data/app/assets/javascripts/bulkrax/bulkrax.js +13 -4
  5. data/app/assets/javascripts/bulkrax/bulkrax_utils.js +96 -0
  6. data/app/assets/javascripts/bulkrax/datatables.js +1 -0
  7. data/app/assets/javascripts/bulkrax/entries.js +17 -10
  8. data/app/assets/javascripts/bulkrax/importers.js.erb +9 -2
  9. data/app/assets/javascripts/bulkrax/importers_stepper.js +2420 -0
  10. data/app/assets/stylesheets/bulkrax/application.css +1 -1
  11. data/app/assets/stylesheets/bulkrax/stepper/_header.scss +83 -0
  12. data/app/assets/stylesheets/bulkrax/stepper/_mixins.scss +26 -0
  13. data/app/assets/stylesheets/bulkrax/stepper/_navigation.scss +103 -0
  14. data/app/assets/stylesheets/bulkrax/stepper/_responsive.scss +46 -0
  15. data/app/assets/stylesheets/bulkrax/stepper/_review.scss +92 -0
  16. data/app/assets/stylesheets/bulkrax/stepper/_settings.scss +106 -0
  17. data/app/assets/stylesheets/bulkrax/stepper/_success.scss +26 -0
  18. data/app/assets/stylesheets/bulkrax/stepper/_summary.scss +171 -0
  19. data/app/assets/stylesheets/bulkrax/stepper/_upload.scss +339 -0
  20. data/app/assets/stylesheets/bulkrax/stepper/_validation.scss +237 -0
  21. data/app/assets/stylesheets/bulkrax/stepper/_variables.scss +46 -0
  22. data/app/assets/stylesheets/bulkrax/stepper.scss +32 -0
  23. data/app/controllers/bulkrax/guided_imports_controller.rb +175 -0
  24. data/app/controllers/bulkrax/importers_controller.rb +28 -31
  25. data/app/controllers/concerns/bulkrax/guided_import_demo_scenarios.rb +201 -0
  26. data/app/controllers/concerns/bulkrax/importer_file_handler.rb +212 -0
  27. data/app/errors/bulkrax/unzip_error.rb +16 -0
  28. data/app/factories/bulkrax/object_factory.rb +3 -2
  29. data/app/factories/bulkrax/valkyrie_object_factory.rb +61 -17
  30. data/app/jobs/bulkrax/importer_job.rb +42 -4
  31. data/app/models/bulkrax/csv_entry.rb +27 -7
  32. data/app/models/bulkrax/entry.rb +4 -0
  33. data/app/models/bulkrax/importer.rb +27 -10
  34. data/app/models/concerns/bulkrax/has_matchers.rb +2 -2
  35. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +6 -5
  36. data/app/parsers/bulkrax/application_parser.rb +63 -20
  37. data/app/parsers/bulkrax/bagit_parser.rb +12 -0
  38. data/app/parsers/bulkrax/csv_parser.rb +168 -25
  39. data/app/parsers/concerns/bulkrax/csv_parser/csv_template_generation.rb +73 -0
  40. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb +133 -0
  41. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_helpers.rb +282 -0
  42. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_hierarchy.rb +96 -0
  43. data/app/services/bulkrax/csv_template/column_builder.rb +60 -0
  44. data/app/services/bulkrax/csv_template/column_descriptor.rb +58 -0
  45. data/app/services/bulkrax/csv_template/csv_builder.rb +83 -0
  46. data/app/services/bulkrax/csv_template/explanation_builder.rb +57 -0
  47. data/app/services/bulkrax/csv_template/field_analyzer.rb +56 -0
  48. data/app/services/bulkrax/csv_template/file_path_generator.rb +47 -0
  49. data/app/services/bulkrax/csv_template/file_validator.rb +68 -0
  50. data/app/services/bulkrax/csv_template/mapping_manager.rb +55 -0
  51. data/app/services/bulkrax/csv_template/model_loader.rb +50 -0
  52. data/app/services/bulkrax/csv_template/row_builder.rb +35 -0
  53. data/app/services/bulkrax/csv_template/schema_analyzer.rb +70 -0
  54. data/app/services/bulkrax/csv_template/split_formatter.rb +44 -0
  55. data/app/services/bulkrax/csv_template/value_determiner.rb +68 -0
  56. data/app/services/bulkrax/stepper_response_formatter.rb +347 -0
  57. data/app/services/bulkrax/validation_error_csv_builder.rb +99 -0
  58. data/app/validators/bulkrax/csv_row/child_reference.rb +56 -0
  59. data/app/validators/bulkrax/csv_row/circular_reference.rb +71 -0
  60. data/app/validators/bulkrax/csv_row/controlled_vocabulary.rb +74 -0
  61. data/app/validators/bulkrax/csv_row/duplicate_identifier.rb +63 -0
  62. data/app/validators/bulkrax/csv_row/missing_source_identifier.rb +31 -0
  63. data/app/validators/bulkrax/csv_row/parent_reference.rb +59 -0
  64. data/app/validators/bulkrax/csv_row/required_values.rb +64 -0
  65. data/app/views/bulkrax/guided_imports/new.html.erb +567 -0
  66. data/app/views/bulkrax/importers/index.html.erb +6 -1
  67. data/app/views/bulkrax/importers/new.html.erb +1 -1
  68. data/app/views/bulkrax/importers/show.html.erb +17 -1
  69. data/config/i18n-tasks.yml +195 -0
  70. data/config/locales/bulkrax.de.yml +508 -0
  71. data/config/locales/bulkrax.en.yml +463 -233
  72. data/config/locales/bulkrax.es.yml +508 -0
  73. data/config/locales/bulkrax.fr.yml +508 -0
  74. data/config/locales/bulkrax.it.yml +508 -0
  75. data/config/locales/bulkrax.pt-BR.yml +508 -0
  76. data/config/locales/bulkrax.zh.yml +507 -0
  77. data/config/routes.rb +10 -1
  78. data/lib/bulkrax/data/demo_scenarios.json +2235 -0
  79. data/lib/bulkrax/version.rb +1 -1
  80. data/lib/bulkrax.rb +31 -0
  81. metadata +56 -16
  82. data/app/services/bulkrax/sample_csv_service/column_builder.rb +0 -58
  83. data/app/services/bulkrax/sample_csv_service/column_descriptor.rb +0 -56
  84. data/app/services/bulkrax/sample_csv_service/csv_builder.rb +0 -82
  85. data/app/services/bulkrax/sample_csv_service/explanation_builder.rb +0 -51
  86. data/app/services/bulkrax/sample_csv_service/field_analyzer.rb +0 -54
  87. data/app/services/bulkrax/sample_csv_service/file_path_generator.rb +0 -16
  88. data/app/services/bulkrax/sample_csv_service/mapping_manager.rb +0 -36
  89. data/app/services/bulkrax/sample_csv_service/model_loader.rb +0 -40
  90. data/app/services/bulkrax/sample_csv_service/row_builder.rb +0 -33
  91. data/app/services/bulkrax/sample_csv_service/schema_analyzer.rb +0 -69
  92. data/app/services/bulkrax/sample_csv_service/split_formatter.rb +0 -42
  93. data/app/services/bulkrax/sample_csv_service/value_determiner.rb +0 -67
  94. data/app/services/bulkrax/sample_csv_service.rb +0 -78
  95. /data/{app/services → lib}/wings/custom_queries/find_by_source_identifier.rb +0 -0
@@ -4,7 +4,10 @@ module Bulkrax
4
4
  class CsvParser < ApplicationParser # rubocop:disable Metrics/ClassLength
5
5
  include ErroredEntries
6
6
  include ExportBehavior
7
+ include CsvParser::CsvTemplateGeneration
8
+ include CsvParser::CsvValidation
7
9
  attr_writer :collections, :file_sets, :works
10
+ attr_accessor :validation_mode
8
11
 
9
12
  def self.export_supported?
10
13
  true
@@ -14,12 +17,14 @@ module Bulkrax
14
17
  return @records if @records.present?
15
18
 
16
19
  file_for_import = only_updates ? parser_fields['partial_import_file_path'] : import_file_path
17
- # data for entry does not need source_identifier for csv, because csvs are read sequentially and mapped after raw data is read.
18
20
  csv_data = entry_class.read_data(file_for_import)
19
- importer.parser_fields['total'] = csv_data.count
20
- importer.save
21
+ unless validation_mode
22
+ importer.parser_fields['total'] = csv_data.count
23
+ importer.save
24
+ end
21
25
 
22
26
  @records = csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
27
+ @records
23
28
  end
24
29
 
25
30
  # rubocop:disable Metrics/AbcSize
@@ -95,11 +100,11 @@ module Bulkrax
95
100
  def missing_elements(record)
96
101
  keys_from_record = keys_without_numbers(record.reject { |_, v| v.blank? }.keys.compact.uniq.map(&:to_s))
97
102
  keys = []
98
- # Because we're persisting the mapping in the database, these are likely string keys.
99
- # However, there's no guarantee. So, we need to ensure that by running stringify.
100
- importerexporter.mapping.stringify_keys.map do |k, v|
101
- Array.wrap(v['from']).each do |vf|
102
- keys << k if keys_from_record.include?(vf)
103
+ mapping_values = importerexporter.mapping.stringify_keys
104
+ mapping_values.each do |k, v|
105
+ from_values = Array.wrap(v.is_a?(Hash) ? (v['from'] || v[:from]) : nil)
106
+ from_values.each do |vf|
107
+ keys << k if vf.present? && keys_from_record.include?(vf.to_s.strip)
103
108
  end
104
109
  end
105
110
  required_elements.map(&:to_s) - keys.uniq.map(&:to_s)
@@ -360,8 +365,11 @@ module Bulkrax
360
365
  else
361
366
  Bulkrax.multi_value_element_split_on
362
367
  end
368
+ files_dir = path_to_files
369
+ raise StandardError, "Record references local files but no files directory could be resolved from the import path" if files_dir.nil?
370
+
363
371
  r[file_mapping].split(split_pattern).map do |f|
364
- file = File.join(path_to_files, f.tr(' ', '_'))
372
+ file = File.join(files_dir, f.strip.tr(' ', '_'))
365
373
  if File.exist?(file) # rubocop:disable Style/GuardClause
366
374
  file
367
375
  else
@@ -371,23 +379,161 @@ module Bulkrax
371
379
  end.flatten.compact.uniq
372
380
  end
373
381
 
374
- # Retrieve the path where we expect to find the files
382
+ # Retrieve the path where we expect to find the files for this import.
383
+ # After {ImporterJob#unzip_imported_file} runs (zip cases), attachments
384
+ # live under `{importer_unzip_path}/files/`. For a server-path-style
385
+ # import (the user specified a CSV file path with a sibling `files/`
386
+ # directory on disk), resolve relative to the CSV's directory instead.
387
+ #
388
+ # When called with `filename:`, returns the full path to that file if
389
+ # it exists on disk, or `nil` otherwise — callers like
390
+ # `Bulkrax::FileSetEntryBehavior#add_path_to_file` rely on the nil
391
+ # sentinel to fall back to the raw filename in their error messages.
392
+ #
393
+ # When called with no filename, returns the `files/` directory itself
394
+ # (only when that directory exists on disk — else `nil` so callers can
395
+ # raise a clear "no files directory" error).
375
396
  def path_to_files(**args)
376
397
  filename = args.fetch(:filename, '')
398
+ base_dir = files_dir
399
+ return base_dir if filename.blank? && Dir.exist?(base_dir)
400
+ return nil if filename.blank?
401
+
402
+ candidate = File.join(base_dir, filename)
403
+ candidate if File.exist?(candidate)
404
+ end
405
+
406
+ # Extracts a zip that contains a primary CSV. The primary CSV lands at
407
+ # the root of {#importer_unzip_path}; every other entry lands under
408
+ # {#importer_unzip_path}/files/, preserving its path relative to the
409
+ # primary CSV's directory.
410
+ #
411
+ # Primary-CSV selection matches the guided-import validator's rule
412
+ # (see {Bulkrax::ImporterFileHandler#locate_csv_entry_in_zip}): the CSV
413
+ # entry at the shallowest directory level. Visible errors are raised on
414
+ # zero CSVs or multiple CSVs at the shallowest level.
415
+ #
416
+ # @param file_to_unzip [String] absolute path to a .zip
417
+ # @raise [Bulkrax::UnzipError] on no CSV or ambiguous CSVs
418
+ def unzip_with_primary_csv(file_to_unzip)
419
+ dest_dir = importer_unzip_path(mkdir: true)
420
+ Zip::File.open(file_to_unzip) do |zip_file|
421
+ entries = real_zip_entries(zip_file)
422
+ primary = select_primary_csv!(entries)
423
+ primary_dir = File.dirname(primary.name)
424
+
425
+ entries.each do |entry|
426
+ if entry == primary
427
+ extract_to(zip_file, entry, dest_dir, File.basename(entry.name))
428
+ else
429
+ extract_to(zip_file, entry, dest_dir, File.join('files', relative_to(primary_dir, entry.name)))
430
+ end
431
+ end
432
+ end
433
+ end
377
434
 
378
- return @path_to_files if @path_to_files.present? && filename.blank?
379
- @path_to_files = File.join(
380
- zip? ? importer_unzip_path : File.dirname(import_file_path), 'files', filename
381
- )
435
+ # Extracts a zip that accompanies a separately-uploaded CSV. Every
436
+ # entry lands under {#importer_unzip_path}/files/ — including any
437
+ # CSVs inside the zip, which are treated as attachments since the
438
+ # primary CSV was uploaded outside the zip. Strips a single top-level
439
+ # wrapper directory if present, so users can zip either the contents
440
+ # or the enclosing folder.
441
+ #
442
+ # @param file_to_unzip [String] absolute path to a .zip
443
+ def unzip_attachments_only(file_to_unzip)
444
+ dest_dir = importer_unzip_path(mkdir: true)
445
+ Zip::File.open(file_to_unzip) do |zip_file|
446
+ entries = real_zip_entries(zip_file)
447
+ wrapper = single_top_level_wrapper(entries)
448
+
449
+ entries.each do |entry|
450
+ relative = wrapper ? entry.name.delete_prefix("#{wrapper}/") : entry.name
451
+ next if relative.empty?
452
+ extract_to(zip_file, entry, dest_dir, File.join('files', relative))
453
+ end
454
+ end
455
+ end
382
456
 
383
- return @path_to_files if File.exist?(@path_to_files)
457
+ # File names referenced in CSVs have spaces replaced with underscores.
458
+ # @see #file_paths
459
+ def remove_spaces_from_filenames
460
+ files = Dir.glob(File.join(importer_unzip_path, 'files', '*'))
461
+ files_with_spaces = files.select { |f| f.split('/').last.include?(' ') }
462
+ return if files_with_spaces.blank?
384
463
 
385
- # TODO: This method silently returns nil if there is no file & no zip file
386
- File.join(importer_unzip_path, 'files', filename) if file? && zip?
464
+ files_with_spaces.map! { |path| Pathname.new(path) }
465
+ files_with_spaces.each do |path|
466
+ filename_without_spaces = path.basename.to_s.tr(' ', '_')
467
+ path.rename(File.join(path.dirname, filename_without_spaces))
468
+ end
387
469
  end
388
470
 
389
471
  private
390
472
 
473
+ # Memoized base directory under which import attachments live. Kept
474
+ # separate from `#path_to_files`' per-filename return value to avoid
475
+ # cross-contamination between directory lookups and file lookups.
476
+ def files_dir
477
+ @files_dir ||= begin
478
+ has_attachments_zip = parser_fields['attachments_zip_path'].present? && zip_file?(parser_fields['attachments_zip_path'])
479
+ base = zip? || has_attachments_zip ? importer_unzip_path : File.dirname(import_file_path)
480
+ File.join(base, 'files')
481
+ end
482
+ end
483
+
484
+ # Returns zip entries filtered down to real files (no directories, no
485
+ # macOS junk). Raises {Bulkrax::UnzipError} if any entry's name would
486
+ # escape the destination directory (Zip Slip).
487
+ def real_zip_entries(zip_file)
488
+ entries = zip_file.entries.select { |e| e.file? && !macos_junk_entry?(e.name) }
489
+ entries.each { |e| reject_unsafe_entry!(e.name) }
490
+ entries
491
+ end
492
+
493
+ # Picks the single primary CSV from zip entries, enforcing the
494
+ # shallowest-level rule. Raises {Bulkrax::UnzipError} on failure.
495
+ def select_primary_csv!(entries)
496
+ csvs = entries.select { |e| e.name.end_with?('.csv') }
497
+ raise Bulkrax::UnzipError, I18n.t('bulkrax.importer.unzip.errors.no_csv') if csvs.empty?
498
+
499
+ by_depth = csvs.group_by { |e| e.name.count('/') }
500
+ shallowest = by_depth[by_depth.keys.min]
501
+
502
+ raise Bulkrax::UnzipError, I18n.t('bulkrax.importer.unzip.errors.multiple_csv') if shallowest.size > 1
503
+
504
+ shallowest.first
505
+ end
506
+
507
+ # If every entry shares a single top-level directory, returns that
508
+ # directory name; otherwise nil.
509
+ def single_top_level_wrapper(entries)
510
+ tops = entries.map { |e| e.name.split('/').first }.uniq
511
+ return nil unless tops.size == 1
512
+ # If the single top segment is a file (no slashes in the entry), not a dir,
513
+ # there's no wrapper to strip.
514
+ return nil if entries.any? { |e| e.name == tops.first }
515
+ tops.first
516
+ end
517
+
518
+ # Returns `path` with `prefix/` removed from the front, if present, and
519
+ # a leading `files/` segment also stripped so callers can join under
520
+ # `files/` without doubling when the zip already uses that convention.
521
+ def relative_to(prefix, path)
522
+ remaining = prefix == '.' || prefix.empty? ? path : path.delete_prefix("#{prefix}/")
523
+ remaining.delete_prefix('files/')
524
+ end
525
+
526
+ # Extracts a zip entry to `dest_dir/relative_dest`. Creates intermediate
527
+ # directories and honors the rubyzip 2/3 extract-method signature.
528
+ # The destination path is validated by {#safe_extract_path} — an unsafe
529
+ # `relative_dest` raises {Bulkrax::UnzipError} before any write.
530
+ def extract_to(zip_file, entry, dest_dir, relative_dest)
531
+ dest_path = safe_extract_path(dest_dir, relative_dest)
532
+ FileUtils.mkdir_p(File.dirname(dest_path))
533
+ return if File.exist?(dest_path)
534
+ extract_zip_entry(zip_file, entry, dest_dir, relative_dest, dest_path)
535
+ end
536
+
391
537
  def unique_collection_identifier(collection_hash)
392
538
  entry_uid = collection_hash[source_identifier]
393
539
  entry_uid ||= if Bulkrax.fill_in_blank_source_identifiers.present?
@@ -402,16 +548,13 @@ module Bulkrax
402
548
  # Override to return the first CSV in the path, if a zip file is supplied
403
549
  # We expect a single CSV at the top level of the zip in the CSVParser
404
550
  # but we are willing to go look for it if need be
551
+ # When the user uploaded a zip containing a CSV, the job extracts the
552
+ # primary CSV to the root of `importer_unzip_path` (see
553
+ # {#unzip_with_primary_csv}). Any non-primary CSVs live under `files/`
554
+ # and are treated as attachments, so a shallow glob suffices.
405
555
  def real_import_file_path
406
- return Dir["#{importer_unzip_path}/**/*.csv"].reject { |path| in_files_dir?(path) }.first if file? && zip?
407
-
556
+ return Dir["#{importer_unzip_path}/*.csv"].first if file? && zip?
408
557
  parser_fields['import_file_path']
409
558
  end
410
-
411
- # If there are CSVs that are meant to be attachments in the files directory,
412
- # we don't want to consider them as the import CSV
413
- def in_files_dir?(path)
414
- File.dirname(path).ends_with?('files')
415
- end
416
559
  end
417
560
  end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class CsvParser < ApplicationParser
5
+ module CsvTemplateGeneration
6
+ extend ActiveSupport::Concern
7
+
8
+ class_methods do
9
+ # Generate a CSV template for the specified models.
10
+ #
11
+ # @param models [Array<String>, String] Model names or 'all' for all available models
12
+ # @param output [String] Output format: 'file' or 'csv_string'
13
+ # @param admin_set_id [String, nil] Optional admin set ID for context
14
+ # @param args [Hash] Additional arguments passed to output method (e.g., file_path)
15
+ # @return [String] File path (for 'file' output) or CSV string (for 'csv_string' output)
16
+ def generate_template(models: [], output: 'file', admin_set_id: nil, **args)
17
+ raise NameError, "Hyrax is not defined" unless defined?(::Hyrax)
18
+ TemplateContext.new(models: models, admin_set_id: admin_set_id).send("to_#{output}", **args)
19
+ end
20
+ end
21
+
22
+ ##
23
+ # Holds state for a single template generation run.
24
+ # Provides the interface expected by CsvTemplate:: components.
25
+ class TemplateContext
26
+ attr_reader :mappings, :all_models, :admin_set_id, :field_analyzer, :mapping_manager
27
+
28
+ def initialize(models: nil, admin_set_id: nil)
29
+ @admin_set_id = admin_set_id
30
+ @mapping_manager = CsvTemplate::MappingManager.new
31
+ @mappings = @mapping_manager.mappings
32
+ @field_analyzer = CsvTemplate::FieldAnalyzer.new(@mappings, admin_set_id)
33
+ @all_models = CsvTemplate::ModelLoader.new(Array.wrap(models)).models
34
+ @csv_builder = CsvTemplate::CsvBuilder.new(self)
35
+ end
36
+
37
+ def to_file(file_path: nil)
38
+ file_path ||= CsvTemplate::FilePathGenerator.default_path(@admin_set_id)
39
+ @csv_builder.write_to_file(file_path)
40
+ file_path
41
+ end
42
+
43
+ def to_csv_string
44
+ @csv_builder.generate_string
45
+ end
46
+
47
+ def field_metadata_for_all_models
48
+ @field_metadata ||= @all_models.each_with_object({}) do |model, hash|
49
+ field_list = @field_analyzer.find_or_create_field_list_for(model_name: model)
50
+ hash[model] = {
51
+ properties: field_list.dig(model, "properties") || [],
52
+ required_terms: field_list.dig(model, "required_terms") || [],
53
+ controlled_vocab_terms: field_list.dig(model, "controlled_vocab_terms") || []
54
+ }
55
+ end
56
+ end
57
+
58
+ def valid_headers_for_models
59
+ @valid_headers ||= begin
60
+ column_builder = CsvTemplate::ColumnBuilder.new(self)
61
+ all_columns = column_builder.all_columns
62
+ all_columns - CsvTemplate::CsvBuilder::IGNORED_PROPERTIES
63
+ rescue StandardError => e
64
+ Rails.logger.error("Error building valid headers: #{e.message}")
65
+ standard_fields = %w[model source_identifier parent parents file]
66
+ model_fields = field_metadata_for_all_models.values.flat_map { |m| m[:properties] }
67
+ (standard_fields + model_fields).uniq
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,133 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class CsvParser < ApplicationParser
5
+ module CsvValidation
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ # Lightweight struct used to satisfy the CsvTemplate::ColumnBuilder
10
+ # interface without constructing a full template context.
11
+ ValidationContext = Struct.new(:mapping_manager, :field_analyzer, :all_models, :mappings, keyword_init: true)
12
+ end
13
+
14
+ class_methods do
15
+ include CsvValidationHelpers
16
+
17
+ # Validate a CSV (and optional zip) without a persisted Importer record.
18
+ #
19
+ # @param csv_file [File, ActionDispatch::Http::UploadedFile, String] path or file object
20
+ # @param zip_file [File, ActionDispatch::Http::UploadedFile, nil]
21
+ # @param admin_set_id [String, nil]
22
+ # @return [Hash] validation result compatible with the guided import UI
23
+ def validate_csv(csv_file:, zip_file: nil, admin_set_id: nil)
24
+ raw_csv, headers, mapping_manager, mappings, source_id_key, csv_data, field_metadata, field_analyzer =
25
+ parse_csv_inputs(csv_file, admin_set_id)
26
+
27
+ all_ids = csv_data.map { |r| r[:source_identifier] }.compact.to_set
28
+ header_issues = check_headers(headers, raw_csv, mapping_manager, mappings, field_metadata, field_analyzer)
29
+ missing_required = header_issues[:missing_required]
30
+ notices, row_errors, file_validator, collections, works, file_sets =
31
+ run_validations(csv_data, all_ids, headers, source_id_key, mappings, field_metadata, missing_required, zip_file, admin_set_id)
32
+
33
+ result = assemble_result(
34
+ headers: headers, missing_required: missing_required, header_issues: header_issues,
35
+ row_errors: row_errors, csv_data: csv_data, file_validator: file_validator,
36
+ collections: collections, works: works, file_sets: file_sets, notices: notices
37
+ )
38
+ apply_rights_statement_validation_override!(result, missing_required)
39
+ result[:raw_csv_data] = csv_data
40
+ result
41
+ end
42
+
43
+ private
44
+
45
+ # Builds notices, runs row validators, file validator, and hierarchy extraction.
46
+ # Returns [notices, row_errors, file_validator, collections, works, file_sets].
47
+ def run_validations(csv_data, all_ids, headers, source_id_key, mappings, field_metadata, missing_required, zip_file, admin_set_id) # rubocop:disable Metrics/ParameterLists
48
+ find_record = build_find_record
49
+ notices = []
50
+ append_missing_source_id!(missing_required, headers, source_id_key, csv_data.map { |r| r[:model] }.compact.uniq)
51
+ append_missing_model_notice!(notices, headers, csv_data)
52
+
53
+ row_errors = run_row_validators(csv_data, all_ids, source_id_key, mappings, field_metadata, find_record, notices)
54
+ file_validator = CsvTemplate::FileValidator.new(csv_data, zip_file, admin_set_id)
55
+ collections, works, file_sets = extract_hierarchy_items(csv_data, all_ids, find_record, mappings)
56
+ [notices, row_errors, file_validator, collections, works, file_sets]
57
+ end
58
+
59
+ # Reads the CSV, resolves mappings, parses rows, and builds field metadata.
60
+ # Returns the values needed by all subsequent validation steps.
61
+ def parse_csv_inputs(csv_file, admin_set_id)
62
+ # Use CsvEntry.read_data so header normalisation is identical to a real import.
63
+ raw_csv = CsvEntry.read_data(csv_file)
64
+ headers = raw_csv.headers.map(&:to_s)
65
+
66
+ mapping_manager = CsvTemplate::MappingManager.new
67
+ mappings = mapping_manager.mappings
68
+
69
+ source_id_key = resolve_validation_key(mapping_manager, flag: 'source_identifier', default: :source_identifier)
70
+ parent_key = resolve_validation_key(mapping_manager, flag: 'related_parents_field_mapping', default: :parents)
71
+ children_key = resolve_validation_key(mapping_manager, flag: 'related_children_field_mapping', default: :children)
72
+ file_key = resolve_validation_key(mapping_manager, key: 'file', default: :file)
73
+
74
+ csv_data = parse_validation_rows(raw_csv, source_id_key, parent_key, children_key, file_key)
75
+ all_models = csv_data.map { |r| r[:model] }.compact.uniq
76
+ all_models |= [Bulkrax.default_work_type] if Bulkrax.default_work_type.present?
77
+ field_analyzer = CsvTemplate::FieldAnalyzer.new(mappings, admin_set_id)
78
+ field_metadata = build_validation_field_metadata(all_models, field_analyzer)
79
+
80
+ [raw_csv, headers, mapping_manager, mappings, source_id_key, csv_data, field_metadata, field_analyzer]
81
+ end
82
+
83
+ # Runs all header-level checks and returns a hash of results.
84
+ def check_headers(headers, raw_csv, mapping_manager, mappings, field_metadata, field_analyzer) # rubocop:disable Metrics/ParameterLists
85
+ all_models = field_metadata.keys
86
+ valid_headers = build_valid_validation_headers(mapping_manager, field_analyzer,
87
+ all_models, mappings, field_metadata)
88
+ suffixed = headers.select { |h| h.match?(/_\d+\z/) }
89
+ valid_headers = (valid_headers + suffixed).uniq
90
+
91
+ {
92
+ missing_required: find_missing_required_headers(headers, field_metadata, mapping_manager),
93
+ unrecognized: find_unrecognized_validation_headers(headers, valid_headers),
94
+ empty_columns: find_empty_column_positions(headers, raw_csv)
95
+ }
96
+ end
97
+
98
+ def extract_hierarchy_items(csv_data, all_ids, find_record, mappings)
99
+ extract_validation_items(
100
+ csv_data, all_ids, find_record,
101
+ parent_split_pattern: resolve_parent_split_pattern(mappings),
102
+ child_split_pattern: resolve_children_split_pattern(mappings) || '|'
103
+ )
104
+ end
105
+
106
+ # Runs all registered row validators and returns the collected errors.
107
+ def run_row_validators(csv_data, all_ids, source_id_key, mappings, field_metadata, find_record, notices = []) # rubocop:disable Metrics/ParameterLists
108
+ context = {
109
+ errors: [],
110
+ warnings: [],
111
+ seen_ids: {},
112
+ all_ids: all_ids,
113
+ source_identifier: source_id_key.to_s,
114
+ parent_split_pattern: resolve_parent_split_pattern(mappings),
115
+ child_split_pattern: resolve_children_split_pattern(mappings),
116
+ parent_column: resolve_relationship_column(mappings, 'related_parents_field_mapping', 'parents'),
117
+ children_column: resolve_relationship_column(mappings, 'related_children_field_mapping', 'children'),
118
+ mappings: mappings,
119
+ field_metadata: field_metadata,
120
+ find_record_by_source_identifier: find_record,
121
+ relationship_graph: build_relationship_graph(csv_data, mappings),
122
+ notices: notices
123
+ }
124
+ csv_data.each_with_index do |record, index|
125
+ row_number = index + 2 # 1-indexed, plus header row
126
+ Bulkrax.csv_row_validators.each { |v| v.call(record, row_number, context) }
127
+ end
128
+ context[:errors]
129
+ end
130
+ end
131
+ end
132
+ end
133
+ end