bulkrax 9.3.5 → 9.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +11 -1
  3. data/app/assets/javascripts/bulkrax/application.js +2 -1
  4. data/app/assets/javascripts/bulkrax/bulkrax.js +13 -4
  5. data/app/assets/javascripts/bulkrax/bulkrax_utils.js +96 -0
  6. data/app/assets/javascripts/bulkrax/datatables.js +1 -0
  7. data/app/assets/javascripts/bulkrax/entries.js +17 -10
  8. data/app/assets/javascripts/bulkrax/importers.js.erb +9 -2
  9. data/app/assets/javascripts/bulkrax/importers_stepper.js +2420 -0
  10. data/app/assets/stylesheets/bulkrax/application.css +1 -1
  11. data/app/assets/stylesheets/bulkrax/stepper/_header.scss +83 -0
  12. data/app/assets/stylesheets/bulkrax/stepper/_mixins.scss +26 -0
  13. data/app/assets/stylesheets/bulkrax/stepper/_navigation.scss +103 -0
  14. data/app/assets/stylesheets/bulkrax/stepper/_responsive.scss +46 -0
  15. data/app/assets/stylesheets/bulkrax/stepper/_review.scss +92 -0
  16. data/app/assets/stylesheets/bulkrax/stepper/_settings.scss +106 -0
  17. data/app/assets/stylesheets/bulkrax/stepper/_success.scss +26 -0
  18. data/app/assets/stylesheets/bulkrax/stepper/_summary.scss +171 -0
  19. data/app/assets/stylesheets/bulkrax/stepper/_upload.scss +339 -0
  20. data/app/assets/stylesheets/bulkrax/stepper/_validation.scss +237 -0
  21. data/app/assets/stylesheets/bulkrax/stepper/_variables.scss +46 -0
  22. data/app/assets/stylesheets/bulkrax/stepper.scss +32 -0
  23. data/app/controllers/bulkrax/guided_imports_controller.rb +175 -0
  24. data/app/controllers/bulkrax/importers_controller.rb +28 -31
  25. data/app/controllers/concerns/bulkrax/guided_import_demo_scenarios.rb +201 -0
  26. data/app/controllers/concerns/bulkrax/importer_file_handler.rb +217 -0
  27. data/app/factories/bulkrax/object_factory.rb +3 -2
  28. data/app/factories/bulkrax/valkyrie_object_factory.rb +61 -17
  29. data/app/jobs/bulkrax/importer_job.rb +11 -4
  30. data/app/models/bulkrax/csv_entry.rb +27 -7
  31. data/app/models/bulkrax/entry.rb +4 -0
  32. data/app/models/bulkrax/importer.rb +31 -1
  33. data/app/models/concerns/bulkrax/has_matchers.rb +2 -2
  34. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +6 -5
  35. data/app/parsers/bulkrax/application_parser.rb +31 -5
  36. data/app/parsers/bulkrax/csv_parser.rb +42 -10
  37. data/app/parsers/concerns/bulkrax/csv_parser/csv_template_generation.rb +73 -0
  38. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb +133 -0
  39. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_helpers.rb +282 -0
  40. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_hierarchy.rb +96 -0
  41. data/app/services/bulkrax/csv_template/column_builder.rb +60 -0
  42. data/app/services/bulkrax/csv_template/column_descriptor.rb +58 -0
  43. data/app/services/bulkrax/csv_template/csv_builder.rb +83 -0
  44. data/app/services/bulkrax/csv_template/explanation_builder.rb +57 -0
  45. data/app/services/bulkrax/csv_template/field_analyzer.rb +56 -0
  46. data/app/services/bulkrax/csv_template/file_path_generator.rb +47 -0
  47. data/app/services/bulkrax/csv_template/file_validator.rb +68 -0
  48. data/app/services/bulkrax/csv_template/mapping_manager.rb +55 -0
  49. data/app/services/bulkrax/csv_template/model_loader.rb +50 -0
  50. data/app/services/bulkrax/csv_template/row_builder.rb +35 -0
  51. data/app/services/bulkrax/csv_template/schema_analyzer.rb +70 -0
  52. data/app/services/bulkrax/csv_template/split_formatter.rb +44 -0
  53. data/app/services/bulkrax/csv_template/value_determiner.rb +68 -0
  54. data/app/services/bulkrax/stepper_response_formatter.rb +347 -0
  55. data/app/services/bulkrax/validation_error_csv_builder.rb +99 -0
  56. data/app/validators/bulkrax/csv_row/child_reference.rb +56 -0
  57. data/app/validators/bulkrax/csv_row/circular_reference.rb +71 -0
  58. data/app/validators/bulkrax/csv_row/controlled_vocabulary.rb +74 -0
  59. data/app/validators/bulkrax/csv_row/duplicate_identifier.rb +63 -0
  60. data/app/validators/bulkrax/csv_row/missing_source_identifier.rb +31 -0
  61. data/app/validators/bulkrax/csv_row/parent_reference.rb +59 -0
  62. data/app/validators/bulkrax/csv_row/required_values.rb +64 -0
  63. data/app/views/bulkrax/guided_imports/new.html.erb +567 -0
  64. data/app/views/bulkrax/importers/index.html.erb +6 -1
  65. data/app/views/bulkrax/importers/new.html.erb +1 -1
  66. data/app/views/bulkrax/importers/show.html.erb +17 -1
  67. data/config/i18n-tasks.yml +195 -0
  68. data/config/locales/bulkrax.de.yml +504 -0
  69. data/config/locales/bulkrax.en.yml +459 -233
  70. data/config/locales/bulkrax.es.yml +504 -0
  71. data/config/locales/bulkrax.fr.yml +504 -0
  72. data/config/locales/bulkrax.it.yml +504 -0
  73. data/config/locales/bulkrax.pt-BR.yml +504 -0
  74. data/config/locales/bulkrax.zh.yml +503 -0
  75. data/config/routes.rb +10 -1
  76. data/lib/bulkrax/data/demo_scenarios.json +2235 -0
  77. data/lib/bulkrax/version.rb +1 -1
  78. data/lib/bulkrax.rb +31 -0
  79. metadata +55 -16
  80. data/app/services/bulkrax/sample_csv_service/column_builder.rb +0 -58
  81. data/app/services/bulkrax/sample_csv_service/column_descriptor.rb +0 -56
  82. data/app/services/bulkrax/sample_csv_service/csv_builder.rb +0 -82
  83. data/app/services/bulkrax/sample_csv_service/explanation_builder.rb +0 -51
  84. data/app/services/bulkrax/sample_csv_service/field_analyzer.rb +0 -54
  85. data/app/services/bulkrax/sample_csv_service/file_path_generator.rb +0 -16
  86. data/app/services/bulkrax/sample_csv_service/mapping_manager.rb +0 -36
  87. data/app/services/bulkrax/sample_csv_service/model_loader.rb +0 -40
  88. data/app/services/bulkrax/sample_csv_service/row_builder.rb +0 -33
  89. data/app/services/bulkrax/sample_csv_service/schema_analyzer.rb +0 -69
  90. data/app/services/bulkrax/sample_csv_service/split_formatter.rb +0 -42
  91. data/app/services/bulkrax/sample_csv_service/value_determiner.rb +0 -67
  92. data/app/services/bulkrax/sample_csv_service.rb +0 -78
  93. /data/{app/services → lib}/wings/custom_queries/find_by_source_identifier.rb +0 -0
@@ -4,7 +4,10 @@ module Bulkrax
4
4
  class CsvParser < ApplicationParser # rubocop:disable Metrics/ClassLength
5
5
  include ErroredEntries
6
6
  include ExportBehavior
7
+ include CsvParser::CsvTemplateGeneration
8
+ include CsvParser::CsvValidation
7
9
  attr_writer :collections, :file_sets, :works
10
+ attr_accessor :validation_mode
8
11
 
9
12
  def self.export_supported?
10
13
  true
@@ -14,12 +17,14 @@ module Bulkrax
14
17
  return @records if @records.present?
15
18
 
16
19
  file_for_import = only_updates ? parser_fields['partial_import_file_path'] : import_file_path
17
- # data for entry does not need source_identifier for csv, because csvs are read sequentially and mapped after raw data is read.
18
20
  csv_data = entry_class.read_data(file_for_import)
19
- importer.parser_fields['total'] = csv_data.count
20
- importer.save
21
+ unless validation_mode
22
+ importer.parser_fields['total'] = csv_data.count
23
+ importer.save
24
+ end
21
25
 
22
26
  @records = csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
27
+ @records
23
28
  end
24
29
 
25
30
  # rubocop:disable Metrics/AbcSize
@@ -95,11 +100,11 @@ module Bulkrax
95
100
  def missing_elements(record)
96
101
  keys_from_record = keys_without_numbers(record.reject { |_, v| v.blank? }.keys.compact.uniq.map(&:to_s))
97
102
  keys = []
98
- # Because we're persisting the mapping in the database, these are likely string keys.
99
- # However, there's no guarantee. So, we need to ensure that by running stringify.
100
- importerexporter.mapping.stringify_keys.map do |k, v|
101
- Array.wrap(v['from']).each do |vf|
102
- keys << k if keys_from_record.include?(vf)
103
+ mapping_values = importerexporter.mapping.stringify_keys
104
+ mapping_values.each do |k, v|
105
+ from_values = Array.wrap(v.is_a?(Hash) ? (v['from'] || v[:from]) : nil)
106
+ from_values.each do |vf|
107
+ keys << k if vf.present? && keys_from_record.include?(vf.to_s.strip)
103
108
  end
104
109
  end
105
110
  required_elements.map(&:to_s) - keys.uniq.map(&:to_s)
@@ -360,8 +365,11 @@ module Bulkrax
360
365
  else
361
366
  Bulkrax.multi_value_element_split_on
362
367
  end
368
+ files_dir = path_to_files
369
+ raise StandardError, "Record references local files but no files directory could be resolved from the import path" if files_dir.nil?
370
+
363
371
  r[file_mapping].split(split_pattern).map do |f|
364
- file = File.join(path_to_files, f.tr(' ', '_'))
372
+ file = File.join(files_dir, f.strip.tr(' ', '_'))
365
373
  if File.exist?(file) # rubocop:disable Style/GuardClause
366
374
  file
367
375
  else
@@ -376,8 +384,11 @@ module Bulkrax
376
384
  filename = args.fetch(:filename, '')
377
385
 
378
386
  return @path_to_files if @path_to_files.present? && filename.blank?
387
+ # The zip file could be either the main import file, or a separate attachments zip file.
388
+ # We want to check for both of those before we determine the path to the files.
389
+ have_zip_file = zip? || (parser_fields['attachments_zip_path'] && zip_file?(parser_fields['attachments_zip_path']))
379
390
  @path_to_files = File.join(
380
- zip? ? importer_unzip_path : File.dirname(import_file_path), 'files', filename
391
+ have_zip_file ? importer_unzip_path : File.dirname(import_file_path), 'files', filename
381
392
  )
382
393
 
383
394
  return @path_to_files if File.exist?(@path_to_files)
@@ -386,8 +397,29 @@ module Bulkrax
386
397
  File.join(importer_unzip_path, 'files', filename) if file? && zip?
387
398
  end
388
399
 
400
+ def unzip(file_to_unzip)
401
+ super
402
+ normalize_unzipped_files_structure(importer_unzip_path)
403
+ end
404
+
389
405
  private
390
406
 
407
+ # Ensure files extracted from a zip always land in a `files/` subdirectory
408
+ # regardless of how the zip was structured. If files were extracted directly
409
+ # into dest_dir (flat zip with no `files/` folder), move them into
410
+ # dest_dir/files/ so that path_to_files can reliably locate them.
411
+ def normalize_unzipped_files_structure(dest_dir)
412
+ flat_files = Dir.glob(File.join(dest_dir, '*')).select { |f| File.file?(f) && !f.end_with?('.csv') }
413
+ return if flat_files.empty?
414
+
415
+ files_dir = File.join(dest_dir, 'files')
416
+ FileUtils.mkdir_p(files_dir)
417
+ flat_files.each do |f|
418
+ dest = File.join(files_dir, File.basename(f))
419
+ FileUtils.mv(f, dest) unless File.exist?(dest)
420
+ end
421
+ end
422
+
391
423
  def unique_collection_identifier(collection_hash)
392
424
  entry_uid = collection_hash[source_identifier]
393
425
  entry_uid ||= if Bulkrax.fill_in_blank_source_identifiers.present?
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class CsvParser < ApplicationParser
5
+ module CsvTemplateGeneration
6
+ extend ActiveSupport::Concern
7
+
8
+ class_methods do
9
+ # Generate a CSV template for the specified models.
10
+ #
11
+ # @param models [Array<String>, String] Model names or 'all' for all available models
12
+ # @param output [String] Output format: 'file' or 'csv_string'
13
+ # @param admin_set_id [String, nil] Optional admin set ID for context
14
+ # @param args [Hash] Additional arguments passed to output method (e.g., file_path)
15
+ # @return [String] File path (for 'file' output) or CSV string (for 'csv_string' output)
16
+ def generate_template(models: [], output: 'file', admin_set_id: nil, **args)
17
+ raise NameError, "Hyrax is not defined" unless defined?(::Hyrax)
18
+ TemplateContext.new(models: models, admin_set_id: admin_set_id).send("to_#{output}", **args)
19
+ end
20
+ end
21
+
22
+ ##
23
+ # Holds state for a single template generation run.
24
+ # Provides the interface expected by CsvTemplate:: components.
25
+ class TemplateContext
26
+ attr_reader :mappings, :all_models, :admin_set_id, :field_analyzer, :mapping_manager
27
+
28
+ def initialize(models: nil, admin_set_id: nil)
29
+ @admin_set_id = admin_set_id
30
+ @mapping_manager = CsvTemplate::MappingManager.new
31
+ @mappings = @mapping_manager.mappings
32
+ @field_analyzer = CsvTemplate::FieldAnalyzer.new(@mappings, admin_set_id)
33
+ @all_models = CsvTemplate::ModelLoader.new(Array.wrap(models)).models
34
+ @csv_builder = CsvTemplate::CsvBuilder.new(self)
35
+ end
36
+
37
+ def to_file(file_path: nil)
38
+ file_path ||= CsvTemplate::FilePathGenerator.default_path(@admin_set_id)
39
+ @csv_builder.write_to_file(file_path)
40
+ file_path
41
+ end
42
+
43
+ def to_csv_string
44
+ @csv_builder.generate_string
45
+ end
46
+
47
+ def field_metadata_for_all_models
48
+ @field_metadata ||= @all_models.each_with_object({}) do |model, hash|
49
+ field_list = @field_analyzer.find_or_create_field_list_for(model_name: model)
50
+ hash[model] = {
51
+ properties: field_list.dig(model, "properties") || [],
52
+ required_terms: field_list.dig(model, "required_terms") || [],
53
+ controlled_vocab_terms: field_list.dig(model, "controlled_vocab_terms") || []
54
+ }
55
+ end
56
+ end
57
+
58
+ def valid_headers_for_models
59
+ @valid_headers ||= begin
60
+ column_builder = CsvTemplate::ColumnBuilder.new(self)
61
+ all_columns = column_builder.all_columns
62
+ all_columns - CsvTemplate::CsvBuilder::IGNORED_PROPERTIES
63
+ rescue StandardError => e
64
+ Rails.logger.error("Error building valid headers: #{e.message}")
65
+ standard_fields = %w[model source_identifier parent parents file]
66
+ model_fields = field_metadata_for_all_models.values.flat_map { |m| m[:properties] }
67
+ (standard_fields + model_fields).uniq
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,133 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class CsvParser < ApplicationParser
5
+ module CsvValidation
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ # Lightweight struct used to satisfy the CsvTemplate::ColumnBuilder
10
+ # interface without constructing a full template context.
11
+ ValidationContext = Struct.new(:mapping_manager, :field_analyzer, :all_models, :mappings, keyword_init: true)
12
+ end
13
+
14
+ class_methods do
15
+ include CsvValidationHelpers
16
+
17
+ # Validate a CSV (and optional zip) without a persisted Importer record.
18
+ #
19
+ # @param csv_file [File, ActionDispatch::Http::UploadedFile, String] path or file object
20
+ # @param zip_file [File, ActionDispatch::Http::UploadedFile, nil]
21
+ # @param admin_set_id [String, nil]
22
+ # @return [Hash] validation result compatible with the guided import UI
23
+ def validate_csv(csv_file:, zip_file: nil, admin_set_id: nil)
24
+ raw_csv, headers, mapping_manager, mappings, source_id_key, csv_data, field_metadata, field_analyzer =
25
+ parse_csv_inputs(csv_file, admin_set_id)
26
+
27
+ all_ids = csv_data.map { |r| r[:source_identifier] }.compact.to_set
28
+ header_issues = check_headers(headers, raw_csv, mapping_manager, mappings, field_metadata, field_analyzer)
29
+ missing_required = header_issues[:missing_required]
30
+ notices, row_errors, file_validator, collections, works, file_sets =
31
+ run_validations(csv_data, all_ids, headers, source_id_key, mappings, field_metadata, missing_required, zip_file, admin_set_id)
32
+
33
+ result = assemble_result(
34
+ headers: headers, missing_required: missing_required, header_issues: header_issues,
35
+ row_errors: row_errors, csv_data: csv_data, file_validator: file_validator,
36
+ collections: collections, works: works, file_sets: file_sets, notices: notices
37
+ )
38
+ apply_rights_statement_validation_override!(result, missing_required)
39
+ result[:raw_csv_data] = csv_data
40
+ result
41
+ end
42
+
43
+ private
44
+
45
+ # Builds notices, runs row validators, file validator, and hierarchy extraction.
46
+ # Returns [notices, row_errors, file_validator, collections, works, file_sets].
47
+ def run_validations(csv_data, all_ids, headers, source_id_key, mappings, field_metadata, missing_required, zip_file, admin_set_id) # rubocop:disable Metrics/ParameterLists
48
+ find_record = build_find_record
49
+ notices = []
50
+ append_missing_source_id!(missing_required, headers, source_id_key, csv_data.map { |r| r[:model] }.compact.uniq)
51
+ append_missing_model_notice!(notices, headers, csv_data)
52
+
53
+ row_errors = run_row_validators(csv_data, all_ids, source_id_key, mappings, field_metadata, find_record, notices)
54
+ file_validator = CsvTemplate::FileValidator.new(csv_data, zip_file, admin_set_id)
55
+ collections, works, file_sets = extract_hierarchy_items(csv_data, all_ids, find_record, mappings)
56
+ [notices, row_errors, file_validator, collections, works, file_sets]
57
+ end
58
+
59
+ # Reads the CSV, resolves mappings, parses rows, and builds field metadata.
60
+ # Returns the values needed by all subsequent validation steps.
61
+ def parse_csv_inputs(csv_file, admin_set_id)
62
+ # Use CsvEntry.read_data so header normalisation is identical to a real import.
63
+ raw_csv = CsvEntry.read_data(csv_file)
64
+ headers = raw_csv.headers.map(&:to_s)
65
+
66
+ mapping_manager = CsvTemplate::MappingManager.new
67
+ mappings = mapping_manager.mappings
68
+
69
+ source_id_key = resolve_validation_key(mapping_manager, flag: 'source_identifier', default: :source_identifier)
70
+ parent_key = resolve_validation_key(mapping_manager, flag: 'related_parents_field_mapping', default: :parents)
71
+ children_key = resolve_validation_key(mapping_manager, flag: 'related_children_field_mapping', default: :children)
72
+ file_key = resolve_validation_key(mapping_manager, key: 'file', default: :file)
73
+
74
+ csv_data = parse_validation_rows(raw_csv, source_id_key, parent_key, children_key, file_key)
75
+ all_models = csv_data.map { |r| r[:model] }.compact.uniq
76
+ all_models |= [Bulkrax.default_work_type] if Bulkrax.default_work_type.present?
77
+ field_analyzer = CsvTemplate::FieldAnalyzer.new(mappings, admin_set_id)
78
+ field_metadata = build_validation_field_metadata(all_models, field_analyzer)
79
+
80
+ [raw_csv, headers, mapping_manager, mappings, source_id_key, csv_data, field_metadata, field_analyzer]
81
+ end
82
+
83
+ # Runs all header-level checks and returns a hash of results.
84
+ def check_headers(headers, raw_csv, mapping_manager, mappings, field_metadata, field_analyzer) # rubocop:disable Metrics/ParameterLists
85
+ all_models = field_metadata.keys
86
+ valid_headers = build_valid_validation_headers(mapping_manager, field_analyzer,
87
+ all_models, mappings, field_metadata)
88
+ suffixed = headers.select { |h| h.match?(/_\d+\z/) }
89
+ valid_headers = (valid_headers + suffixed).uniq
90
+
91
+ {
92
+ missing_required: find_missing_required_headers(headers, field_metadata, mapping_manager),
93
+ unrecognized: find_unrecognized_validation_headers(headers, valid_headers),
94
+ empty_columns: find_empty_column_positions(headers, raw_csv)
95
+ }
96
+ end
97
+
98
+ def extract_hierarchy_items(csv_data, all_ids, find_record, mappings)
99
+ extract_validation_items(
100
+ csv_data, all_ids, find_record,
101
+ parent_split_pattern: resolve_parent_split_pattern(mappings),
102
+ child_split_pattern: resolve_children_split_pattern(mappings) || '|'
103
+ )
104
+ end
105
+
106
+ # Runs all registered row validators and returns the collected errors.
107
+ def run_row_validators(csv_data, all_ids, source_id_key, mappings, field_metadata, find_record, notices = []) # rubocop:disable Metrics/ParameterLists
108
+ context = {
109
+ errors: [],
110
+ warnings: [],
111
+ seen_ids: {},
112
+ all_ids: all_ids,
113
+ source_identifier: source_id_key.to_s,
114
+ parent_split_pattern: resolve_parent_split_pattern(mappings),
115
+ child_split_pattern: resolve_children_split_pattern(mappings),
116
+ parent_column: resolve_relationship_column(mappings, 'related_parents_field_mapping', 'parents'),
117
+ children_column: resolve_relationship_column(mappings, 'related_children_field_mapping', 'children'),
118
+ mappings: mappings,
119
+ field_metadata: field_metadata,
120
+ find_record_by_source_identifier: find_record,
121
+ relationship_graph: build_relationship_graph(csv_data, mappings),
122
+ notices: notices
123
+ }
124
+ csv_data.each_with_index do |record, index|
125
+ row_number = index + 2 # 1-indexed, plus header row
126
+ Bulkrax.csv_row_validators.each { |v| v.call(record, row_number, context) }
127
+ end
128
+ context[:errors]
129
+ end
130
+ end
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,282 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class CsvParser < ApplicationParser
5
+ # Private helper methods for CsvValidation.
6
+ module CsvValidationHelpers # rubocop:disable Metrics/ModuleLength
7
+ include CsvValidationHierarchy
8
+
9
+ # Resolve a symbol key from mappings for use as a record hash key.
10
+ # Returns a Symbol matching the parser's symbol-keyed record hash.
11
+ def resolve_validation_key(mapping_manager, key: nil, flag: nil, default:)
12
+ options = mapping_manager.resolve_column_name(key: key, flag: flag, default: default.to_s)
13
+ options.first&.to_sym || default
14
+ end
15
+
16
+ # Parse rows from a CsvEntry.read_data result into the canonical record shape.
17
+ # CsvEntry.read_data returns CSV::Row objects with symbol headers; blank rows
18
+ # are already filtered by CsvWrapper.
19
+ def parse_validation_rows(raw_csv, source_id_key, parent_key, children_key, file_key)
20
+ raw_csv.map do |row|
21
+ # CSV::Row#to_h converts symbol headers → string-keyed hash
22
+ row_hash = row.to_h.transform_keys(&:to_s)
23
+ {
24
+ source_identifier: row[source_id_key],
25
+ model: row[:model],
26
+ parent: row[parent_key],
27
+ children: row[children_key],
28
+ file: row[file_key],
29
+ raw_row: row_hash
30
+ }
31
+ end
32
+ rescue StandardError => e
33
+ Rails.logger.error("CsvParser.validate_csv: error parsing rows – #{e.message}")
34
+ []
35
+ end
36
+
37
+ def build_validation_field_metadata(all_models, field_analyzer)
38
+ all_models.each_with_object({}) do |model, hash|
39
+ field_list = field_analyzer.find_or_create_field_list_for(model_name: model)
40
+ hash[model] = {
41
+ properties: field_list.dig(model, 'properties') || [],
42
+ required_terms: field_list.dig(model, 'required_terms') || [],
43
+ controlled_vocab_terms: field_list.dig(model, 'controlled_vocab_terms') || []
44
+ }
45
+ end
46
+ end
47
+
48
+ def build_valid_validation_headers(mapping_manager, field_analyzer, all_models, mappings, field_metadata)
49
+ svc = ValidationContext.new(
50
+ mapping_manager: mapping_manager,
51
+ field_analyzer: field_analyzer,
52
+ all_models: all_models,
53
+ mappings: mappings
54
+ )
55
+ all_cols = CsvTemplate::ColumnBuilder.new(svc).all_columns
56
+ all_cols - CsvTemplate::CsvBuilder::IGNORED_PROPERTIES
57
+ rescue StandardError => e
58
+ Rails.logger.error("CsvParser.validate_csv: error building valid headers – #{e.message}")
59
+ standard = %w[model source_identifier parents children file]
60
+ model_fields = field_metadata.values.flat_map { |m| m[:properties] }
61
+ .map { |prop| mapping_manager.key_to_mapped_column(prop) }
62
+ (standard + model_fields).uniq
63
+ end
64
+
65
+ def find_missing_required_headers(headers, field_metadata, mapping_manager)
66
+ csv_keys = headers.map { |h| mapping_manager.mapped_to_key(h).sub(/_\d+\z/, '') }.uniq
67
+ missing = []
68
+ field_metadata.each do |model, meta|
69
+ (meta[:required_terms] || []).each do |field|
70
+ missing << { model: model, field: field } unless csv_keys.include?(field)
71
+ end
72
+ end
73
+ missing.uniq
74
+ end
75
+
76
+ def find_unrecognized_validation_headers(headers, valid_headers)
77
+ checker = DidYouMean::SpellChecker.new(dictionary: valid_headers)
78
+ headers
79
+ .reject { |h| h.blank? || valid_headers.include?(h) || valid_headers.include?(h.sub(/_\d+\z/, '')) }
80
+ .index_with { |h| checker.correct(h).first }
81
+ end
82
+
83
+ def find_empty_column_positions(headers, raw_csv)
84
+ headers.each_with_index.filter_map do |h, i|
85
+ next if h.present?
86
+ has_data = raw_csv.any? { |row| row.fields[i].present? }
87
+ i + 1 if has_data
88
+ end
89
+ end
90
+
91
+ # Adds a missing source_identifier entry to missing_required when the column
92
+ # is absent and fill_in_blank_source_identifiers is not configured.
93
+ def append_missing_source_id!(missing_required, headers, source_id_key, all_models)
94
+ return if headers.map(&:to_s).include?(source_id_key.to_s)
95
+ return if Bulkrax.fill_in_blank_source_identifiers.present?
96
+
97
+ all_models.each { |model| missing_required << { model: model, field: source_id_key.to_s } }
98
+ end
99
+
100
+ # Adds a file-level notice when the model column is absent or every row has a blank
101
+ # model value, indicating that the default work type will be used for all rows.
102
+ # When this notice is present the per-row default_work_type_used warnings are
103
+ # suppressed in the formatter — no need to repeat the same message for every row.
104
+ def append_missing_model_notice!(notices, headers, csv_data)
105
+ default_model = Bulkrax.default_work_type
106
+ return if default_model.blank?
107
+
108
+ model_column_present = headers.map(&:to_s).include?('model')
109
+ all_rows_blank = model_column_present && csv_data.all? { |r| r[:model].blank? }
110
+
111
+ return if model_column_present && !all_rows_blank
112
+
113
+ key_suffix = all_rows_blank ? 'column_empty' : 'column_missing'
114
+ base_key = 'bulkrax.importer.guided_import.validation.default_work_type_notice'
115
+ notices << {
116
+ field: 'model',
117
+ default_work_type: default_model,
118
+ message: I18n.t("#{base_key}.message_#{key_suffix}", default_work_type: default_model),
119
+ suggestion: I18n.t("#{base_key}.suggestion_#{key_suffix}")
120
+ }
121
+ end
122
+
123
+ def apply_rights_statement_validation_override!(result, missing_required)
124
+ only_rights = missing_required.present? &&
125
+ missing_required.all? { |h| h[:field].to_s == 'rights_statement' }
126
+ return unless only_rights && !result[:isValid]
127
+ return if result[:headers].blank?
128
+ return if result[:missingFiles]&.any?
129
+
130
+ result[:isValid] = true
131
+ result[:hasWarnings] = true
132
+ end
133
+
134
+ # Assembles the final result hash returned to the guided import UI.
135
+ def assemble_result(headers:, missing_required:, header_issues:, row_errors:, csv_data:, file_validator:, collections:, works:, file_sets:, notices: []) # rubocop:disable Metrics/ParameterLists
136
+ row_error_entries = row_errors.select { |e| e[:severity] == 'error' }
137
+ row_warning_entries = row_errors.select { |e| e[:severity] == 'warning' }
138
+ has_errors = missing_required.any? || headers.blank? || csv_data.empty? ||
139
+ file_validator.missing_files.any? || row_error_entries.any?
140
+ has_warnings = header_issues[:unrecognized].any? || header_issues[:empty_columns].any? ||
141
+ file_validator.possible_missing_files? || row_warning_entries.any? || notices.any?
142
+
143
+ {
144
+ headers: headers,
145
+ missingRequired: missing_required,
146
+ notices: notices,
147
+ unrecognized: header_issues[:unrecognized],
148
+ emptyColumns: header_issues[:empty_columns],
149
+ rowCount: csv_data.length,
150
+ isValid: !has_errors,
151
+ hasWarnings: has_warnings,
152
+ rowErrors: row_errors,
153
+ collections: collections,
154
+ works: works,
155
+ fileSets: file_sets,
156
+ totalItems: csv_data.length,
157
+ fileReferences: file_validator.count_references,
158
+ missingFiles: file_validator.missing_files,
159
+ foundFiles: file_validator.found_files_count,
160
+ zipIncluded: file_validator.zip_included?
161
+ }
162
+ end
163
+
164
+ # Builds the find_record lambda used by row validators and hierarchy extraction.
165
+ def build_find_record
166
+ all_mappings = Bulkrax.field_mappings['Bulkrax::CsvParser'] || {}
167
+ work_identifier = all_mappings.find { |_k, v| v['source_identifier'] == true }&.first || 'source'
168
+ work_identifier_search = Array.wrap(all_mappings.dig(work_identifier, 'search_field')).first&.to_s ||
169
+ "#{work_identifier}_sim"
170
+ ->(id) { find_record_by_source_identifier(id, work_identifier, work_identifier_search) }
171
+ end
172
+
173
+ # Attempt to locate an existing repository record by its identifier.
174
+ # The identifier may be a repository object ID or a source_identifier property value.
175
+ # Checks the repository directly (by ID, then by Solr property search) — a Bulkrax
176
+ # Entry record alone is not sufficient, as the object may never have been created.
177
+ #
178
+ # @param identifier [String]
179
+ # @param work_identifier [String] the source_identifier property name (e.g. "source")
180
+ # @param work_identifier_search [String] the Solr field for source_identifier (e.g. "source_sim")
181
+ # @return [Boolean] true if a matching repository object is found
182
+ def find_record_by_source_identifier(identifier, work_identifier, work_identifier_search)
183
+ return false if identifier.blank?
184
+
185
+ return true if Bulkrax.object_factory.find_or_nil(identifier).present?
186
+
187
+ [Bulkrax.collection_model_class, *Bulkrax.curation_concerns].any? do |klass|
188
+ Bulkrax.object_factory.search_by_property(
189
+ value: identifier,
190
+ klass: klass,
191
+ search_field: work_identifier_search,
192
+ name_field: work_identifier
193
+ ).present?
194
+ end
195
+ rescue StandardError
196
+ false
197
+ end
198
+
199
+ # Returns the raw CSV column name (String) for a relationship field.
200
+ # Looks for the mapping entry flagged with +flag+ and returns its first +from+ value,
201
+ # falling back to +default+ when none is found.
202
+ def resolve_relationship_column(mappings, flag, default)
203
+ entry = mappings.find { |_k, v| v.is_a?(Hash) && v[flag] }
204
+ entry&.last&.dig('from')&.first || default
205
+ end
206
+
207
+ def resolve_parent_split_pattern(mappings)
208
+ split_val = mappings.dig('parents', 'split') || mappings.dig(:parents, :split)
209
+ return nil if split_val.blank?
210
+ return Bulkrax::DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON if split_val == true
211
+
212
+ split_val
213
+ end
214
+
215
+ def resolve_children_split_pattern(mappings)
216
+ split_val = mappings.dig('children', 'split') || mappings.dig(:children, :split)
217
+ return nil if split_val.blank?
218
+ return Bulkrax::DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON if split_val == true
219
+
220
+ split_val
221
+ end
222
+
223
+ # Builds a graph of { source_identifier => [parent_ids] } from all CSV records.
224
+ # Used by CircularReference validator to detect cycles across the whole CSV.
225
+ #
226
+ # Parent edges are collected from both directions:
227
+ # - explicit parent declarations (parents / parents_N columns)
228
+ # - inverted child declarations (children / children_N columns), mirroring
229
+ # the normalisation done in importers_stepper.js#normalizeRelationships
230
+ def build_relationship_graph(csv_data, mappings)
231
+ parent_column = resolve_relationship_column(mappings, 'related_parents_field_mapping', 'parents')
232
+ children_column = resolve_relationship_column(mappings, 'related_children_field_mapping', 'children')
233
+ parent_suffix = /\A#{Regexp.escape(parent_column)}_\d+\z/
234
+ children_suffix = /\A#{Regexp.escape(children_column)}_\d+\z/
235
+
236
+ graph = build_parent_edges(csv_data, parent_suffix, resolve_parent_split_pattern(mappings))
237
+ invert_child_edges(graph, csv_data, children_suffix, resolve_children_split_pattern(mappings))
238
+ graph
239
+ end
240
+
241
+ def build_parent_edges(csv_data, suffix_pattern, split_pattern)
242
+ csv_data.each_with_object({}) do |record, graph|
243
+ id = record[:source_identifier]
244
+ next if id.blank?
245
+
246
+ base_ids = split_or_single(record[:parent], split_pattern)
247
+ suffix_ids = suffixed_values(record[:raw_row], suffix_pattern)
248
+ graph[id] = (base_ids + suffix_ids).uniq
249
+ end
250
+ end
251
+
252
+ def invert_child_edges(graph, csv_data, suffix_pattern, split_pattern)
253
+ csv_data.each do |record|
254
+ id = record[:source_identifier]
255
+ next if id.blank?
256
+
257
+ child_ids = split_or_single(record[:children], split_pattern) +
258
+ suffixed_values(record[:raw_row], suffix_pattern)
259
+ child_ids.each do |child_id|
260
+ graph[child_id] ||= []
261
+ graph[child_id] << id unless graph[child_id].include?(id)
262
+ end
263
+ end
264
+ end
265
+
266
+ def split_or_single(value, split_pattern)
267
+ if split_pattern
268
+ value.to_s.split(split_pattern).map(&:strip).reject(&:blank?)
269
+ elsif value.present?
270
+ [value.to_s.strip]
271
+ else
272
+ []
273
+ end
274
+ end
275
+
276
+ def suffixed_values(raw_row, suffix_pattern)
277
+ raw_row.select { |k, _| k.to_s.match?(suffix_pattern) }
278
+ .values.map(&:to_s).map(&:strip).reject(&:blank?)
279
+ end
280
+ end
281
+ end
282
+ end