bulkrax 9.3.4 → 9.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +11 -1
  3. data/app/assets/javascripts/bulkrax/application.js +2 -1
  4. data/app/assets/javascripts/bulkrax/bulkrax.js +13 -4
  5. data/app/assets/javascripts/bulkrax/bulkrax_utils.js +96 -0
  6. data/app/assets/javascripts/bulkrax/datatables.js +1 -0
  7. data/app/assets/javascripts/bulkrax/entries.js +17 -10
  8. data/app/assets/javascripts/bulkrax/importers.js.erb +9 -2
  9. data/app/assets/javascripts/bulkrax/importers_stepper.js +2420 -0
  10. data/app/assets/stylesheets/bulkrax/application.css +1 -1
  11. data/app/assets/stylesheets/bulkrax/import_export.scss +9 -2
  12. data/app/assets/stylesheets/bulkrax/stepper/_header.scss +83 -0
  13. data/app/assets/stylesheets/bulkrax/stepper/_mixins.scss +26 -0
  14. data/app/assets/stylesheets/bulkrax/stepper/_navigation.scss +103 -0
  15. data/app/assets/stylesheets/bulkrax/stepper/_responsive.scss +46 -0
  16. data/app/assets/stylesheets/bulkrax/stepper/_review.scss +92 -0
  17. data/app/assets/stylesheets/bulkrax/stepper/_settings.scss +106 -0
  18. data/app/assets/stylesheets/bulkrax/stepper/_success.scss +26 -0
  19. data/app/assets/stylesheets/bulkrax/stepper/_summary.scss +171 -0
  20. data/app/assets/stylesheets/bulkrax/stepper/_upload.scss +339 -0
  21. data/app/assets/stylesheets/bulkrax/stepper/_validation.scss +237 -0
  22. data/app/assets/stylesheets/bulkrax/stepper/_variables.scss +46 -0
  23. data/app/assets/stylesheets/bulkrax/stepper.scss +32 -0
  24. data/app/controllers/bulkrax/guided_imports_controller.rb +175 -0
  25. data/app/controllers/bulkrax/importers_controller.rb +34 -28
  26. data/app/controllers/concerns/bulkrax/guided_import_demo_scenarios.rb +201 -0
  27. data/app/controllers/concerns/bulkrax/importer_file_handler.rb +217 -0
  28. data/app/factories/bulkrax/object_factory.rb +3 -2
  29. data/app/factories/bulkrax/valkyrie_object_factory.rb +61 -17
  30. data/app/jobs/bulkrax/export_work_job.rb +1 -3
  31. data/app/jobs/bulkrax/importer_job.rb +11 -4
  32. data/app/models/bulkrax/csv_entry.rb +27 -7
  33. data/app/models/bulkrax/entry.rb +4 -0
  34. data/app/models/bulkrax/importer.rb +31 -1
  35. data/app/models/concerns/bulkrax/has_matchers.rb +2 -2
  36. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +6 -5
  37. data/app/parsers/bulkrax/application_parser.rb +31 -5
  38. data/app/parsers/bulkrax/csv_parser.rb +42 -10
  39. data/app/parsers/concerns/bulkrax/csv_parser/csv_template_generation.rb +73 -0
  40. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb +133 -0
  41. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_helpers.rb +282 -0
  42. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_hierarchy.rb +96 -0
  43. data/app/services/bulkrax/csv_template/column_builder.rb +60 -0
  44. data/app/services/bulkrax/csv_template/column_descriptor.rb +58 -0
  45. data/app/services/bulkrax/csv_template/csv_builder.rb +83 -0
  46. data/app/services/bulkrax/csv_template/explanation_builder.rb +57 -0
  47. data/app/services/bulkrax/csv_template/field_analyzer.rb +56 -0
  48. data/app/services/bulkrax/csv_template/file_path_generator.rb +47 -0
  49. data/app/services/bulkrax/csv_template/file_validator.rb +68 -0
  50. data/app/services/bulkrax/csv_template/mapping_manager.rb +55 -0
  51. data/app/services/bulkrax/csv_template/model_loader.rb +50 -0
  52. data/app/services/bulkrax/csv_template/row_builder.rb +35 -0
  53. data/app/services/bulkrax/csv_template/schema_analyzer.rb +70 -0
  54. data/app/services/bulkrax/csv_template/split_formatter.rb +44 -0
  55. data/app/services/bulkrax/csv_template/value_determiner.rb +68 -0
  56. data/app/services/bulkrax/stepper_response_formatter.rb +347 -0
  57. data/app/services/bulkrax/validation_error_csv_builder.rb +99 -0
  58. data/app/validators/bulkrax/csv_row/child_reference.rb +56 -0
  59. data/app/validators/bulkrax/csv_row/circular_reference.rb +71 -0
  60. data/app/validators/bulkrax/csv_row/controlled_vocabulary.rb +74 -0
  61. data/app/validators/bulkrax/csv_row/duplicate_identifier.rb +63 -0
  62. data/app/validators/bulkrax/csv_row/missing_source_identifier.rb +31 -0
  63. data/app/validators/bulkrax/csv_row/parent_reference.rb +59 -0
  64. data/app/validators/bulkrax/csv_row/required_values.rb +64 -0
  65. data/app/views/bulkrax/entries/_parsed_metadata.html.erb +1 -1
  66. data/app/views/bulkrax/entries/_raw_metadata.html.erb +1 -1
  67. data/app/views/bulkrax/entries/show.html.erb +6 -6
  68. data/app/views/bulkrax/exporters/_form.html.erb +19 -43
  69. data/app/views/bulkrax/exporters/edit.html.erb +2 -2
  70. data/app/views/bulkrax/exporters/index.html.erb +5 -5
  71. data/app/views/bulkrax/exporters/new.html.erb +3 -5
  72. data/app/views/bulkrax/exporters/show.html.erb +3 -3
  73. data/app/views/bulkrax/guided_imports/new.html.erb +567 -0
  74. data/app/views/bulkrax/importers/_bagit_fields.html.erb +9 -9
  75. data/app/views/bulkrax/importers/_browse_everything.html.erb +1 -1
  76. data/app/views/bulkrax/importers/_csv_fields.html.erb +11 -11
  77. data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +23 -23
  78. data/app/views/bulkrax/importers/_edit_item_buttons.html.erb +2 -2
  79. data/app/views/bulkrax/importers/_file_uploader.html.erb +3 -3
  80. data/app/views/bulkrax/importers/_form.html.erb +4 -5
  81. data/app/views/bulkrax/importers/_oai_fields.html.erb +8 -18
  82. data/app/views/bulkrax/importers/_xml_fields.html.erb +13 -13
  83. data/app/views/bulkrax/importers/edit.html.erb +2 -2
  84. data/app/views/bulkrax/importers/index.html.erb +19 -14
  85. data/app/views/bulkrax/importers/new.html.erb +10 -9
  86. data/app/views/bulkrax/importers/show.html.erb +23 -7
  87. data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +6 -6
  88. data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +11 -11
  89. data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +3 -3
  90. data/config/i18n-tasks.yml +195 -0
  91. data/config/locales/bulkrax.de.yml +504 -0
  92. data/config/locales/bulkrax.en.yml +487 -28
  93. data/config/locales/bulkrax.es.yml +504 -0
  94. data/config/locales/bulkrax.fr.yml +504 -0
  95. data/config/locales/bulkrax.it.yml +504 -0
  96. data/config/locales/bulkrax.pt-BR.yml +504 -0
  97. data/config/locales/bulkrax.zh.yml +503 -0
  98. data/config/routes.rb +10 -0
  99. data/lib/bulkrax/data/demo_scenarios.json +2235 -0
  100. data/lib/bulkrax/version.rb +1 -1
  101. data/lib/bulkrax.rb +31 -3
  102. data/lib/tasks/bulkrax_tasks.rake +0 -102
  103. metadata +55 -3
  104. /data/{app/services → lib}/wings/custom_queries/find_by_source_identifier.rb +0 -0
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module CsvTemplate
5
+ # Analyzes model schemas for required and controlled vocabulary fields
6
+ class SchemaAnalyzer
7
+ def initialize(klass:, admin_set_id: nil)
8
+ @klass = klass
9
+ @admin_set_id = admin_set_id
10
+ @schema = load_schema
11
+ end
12
+
13
+ def required_terms
14
+ return [] if @schema.blank?
15
+
16
+ @schema.select do |field|
17
+ field.respond_to?(:meta) &&
18
+ field.meta["form"].is_a?(Hash) &&
19
+ field.meta["form"]["required"] == true
20
+ end.map(&:name).map(&:to_s)
21
+ rescue StandardError
22
+ []
23
+ end
24
+
25
+ def controlled_vocab_terms
26
+ return [] unless @schema
27
+
28
+ controlled_properties = extract_controlled_properties
29
+ controlled_properties.empty? ? registered_controlled_vocab_fields : controlled_properties
30
+ rescue StandardError
31
+ []
32
+ end
33
+
34
+ private
35
+
36
+ def load_schema
37
+ return nil unless @klass.respond_to?(:schema)
38
+
39
+ if @admin_set_id.present? && defined?(Hyrax) && Hyrax.respond_to?(:schema_for)
40
+ Hyrax.schema_for(klass: @klass, admin_set_id: @admin_set_id)
41
+ else
42
+ @klass.new.singleton_class.schema || @klass.schema
43
+ end
44
+ rescue StandardError
45
+ nil
46
+ end
47
+
48
+ def extract_controlled_properties
49
+ return [] unless @schema
50
+
51
+ @schema.filter_map do |property|
52
+ next unless property.respond_to?(:meta)
53
+ sources = property.meta&.dig('controlled_values', 'sources')
54
+ next if sources.nil? || sources == ['null'] || sources == 'null'
55
+ property.name.to_s
56
+ end
57
+ end
58
+
59
+ def registered_controlled_vocab_fields
60
+ qa_registry.filter_map do |k, v|
61
+ k.singularize if v.klass == Qa::Authorities::Local::FileBasedAuthority
62
+ end
63
+ end
64
+
65
+ def qa_registry
66
+ @qa_registry ||= Qa::Authorities::Local.registry.instance_variable_get('@hash')
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module CsvTemplate
5
+ # Formats split pattern descriptions
6
+ class SplitFormatter
7
+ def format(split_value)
8
+ return "Property does not split." if split_value.nil?
9
+
10
+ if split_value == true
11
+ parse_pattern(Bulkrax.multi_value_element_split_on.source)
12
+ elsif split_value.is_a?(String)
13
+ parse_pattern(split_value)
14
+ else
15
+ split_value
16
+ end
17
+ end
18
+
19
+ private
20
+
21
+ def parse_pattern(pattern)
22
+ chars = extract_characters(pattern)
23
+ format_message(chars)
24
+ end
25
+
26
+ def extract_characters(pattern)
27
+ if (match = pattern.match(/\[([^\]]+)\]/))
28
+ match[1]
29
+ elsif (single = pattern.match(/\\(.)/))
30
+ single[1]
31
+ else
32
+ pattern
33
+ end
34
+ end
35
+
36
+ def format_message(chars)
37
+ formatted = chars.chars.then do |c|
38
+ c.length > 1 ? "#{c[0..-2].join(' ')}, or #{c.last}" : c.first
39
+ end
40
+ "Split multiple values with #{formatted}"
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module CsvTemplate
5
+ # Determines values for CSV cells
6
+ class ValueDeterminer
7
+ def initialize(service)
8
+ @service = service
9
+ @column_builder = CsvTemplate::ColumnBuilder.new(service)
10
+ end
11
+
12
+ def determine_value(column, model_name, field_list)
13
+ key = @service.mapping_manager.mapped_to_key(column)
14
+ required_terms = field_list.dig(model_name, 'required_terms')
15
+
16
+ if field_list.dig(model_name, "properties")&.include?(key)
17
+ mark_required_or_optional(key, required_terms)
18
+ elsif special_column?(column, key)
19
+ special_value(column, key, model_name, required_terms)
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ def special_column?(column, key)
26
+ descriptor = CsvTemplate::ColumnDescriptor.new
27
+ visibility_cols = descriptor.send(:extract_column_names, :visibility)
28
+
29
+ key.in?(['model', 'work_type']) ||
30
+ column.in?(visibility_cols) ||
31
+ column == 'source_identifier' ||
32
+ column == 'rights_statement' ||
33
+ relationship_column?(column) ||
34
+ file_column?(column)
35
+ end
36
+
37
+ def special_value(column, key, model_name, required_terms)
38
+ return CsvTemplate::ModelLoader.determine_klass_for(model_name).to_s if key.in?(['model', 'work_type'])
39
+ return 'Required' if column == 'source_identifier'
40
+ return mark_required_or_optional(key, required_terms) if column == 'rights_statement'
41
+ return nil if file_column?(column) && model_name.in?([Bulkrax.collection_model_class].compact.map(&:to_s))
42
+ 'Optional'
43
+ end
44
+
45
+ def mark_required_or_optional(field, required_terms)
46
+ return 'Unknown' unless required_terms
47
+ required_terms.include?(field) ? 'Required' : 'Optional'
48
+ end
49
+
50
+ def relationship_column?(column)
51
+ relationships = [
52
+ @service.mapping_manager.find_by_flag("related_children_field_mapping", 'children'),
53
+ @service.mapping_manager.find_by_flag("related_parents_field_mapping", 'parents')
54
+ ]
55
+ column.in?(relationships)
56
+ end
57
+
58
+ def file_column?(column)
59
+ file_cols = CsvTemplate::ColumnDescriptor::COLUMN_DESCRIPTIONS[:files].flat_map do |property_hash|
60
+ property_hash.keys.filter_map do |key|
61
+ @service.mappings.dig(key, "from")&.first
62
+ end
63
+ end
64
+ column.in?(file_cols)
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,347 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ ##
5
+ # Formats validation data from CsvParser.validate_csv into the structure
6
+ # expected by the importers_stepper.js frontend component.
7
+ #
8
+ # This service acts as a presentation layer, transforming raw validation data
9
+ # into a structured response with proper status messages, severity levels,
10
+ # and formatted issue lists that the JavaScript can render correctly.
11
+ #
12
+ # @example Basic usage
13
+ # validation_data = CsvParser.validate_csv(csv_file: file, zip_file: zip)
14
+ # formatted_response = StepperResponseFormatter.format(validation_data)
15
+ # render json: formatted_response
16
+ #
17
+ # @example Error response
18
+ # error_response = StepperResponseFormatter.error(message: "Unable to process files")
19
+ # render json: error_response, status: :ok
20
+ #
21
+ # rubocop:disable Metrics/ClassLength
22
+ class StepperResponseFormatter
23
+ # Format validation data for the stepper frontend
24
+ #
25
+ # @param data [Hash] Raw validation data from CsvParser.validate_csv containing:
26
+ # - headers: Array of CSV column names
27
+ # - missingRequired: Array of hashes of missing required fields by model (e.g. {model: 'GenericWork', field: 'source_identifier'})
28
+ # - unrecognized: Array of unrecognized column names
29
+ # - rowCount: Total number of data rows
30
+ # - isValid: Boolean indicating validation success
31
+ # - hasWarnings: Boolean indicating presence of warnings
32
+ # - collections: Array of collection items with id, title, type, parentIds (array), childIds (array)
33
+ # - works: Array of work items with id, title, type, parentIds (array), childIds (array)
34
+ # - fileSets: Array of file set items
35
+ # - totalItems: Total count of items
36
+ # - fileReferences: Count of file references
37
+ # - missingFiles: Array of missing file names
38
+ # - foundFiles: Count of found files
39
+ # - zipIncluded: Boolean indicating if zip was provided
40
+ # @return [Hash] Formatted response ready for JSON rendering
41
+ def self.format(data)
42
+ new(data).format
43
+ end
44
+
45
+ # Generate an error response for validation failures
46
+ #
47
+ # @param message [String] Error message to display
48
+ # @param summary [String] Optional summary (defaults to standard message)
49
+ # @return [Hash] Error response structure
50
+ def self.error(message: I18n.t('bulkrax.importer.guided_import.validation.unable_to_process'), summary: nil)
51
+ {
52
+ totalItems: 0,
53
+ collections: [],
54
+ works: [],
55
+ fileSets: [],
56
+ isValid: false,
57
+ hasWarnings: false,
58
+ messages: {
59
+ validationStatus: {
60
+ severity: 'error',
61
+ icon: 'fa-times-circle',
62
+ title: I18n.t('bulkrax.importer.guided_import.validation.failed'),
63
+ summary: summary || message,
64
+ details: I18n.t('bulkrax.importer.guided_import.validation.critical_errors'),
65
+ defaultOpen: true
66
+ },
67
+ issues: []
68
+ }
69
+ }
70
+ end
71
+
72
+ def initialize(data)
73
+ @data = data
74
+ end
75
+
76
+ # Format the validation data with messages structure
77
+ # If data already contains a messages structure, return it as-is
78
+ #
79
+ # @return [Hash] Complete formatted response
80
+ def format
81
+ # Check if data is already formatted (has messages structure)
82
+ return @data if already_formatted?
83
+
84
+ # Build formatted response with messages structure
85
+ {
86
+ headers: @data[:headers],
87
+ missingRequired: @data[:missingRequired],
88
+ unrecognized: @data[:unrecognized],
89
+ rowCount: @data[:rowCount],
90
+ isValid: @data[:isValid],
91
+ hasWarnings: @data[:hasWarnings],
92
+ rowErrors: @data[:rowErrors],
93
+ collections: @data[:collections],
94
+ works: @data[:works],
95
+ fileSets: @data[:fileSets],
96
+ totalItems: @data[:totalItems],
97
+ fileReferences: @data[:fileReferences],
98
+ missingFiles: @data[:missingFiles],
99
+ foundFiles: @data[:foundFiles],
100
+ zipIncluded: @data[:zipIncluded],
101
+ messages: build_messages
102
+ }
103
+ end
104
+
105
+ private
106
+
107
+ # Check if data is already formatted with messages structure
108
+ #
109
+ # @return [Boolean] true if data already has proper messages structure
110
+ def already_formatted?
111
+ @data.key?(:messages) &&
112
+ @data[:messages].is_a?(Hash) &&
113
+ @data[:messages].key?(:validationStatus)
114
+ end
115
+
116
+ # Build the messages structure with validationStatus and issues
117
+ #
118
+ # @return [Hash] Messages structure for frontend
119
+ def build_messages
120
+ issues = []
121
+ issues << missing_required_issue if @data[:missingRequired]&.any?
122
+ issues << notices_issue if @data[:notices]&.any?
123
+ issues << unrecognized_fields_issue if @data[:unrecognized]&.any? || @data[:emptyColumns]&.any?
124
+ issues << file_references_issue if @data[:fileReferences]&.positive?
125
+ issues << row_errors_issue if @data[:rowErrors]&.any? { |e| e[:severity] == 'error' }
126
+ issues << row_warnings_issue if @data[:rowErrors]&.any? { |e| e[:severity] == 'warning' }
127
+
128
+ {
129
+ validationStatus: validation_status,
130
+ issues: issues.compact
131
+ }
132
+ end
133
+
134
+ # Generate the main validation status object
135
+ #
136
+ # @return [Hash] Validation status with severity, icon, title, summary, details
137
+ def validation_status
138
+ severity, icon, title = determine_severity_level
139
+ recognized = @data[:headers].reject(&:blank?) - (@data[:unrecognized].keys || [])
140
+
141
+ {
142
+ severity: severity,
143
+ icon: icon,
144
+ title: title,
145
+ summary: I18n.t('bulkrax.importer.guided_import.validation.columns_detected', columns: @data[:headers].length, records: @data[:rowCount]),
146
+ details: details_message(recognized),
147
+ defaultOpen: true
148
+ }
149
+ end
150
+
151
+ # Determine severity level based on validation state
152
+ #
153
+ # @return [Array<String>] [severity, icon, title]
154
+ def determine_severity_level
155
+ if !@data[:isValid]
156
+ ['error', 'fa-times-circle', I18n.t('bulkrax.importer.guided_import.validation.failed')]
157
+ elsif @data[:hasWarnings]
158
+ ['warning', 'fa-exclamation-triangle', I18n.t('bulkrax.importer.guided_import.validation.passed_warnings')]
159
+ else
160
+ ['success', 'fa-check-circle', I18n.t('bulkrax.importer.guided_import.validation.passed')]
161
+ end
162
+ end
163
+
164
+ # Generate details message for validation status
165
+ #
166
+ # @param recognized [Array<String>] List of recognized field names
167
+ # @return [String] Details message
168
+ def details_message(recognized)
169
+ if @data[:isValid]
170
+ I18n.t('bulkrax.importer.guided_import.validation.recognized_fields', fields: recognized.join(', '))
171
+ else
172
+ I18n.t('bulkrax.importer.guided_import.validation.critical_errors')
173
+ end
174
+ end
175
+
176
+ # Format missing required fields issue
177
+ #
178
+ # @return [Hash] Missing required fields issue structure
179
+ def missing_required_issue
180
+ only_rights_statement = @data[:missingRequired]&.all? { |h| h[:field].to_s == 'rights_statement' }
181
+
182
+ if only_rights_statement
183
+ {
184
+ type: 'missing_required_fields',
185
+ severity: 'warning',
186
+ icon: 'fa-exclamation-triangle',
187
+ title: I18n.t('bulkrax.importer.guided_import.validation.missing_required_title'),
188
+ count: @data[:missingRequired].length,
189
+ description: I18n.t('bulkrax.importer.guided_import.validation.missing_rights_desc'),
190
+ items: @data[:missingRequired],
191
+ defaultOpen: false
192
+ }
193
+ else
194
+ {
195
+ type: 'missing_required_fields',
196
+ severity: 'error',
197
+ icon: 'fa-times-circle',
198
+ title: I18n.t('bulkrax.importer.guided_import.validation.missing_required_title'),
199
+ count: @data[:missingRequired].length,
200
+ description: I18n.t('bulkrax.importer.guided_import.validation.missing_required_desc'),
201
+ items: @data[:missingRequired],
202
+ defaultOpen: false
203
+ }
204
+ end
205
+ end
206
+
207
+ # Format unrecognized fields issue
208
+ #
209
+ # @return [Hash] Unrecognized fields issue structure
210
+ def unrecognized_fields_issue
211
+ all_items = unrecognized_fields_issue_items
212
+ {
213
+ type: 'unrecognized_fields',
214
+ severity: 'warning',
215
+ icon: 'fa-exclamation-triangle',
216
+ title: I18n.t('bulkrax.importer.guided_import.validation.unrecognized_title'),
217
+ count: all_items.length,
218
+ description: I18n.t('bulkrax.importer.guided_import.validation.unrecognized_desc'),
219
+ items: all_items,
220
+ defaultOpen: false
221
+ }
222
+ end
223
+
224
+ def unrecognized_fields_issue_items
225
+ named = (@data[:unrecognized] || {}).partition(&:last)
226
+ .flatten(1)
227
+ .map { |field| { field: field.first, message: field.last ? I18n.t('bulkrax.importer.guided_import.validation.did_you_mean', suggestion: field.last) : nil } }
228
+ empty = (@data[:emptyColumns] || []).map do |col|
229
+ { field: I18n.t('bulkrax.importer.guided_import.validation.empty_column', column: col), message: nil }
230
+ end
231
+ named + empty
232
+ end
233
+
234
+ # Format file references issue
235
+ #
236
+ # @return [Hash, nil] File references issue structure or nil if not applicable
237
+ def file_references_issue
238
+ missing_files = @data[:missingFiles] || []
239
+
240
+ if missing_files.any? && @data[:zipIncluded]
241
+ missing_files_issue
242
+ elsif !@data[:zipIncluded]
243
+ no_zip_issue
244
+ end
245
+ end
246
+
247
+ # Format issue for missing files in ZIP
248
+ #
249
+ # @return [Hash] Missing files issue structure
250
+ def missing_files_issue
251
+ missing_files = @data[:missingFiles]
252
+
253
+ {
254
+ type: 'file_references',
255
+ severity: 'warning',
256
+ icon: 'fa-info-circle',
257
+ title: I18n.t('bulkrax.importer.guided_import.validation.file_references_title'),
258
+ count: @data[:fileReferences],
259
+ summary: I18n.t('bulkrax.importer.guided_import.validation.files_found_in_zip', found: @data[:foundFiles], total: @data[:fileReferences]),
260
+ description: I18n.t('bulkrax.importer.guided_import.validation.files_missing_from_zip', count: missing_files.length, files_word: 'file'.pluralize(missing_files.length)),
261
+ items: missing_files.map { |file| { field: file, message: I18n.t('bulkrax.importer.guided_import.validation.missing_from_zip') } },
262
+ defaultOpen: false
263
+ }
264
+ end
265
+
266
+ # Format issue for no ZIP uploaded
267
+ #
268
+ # @return [Hash] No ZIP issue structure
269
+ def no_zip_issue
270
+ {
271
+ type: 'file_references',
272
+ severity: 'warning',
273
+ icon: 'fa-exclamation-triangle',
274
+ title: I18n.t('bulkrax.importer.guided_import.validation.file_references_title'),
275
+ count: @data[:fileReferences],
276
+ summary: I18n.t('bulkrax.importer.guided_import.validation.files_referenced', count: @data[:fileReferences]),
277
+ description: I18n.t('bulkrax.importer.guided_import.validation.no_zip_desc'),
278
+ items: [],
279
+ defaultOpen: false
280
+ }
281
+ end
282
+
283
+ def row_errors_issue
284
+ entries = filtered_row_errors.select { |e| e[:severity] == 'error' }
285
+ return nil if entries.empty?
286
+
287
+ {
288
+ type: 'row_level_errors',
289
+ severity: 'error',
290
+ icon: 'fa-times-circle',
291
+ title: I18n.t('bulkrax.importer.guided_import.stepper_response_formatter.row_errors_issue.title_errors'),
292
+ count: entries.length,
293
+ description: I18n.t('bulkrax.importer.guided_import.stepper_response_formatter.row_errors_issue.description'),
294
+ items: row_error_items(entries),
295
+ defaultOpen: false
296
+ }
297
+ end
298
+
299
+ def row_warnings_issue
300
+ entries = filtered_row_errors.select { |e| e[:severity] == 'warning' }
301
+ return nil if entries.empty?
302
+
303
+ {
304
+ type: 'row_level_warnings',
305
+ severity: 'warning',
306
+ icon: 'fa-exclamation-triangle',
307
+ title: I18n.t('bulkrax.importer.guided_import.stepper_response_formatter.row_errors_issue.title_warnings'),
308
+ count: entries.length,
309
+ description: I18n.t('bulkrax.importer.guided_import.stepper_response_formatter.row_errors_issue.description'),
310
+ items: row_error_items(entries),
311
+ defaultOpen: false
312
+ }
313
+ end
314
+
315
+ def notices_issue
316
+ {
317
+ type: 'notices',
318
+ severity: 'warning',
319
+ icon: 'fa-info-circle',
320
+ title: I18n.t('bulkrax.importer.guided_import.validation.notices_title'),
321
+ count: @data[:notices].length,
322
+ description: I18n.t('bulkrax.importer.guided_import.validation.notices_desc'),
323
+ items: @data[:notices].map { |n| { field: n[:field], message: [n[:message], n[:suggestion]].compact.join(' ') } },
324
+ defaultOpen: false
325
+ }
326
+ end
327
+
328
+ def filtered_row_errors
329
+ missing_required_columns = @data[:missingRequired]&.map { |h| h[:field].to_s } || []
330
+ notice_columns = @data[:notices]&.map { |n| n[:field].to_s } || []
331
+ suppressed_columns = (missing_required_columns + notice_columns).uniq
332
+ @data[:rowErrors].reject { |e| suppressed_columns.include?(e[:column].to_s) }
333
+ end
334
+
335
+ def row_error_items(errors)
336
+ errors.map do |error|
337
+ message = error[:message]
338
+ message = [message, error[:suggestion]].compact.join(' ') if error[:suggestion].present?
339
+ {
340
+ field: I18n.t('bulkrax.importer.guided_import.stepper_response_formatter.row_errors_issue.row_label', row: error[:row], column: error[:column]),
341
+ message: message
342
+ }
343
+ end
344
+ end
345
+ end
346
+ end
347
+ # rubocop:enable Metrics/ClassLength
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+
5
+ module Bulkrax
6
+ # Builds a CSV string containing all validation errors from a guided import.
7
+ # File-level errors (missing required columns, unrecognized headers, empty
8
+ # columns, missing files) appear first as summary rows with a blank `row`
9
+ # cell. Row-level errors follow, one output row per errored data row.
10
+ #
11
+ # Usage:
12
+ # csv = Bulkrax::ValidationErrorCsvBuilder.build(
13
+ # headers: result[:headers],
14
+ # csv_data: result[:raw_csv_data],
15
+ # row_errors: result[:rowErrors],
16
+ # file_errors: {
17
+ # missing_required: result[:missingRequired],
18
+ # unrecognized: result[:unrecognized],
19
+ # empty_columns: result[:emptyColumns],
20
+ # missing_files: result[:missingFiles]
21
+ # }
22
+ # )
23
+ class ValidationErrorCsvBuilder
24
+ # @param headers [Array<String>] original CSV headers in order
25
+ # @param csv_data [Array<Hash>] one entry per data row; each hash has
26
+ # :raw_row (String-keyed hash of column=>value)
27
+ # @param row_errors [Array<Hash>] each hash has :row (Integer) and :message (String)
28
+ # @param file_errors [Hash] file-level issues:
29
+ # - :missing_required [Array<Hash>] each hash has :model and :field
30
+ # - :unrecognized [Hash] column_name => suggestion_or_nil
31
+ # - :empty_columns [Array<Integer>] 1-based column positions with no header
32
+ # - :missing_files [Array<String>] filenames referenced but not found
33
+ # @return [String] CSV content
34
+ def self.build(headers:, csv_data:, row_errors:, file_errors: {})
35
+ new(headers: headers, csv_data: csv_data, row_errors: row_errors, file_errors: file_errors).build
36
+ end
37
+
38
+ def initialize(headers:, csv_data:, row_errors:, file_errors:)
39
+ @headers = headers
40
+ @csv_data = csv_data
41
+ @row_errors = row_errors
42
+ @file_errors = file_errors
43
+ end
44
+
45
+ def build
46
+ errors_by_row = group_errors_by_row
47
+ blank_data = Array.new(@headers.length)
48
+
49
+ CSV.generate(force_quotes: false) do |csv|
50
+ csv << ['row', 'errors'] + @headers
51
+
52
+ file_level_error_rows.each do |message|
53
+ csv << [nil, message] + blank_data
54
+ end
55
+
56
+ @csv_data.each_with_index do |record, index|
57
+ row_number = index + 2 # header is row 1; first data row is row 2
58
+ error_messages = errors_by_row[row_number]&.map { |e| e[:message] }&.join(' | ')
59
+ raw_row = record[:raw_row] || {}
60
+ csv << [row_number, error_messages] + @headers.map { |h| raw_row[h] }
61
+ end
62
+ end
63
+ end
64
+
65
+ private
66
+
67
+ def group_errors_by_row
68
+ @row_errors.each_with_object({}) do |error, hash|
69
+ row_num = error[:row]
70
+ hash[row_num] ||= []
71
+ hash[row_num] << error
72
+ end
73
+ end
74
+
75
+ def file_level_error_rows
76
+ messages = []
77
+
78
+ Array(@file_errors[:missing_required]).each do |entry|
79
+ messages << "Missing required column '#{entry[:field]}' (#{entry[:model]})"
80
+ end
81
+
82
+ Hash(@file_errors[:unrecognized]).each do |col, suggestion|
83
+ msg = "Unrecognized column '#{col}'"
84
+ msg += " (did you mean '#{suggestion}'?)" if suggestion.present?
85
+ messages << msg
86
+ end
87
+
88
+ Array(@file_errors[:empty_columns]).each do |pos|
89
+ messages << "Column #{pos + 2} has no header and will be ignored during import"
90
+ end
91
+
92
+ Array(@file_errors[:missing_files]).each do |filename|
93
+ messages << "Missing file: #{filename}"
94
+ end
95
+
96
+ messages
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module CsvRow
5
+ ##
6
+ # Validates that any child references in a row point to source identifiers
7
+ # that exist either elsewhere in the same CSV or as existing repository records.
8
+ # Uses context[:all_ids] (Set of all source identifiers) to validate references
9
+ # within the CSV, and context[:find_record_by_source_identifier] (callable) to
10
+ # look up existing records in the same way the importer does at runtime.
11
+ # Skips validation when all_ids is empty and fill_in_blank_source_identifiers is
12
+ # configured, since generated identifiers cannot be cross-referenced at validation time.
13
+ module ChildReference
14
+ def self.call(record, row_index, context)
15
+ all_ids = context[:all_ids]
16
+ return if all_ids.empty? && Bulkrax.fill_in_blank_source_identifiers.present?
17
+
18
+ find_record = context[:find_record_by_source_identifier]
19
+
20
+ collect_child_ids(record, context).each do |child_id|
21
+ next if all_ids.include?(child_id)
22
+ next if find_record&.call(child_id)
23
+
24
+ context[:errors] << {
25
+ row: row_index,
26
+ source_identifier: record[:source_identifier],
27
+ severity: 'error',
28
+ category: 'invalid_child_reference',
29
+ column: 'children',
30
+ value: child_id,
31
+ message: I18n.t('bulkrax.importer.guided_import.validation.child_reference_validator.errors.message',
32
+ value: child_id,
33
+ field: 'source_identifier'),
34
+ suggestion: I18n.t('bulkrax.importer.guided_import.validation.child_reference_validator.errors.suggestion')
35
+ }
36
+ end
37
+ end
38
+
39
+ def self.collect_child_ids(record, context)
40
+ split_pattern = context[:child_split_pattern] || '|'
41
+ children_column = context[:children_column] || 'children'
42
+
43
+ base_ids = record[:children].to_s.split(split_pattern).map(&:strip).reject(&:blank?)
44
+
45
+ suffix_pattern = /\A#{Regexp.escape(children_column)}_\d+\z/
46
+ suffix_ids = record[:raw_row]
47
+ .select { |k, _| k.to_s.match?(suffix_pattern) }
48
+ .values
49
+ .map(&:to_s).map(&:strip).reject(&:blank?)
50
+
51
+ (base_ids + suffix_ids).uniq
52
+ end
53
+ private_class_method :collect_child_ids
54
+ end
55
+ end
56
+ end