bulkrax 9.3.5 → 9.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +11 -1
  3. data/app/assets/javascripts/bulkrax/application.js +2 -1
  4. data/app/assets/javascripts/bulkrax/bulkrax.js +13 -4
  5. data/app/assets/javascripts/bulkrax/bulkrax_utils.js +96 -0
  6. data/app/assets/javascripts/bulkrax/datatables.js +1 -0
  7. data/app/assets/javascripts/bulkrax/entries.js +17 -10
  8. data/app/assets/javascripts/bulkrax/importers.js.erb +9 -2
  9. data/app/assets/javascripts/bulkrax/importers_stepper.js +2420 -0
  10. data/app/assets/stylesheets/bulkrax/application.css +1 -1
  11. data/app/assets/stylesheets/bulkrax/stepper/_header.scss +83 -0
  12. data/app/assets/stylesheets/bulkrax/stepper/_mixins.scss +26 -0
  13. data/app/assets/stylesheets/bulkrax/stepper/_navigation.scss +103 -0
  14. data/app/assets/stylesheets/bulkrax/stepper/_responsive.scss +46 -0
  15. data/app/assets/stylesheets/bulkrax/stepper/_review.scss +92 -0
  16. data/app/assets/stylesheets/bulkrax/stepper/_settings.scss +106 -0
  17. data/app/assets/stylesheets/bulkrax/stepper/_success.scss +26 -0
  18. data/app/assets/stylesheets/bulkrax/stepper/_summary.scss +171 -0
  19. data/app/assets/stylesheets/bulkrax/stepper/_upload.scss +339 -0
  20. data/app/assets/stylesheets/bulkrax/stepper/_validation.scss +237 -0
  21. data/app/assets/stylesheets/bulkrax/stepper/_variables.scss +46 -0
  22. data/app/assets/stylesheets/bulkrax/stepper.scss +32 -0
  23. data/app/controllers/bulkrax/guided_imports_controller.rb +175 -0
  24. data/app/controllers/bulkrax/importers_controller.rb +28 -31
  25. data/app/controllers/concerns/bulkrax/guided_import_demo_scenarios.rb +201 -0
  26. data/app/controllers/concerns/bulkrax/importer_file_handler.rb +212 -0
  27. data/app/errors/bulkrax/unzip_error.rb +16 -0
  28. data/app/factories/bulkrax/object_factory.rb +3 -2
  29. data/app/factories/bulkrax/valkyrie_object_factory.rb +61 -17
  30. data/app/jobs/bulkrax/importer_job.rb +42 -4
  31. data/app/models/bulkrax/csv_entry.rb +27 -7
  32. data/app/models/bulkrax/entry.rb +4 -0
  33. data/app/models/bulkrax/importer.rb +27 -10
  34. data/app/models/concerns/bulkrax/has_matchers.rb +2 -2
  35. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +6 -5
  36. data/app/parsers/bulkrax/application_parser.rb +63 -20
  37. data/app/parsers/bulkrax/bagit_parser.rb +12 -0
  38. data/app/parsers/bulkrax/csv_parser.rb +168 -25
  39. data/app/parsers/concerns/bulkrax/csv_parser/csv_template_generation.rb +73 -0
  40. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb +133 -0
  41. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_helpers.rb +282 -0
  42. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_hierarchy.rb +96 -0
  43. data/app/services/bulkrax/csv_template/column_builder.rb +60 -0
  44. data/app/services/bulkrax/csv_template/column_descriptor.rb +58 -0
  45. data/app/services/bulkrax/csv_template/csv_builder.rb +83 -0
  46. data/app/services/bulkrax/csv_template/explanation_builder.rb +57 -0
  47. data/app/services/bulkrax/csv_template/field_analyzer.rb +56 -0
  48. data/app/services/bulkrax/csv_template/file_path_generator.rb +47 -0
  49. data/app/services/bulkrax/csv_template/file_validator.rb +68 -0
  50. data/app/services/bulkrax/csv_template/mapping_manager.rb +55 -0
  51. data/app/services/bulkrax/csv_template/model_loader.rb +50 -0
  52. data/app/services/bulkrax/csv_template/row_builder.rb +35 -0
  53. data/app/services/bulkrax/csv_template/schema_analyzer.rb +70 -0
  54. data/app/services/bulkrax/csv_template/split_formatter.rb +44 -0
  55. data/app/services/bulkrax/csv_template/value_determiner.rb +68 -0
  56. data/app/services/bulkrax/stepper_response_formatter.rb +347 -0
  57. data/app/services/bulkrax/validation_error_csv_builder.rb +99 -0
  58. data/app/validators/bulkrax/csv_row/child_reference.rb +56 -0
  59. data/app/validators/bulkrax/csv_row/circular_reference.rb +71 -0
  60. data/app/validators/bulkrax/csv_row/controlled_vocabulary.rb +74 -0
  61. data/app/validators/bulkrax/csv_row/duplicate_identifier.rb +63 -0
  62. data/app/validators/bulkrax/csv_row/missing_source_identifier.rb +31 -0
  63. data/app/validators/bulkrax/csv_row/parent_reference.rb +59 -0
  64. data/app/validators/bulkrax/csv_row/required_values.rb +64 -0
  65. data/app/views/bulkrax/guided_imports/new.html.erb +567 -0
  66. data/app/views/bulkrax/importers/index.html.erb +6 -1
  67. data/app/views/bulkrax/importers/new.html.erb +1 -1
  68. data/app/views/bulkrax/importers/show.html.erb +17 -1
  69. data/config/i18n-tasks.yml +195 -0
  70. data/config/locales/bulkrax.de.yml +508 -0
  71. data/config/locales/bulkrax.en.yml +463 -233
  72. data/config/locales/bulkrax.es.yml +508 -0
  73. data/config/locales/bulkrax.fr.yml +508 -0
  74. data/config/locales/bulkrax.it.yml +508 -0
  75. data/config/locales/bulkrax.pt-BR.yml +508 -0
  76. data/config/locales/bulkrax.zh.yml +507 -0
  77. data/config/routes.rb +10 -1
  78. data/lib/bulkrax/data/demo_scenarios.json +2235 -0
  79. data/lib/bulkrax/version.rb +1 -1
  80. data/lib/bulkrax.rb +31 -0
  81. metadata +56 -16
  82. data/app/services/bulkrax/sample_csv_service/column_builder.rb +0 -58
  83. data/app/services/bulkrax/sample_csv_service/column_descriptor.rb +0 -56
  84. data/app/services/bulkrax/sample_csv_service/csv_builder.rb +0 -82
  85. data/app/services/bulkrax/sample_csv_service/explanation_builder.rb +0 -51
  86. data/app/services/bulkrax/sample_csv_service/field_analyzer.rb +0 -54
  87. data/app/services/bulkrax/sample_csv_service/file_path_generator.rb +0 -16
  88. data/app/services/bulkrax/sample_csv_service/mapping_manager.rb +0 -36
  89. data/app/services/bulkrax/sample_csv_service/model_loader.rb +0 -40
  90. data/app/services/bulkrax/sample_csv_service/row_builder.rb +0 -33
  91. data/app/services/bulkrax/sample_csv_service/schema_analyzer.rb +0 -69
  92. data/app/services/bulkrax/sample_csv_service/split_formatter.rb +0 -42
  93. data/app/services/bulkrax/sample_csv_service/value_determiner.rb +0 -67
  94. data/app/services/bulkrax/sample_csv_service.rb +0 -78
  95. /data/{app/services → lib}/wings/custom_queries/find_by_source_identifier.rb +0 -0
@@ -0,0 +1,347 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ ##
5
+ # Formats validation data from CsvParser.validate_csv into the structure
6
+ # expected by the importers_stepper.js frontend component.
7
+ #
8
+ # This service acts as a presentation layer, transforming raw validation data
9
+ # into a structured response with proper status messages, severity levels,
10
+ # and formatted issue lists that the JavaScript can render correctly.
11
+ #
12
+ # @example Basic usage
13
+ # validation_data = CsvParser.validate_csv(csv_file: file, zip_file: zip)
14
+ # formatted_response = StepperResponseFormatter.format(validation_data)
15
+ # render json: formatted_response
16
+ #
17
+ # @example Error response
18
+ # error_response = StepperResponseFormatter.error(message: "Unable to process files")
19
+ # render json: error_response, status: :ok
20
+ #
21
+ # rubocop:disable Metrics/ClassLength
22
+ class StepperResponseFormatter
23
+ # Format validation data for the stepper frontend
24
+ #
25
+ # @param data [Hash] Raw validation data from CsvParser.validate_csv containing:
26
+ # - headers: Array of CSV column names
27
+ # - missingRequired: Array of hashes of missing required fields by model (e.g. {model: 'GenericWork', field: 'source_identifier'})
28
+ # - unrecognized: Array of unrecognized column names
29
+ # - rowCount: Total number of data rows
30
+ # - isValid: Boolean indicating validation success
31
+ # - hasWarnings: Boolean indicating presence of warnings
32
+ # - collections: Array of collection items with id, title, type, parentIds (array), childIds (array)
33
+ # - works: Array of work items with id, title, type, parentIds (array), childIds (array)
34
+ # - fileSets: Array of file set items
35
+ # - totalItems: Total count of items
36
+ # - fileReferences: Count of file references
37
+ # - missingFiles: Array of missing file names
38
+ # - foundFiles: Count of found files
39
+ # - zipIncluded: Boolean indicating if zip was provided
40
+ # @return [Hash] Formatted response ready for JSON rendering
41
+ def self.format(data)
42
+ new(data).format
43
+ end
44
+
45
+ # Generate an error response for validation failures
46
+ #
47
+ # @param message [String] Error message to display
48
+ # @param summary [String] Optional summary (defaults to standard message)
49
+ # @return [Hash] Error response structure
50
+ def self.error(message: I18n.t('bulkrax.importer.guided_import.validation.unable_to_process'), summary: nil)
51
+ {
52
+ totalItems: 0,
53
+ collections: [],
54
+ works: [],
55
+ fileSets: [],
56
+ isValid: false,
57
+ hasWarnings: false,
58
+ messages: {
59
+ validationStatus: {
60
+ severity: 'error',
61
+ icon: 'fa-times-circle',
62
+ title: I18n.t('bulkrax.importer.guided_import.validation.failed'),
63
+ summary: summary || message,
64
+ details: I18n.t('bulkrax.importer.guided_import.validation.critical_errors'),
65
+ defaultOpen: true
66
+ },
67
+ issues: []
68
+ }
69
+ }
70
+ end
71
+
72
+ def initialize(data)
73
+ @data = data
74
+ end
75
+
76
+ # Format the validation data with messages structure
77
+ # If data already contains a messages structure, return it as-is
78
+ #
79
+ # @return [Hash] Complete formatted response
80
+ def format
81
+ # Check if data is already formatted (has messages structure)
82
+ return @data if already_formatted?
83
+
84
+ # Build formatted response with messages structure
85
+ {
86
+ headers: @data[:headers],
87
+ missingRequired: @data[:missingRequired],
88
+ unrecognized: @data[:unrecognized],
89
+ rowCount: @data[:rowCount],
90
+ isValid: @data[:isValid],
91
+ hasWarnings: @data[:hasWarnings],
92
+ rowErrors: @data[:rowErrors],
93
+ collections: @data[:collections],
94
+ works: @data[:works],
95
+ fileSets: @data[:fileSets],
96
+ totalItems: @data[:totalItems],
97
+ fileReferences: @data[:fileReferences],
98
+ missingFiles: @data[:missingFiles],
99
+ foundFiles: @data[:foundFiles],
100
+ zipIncluded: @data[:zipIncluded],
101
+ messages: build_messages
102
+ }
103
+ end
104
+
105
+ private
106
+
107
+ # Check if data is already formatted with messages structure
108
+ #
109
+ # @return [Boolean] true if data already has proper messages structure
110
+ def already_formatted?
111
+ @data.key?(:messages) &&
112
+ @data[:messages].is_a?(Hash) &&
113
+ @data[:messages].key?(:validationStatus)
114
+ end
115
+
116
+ # Build the messages structure with validationStatus and issues
117
+ #
118
+ # @return [Hash] Messages structure for frontend
119
+ def build_messages
120
+ issues = []
121
+ issues << missing_required_issue if @data[:missingRequired]&.any?
122
+ issues << notices_issue if @data[:notices]&.any?
123
+ issues << unrecognized_fields_issue if @data[:unrecognized]&.any? || @data[:emptyColumns]&.any?
124
+ issues << file_references_issue if @data[:fileReferences]&.positive?
125
+ issues << row_errors_issue if @data[:rowErrors]&.any? { |e| e[:severity] == 'error' }
126
+ issues << row_warnings_issue if @data[:rowErrors]&.any? { |e| e[:severity] == 'warning' }
127
+
128
+ {
129
+ validationStatus: validation_status,
130
+ issues: issues.compact
131
+ }
132
+ end
133
+
134
+ # Generate the main validation status object
135
+ #
136
+ # @return [Hash] Validation status with severity, icon, title, summary, details
137
+ def validation_status
138
+ severity, icon, title = determine_severity_level
139
+ recognized = @data[:headers].reject(&:blank?) - (@data[:unrecognized].keys || [])
140
+
141
+ {
142
+ severity: severity,
143
+ icon: icon,
144
+ title: title,
145
+ summary: I18n.t('bulkrax.importer.guided_import.validation.columns_detected', columns: @data[:headers].length, records: @data[:rowCount]),
146
+ details: details_message(recognized),
147
+ defaultOpen: true
148
+ }
149
+ end
150
+
151
+ # Determine severity level based on validation state
152
+ #
153
+ # @return [Array<String>] [severity, icon, title]
154
+ def determine_severity_level
155
+ if !@data[:isValid]
156
+ ['error', 'fa-times-circle', I18n.t('bulkrax.importer.guided_import.validation.failed')]
157
+ elsif @data[:hasWarnings]
158
+ ['warning', 'fa-exclamation-triangle', I18n.t('bulkrax.importer.guided_import.validation.passed_warnings')]
159
+ else
160
+ ['success', 'fa-check-circle', I18n.t('bulkrax.importer.guided_import.validation.passed')]
161
+ end
162
+ end
163
+
164
+ # Generate details message for validation status
165
+ #
166
+ # @param recognized [Array<String>] List of recognized field names
167
+ # @return [String] Details message
168
+ def details_message(recognized)
169
+ if @data[:isValid]
170
+ I18n.t('bulkrax.importer.guided_import.validation.recognized_fields', fields: recognized.join(', '))
171
+ else
172
+ I18n.t('bulkrax.importer.guided_import.validation.critical_errors')
173
+ end
174
+ end
175
+
176
+ # Format missing required fields issue
177
+ #
178
+ # @return [Hash] Missing required fields issue structure
179
+ def missing_required_issue
180
+ only_rights_statement = @data[:missingRequired]&.all? { |h| h[:field].to_s == 'rights_statement' }
181
+
182
+ if only_rights_statement
183
+ {
184
+ type: 'missing_required_fields',
185
+ severity: 'warning',
186
+ icon: 'fa-exclamation-triangle',
187
+ title: I18n.t('bulkrax.importer.guided_import.validation.missing_required_title'),
188
+ count: @data[:missingRequired].length,
189
+ description: I18n.t('bulkrax.importer.guided_import.validation.missing_rights_desc'),
190
+ items: @data[:missingRequired],
191
+ defaultOpen: false
192
+ }
193
+ else
194
+ {
195
+ type: 'missing_required_fields',
196
+ severity: 'error',
197
+ icon: 'fa-times-circle',
198
+ title: I18n.t('bulkrax.importer.guided_import.validation.missing_required_title'),
199
+ count: @data[:missingRequired].length,
200
+ description: I18n.t('bulkrax.importer.guided_import.validation.missing_required_desc'),
201
+ items: @data[:missingRequired],
202
+ defaultOpen: false
203
+ }
204
+ end
205
+ end
206
+
207
+ # Format unrecognized fields issue
208
+ #
209
+ # @return [Hash] Unrecognized fields issue structure
210
+ def unrecognized_fields_issue
211
+ all_items = unrecognized_fields_issue_items
212
+ {
213
+ type: 'unrecognized_fields',
214
+ severity: 'warning',
215
+ icon: 'fa-exclamation-triangle',
216
+ title: I18n.t('bulkrax.importer.guided_import.validation.unrecognized_title'),
217
+ count: all_items.length,
218
+ description: I18n.t('bulkrax.importer.guided_import.validation.unrecognized_desc'),
219
+ items: all_items,
220
+ defaultOpen: false
221
+ }
222
+ end
223
+
224
+ def unrecognized_fields_issue_items
225
+ named = (@data[:unrecognized] || {}).partition(&:last)
226
+ .flatten(1)
227
+ .map { |field| { field: field.first, message: field.last ? I18n.t('bulkrax.importer.guided_import.validation.did_you_mean', suggestion: field.last) : nil } }
228
+ empty = (@data[:emptyColumns] || []).map do |col|
229
+ { field: I18n.t('bulkrax.importer.guided_import.validation.empty_column', column: col), message: nil }
230
+ end
231
+ named + empty
232
+ end
233
+
234
+ # Format file references issue
235
+ #
236
+ # @return [Hash, nil] File references issue structure or nil if not applicable
237
+ def file_references_issue
238
+ missing_files = @data[:missingFiles] || []
239
+
240
+ if missing_files.any? && @data[:zipIncluded]
241
+ missing_files_issue
242
+ elsif !@data[:zipIncluded]
243
+ no_zip_issue
244
+ end
245
+ end
246
+
247
+ # Format issue for missing files in ZIP
248
+ #
249
+ # @return [Hash] Missing files issue structure
250
+ def missing_files_issue
251
+ missing_files = @data[:missingFiles]
252
+
253
+ {
254
+ type: 'file_references',
255
+ severity: 'warning',
256
+ icon: 'fa-info-circle',
257
+ title: I18n.t('bulkrax.importer.guided_import.validation.file_references_title'),
258
+ count: @data[:fileReferences],
259
+ summary: I18n.t('bulkrax.importer.guided_import.validation.files_found_in_zip', found: @data[:foundFiles], total: @data[:fileReferences]),
260
+ description: I18n.t('bulkrax.importer.guided_import.validation.files_missing_from_zip', count: missing_files.length, files_word: 'file'.pluralize(missing_files.length)),
261
+ items: missing_files.map { |file| { field: file, message: I18n.t('bulkrax.importer.guided_import.validation.missing_from_zip') } },
262
+ defaultOpen: false
263
+ }
264
+ end
265
+
266
+ # Format issue for no ZIP uploaded
267
+ #
268
+ # @return [Hash] No ZIP issue structure
269
+ def no_zip_issue
270
+ {
271
+ type: 'file_references',
272
+ severity: 'warning',
273
+ icon: 'fa-exclamation-triangle',
274
+ title: I18n.t('bulkrax.importer.guided_import.validation.file_references_title'),
275
+ count: @data[:fileReferences],
276
+ summary: I18n.t('bulkrax.importer.guided_import.validation.files_referenced', count: @data[:fileReferences]),
277
+ description: I18n.t('bulkrax.importer.guided_import.validation.no_zip_desc'),
278
+ items: [],
279
+ defaultOpen: false
280
+ }
281
+ end
282
+
283
+ def row_errors_issue
284
+ entries = filtered_row_errors.select { |e| e[:severity] == 'error' }
285
+ return nil if entries.empty?
286
+
287
+ {
288
+ type: 'row_level_errors',
289
+ severity: 'error',
290
+ icon: 'fa-times-circle',
291
+ title: I18n.t('bulkrax.importer.guided_import.stepper_response_formatter.row_errors_issue.title_errors'),
292
+ count: entries.length,
293
+ description: I18n.t('bulkrax.importer.guided_import.stepper_response_formatter.row_errors_issue.description'),
294
+ items: row_error_items(entries),
295
+ defaultOpen: false
296
+ }
297
+ end
298
+
299
+ def row_warnings_issue
300
+ entries = filtered_row_errors.select { |e| e[:severity] == 'warning' }
301
+ return nil if entries.empty?
302
+
303
+ {
304
+ type: 'row_level_warnings',
305
+ severity: 'warning',
306
+ icon: 'fa-exclamation-triangle',
307
+ title: I18n.t('bulkrax.importer.guided_import.stepper_response_formatter.row_errors_issue.title_warnings'),
308
+ count: entries.length,
309
+ description: I18n.t('bulkrax.importer.guided_import.stepper_response_formatter.row_errors_issue.description'),
310
+ items: row_error_items(entries),
311
+ defaultOpen: false
312
+ }
313
+ end
314
+
315
+ def notices_issue
316
+ {
317
+ type: 'notices',
318
+ severity: 'warning',
319
+ icon: 'fa-info-circle',
320
+ title: I18n.t('bulkrax.importer.guided_import.validation.notices_title'),
321
+ count: @data[:notices].length,
322
+ description: I18n.t('bulkrax.importer.guided_import.validation.notices_desc'),
323
+ items: @data[:notices].map { |n| { field: n[:field], message: [n[:message], n[:suggestion]].compact.join(' ') } },
324
+ defaultOpen: false
325
+ }
326
+ end
327
+
328
+ def filtered_row_errors
329
+ missing_required_columns = @data[:missingRequired]&.map { |h| h[:field].to_s } || []
330
+ notice_columns = @data[:notices]&.map { |n| n[:field].to_s } || []
331
+ suppressed_columns = (missing_required_columns + notice_columns).uniq
332
+ @data[:rowErrors].reject { |e| suppressed_columns.include?(e[:column].to_s) }
333
+ end
334
+
335
+ def row_error_items(errors)
336
+ errors.map do |error|
337
+ message = error[:message]
338
+ message = [message, error[:suggestion]].compact.join(' ') if error[:suggestion].present?
339
+ {
340
+ field: I18n.t('bulkrax.importer.guided_import.stepper_response_formatter.row_errors_issue.row_label', row: error[:row], column: error[:column]),
341
+ message: message
342
+ }
343
+ end
344
+ end
345
+ end
346
+ end
347
+ # rubocop:enable Metrics/ClassLength
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+
5
+ module Bulkrax
6
+ # Builds a CSV string containing all validation errors from a guided import.
7
+ # File-level errors (missing required columns, unrecognized headers, empty
8
+ # columns, missing files) appear first as summary rows with a blank `row`
9
+ # cell. Row-level errors follow, one output row per errored data row.
10
+ #
11
+ # Usage:
12
+ # csv = Bulkrax::ValidationErrorCsvBuilder.build(
13
+ # headers: result[:headers],
14
+ # csv_data: result[:raw_csv_data],
15
+ # row_errors: result[:rowErrors],
16
+ # file_errors: {
17
+ # missing_required: result[:missingRequired],
18
+ # unrecognized: result[:unrecognized],
19
+ # empty_columns: result[:emptyColumns],
20
+ # missing_files: result[:missingFiles]
21
+ # }
22
+ # )
23
+ class ValidationErrorCsvBuilder
24
+ # @param headers [Array<String>] original CSV headers in order
25
+ # @param csv_data [Array<Hash>] one entry per data row; each hash has
26
+ # :raw_row (String-keyed hash of column=>value)
27
+ # @param row_errors [Array<Hash>] each hash has :row (Integer) and :message (String)
28
+ # @param file_errors [Hash] file-level issues:
29
+ # - :missing_required [Array<Hash>] each hash has :model and :field
30
+ # - :unrecognized [Hash] column_name => suggestion_or_nil
31
+ # - :empty_columns [Array<Integer>] 1-based column positions with no header
32
+ # - :missing_files [Array<String>] filenames referenced but not found
33
+ # @return [String] CSV content
34
+ def self.build(headers:, csv_data:, row_errors:, file_errors: {})
35
+ new(headers: headers, csv_data: csv_data, row_errors: row_errors, file_errors: file_errors).build
36
+ end
37
+
38
+ def initialize(headers:, csv_data:, row_errors:, file_errors:)
39
+ @headers = headers
40
+ @csv_data = csv_data
41
+ @row_errors = row_errors
42
+ @file_errors = file_errors
43
+ end
44
+
45
+ def build
46
+ errors_by_row = group_errors_by_row
47
+ blank_data = Array.new(@headers.length)
48
+
49
+ CSV.generate(force_quotes: false) do |csv|
50
+ csv << ['row', 'errors'] + @headers
51
+
52
+ file_level_error_rows.each do |message|
53
+ csv << [nil, message] + blank_data
54
+ end
55
+
56
+ @csv_data.each_with_index do |record, index|
57
+ row_number = index + 2 # header is row 1; first data row is row 2
58
+ error_messages = errors_by_row[row_number]&.map { |e| e[:message] }&.join(' | ')
59
+ raw_row = record[:raw_row] || {}
60
+ csv << [row_number, error_messages] + @headers.map { |h| raw_row[h] }
61
+ end
62
+ end
63
+ end
64
+
65
+ private
66
+
67
+ def group_errors_by_row
68
+ @row_errors.each_with_object({}) do |error, hash|
69
+ row_num = error[:row]
70
+ hash[row_num] ||= []
71
+ hash[row_num] << error
72
+ end
73
+ end
74
+
75
+ def file_level_error_rows
76
+ messages = []
77
+
78
+ Array(@file_errors[:missing_required]).each do |entry|
79
+ messages << "Missing required column '#{entry[:field]}' (#{entry[:model]})"
80
+ end
81
+
82
+ Hash(@file_errors[:unrecognized]).each do |col, suggestion|
83
+ msg = "Unrecognized column '#{col}'"
84
+ msg += " (did you mean '#{suggestion}'?)" if suggestion.present?
85
+ messages << msg
86
+ end
87
+
88
+ Array(@file_errors[:empty_columns]).each do |pos|
89
+ messages << "Column #{pos + 2} has no header and will be ignored during import"
90
+ end
91
+
92
+ Array(@file_errors[:missing_files]).each do |filename|
93
+ messages << "Missing file: #{filename}"
94
+ end
95
+
96
+ messages
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module CsvRow
5
+ ##
6
+ # Validates that any child references in a row point to source identifiers
7
+ # that exist either elsewhere in the same CSV or as existing repository records.
8
+ # Uses context[:all_ids] (Set of all source identifiers) to validate references
9
+ # within the CSV, and context[:find_record_by_source_identifier] (callable) to
10
+ # look up existing records in the same way the importer does at runtime.
11
+ # Skips validation when all_ids is empty and fill_in_blank_source_identifiers is
12
+ # configured, since generated identifiers cannot be cross-referenced at validation time.
13
+ module ChildReference
14
+ def self.call(record, row_index, context)
15
+ all_ids = context[:all_ids]
16
+ return if all_ids.empty? && Bulkrax.fill_in_blank_source_identifiers.present?
17
+
18
+ find_record = context[:find_record_by_source_identifier]
19
+
20
+ collect_child_ids(record, context).each do |child_id|
21
+ next if all_ids.include?(child_id)
22
+ next if find_record&.call(child_id)
23
+
24
+ context[:errors] << {
25
+ row: row_index,
26
+ source_identifier: record[:source_identifier],
27
+ severity: 'error',
28
+ category: 'invalid_child_reference',
29
+ column: 'children',
30
+ value: child_id,
31
+ message: I18n.t('bulkrax.importer.guided_import.validation.child_reference_validator.errors.message',
32
+ value: child_id,
33
+ field: 'source_identifier'),
34
+ suggestion: I18n.t('bulkrax.importer.guided_import.validation.child_reference_validator.errors.suggestion')
35
+ }
36
+ end
37
+ end
38
+
39
+ def self.collect_child_ids(record, context)
40
+ split_pattern = context[:child_split_pattern] || '|'
41
+ children_column = context[:children_column] || 'children'
42
+
43
+ base_ids = record[:children].to_s.split(split_pattern).map(&:strip).reject(&:blank?)
44
+
45
+ suffix_pattern = /\A#{Regexp.escape(children_column)}_\d+\z/
46
+ suffix_ids = record[:raw_row]
47
+ .select { |k, _| k.to_s.match?(suffix_pattern) }
48
+ .values
49
+ .map(&:to_s).map(&:strip).reject(&:blank?)
50
+
51
+ (base_ids + suffix_ids).uniq
52
+ end
53
+ private_class_method :collect_child_ids
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module CsvRow
5
+ ##
6
+ # Detects circular parent-child relationships in the CSV.
7
+ # A circular reference occurs when following the parent chain from any record
8
+ # eventually leads back to itself (e.g. A→B→C→A).
9
+ #
10
+ # The validator builds a directed graph (child → parents) from all records on
11
+ # first invocation and caches the set of all record ids involved in any cycle.
12
+ # Subsequent per-row calls simply check membership in that set.
13
+ #
14
+ # Requires context key:
15
+ # :relationship_graph – Hash { source_identifier => [parent_ids] } built by
16
+ # run_row_validators before iterating rows.
17
+ module CircularReference
18
+ def self.call(record, row_index, context)
19
+ cycle_ids = context[:circular_reference_ids] ||= detect_cycle_ids(context[:relationship_graph] || {})
20
+ return unless cycle_ids.include?(record[:source_identifier])
21
+
22
+ context[:errors] << {
23
+ row: row_index,
24
+ source_identifier: record[:source_identifier],
25
+ severity: 'error',
26
+ category: 'circular_reference',
27
+ column: 'parents',
28
+ value: record[:source_identifier],
29
+ message: I18n.t('bulkrax.importer.guided_import.validation.circular_reference_validator.errors.message',
30
+ value: record[:source_identifier]),
31
+ suggestion: I18n.t('bulkrax.importer.guided_import.validation.circular_reference_validator.errors.suggestion')
32
+ }
33
+ end
34
+
35
+ # Returns the set of all source identifiers that participate in at least one cycle.
36
+ # Uses recursive DFS with a per-branch ancestry set to detect back-edges.
37
+ def self.detect_cycle_ids(graph)
38
+ all_nodes = graph.keys.to_set | graph.values.flatten.to_set
39
+ visited = Set.new
40
+ cycle_ids = Set.new
41
+
42
+ all_nodes.each do |node|
43
+ next if visited.include?(node)
44
+ dfs(node, graph, visited, [], cycle_ids)
45
+ end
46
+
47
+ cycle_ids
48
+ end
49
+ private_class_method :detect_cycle_ids
50
+
51
+ def self.dfs(node, graph, visited, ancestors, cycle_ids) # rubocop:disable Metrics/MethodLength
52
+ visited.add(node)
53
+ ancestors.push(node)
54
+
55
+ (graph[node] || []).each do |neighbor|
56
+ if ancestors.include?(neighbor)
57
+ # Back-edge found: mark every node in the cycle path
58
+ cycle_start = ancestors.index(neighbor)
59
+ ancestors[cycle_start..].each { |n| cycle_ids.add(n) }
60
+ cycle_ids.add(neighbor)
61
+ elsif !visited.include?(neighbor)
62
+ dfs(neighbor, graph, visited, ancestors, cycle_ids)
63
+ end
64
+ end
65
+
66
+ ancestors.pop
67
+ end
68
+ private_class_method :dfs
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module CsvRow
5
+ ##
6
+ # Validates that controlled vocabulary values in each row are valid according
7
+ # to the QA authority for that field.
8
+ module ControlledVocabulary
9
+ def self.call(record, row_index, context) # rubocop:disable Metrics/MethodLength
10
+ field_metadata = context[:field_metadata]
11
+ return if field_metadata.blank?
12
+
13
+ model = record[:model]
14
+ metadata = field_metadata[model]
15
+ return if metadata.blank?
16
+
17
+ controlled_terms = metadata[:controlled_vocab_terms] || []
18
+ return if controlled_terms.blank?
19
+
20
+ controlled_terms.each do |field|
21
+ value = record[:raw_row][field]
22
+ next if value.blank?
23
+
24
+ authority = load_authority(field)
25
+ next if authority.nil?
26
+
27
+ term = authority.find(value)
28
+ next unless term.blank? || term.dig('active') == false
29
+
30
+ context[:errors] << {
31
+ row: row_index,
32
+ source_identifier: record[:source_identifier],
33
+ severity: 'error',
34
+ category: 'invalid_controlled_value',
35
+ column: field,
36
+ value: value,
37
+ message: I18n.t('bulkrax.importer.guided_import.validation.controlled_vocabulary_validator.errors.message',
38
+ value: value, field: field),
39
+ suggestion: suggestion(value, authority)
40
+ }
41
+ end
42
+ end
43
+
44
+ def self.load_authority(field)
45
+ Qa::Authorities::Local.subauthority_for(field.pluralize)
46
+ rescue Qa::InvalidSubAuthority
47
+ begin
48
+ Qa::Authorities::Local.subauthority_for(field)
49
+ rescue Qa::InvalidSubAuthority
50
+ nil
51
+ end
52
+ end
53
+ private_class_method :load_authority
54
+
55
+ def self.suggestion(value, authority)
56
+ suggestion = DidYouMean::SpellChecker.new(dictionary: dictionary_for(authority)).correct(value).first
57
+ return fallback_suggestion if suggestion.nil?
58
+
59
+ I18n.t('bulkrax.importer.guided_import.validation.did_you_mean', suggestion: suggestion)
60
+ end
61
+ private_class_method :suggestion
62
+
63
+ def self.fallback_suggestion
64
+ I18n.t('bulkrax.importer.guided_import.validation.controlled_vocabulary_validator.errors.suggestion')
65
+ end
66
+ private_class_method :fallback_suggestion
67
+
68
+ def self.dictionary_for(authority)
69
+ authority.all.filter_map { |term| term['label'] if term['active'] == true }.uniq
70
+ end
71
+ private_class_method :dictionary_for
72
+ end
73
+ end
74
+ end