bulkrax 9.3.5 → 9.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -1
- data/app/assets/javascripts/bulkrax/application.js +2 -1
- data/app/assets/javascripts/bulkrax/bulkrax.js +13 -4
- data/app/assets/javascripts/bulkrax/bulkrax_utils.js +96 -0
- data/app/assets/javascripts/bulkrax/datatables.js +1 -0
- data/app/assets/javascripts/bulkrax/entries.js +17 -10
- data/app/assets/javascripts/bulkrax/importers.js.erb +9 -2
- data/app/assets/javascripts/bulkrax/importers_stepper.js +2420 -0
- data/app/assets/stylesheets/bulkrax/application.css +1 -1
- data/app/assets/stylesheets/bulkrax/stepper/_header.scss +83 -0
- data/app/assets/stylesheets/bulkrax/stepper/_mixins.scss +26 -0
- data/app/assets/stylesheets/bulkrax/stepper/_navigation.scss +103 -0
- data/app/assets/stylesheets/bulkrax/stepper/_responsive.scss +46 -0
- data/app/assets/stylesheets/bulkrax/stepper/_review.scss +92 -0
- data/app/assets/stylesheets/bulkrax/stepper/_settings.scss +106 -0
- data/app/assets/stylesheets/bulkrax/stepper/_success.scss +26 -0
- data/app/assets/stylesheets/bulkrax/stepper/_summary.scss +171 -0
- data/app/assets/stylesheets/bulkrax/stepper/_upload.scss +339 -0
- data/app/assets/stylesheets/bulkrax/stepper/_validation.scss +237 -0
- data/app/assets/stylesheets/bulkrax/stepper/_variables.scss +46 -0
- data/app/assets/stylesheets/bulkrax/stepper.scss +32 -0
- data/app/controllers/bulkrax/guided_imports_controller.rb +175 -0
- data/app/controllers/bulkrax/importers_controller.rb +28 -31
- data/app/controllers/concerns/bulkrax/guided_import_demo_scenarios.rb +201 -0
- data/app/controllers/concerns/bulkrax/importer_file_handler.rb +212 -0
- data/app/errors/bulkrax/unzip_error.rb +16 -0
- data/app/factories/bulkrax/object_factory.rb +3 -2
- data/app/factories/bulkrax/valkyrie_object_factory.rb +61 -17
- data/app/jobs/bulkrax/importer_job.rb +42 -4
- data/app/models/bulkrax/csv_entry.rb +27 -7
- data/app/models/bulkrax/entry.rb +4 -0
- data/app/models/bulkrax/importer.rb +27 -10
- data/app/models/concerns/bulkrax/has_matchers.rb +2 -2
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +6 -5
- data/app/parsers/bulkrax/application_parser.rb +63 -20
- data/app/parsers/bulkrax/bagit_parser.rb +12 -0
- data/app/parsers/bulkrax/csv_parser.rb +168 -25
- data/app/parsers/concerns/bulkrax/csv_parser/csv_template_generation.rb +73 -0
- data/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb +133 -0
- data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_helpers.rb +282 -0
- data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_hierarchy.rb +96 -0
- data/app/services/bulkrax/csv_template/column_builder.rb +60 -0
- data/app/services/bulkrax/csv_template/column_descriptor.rb +58 -0
- data/app/services/bulkrax/csv_template/csv_builder.rb +83 -0
- data/app/services/bulkrax/csv_template/explanation_builder.rb +57 -0
- data/app/services/bulkrax/csv_template/field_analyzer.rb +56 -0
- data/app/services/bulkrax/csv_template/file_path_generator.rb +47 -0
- data/app/services/bulkrax/csv_template/file_validator.rb +68 -0
- data/app/services/bulkrax/csv_template/mapping_manager.rb +55 -0
- data/app/services/bulkrax/csv_template/model_loader.rb +50 -0
- data/app/services/bulkrax/csv_template/row_builder.rb +35 -0
- data/app/services/bulkrax/csv_template/schema_analyzer.rb +70 -0
- data/app/services/bulkrax/csv_template/split_formatter.rb +44 -0
- data/app/services/bulkrax/csv_template/value_determiner.rb +68 -0
- data/app/services/bulkrax/stepper_response_formatter.rb +347 -0
- data/app/services/bulkrax/validation_error_csv_builder.rb +99 -0
- data/app/validators/bulkrax/csv_row/child_reference.rb +56 -0
- data/app/validators/bulkrax/csv_row/circular_reference.rb +71 -0
- data/app/validators/bulkrax/csv_row/controlled_vocabulary.rb +74 -0
- data/app/validators/bulkrax/csv_row/duplicate_identifier.rb +63 -0
- data/app/validators/bulkrax/csv_row/missing_source_identifier.rb +31 -0
- data/app/validators/bulkrax/csv_row/parent_reference.rb +59 -0
- data/app/validators/bulkrax/csv_row/required_values.rb +64 -0
- data/app/views/bulkrax/guided_imports/new.html.erb +567 -0
- data/app/views/bulkrax/importers/index.html.erb +6 -1
- data/app/views/bulkrax/importers/new.html.erb +1 -1
- data/app/views/bulkrax/importers/show.html.erb +17 -1
- data/config/i18n-tasks.yml +195 -0
- data/config/locales/bulkrax.de.yml +508 -0
- data/config/locales/bulkrax.en.yml +463 -233
- data/config/locales/bulkrax.es.yml +508 -0
- data/config/locales/bulkrax.fr.yml +508 -0
- data/config/locales/bulkrax.it.yml +508 -0
- data/config/locales/bulkrax.pt-BR.yml +508 -0
- data/config/locales/bulkrax.zh.yml +507 -0
- data/config/routes.rb +10 -1
- data/lib/bulkrax/data/demo_scenarios.json +2235 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +31 -0
- metadata +56 -16
- data/app/services/bulkrax/sample_csv_service/column_builder.rb +0 -58
- data/app/services/bulkrax/sample_csv_service/column_descriptor.rb +0 -56
- data/app/services/bulkrax/sample_csv_service/csv_builder.rb +0 -82
- data/app/services/bulkrax/sample_csv_service/explanation_builder.rb +0 -51
- data/app/services/bulkrax/sample_csv_service/field_analyzer.rb +0 -54
- data/app/services/bulkrax/sample_csv_service/file_path_generator.rb +0 -16
- data/app/services/bulkrax/sample_csv_service/mapping_manager.rb +0 -36
- data/app/services/bulkrax/sample_csv_service/model_loader.rb +0 -40
- data/app/services/bulkrax/sample_csv_service/row_builder.rb +0 -33
- data/app/services/bulkrax/sample_csv_service/schema_analyzer.rb +0 -69
- data/app/services/bulkrax/sample_csv_service/split_formatter.rb +0 -42
- data/app/services/bulkrax/sample_csv_service/value_determiner.rb +0 -67
- data/app/services/bulkrax/sample_csv_service.rb +0 -78
- /data/{app/services → lib}/wings/custom_queries/find_by_source_identifier.rb +0 -0
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
##
|
|
5
|
+
# Formats validation data from CsvParser.validate_csv into the structure
|
|
6
|
+
# expected by the importers_stepper.js frontend component.
|
|
7
|
+
#
|
|
8
|
+
# This service acts as a presentation layer, transforming raw validation data
|
|
9
|
+
# into a structured response with proper status messages, severity levels,
|
|
10
|
+
# and formatted issue lists that the JavaScript can render correctly.
|
|
11
|
+
#
|
|
12
|
+
# @example Basic usage
|
|
13
|
+
# validation_data = CsvParser.validate_csv(csv_file: file, zip_file: zip)
|
|
14
|
+
# formatted_response = StepperResponseFormatter.format(validation_data)
|
|
15
|
+
# render json: formatted_response
|
|
16
|
+
#
|
|
17
|
+
# @example Error response
|
|
18
|
+
# error_response = StepperResponseFormatter.error(message: "Unable to process files")
|
|
19
|
+
# render json: error_response, status: :ok
|
|
20
|
+
#
|
|
21
|
+
# rubocop:disable Metrics/ClassLength
|
|
22
|
+
class StepperResponseFormatter
|
|
23
|
+
# Format validation data for the stepper frontend
|
|
24
|
+
#
|
|
25
|
+
# @param data [Hash] Raw validation data from CsvParser.validate_csv containing:
|
|
26
|
+
# - headers: Array of CSV column names
|
|
27
|
+
# - missingRequired: Array of hashes of missing required fields by model (e.g. {model: 'GenericWork', field: 'source_identifier'})
|
|
28
|
+
# - unrecognized: Array of unrecognized column names
|
|
29
|
+
# - rowCount: Total number of data rows
|
|
30
|
+
# - isValid: Boolean indicating validation success
|
|
31
|
+
# - hasWarnings: Boolean indicating presence of warnings
|
|
32
|
+
# - collections: Array of collection items with id, title, type, parentIds (array), childIds (array)
|
|
33
|
+
# - works: Array of work items with id, title, type, parentIds (array), childIds (array)
|
|
34
|
+
# - fileSets: Array of file set items
|
|
35
|
+
# - totalItems: Total count of items
|
|
36
|
+
# - fileReferences: Count of file references
|
|
37
|
+
# - missingFiles: Array of missing file names
|
|
38
|
+
# - foundFiles: Count of found files
|
|
39
|
+
# - zipIncluded: Boolean indicating if zip was provided
|
|
40
|
+
# @return [Hash] Formatted response ready for JSON rendering
|
|
41
|
+
def self.format(data)
|
|
42
|
+
new(data).format
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Generate an error response for validation failures
|
|
46
|
+
#
|
|
47
|
+
# @param message [String] Error message to display
|
|
48
|
+
# @param summary [String] Optional summary (defaults to standard message)
|
|
49
|
+
# @return [Hash] Error response structure
|
|
50
|
+
def self.error(message: I18n.t('bulkrax.importer.guided_import.validation.unable_to_process'), summary: nil)
|
|
51
|
+
{
|
|
52
|
+
totalItems: 0,
|
|
53
|
+
collections: [],
|
|
54
|
+
works: [],
|
|
55
|
+
fileSets: [],
|
|
56
|
+
isValid: false,
|
|
57
|
+
hasWarnings: false,
|
|
58
|
+
messages: {
|
|
59
|
+
validationStatus: {
|
|
60
|
+
severity: 'error',
|
|
61
|
+
icon: 'fa-times-circle',
|
|
62
|
+
title: I18n.t('bulkrax.importer.guided_import.validation.failed'),
|
|
63
|
+
summary: summary || message,
|
|
64
|
+
details: I18n.t('bulkrax.importer.guided_import.validation.critical_errors'),
|
|
65
|
+
defaultOpen: true
|
|
66
|
+
},
|
|
67
|
+
issues: []
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def initialize(data)
|
|
73
|
+
@data = data
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Format the validation data with messages structure
|
|
77
|
+
# If data already contains a messages structure, return it as-is
|
|
78
|
+
#
|
|
79
|
+
# @return [Hash] Complete formatted response
|
|
80
|
+
def format
|
|
81
|
+
# Check if data is already formatted (has messages structure)
|
|
82
|
+
return @data if already_formatted?
|
|
83
|
+
|
|
84
|
+
# Build formatted response with messages structure
|
|
85
|
+
{
|
|
86
|
+
headers: @data[:headers],
|
|
87
|
+
missingRequired: @data[:missingRequired],
|
|
88
|
+
unrecognized: @data[:unrecognized],
|
|
89
|
+
rowCount: @data[:rowCount],
|
|
90
|
+
isValid: @data[:isValid],
|
|
91
|
+
hasWarnings: @data[:hasWarnings],
|
|
92
|
+
rowErrors: @data[:rowErrors],
|
|
93
|
+
collections: @data[:collections],
|
|
94
|
+
works: @data[:works],
|
|
95
|
+
fileSets: @data[:fileSets],
|
|
96
|
+
totalItems: @data[:totalItems],
|
|
97
|
+
fileReferences: @data[:fileReferences],
|
|
98
|
+
missingFiles: @data[:missingFiles],
|
|
99
|
+
foundFiles: @data[:foundFiles],
|
|
100
|
+
zipIncluded: @data[:zipIncluded],
|
|
101
|
+
messages: build_messages
|
|
102
|
+
}
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
private
|
|
106
|
+
|
|
107
|
+
# Check if data is already formatted with messages structure
|
|
108
|
+
#
|
|
109
|
+
# @return [Boolean] true if data already has proper messages structure
|
|
110
|
+
def already_formatted?
|
|
111
|
+
@data.key?(:messages) &&
|
|
112
|
+
@data[:messages].is_a?(Hash) &&
|
|
113
|
+
@data[:messages].key?(:validationStatus)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Build the messages structure with validationStatus and issues
|
|
117
|
+
#
|
|
118
|
+
# @return [Hash] Messages structure for frontend
|
|
119
|
+
def build_messages
|
|
120
|
+
issues = []
|
|
121
|
+
issues << missing_required_issue if @data[:missingRequired]&.any?
|
|
122
|
+
issues << notices_issue if @data[:notices]&.any?
|
|
123
|
+
issues << unrecognized_fields_issue if @data[:unrecognized]&.any? || @data[:emptyColumns]&.any?
|
|
124
|
+
issues << file_references_issue if @data[:fileReferences]&.positive?
|
|
125
|
+
issues << row_errors_issue if @data[:rowErrors]&.any? { |e| e[:severity] == 'error' }
|
|
126
|
+
issues << row_warnings_issue if @data[:rowErrors]&.any? { |e| e[:severity] == 'warning' }
|
|
127
|
+
|
|
128
|
+
{
|
|
129
|
+
validationStatus: validation_status,
|
|
130
|
+
issues: issues.compact
|
|
131
|
+
}
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Generate the main validation status object
|
|
135
|
+
#
|
|
136
|
+
# @return [Hash] Validation status with severity, icon, title, summary, details
|
|
137
|
+
def validation_status
|
|
138
|
+
severity, icon, title = determine_severity_level
|
|
139
|
+
recognized = @data[:headers].reject(&:blank?) - (@data[:unrecognized].keys || [])
|
|
140
|
+
|
|
141
|
+
{
|
|
142
|
+
severity: severity,
|
|
143
|
+
icon: icon,
|
|
144
|
+
title: title,
|
|
145
|
+
summary: I18n.t('bulkrax.importer.guided_import.validation.columns_detected', columns: @data[:headers].length, records: @data[:rowCount]),
|
|
146
|
+
details: details_message(recognized),
|
|
147
|
+
defaultOpen: true
|
|
148
|
+
}
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Determine severity level based on validation state
|
|
152
|
+
#
|
|
153
|
+
# @return [Array<String>] [severity, icon, title]
|
|
154
|
+
def determine_severity_level
|
|
155
|
+
if !@data[:isValid]
|
|
156
|
+
['error', 'fa-times-circle', I18n.t('bulkrax.importer.guided_import.validation.failed')]
|
|
157
|
+
elsif @data[:hasWarnings]
|
|
158
|
+
['warning', 'fa-exclamation-triangle', I18n.t('bulkrax.importer.guided_import.validation.passed_warnings')]
|
|
159
|
+
else
|
|
160
|
+
['success', 'fa-check-circle', I18n.t('bulkrax.importer.guided_import.validation.passed')]
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Generate details message for validation status
|
|
165
|
+
#
|
|
166
|
+
# @param recognized [Array<String>] List of recognized field names
|
|
167
|
+
# @return [String] Details message
|
|
168
|
+
def details_message(recognized)
|
|
169
|
+
if @data[:isValid]
|
|
170
|
+
I18n.t('bulkrax.importer.guided_import.validation.recognized_fields', fields: recognized.join(', '))
|
|
171
|
+
else
|
|
172
|
+
I18n.t('bulkrax.importer.guided_import.validation.critical_errors')
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Format missing required fields issue
|
|
177
|
+
#
|
|
178
|
+
# @return [Hash] Missing required fields issue structure
|
|
179
|
+
def missing_required_issue
|
|
180
|
+
only_rights_statement = @data[:missingRequired]&.all? { |h| h[:field].to_s == 'rights_statement' }
|
|
181
|
+
|
|
182
|
+
if only_rights_statement
|
|
183
|
+
{
|
|
184
|
+
type: 'missing_required_fields',
|
|
185
|
+
severity: 'warning',
|
|
186
|
+
icon: 'fa-exclamation-triangle',
|
|
187
|
+
title: I18n.t('bulkrax.importer.guided_import.validation.missing_required_title'),
|
|
188
|
+
count: @data[:missingRequired].length,
|
|
189
|
+
description: I18n.t('bulkrax.importer.guided_import.validation.missing_rights_desc'),
|
|
190
|
+
items: @data[:missingRequired],
|
|
191
|
+
defaultOpen: false
|
|
192
|
+
}
|
|
193
|
+
else
|
|
194
|
+
{
|
|
195
|
+
type: 'missing_required_fields',
|
|
196
|
+
severity: 'error',
|
|
197
|
+
icon: 'fa-times-circle',
|
|
198
|
+
title: I18n.t('bulkrax.importer.guided_import.validation.missing_required_title'),
|
|
199
|
+
count: @data[:missingRequired].length,
|
|
200
|
+
description: I18n.t('bulkrax.importer.guided_import.validation.missing_required_desc'),
|
|
201
|
+
items: @data[:missingRequired],
|
|
202
|
+
defaultOpen: false
|
|
203
|
+
}
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# Format unrecognized fields issue
|
|
208
|
+
#
|
|
209
|
+
# @return [Hash] Unrecognized fields issue structure
|
|
210
|
+
def unrecognized_fields_issue
|
|
211
|
+
all_items = unrecognized_fields_issue_items
|
|
212
|
+
{
|
|
213
|
+
type: 'unrecognized_fields',
|
|
214
|
+
severity: 'warning',
|
|
215
|
+
icon: 'fa-exclamation-triangle',
|
|
216
|
+
title: I18n.t('bulkrax.importer.guided_import.validation.unrecognized_title'),
|
|
217
|
+
count: all_items.length,
|
|
218
|
+
description: I18n.t('bulkrax.importer.guided_import.validation.unrecognized_desc'),
|
|
219
|
+
items: all_items,
|
|
220
|
+
defaultOpen: false
|
|
221
|
+
}
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
def unrecognized_fields_issue_items
|
|
225
|
+
named = (@data[:unrecognized] || {}).partition(&:last)
|
|
226
|
+
.flatten(1)
|
|
227
|
+
.map { |field| { field: field.first, message: field.last ? I18n.t('bulkrax.importer.guided_import.validation.did_you_mean', suggestion: field.last) : nil } }
|
|
228
|
+
empty = (@data[:emptyColumns] || []).map do |col|
|
|
229
|
+
{ field: I18n.t('bulkrax.importer.guided_import.validation.empty_column', column: col), message: nil }
|
|
230
|
+
end
|
|
231
|
+
named + empty
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
# Format file references issue
|
|
235
|
+
#
|
|
236
|
+
# @return [Hash, nil] File references issue structure or nil if not applicable
|
|
237
|
+
def file_references_issue
|
|
238
|
+
missing_files = @data[:missingFiles] || []
|
|
239
|
+
|
|
240
|
+
if missing_files.any? && @data[:zipIncluded]
|
|
241
|
+
missing_files_issue
|
|
242
|
+
elsif !@data[:zipIncluded]
|
|
243
|
+
no_zip_issue
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# Format issue for missing files in ZIP
|
|
248
|
+
#
|
|
249
|
+
# @return [Hash] Missing files issue structure
|
|
250
|
+
def missing_files_issue
|
|
251
|
+
missing_files = @data[:missingFiles]
|
|
252
|
+
|
|
253
|
+
{
|
|
254
|
+
type: 'file_references',
|
|
255
|
+
severity: 'warning',
|
|
256
|
+
icon: 'fa-info-circle',
|
|
257
|
+
title: I18n.t('bulkrax.importer.guided_import.validation.file_references_title'),
|
|
258
|
+
count: @data[:fileReferences],
|
|
259
|
+
summary: I18n.t('bulkrax.importer.guided_import.validation.files_found_in_zip', found: @data[:foundFiles], total: @data[:fileReferences]),
|
|
260
|
+
description: I18n.t('bulkrax.importer.guided_import.validation.files_missing_from_zip', count: missing_files.length, files_word: 'file'.pluralize(missing_files.length)),
|
|
261
|
+
items: missing_files.map { |file| { field: file, message: I18n.t('bulkrax.importer.guided_import.validation.missing_from_zip') } },
|
|
262
|
+
defaultOpen: false
|
|
263
|
+
}
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
# Format issue for no ZIP uploaded
|
|
267
|
+
#
|
|
268
|
+
# @return [Hash] No ZIP issue structure
|
|
269
|
+
def no_zip_issue
|
|
270
|
+
{
|
|
271
|
+
type: 'file_references',
|
|
272
|
+
severity: 'warning',
|
|
273
|
+
icon: 'fa-exclamation-triangle',
|
|
274
|
+
title: I18n.t('bulkrax.importer.guided_import.validation.file_references_title'),
|
|
275
|
+
count: @data[:fileReferences],
|
|
276
|
+
summary: I18n.t('bulkrax.importer.guided_import.validation.files_referenced', count: @data[:fileReferences]),
|
|
277
|
+
description: I18n.t('bulkrax.importer.guided_import.validation.no_zip_desc'),
|
|
278
|
+
items: [],
|
|
279
|
+
defaultOpen: false
|
|
280
|
+
}
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
def row_errors_issue
|
|
284
|
+
entries = filtered_row_errors.select { |e| e[:severity] == 'error' }
|
|
285
|
+
return nil if entries.empty?
|
|
286
|
+
|
|
287
|
+
{
|
|
288
|
+
type: 'row_level_errors',
|
|
289
|
+
severity: 'error',
|
|
290
|
+
icon: 'fa-times-circle',
|
|
291
|
+
title: I18n.t('bulkrax.importer.guided_import.stepper_response_formatter.row_errors_issue.title_errors'),
|
|
292
|
+
count: entries.length,
|
|
293
|
+
description: I18n.t('bulkrax.importer.guided_import.stepper_response_formatter.row_errors_issue.description'),
|
|
294
|
+
items: row_error_items(entries),
|
|
295
|
+
defaultOpen: false
|
|
296
|
+
}
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
def row_warnings_issue
|
|
300
|
+
entries = filtered_row_errors.select { |e| e[:severity] == 'warning' }
|
|
301
|
+
return nil if entries.empty?
|
|
302
|
+
|
|
303
|
+
{
|
|
304
|
+
type: 'row_level_warnings',
|
|
305
|
+
severity: 'warning',
|
|
306
|
+
icon: 'fa-exclamation-triangle',
|
|
307
|
+
title: I18n.t('bulkrax.importer.guided_import.stepper_response_formatter.row_errors_issue.title_warnings'),
|
|
308
|
+
count: entries.length,
|
|
309
|
+
description: I18n.t('bulkrax.importer.guided_import.stepper_response_formatter.row_errors_issue.description'),
|
|
310
|
+
items: row_error_items(entries),
|
|
311
|
+
defaultOpen: false
|
|
312
|
+
}
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
def notices_issue
|
|
316
|
+
{
|
|
317
|
+
type: 'notices',
|
|
318
|
+
severity: 'warning',
|
|
319
|
+
icon: 'fa-info-circle',
|
|
320
|
+
title: I18n.t('bulkrax.importer.guided_import.validation.notices_title'),
|
|
321
|
+
count: @data[:notices].length,
|
|
322
|
+
description: I18n.t('bulkrax.importer.guided_import.validation.notices_desc'),
|
|
323
|
+
items: @data[:notices].map { |n| { field: n[:field], message: [n[:message], n[:suggestion]].compact.join(' ') } },
|
|
324
|
+
defaultOpen: false
|
|
325
|
+
}
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
def filtered_row_errors
|
|
329
|
+
missing_required_columns = @data[:missingRequired]&.map { |h| h[:field].to_s } || []
|
|
330
|
+
notice_columns = @data[:notices]&.map { |n| n[:field].to_s } || []
|
|
331
|
+
suppressed_columns = (missing_required_columns + notice_columns).uniq
|
|
332
|
+
@data[:rowErrors].reject { |e| suppressed_columns.include?(e[:column].to_s) }
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
def row_error_items(errors)
|
|
336
|
+
errors.map do |error|
|
|
337
|
+
message = error[:message]
|
|
338
|
+
message = [message, error[:suggestion]].compact.join(' ') if error[:suggestion].present?
|
|
339
|
+
{
|
|
340
|
+
field: I18n.t('bulkrax.importer.guided_import.stepper_response_formatter.row_errors_issue.row_label', row: error[:row], column: error[:column]),
|
|
341
|
+
message: message
|
|
342
|
+
}
|
|
343
|
+
end
|
|
344
|
+
end
|
|
345
|
+
end
|
|
346
|
+
end
|
|
347
|
+
# rubocop:enable Metrics/ClassLength
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'csv'
|
|
4
|
+
|
|
5
|
+
module Bulkrax
|
|
6
|
+
# Builds a CSV string containing all validation errors from a guided import.
|
|
7
|
+
# File-level errors (missing required columns, unrecognized headers, empty
|
|
8
|
+
# columns, missing files) appear first as summary rows with a blank `row`
|
|
9
|
+
# cell. Row-level errors follow, one output row per errored data row.
|
|
10
|
+
#
|
|
11
|
+
# Usage:
|
|
12
|
+
# csv = Bulkrax::ValidationErrorCsvBuilder.build(
|
|
13
|
+
# headers: result[:headers],
|
|
14
|
+
# csv_data: result[:raw_csv_data],
|
|
15
|
+
# row_errors: result[:rowErrors],
|
|
16
|
+
# file_errors: {
|
|
17
|
+
# missing_required: result[:missingRequired],
|
|
18
|
+
# unrecognized: result[:unrecognized],
|
|
19
|
+
# empty_columns: result[:emptyColumns],
|
|
20
|
+
# missing_files: result[:missingFiles]
|
|
21
|
+
# }
|
|
22
|
+
# )
|
|
23
|
+
class ValidationErrorCsvBuilder
|
|
24
|
+
# @param headers [Array<String>] original CSV headers in order
|
|
25
|
+
# @param csv_data [Array<Hash>] one entry per data row; each hash has
|
|
26
|
+
# :raw_row (String-keyed hash of column=>value)
|
|
27
|
+
# @param row_errors [Array<Hash>] each hash has :row (Integer) and :message (String)
|
|
28
|
+
# @param file_errors [Hash] file-level issues:
|
|
29
|
+
# - :missing_required [Array<Hash>] each hash has :model and :field
|
|
30
|
+
# - :unrecognized [Hash] column_name => suggestion_or_nil
|
|
31
|
+
# - :empty_columns [Array<Integer>] 1-based column positions with no header
|
|
32
|
+
# - :missing_files [Array<String>] filenames referenced but not found
|
|
33
|
+
# @return [String] CSV content
|
|
34
|
+
def self.build(headers:, csv_data:, row_errors:, file_errors: {})
|
|
35
|
+
new(headers: headers, csv_data: csv_data, row_errors: row_errors, file_errors: file_errors).build
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def initialize(headers:, csv_data:, row_errors:, file_errors:)
|
|
39
|
+
@headers = headers
|
|
40
|
+
@csv_data = csv_data
|
|
41
|
+
@row_errors = row_errors
|
|
42
|
+
@file_errors = file_errors
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def build
|
|
46
|
+
errors_by_row = group_errors_by_row
|
|
47
|
+
blank_data = Array.new(@headers.length)
|
|
48
|
+
|
|
49
|
+
CSV.generate(force_quotes: false) do |csv|
|
|
50
|
+
csv << ['row', 'errors'] + @headers
|
|
51
|
+
|
|
52
|
+
file_level_error_rows.each do |message|
|
|
53
|
+
csv << [nil, message] + blank_data
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
@csv_data.each_with_index do |record, index|
|
|
57
|
+
row_number = index + 2 # header is row 1; first data row is row 2
|
|
58
|
+
error_messages = errors_by_row[row_number]&.map { |e| e[:message] }&.join(' | ')
|
|
59
|
+
raw_row = record[:raw_row] || {}
|
|
60
|
+
csv << [row_number, error_messages] + @headers.map { |h| raw_row[h] }
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
private
|
|
66
|
+
|
|
67
|
+
def group_errors_by_row
|
|
68
|
+
@row_errors.each_with_object({}) do |error, hash|
|
|
69
|
+
row_num = error[:row]
|
|
70
|
+
hash[row_num] ||= []
|
|
71
|
+
hash[row_num] << error
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def file_level_error_rows
|
|
76
|
+
messages = []
|
|
77
|
+
|
|
78
|
+
Array(@file_errors[:missing_required]).each do |entry|
|
|
79
|
+
messages << "Missing required column '#{entry[:field]}' (#{entry[:model]})"
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
Hash(@file_errors[:unrecognized]).each do |col, suggestion|
|
|
83
|
+
msg = "Unrecognized column '#{col}'"
|
|
84
|
+
msg += " (did you mean '#{suggestion}'?)" if suggestion.present?
|
|
85
|
+
messages << msg
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
Array(@file_errors[:empty_columns]).each do |pos|
|
|
89
|
+
messages << "Column #{pos + 2} has no header and will be ignored during import"
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
Array(@file_errors[:missing_files]).each do |filename|
|
|
93
|
+
messages << "Missing file: #{filename}"
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
messages
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
module CsvRow
|
|
5
|
+
##
|
|
6
|
+
# Validates that any child references in a row point to source identifiers
|
|
7
|
+
# that exist either elsewhere in the same CSV or as existing repository records.
|
|
8
|
+
# Uses context[:all_ids] (Set of all source identifiers) to validate references
|
|
9
|
+
# within the CSV, and context[:find_record_by_source_identifier] (callable) to
|
|
10
|
+
# look up existing records in the same way the importer does at runtime.
|
|
11
|
+
# Skips validation when all_ids is empty and fill_in_blank_source_identifiers is
|
|
12
|
+
# configured, since generated identifiers cannot be cross-referenced at validation time.
|
|
13
|
+
module ChildReference
|
|
14
|
+
def self.call(record, row_index, context)
|
|
15
|
+
all_ids = context[:all_ids]
|
|
16
|
+
return if all_ids.empty? && Bulkrax.fill_in_blank_source_identifiers.present?
|
|
17
|
+
|
|
18
|
+
find_record = context[:find_record_by_source_identifier]
|
|
19
|
+
|
|
20
|
+
collect_child_ids(record, context).each do |child_id|
|
|
21
|
+
next if all_ids.include?(child_id)
|
|
22
|
+
next if find_record&.call(child_id)
|
|
23
|
+
|
|
24
|
+
context[:errors] << {
|
|
25
|
+
row: row_index,
|
|
26
|
+
source_identifier: record[:source_identifier],
|
|
27
|
+
severity: 'error',
|
|
28
|
+
category: 'invalid_child_reference',
|
|
29
|
+
column: 'children',
|
|
30
|
+
value: child_id,
|
|
31
|
+
message: I18n.t('bulkrax.importer.guided_import.validation.child_reference_validator.errors.message',
|
|
32
|
+
value: child_id,
|
|
33
|
+
field: 'source_identifier'),
|
|
34
|
+
suggestion: I18n.t('bulkrax.importer.guided_import.validation.child_reference_validator.errors.suggestion')
|
|
35
|
+
}
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def self.collect_child_ids(record, context)
|
|
40
|
+
split_pattern = context[:child_split_pattern] || '|'
|
|
41
|
+
children_column = context[:children_column] || 'children'
|
|
42
|
+
|
|
43
|
+
base_ids = record[:children].to_s.split(split_pattern).map(&:strip).reject(&:blank?)
|
|
44
|
+
|
|
45
|
+
suffix_pattern = /\A#{Regexp.escape(children_column)}_\d+\z/
|
|
46
|
+
suffix_ids = record[:raw_row]
|
|
47
|
+
.select { |k, _| k.to_s.match?(suffix_pattern) }
|
|
48
|
+
.values
|
|
49
|
+
.map(&:to_s).map(&:strip).reject(&:blank?)
|
|
50
|
+
|
|
51
|
+
(base_ids + suffix_ids).uniq
|
|
52
|
+
end
|
|
53
|
+
private_class_method :collect_child_ids
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
module CsvRow
|
|
5
|
+
##
|
|
6
|
+
# Detects circular parent-child relationships in the CSV.
|
|
7
|
+
# A circular reference occurs when following the parent chain from any record
|
|
8
|
+
# eventually leads back to itself (e.g. A→B→C→A).
|
|
9
|
+
#
|
|
10
|
+
# The validator builds a directed graph (child → parents) from all records on
|
|
11
|
+
# first invocation and caches the set of all record ids involved in any cycle.
|
|
12
|
+
# Subsequent per-row calls simply check membership in that set.
|
|
13
|
+
#
|
|
14
|
+
# Requires context key:
|
|
15
|
+
# :relationship_graph – Hash { source_identifier => [parent_ids] } built by
|
|
16
|
+
# run_row_validators before iterating rows.
|
|
17
|
+
module CircularReference
|
|
18
|
+
def self.call(record, row_index, context)
|
|
19
|
+
cycle_ids = context[:circular_reference_ids] ||= detect_cycle_ids(context[:relationship_graph] || {})
|
|
20
|
+
return unless cycle_ids.include?(record[:source_identifier])
|
|
21
|
+
|
|
22
|
+
context[:errors] << {
|
|
23
|
+
row: row_index,
|
|
24
|
+
source_identifier: record[:source_identifier],
|
|
25
|
+
severity: 'error',
|
|
26
|
+
category: 'circular_reference',
|
|
27
|
+
column: 'parents',
|
|
28
|
+
value: record[:source_identifier],
|
|
29
|
+
message: I18n.t('bulkrax.importer.guided_import.validation.circular_reference_validator.errors.message',
|
|
30
|
+
value: record[:source_identifier]),
|
|
31
|
+
suggestion: I18n.t('bulkrax.importer.guided_import.validation.circular_reference_validator.errors.suggestion')
|
|
32
|
+
}
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Returns the set of all source identifiers that participate in at least one cycle.
|
|
36
|
+
# Uses recursive DFS with a per-branch ancestry set to detect back-edges.
|
|
37
|
+
def self.detect_cycle_ids(graph)
|
|
38
|
+
all_nodes = graph.keys.to_set | graph.values.flatten.to_set
|
|
39
|
+
visited = Set.new
|
|
40
|
+
cycle_ids = Set.new
|
|
41
|
+
|
|
42
|
+
all_nodes.each do |node|
|
|
43
|
+
next if visited.include?(node)
|
|
44
|
+
dfs(node, graph, visited, [], cycle_ids)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
cycle_ids
|
|
48
|
+
end
|
|
49
|
+
private_class_method :detect_cycle_ids
|
|
50
|
+
|
|
51
|
+
def self.dfs(node, graph, visited, ancestors, cycle_ids) # rubocop:disable Metrics/MethodLength
|
|
52
|
+
visited.add(node)
|
|
53
|
+
ancestors.push(node)
|
|
54
|
+
|
|
55
|
+
(graph[node] || []).each do |neighbor|
|
|
56
|
+
if ancestors.include?(neighbor)
|
|
57
|
+
# Back-edge found: mark every node in the cycle path
|
|
58
|
+
cycle_start = ancestors.index(neighbor)
|
|
59
|
+
ancestors[cycle_start..].each { |n| cycle_ids.add(n) }
|
|
60
|
+
cycle_ids.add(neighbor)
|
|
61
|
+
elsif !visited.include?(neighbor)
|
|
62
|
+
dfs(neighbor, graph, visited, ancestors, cycle_ids)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
ancestors.pop
|
|
67
|
+
end
|
|
68
|
+
private_class_method :dfs
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
module CsvRow
|
|
5
|
+
##
|
|
6
|
+
# Validates that controlled vocabulary values in each row are valid according
|
|
7
|
+
# to the QA authority for that field.
|
|
8
|
+
module ControlledVocabulary
|
|
9
|
+
def self.call(record, row_index, context) # rubocop:disable Metrics/MethodLength
|
|
10
|
+
field_metadata = context[:field_metadata]
|
|
11
|
+
return if field_metadata.blank?
|
|
12
|
+
|
|
13
|
+
model = record[:model]
|
|
14
|
+
metadata = field_metadata[model]
|
|
15
|
+
return if metadata.blank?
|
|
16
|
+
|
|
17
|
+
controlled_terms = metadata[:controlled_vocab_terms] || []
|
|
18
|
+
return if controlled_terms.blank?
|
|
19
|
+
|
|
20
|
+
controlled_terms.each do |field|
|
|
21
|
+
value = record[:raw_row][field]
|
|
22
|
+
next if value.blank?
|
|
23
|
+
|
|
24
|
+
authority = load_authority(field)
|
|
25
|
+
next if authority.nil?
|
|
26
|
+
|
|
27
|
+
term = authority.find(value)
|
|
28
|
+
next unless term.blank? || term.dig('active') == false
|
|
29
|
+
|
|
30
|
+
context[:errors] << {
|
|
31
|
+
row: row_index,
|
|
32
|
+
source_identifier: record[:source_identifier],
|
|
33
|
+
severity: 'error',
|
|
34
|
+
category: 'invalid_controlled_value',
|
|
35
|
+
column: field,
|
|
36
|
+
value: value,
|
|
37
|
+
message: I18n.t('bulkrax.importer.guided_import.validation.controlled_vocabulary_validator.errors.message',
|
|
38
|
+
value: value, field: field),
|
|
39
|
+
suggestion: suggestion(value, authority)
|
|
40
|
+
}
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def self.load_authority(field)
|
|
45
|
+
Qa::Authorities::Local.subauthority_for(field.pluralize)
|
|
46
|
+
rescue Qa::InvalidSubAuthority
|
|
47
|
+
begin
|
|
48
|
+
Qa::Authorities::Local.subauthority_for(field)
|
|
49
|
+
rescue Qa::InvalidSubAuthority
|
|
50
|
+
nil
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
private_class_method :load_authority
|
|
54
|
+
|
|
55
|
+
def self.suggestion(value, authority)
|
|
56
|
+
suggestion = DidYouMean::SpellChecker.new(dictionary: dictionary_for(authority)).correct(value).first
|
|
57
|
+
return fallback_suggestion if suggestion.nil?
|
|
58
|
+
|
|
59
|
+
I18n.t('bulkrax.importer.guided_import.validation.did_you_mean', suggestion: suggestion)
|
|
60
|
+
end
|
|
61
|
+
private_class_method :suggestion
|
|
62
|
+
|
|
63
|
+
def self.fallback_suggestion
|
|
64
|
+
I18n.t('bulkrax.importer.guided_import.validation.controlled_vocabulary_validator.errors.suggestion')
|
|
65
|
+
end
|
|
66
|
+
private_class_method :fallback_suggestion
|
|
67
|
+
|
|
68
|
+
def self.dictionary_for(authority)
|
|
69
|
+
authority.all.filter_map { |term| term['label'] if term['active'] == true }.uniq
|
|
70
|
+
end
|
|
71
|
+
private_class_method :dictionary_for
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|