bulkrax 9.4.0 → 9.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +8 -2
- data/app/assets/javascripts/bulkrax/datatables.js +43 -8
- data/app/assets/javascripts/bulkrax/importers_stepper.js +221 -26
- data/app/assets/stylesheets/bulkrax/stepper/_review.scss +14 -12
- data/app/controllers/bulkrax/entries_controller.rb +2 -2
- data/app/controllers/bulkrax/exporters_controller.rb +3 -3
- data/app/controllers/bulkrax/guided_imports_controller.rb +3 -1
- data/app/controllers/bulkrax/importers_controller.rb +5 -5
- data/app/controllers/concerns/bulkrax/importer_file_handler.rb +1 -6
- data/app/errors/bulkrax/unzip_error.rb +16 -0
- data/app/jobs/bulkrax/importer_job.rb +40 -9
- data/app/matchers/bulkrax/application_matcher.rb +5 -6
- data/app/models/bulkrax/csv_entry.rb +1 -1
- data/app/models/bulkrax/importer.rb +3 -16
- data/app/parsers/bulkrax/application_parser.rb +50 -33
- data/app/parsers/bulkrax/bagit_parser.rb +12 -0
- data/app/parsers/bulkrax/csv_parser.rb +163 -49
- data/app/parsers/concerns/bulkrax/csv_parser/csv_template_generation.rb +4 -1
- data/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb +10 -8
- data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_helpers.rb +69 -36
- data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_hierarchy.rb +9 -7
- data/app/services/bulkrax/csv_template/file_validator.rb +1 -1
- data/app/services/bulkrax/csv_template/mapping_manager.rb +15 -6
- data/app/services/bulkrax/csv_template/split_formatter.rb +10 -3
- data/app/services/bulkrax/split_pattern_coercion.rb +42 -0
- data/app/services/bulkrax/stepper_response_formatter.rb +2 -1
- data/app/services/bulkrax/validation_error_csv_builder.rb +36 -12
- data/app/validators/bulkrax/csv_row/child_reference.rb +2 -1
- data/app/validators/bulkrax/csv_row/parent_reference.rb +1 -1
- data/app/validators/bulkrax/csv_row/required_values.rb +17 -3
- data/app/views/bulkrax/exporters/edit.html.erb +1 -1
- data/app/views/bulkrax/exporters/index.html.erb +3 -1
- data/app/views/bulkrax/exporters/new.html.erb +1 -1
- data/app/views/bulkrax/exporters/show.html.erb +1 -1
- data/app/views/bulkrax/guided_imports/new.html.erb +7 -0
- data/app/views/bulkrax/importers/_edit_item_buttons.html.erb +3 -3
- data/app/views/bulkrax/importers/index.html.erb +2 -0
- data/app/views/bulkrax/importers/new.html.erb +1 -1
- data/app/views/bulkrax/importers/show.html.erb +3 -1
- data/app/views/bulkrax/shared/_datatable_i18n.html.erb +3 -0
- data/config/locales/bulkrax.de.yml +95 -2
- data/config/locales/bulkrax.en.yml +58 -2
- data/config/locales/bulkrax.es.yml +95 -2
- data/config/locales/bulkrax.fr.yml +95 -2
- data/config/locales/bulkrax.it.yml +95 -2
- data/config/locales/bulkrax.pt-BR.yml +95 -2
- data/config/locales/bulkrax.zh.yml +96 -2
- data/db/migrate/20260424081537_remove_parents_from_bulkrax_importer_runs.rb +9 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +15 -1
- metadata +8 -4
|
@@ -46,14 +46,20 @@ module Bulkrax
|
|
|
46
46
|
end
|
|
47
47
|
|
|
48
48
|
def build_valid_validation_headers(mapping_manager, field_analyzer, all_models, mappings, field_metadata)
|
|
49
|
-
svc = ValidationContext.new(
|
|
49
|
+
svc = Bulkrax::CsvParser::ValidationContext.new(
|
|
50
50
|
mapping_manager: mapping_manager,
|
|
51
51
|
field_analyzer: field_analyzer,
|
|
52
52
|
all_models: all_models,
|
|
53
53
|
mappings: mappings
|
|
54
54
|
)
|
|
55
55
|
all_cols = CsvTemplate::ColumnBuilder.new(svc).all_columns
|
|
56
|
-
|
|
56
|
+
# ColumnBuilder only emits the first `from:` alias per non-property key
|
|
57
|
+
# (core/file/relationship). Accept every alias so a CSV using a
|
|
58
|
+
# non-primary alias like `file` (when mappings are `from: ['item', 'file']`)
|
|
59
|
+
# isn't flagged unrecognised. Property-level aliases are handled
|
|
60
|
+
# separately by find_unrecognized_validation_headers via mapped_to_key.
|
|
61
|
+
non_property_aliases = non_property_mapping_aliases(mappings)
|
|
62
|
+
(all_cols + non_property_aliases).uniq - CsvTemplate::CsvBuilder::IGNORED_PROPERTIES
|
|
57
63
|
rescue StandardError => e
|
|
58
64
|
Rails.logger.error("CsvParser.validate_csv: error building valid headers – #{e.message}")
|
|
59
65
|
standard = %w[model source_identifier parents children file]
|
|
@@ -62,6 +68,20 @@ module Bulkrax
|
|
|
62
68
|
(standard + model_fields).uniq
|
|
63
69
|
end
|
|
64
70
|
|
|
71
|
+
# Returns every `from:` alias for mapping keys that describe non-property
|
|
72
|
+
# columns (core/file/relationship). These keys are fixed by the descriptor
|
|
73
|
+
# rather than discovered per-model, so every alias is unambiguously valid.
|
|
74
|
+
def non_property_mapping_aliases(mappings)
|
|
75
|
+
descriptor = CsvTemplate::ColumnDescriptor.new
|
|
76
|
+
non_property_keys = descriptor.core_columns +
|
|
77
|
+
CsvTemplate::ColumnDescriptor::COLUMN_DESCRIPTIONS[:files].flat_map(&:keys) +
|
|
78
|
+
CsvTemplate::ColumnDescriptor::COLUMN_DESCRIPTIONS[:relationships].flat_map(&:keys)
|
|
79
|
+
non_property_keys.flat_map do |key|
|
|
80
|
+
entry = mappings[key]
|
|
81
|
+
entry.is_a?(Hash) ? Array(entry["from"]) : []
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
65
85
|
def find_missing_required_headers(headers, field_metadata, mapping_manager)
|
|
66
86
|
csv_keys = headers.map { |h| mapping_manager.mapped_to_key(h).sub(/_\d+\z/, '') }.uniq
|
|
67
87
|
missing = []
|
|
@@ -73,11 +93,23 @@ module Bulkrax
|
|
|
73
93
|
missing.uniq
|
|
74
94
|
end
|
|
75
95
|
|
|
76
|
-
|
|
96
|
+
# A header is considered recognised if it appears in valid_headers or
|
|
97
|
+
# if it matches any alias in a known property's `from` array. The real
|
|
98
|
+
# importer (CsvParser#missing_elements) scans every `from` entry when
|
|
99
|
+
# matching incoming columns, so the validator has to use the same rule
|
|
100
|
+
# — otherwise a CSV that imports cleanly gets flagged for columns like
|
|
101
|
+
# `creator` when the mapping declares `creator: { from: ['author', 'creator'] }`.
|
|
102
|
+
def find_unrecognized_validation_headers(headers, valid_headers, mapping_manager: nil, field_metadata: nil)
|
|
103
|
+
known_property_keys = (field_metadata || {}).values.flat_map { |m| Array(m[:properties]) }.to_set
|
|
77
104
|
checker = DidYouMean::SpellChecker.new(dictionary: valid_headers)
|
|
78
|
-
headers
|
|
79
|
-
|
|
80
|
-
|
|
105
|
+
unrecognized = headers.reject do |h|
|
|
106
|
+
next true if h.blank?
|
|
107
|
+
base = h.sub(/_\d+\z/, '')
|
|
108
|
+
next true if valid_headers.include?(h) || valid_headers.include?(base)
|
|
109
|
+
mapped_key = mapping_manager&.mapped_to_key(base)
|
|
110
|
+
mapped_key && known_property_keys.include?(mapped_key)
|
|
111
|
+
end
|
|
112
|
+
unrecognized.index_with { |h| checker.correct(h).first }
|
|
81
113
|
end
|
|
82
114
|
|
|
83
115
|
def find_empty_column_positions(headers, raw_csv)
|
|
@@ -120,25 +152,12 @@ module Bulkrax
|
|
|
120
152
|
}
|
|
121
153
|
end
|
|
122
154
|
|
|
123
|
-
def apply_rights_statement_validation_override!(result, missing_required)
|
|
124
|
-
only_rights = missing_required.present? &&
|
|
125
|
-
missing_required.all? { |h| h[:field].to_s == 'rights_statement' }
|
|
126
|
-
return unless only_rights && !result[:isValid]
|
|
127
|
-
return if result[:headers].blank?
|
|
128
|
-
return if result[:missingFiles]&.any?
|
|
129
|
-
|
|
130
|
-
result[:isValid] = true
|
|
131
|
-
result[:hasWarnings] = true
|
|
132
|
-
end
|
|
133
|
-
|
|
134
155
|
# Assembles the final result hash returned to the guided import UI.
|
|
135
156
|
def assemble_result(headers:, missing_required:, header_issues:, row_errors:, csv_data:, file_validator:, collections:, works:, file_sets:, notices: []) # rubocop:disable Metrics/ParameterLists
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
has_warnings = header_issues[:unrecognized].any? || header_issues[:empty_columns].any? ||
|
|
141
|
-
file_validator.possible_missing_files? || row_warning_entries.any? || notices.any?
|
|
157
|
+
is_valid, has_warnings = determine_validity(
|
|
158
|
+
headers: headers, missing_required: missing_required, header_issues: header_issues,
|
|
159
|
+
row_errors: row_errors, csv_data: csv_data, file_validator: file_validator, notices: notices
|
|
160
|
+
)
|
|
142
161
|
|
|
143
162
|
{
|
|
144
163
|
headers: headers,
|
|
@@ -147,7 +166,7 @@ module Bulkrax
|
|
|
147
166
|
unrecognized: header_issues[:unrecognized],
|
|
148
167
|
emptyColumns: header_issues[:empty_columns],
|
|
149
168
|
rowCount: csv_data.length,
|
|
150
|
-
isValid:
|
|
169
|
+
isValid: is_valid,
|
|
151
170
|
hasWarnings: has_warnings,
|
|
152
171
|
rowErrors: row_errors,
|
|
153
172
|
collections: collections,
|
|
@@ -161,6 +180,27 @@ module Bulkrax
|
|
|
161
180
|
}
|
|
162
181
|
end
|
|
163
182
|
|
|
183
|
+
# Returns [is_valid, has_warnings] for the assembled result.
|
|
184
|
+
# rights_statement can be supplied on Step 2, so a CSV missing ONLY the
|
|
185
|
+
# rights_statement column is valid-with-warnings rather than a blocker;
|
|
186
|
+
# the display formatter styles that case as a warning accordion.
|
|
187
|
+
def determine_validity(headers:, missing_required:, header_issues:, row_errors:, csv_data:, file_validator:, notices:) # rubocop:disable Metrics/ParameterLists
|
|
188
|
+
row_error_entries = row_errors.select { |e| e[:severity] == 'error' }
|
|
189
|
+
row_warning_entries = row_errors.select { |e| e[:severity] == 'warning' }
|
|
190
|
+
|
|
191
|
+
only_rights_missing = missing_required.present? &&
|
|
192
|
+
missing_required.all? { |h| h[:field].to_s == 'rights_statement' }
|
|
193
|
+
blocking_missing_required = missing_required.any? && !only_rights_missing
|
|
194
|
+
|
|
195
|
+
has_errors = blocking_missing_required || headers.blank? || csv_data.empty? ||
|
|
196
|
+
file_validator.missing_files.any? || row_error_entries.any?
|
|
197
|
+
has_warnings = header_issues[:unrecognized].any? || header_issues[:empty_columns].any? ||
|
|
198
|
+
file_validator.possible_missing_files? || row_warning_entries.any? ||
|
|
199
|
+
notices.any? || only_rights_missing
|
|
200
|
+
|
|
201
|
+
[!has_errors, has_warnings]
|
|
202
|
+
end
|
|
203
|
+
|
|
164
204
|
# Builds the find_record lambda used by row validators and hierarchy extraction.
|
|
165
205
|
def build_find_record
|
|
166
206
|
all_mappings = Bulkrax.field_mappings['Bulkrax::CsvParser'] || {}
|
|
@@ -205,19 +245,11 @@ module Bulkrax
|
|
|
205
245
|
end
|
|
206
246
|
|
|
207
247
|
def resolve_parent_split_pattern(mappings)
|
|
208
|
-
|
|
209
|
-
return nil if split_val.blank?
|
|
210
|
-
return Bulkrax::DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON if split_val == true
|
|
211
|
-
|
|
212
|
-
split_val
|
|
248
|
+
Bulkrax::SplitPatternCoercion.coerce(mappings.dig('parents', 'split') || mappings.dig(:parents, :split))
|
|
213
249
|
end
|
|
214
250
|
|
|
215
251
|
def resolve_children_split_pattern(mappings)
|
|
216
|
-
|
|
217
|
-
return nil if split_val.blank?
|
|
218
|
-
return Bulkrax::DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON if split_val == true
|
|
219
|
-
|
|
220
|
-
split_val
|
|
252
|
+
Bulkrax::SplitPatternCoercion.coerce(mappings.dig('children', 'split') || mappings.dig(:children, :split))
|
|
221
253
|
end
|
|
222
254
|
|
|
223
255
|
# Builds a graph of { source_identifier => [parent_ids] } from all CSV records.
|
|
@@ -264,8 +296,9 @@ module Bulkrax
|
|
|
264
296
|
end
|
|
265
297
|
|
|
266
298
|
def split_or_single(value, split_pattern)
|
|
267
|
-
|
|
268
|
-
|
|
299
|
+
coerced = Bulkrax::SplitPatternCoercion.coerce(split_pattern)
|
|
300
|
+
if coerced
|
|
301
|
+
value.to_s.split(coerced).map(&:strip).reject(&:blank?)
|
|
269
302
|
elsif value.present?
|
|
270
303
|
[value.to_s.strip]
|
|
271
304
|
else
|
|
@@ -5,7 +5,7 @@ module Bulkrax
|
|
|
5
5
|
# Hierarchy-building helpers for CsvValidation. Handles extracting and
|
|
6
6
|
# categorising items from parsed CSV data for the guided import tree view.
|
|
7
7
|
module CsvValidationHierarchy
|
|
8
|
-
def extract_validation_items(csv_data, all_ids = Set.new, find_record = nil, parent_split_pattern: nil, child_split_pattern:
|
|
8
|
+
def extract_validation_items(csv_data, all_ids = Set.new, find_record = nil, parent_split_pattern: nil, child_split_pattern: Bulkrax::DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON)
|
|
9
9
|
child_to_parents = build_child_to_parents_map(csv_data, child_split_pattern: child_split_pattern)
|
|
10
10
|
collections = []
|
|
11
11
|
works = []
|
|
@@ -19,7 +19,7 @@ module Bulkrax
|
|
|
19
19
|
[collections, works, file_sets]
|
|
20
20
|
end
|
|
21
21
|
|
|
22
|
-
def build_child_to_parents_map(csv_data, child_split_pattern:
|
|
22
|
+
def build_child_to_parents_map(csv_data, child_split_pattern: Bulkrax::DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON)
|
|
23
23
|
Hash.new { |h, k| h[k] = [] }.tap do |map|
|
|
24
24
|
csv_data.each do |item|
|
|
25
25
|
next if item[:source_identifier].blank?
|
|
@@ -31,7 +31,7 @@ module Bulkrax
|
|
|
31
31
|
end
|
|
32
32
|
end
|
|
33
33
|
|
|
34
|
-
def categorise_validation_item(item, child_to_parents, all_ids, collections, works, file_sets, find_record = nil, parent_split_pattern: nil, child_split_pattern:
|
|
34
|
+
def categorise_validation_item(item, child_to_parents, all_ids, collections, works, file_sets, find_record = nil, parent_split_pattern: nil, child_split_pattern: Bulkrax::DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON) # rubocop:disable Metrics/ParameterLists
|
|
35
35
|
item_id = item[:source_identifier]
|
|
36
36
|
model_str = item[:model].to_s
|
|
37
37
|
|
|
@@ -51,7 +51,7 @@ module Bulkrax
|
|
|
51
51
|
item_id = item[:source_identifier]
|
|
52
52
|
title = item[:raw_row]['title'] || item_id
|
|
53
53
|
parents = collect_relationship_ids(item[:parent], item[:raw_row], 'parents', split_pattern: opts[:parent])
|
|
54
|
-
children = collect_relationship_ids(item[:children], item[:raw_row], 'children', split_pattern: opts[:child] ||
|
|
54
|
+
children = collect_relationship_ids(item[:children], item[:raw_row], 'children', split_pattern: opts[:child] || Bulkrax::DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON)
|
|
55
55
|
|
|
56
56
|
{
|
|
57
57
|
id: item_id,
|
|
@@ -65,12 +65,14 @@ module Bulkrax
|
|
|
65
65
|
}
|
|
66
66
|
end
|
|
67
67
|
|
|
68
|
-
def parse_relationship_field(value, split_pattern:
|
|
68
|
+
def parse_relationship_field(value, split_pattern: Bulkrax::DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON)
|
|
69
69
|
return [] if value.blank?
|
|
70
|
-
|
|
70
|
+
|
|
71
|
+
pattern = Bulkrax::SplitPatternCoercion.coerce(split_pattern) || Bulkrax::DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON
|
|
72
|
+
value.to_s.split(pattern).map(&:strip).reject(&:blank?)
|
|
71
73
|
end
|
|
72
74
|
|
|
73
|
-
def collect_relationship_ids(base_value, raw_row, column, split_pattern:
|
|
75
|
+
def collect_relationship_ids(base_value, raw_row, column, split_pattern: Bulkrax::DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON)
|
|
74
76
|
base_ids = parse_relationship_field(base_value, split_pattern: split_pattern)
|
|
75
77
|
suffix_pattern = /\A#{Regexp.escape(column)}_\d+\z/
|
|
76
78
|
suffix_ids = raw_row
|
|
@@ -46,7 +46,7 @@ module Bulkrax
|
|
|
46
46
|
@referenced_files ||= @csv_data.flat_map do |item|
|
|
47
47
|
next [] if item[:file].blank?
|
|
48
48
|
|
|
49
|
-
item[:file].split(Bulkrax.
|
|
49
|
+
item[:file].split(Bulkrax::CsvParser.file_split_pattern).map { |f| File.basename(f.strip) }
|
|
50
50
|
end.compact
|
|
51
51
|
end
|
|
52
52
|
|
|
@@ -6,8 +6,16 @@ module Bulkrax
|
|
|
6
6
|
class MappingManager
|
|
7
7
|
attr_reader :mappings
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
# @param include_generated [Boolean] when false, excludes mapping entries
|
|
10
|
+
# flagged `generated: true` (system-maintained fields like
|
|
11
|
+
# date_uploaded, depositor, source_identifier). Template generation
|
|
12
|
+
# passes +false+ so the downloadable template doesn't expose
|
|
13
|
+
# system columns; import validation uses the default +true+ so that
|
|
14
|
+
# user-configured mappings like `rights_statement` (which Bulkrax
|
|
15
|
+
# ships with `generated: true`) are still recognised when the CSV
|
|
16
|
+
# uses one of their `from:` aliases.
|
|
17
|
+
def initialize(include_generated: true)
|
|
18
|
+
@mappings = load_mappings(include_generated: include_generated)
|
|
11
19
|
end
|
|
12
20
|
|
|
13
21
|
def mapped_to_key(column_str)
|
|
@@ -45,10 +53,11 @@ module Bulkrax
|
|
|
45
53
|
|
|
46
54
|
private
|
|
47
55
|
|
|
48
|
-
def load_mappings
|
|
49
|
-
Bulkrax.field_mappings["Bulkrax::CsvParser"]
|
|
50
|
-
|
|
51
|
-
|
|
56
|
+
def load_mappings(include_generated:)
|
|
57
|
+
raw = Bulkrax.field_mappings["Bulkrax::CsvParser"]
|
|
58
|
+
return raw if include_generated
|
|
59
|
+
|
|
60
|
+
raw.reject { |_key, value| value["generated"] == true }
|
|
52
61
|
end
|
|
53
62
|
end
|
|
54
63
|
end
|
|
@@ -9,6 +9,8 @@ module Bulkrax
|
|
|
9
9
|
|
|
10
10
|
if split_value == true
|
|
11
11
|
parse_pattern(Bulkrax.multi_value_element_split_on.source)
|
|
12
|
+
elsif split_value.is_a?(Regexp)
|
|
13
|
+
parse_pattern(split_value.source)
|
|
12
14
|
elsif split_value.is_a?(String)
|
|
13
15
|
parse_pattern(split_value)
|
|
14
16
|
else
|
|
@@ -34,9 +36,14 @@ module Bulkrax
|
|
|
34
36
|
end
|
|
35
37
|
|
|
36
38
|
def format_message(chars)
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
39
|
+
list = chars.chars
|
|
40
|
+
# Use spaces rather than commas between delimiters so the message
|
|
41
|
+
# stays unambiguous when one of the delimiters IS a comma.
|
|
42
|
+
formatted = if list.length <= 1
|
|
43
|
+
list.first || chars # no extractable chars → surface as-is
|
|
44
|
+
else
|
|
45
|
+
"#{list[0..-2].join(' ')} or #{list.last}"
|
|
46
|
+
end
|
|
40
47
|
"Split multiple values with #{formatted}"
|
|
41
48
|
end
|
|
42
49
|
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
# Coerces a stored split pattern into a Regexp suitable for String#split.
|
|
5
|
+
#
|
|
6
|
+
# Bulkrax field mappings are persisted as JSON in several host applications
|
|
7
|
+
# (e.g. Hyku), so the `split` value for a mapping can show up in several
|
|
8
|
+
# forms. This module is the single place that normalises them:
|
|
9
|
+
#
|
|
10
|
+
# * nil / blank → `nil` (caller should treat as "no split")
|
|
11
|
+
# * already a Regexp → returned unchanged
|
|
12
|
+
# * `true` → {Bulkrax.multi_value_element_split_on}
|
|
13
|
+
# * String, any content → `Regexp.new(str)` — the String is treated as a
|
|
14
|
+
# regex source, matching the long-standing
|
|
15
|
+
# contract in {Bulkrax::ApplicationMatcher}.
|
|
16
|
+
# `"\\|"` → `/\|/`; a serialised regex like
|
|
17
|
+
# `"(?-mix:\\s*[;|]\\s*)"` rebuilds into an
|
|
18
|
+
# equivalent Regexp.
|
|
19
|
+
# * invalid regex source → `nil` (we neither raise nor hand back an
|
|
20
|
+
# unusable value to String#split).
|
|
21
|
+
# * any other type → `nil` (likewise — never returns something
|
|
22
|
+
# String#split can't accept).
|
|
23
|
+
#
|
|
24
|
+
# Import, validation, and hierarchy code paths all route through here so
|
|
25
|
+
# the behaviour is consistent regardless of how the mapping was persisted.
|
|
26
|
+
#
|
|
27
|
+
# @param split_val [nil, true, Regexp, String, Object] the configured split
|
|
28
|
+
# @return [nil, Regexp] a pattern ready for String#split, or nil when
|
|
29
|
+
# no usable pattern can be derived from the input.
|
|
30
|
+
module SplitPatternCoercion
|
|
31
|
+
def self.coerce(split_val)
|
|
32
|
+
return nil if split_val.blank?
|
|
33
|
+
return Bulkrax.multi_value_element_split_on if split_val == true
|
|
34
|
+
return split_val if split_val.is_a?(Regexp)
|
|
35
|
+
return nil unless split_val.is_a?(String)
|
|
36
|
+
|
|
37
|
+
Regexp.new(split_val)
|
|
38
|
+
rescue RegexpError
|
|
39
|
+
nil
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -338,7 +338,8 @@ module Bulkrax
|
|
|
338
338
|
message = [message, error[:suggestion]].compact.join(' ') if error[:suggestion].present?
|
|
339
339
|
{
|
|
340
340
|
field: I18n.t('bulkrax.importer.guided_import.stepper_response_formatter.row_errors_issue.row_label', row: error[:row], column: error[:column]),
|
|
341
|
-
message: message
|
|
341
|
+
message: message,
|
|
342
|
+
category: error[:category]
|
|
342
343
|
}
|
|
343
344
|
end
|
|
344
345
|
end
|
|
@@ -4,9 +4,18 @@ require 'csv'
|
|
|
4
4
|
|
|
5
5
|
module Bulkrax
|
|
6
6
|
# Builds a CSV string containing all validation errors from a guided import.
|
|
7
|
+
#
|
|
8
|
+
# Output columns, in order:
|
|
9
|
+
# 1. row — 1-based row number from the source CSV (blank for file-level rows)
|
|
10
|
+
# 2. errors — all error messages for that row, joined with " | "
|
|
11
|
+
# 3. categories — distinct validator categories for that row's errors (e.g.
|
|
12
|
+
# "missing_required_value | invalid_parent_reference"),
|
|
13
|
+
# joined with " | "; blank for file-level rows
|
|
14
|
+
# 4..N. the original CSV headers, carrying the raw cell values
|
|
15
|
+
#
|
|
7
16
|
# File-level errors (missing required columns, unrecognized headers, empty
|
|
8
17
|
# columns, missing files) appear first as summary rows with a blank `row`
|
|
9
|
-
# cell. Row-level errors follow, one output row per
|
|
18
|
+
# and `categories` cell. Row-level errors follow, one output row per data row.
|
|
10
19
|
#
|
|
11
20
|
# Usage:
|
|
12
21
|
# csv = Bulkrax::ValidationErrorCsvBuilder.build(
|
|
@@ -21,10 +30,21 @@ module Bulkrax
|
|
|
21
30
|
# }
|
|
22
31
|
# )
|
|
23
32
|
class ValidationErrorCsvBuilder
|
|
33
|
+
I18N_BASE = 'bulkrax.importer.guided_import.validation.validation_error_csv_builder'
|
|
34
|
+
private_constant :I18N_BASE
|
|
35
|
+
|
|
24
36
|
# @param headers [Array<String>] original CSV headers in order
|
|
25
37
|
# @param csv_data [Array<Hash>] one entry per data row; each hash has
|
|
26
38
|
# :raw_row (String-keyed hash of column=>value)
|
|
27
|
-
# @param row_errors [Array<Hash>] each hash
|
|
39
|
+
# @param row_errors [Array<Hash>] each hash describes a single row-level
|
|
40
|
+
# validation result with the following keys:
|
|
41
|
+
# - :row [Integer] 1-based source row number (header is row 1)
|
|
42
|
+
# - :message [String] human-readable error/warning message
|
|
43
|
+
# - :category [String, nil] validator category slug used to populate the
|
|
44
|
+
# `categories` output column (e.g. 'missing_required_value',
|
|
45
|
+
# 'invalid_parent_reference'); omitted/nil categories are dropped
|
|
46
|
+
# - :severity, :column, :value, :suggestion, :source_identifier — not
|
|
47
|
+
# emitted by this builder but commonly present on the same hash
|
|
28
48
|
# @param file_errors [Hash] file-level issues:
|
|
29
49
|
# - :missing_required [Array<Hash>] each hash has :model and :field
|
|
30
50
|
# - :unrecognized [Hash] column_name => suggestion_or_nil
|
|
@@ -47,17 +67,19 @@ module Bulkrax
|
|
|
47
67
|
blank_data = Array.new(@headers.length)
|
|
48
68
|
|
|
49
69
|
CSV.generate(force_quotes: false) do |csv|
|
|
50
|
-
csv << ['row', 'errors'] + @headers
|
|
70
|
+
csv << ['row', 'errors', 'categories'] + @headers
|
|
51
71
|
|
|
52
72
|
file_level_error_rows.each do |message|
|
|
53
|
-
csv << [nil, message] + blank_data
|
|
73
|
+
csv << [nil, message, nil] + blank_data
|
|
54
74
|
end
|
|
55
75
|
|
|
56
76
|
@csv_data.each_with_index do |record, index|
|
|
57
77
|
row_number = index + 2 # header is row 1; first data row is row 2
|
|
58
|
-
|
|
78
|
+
row_errors = errors_by_row[row_number]
|
|
79
|
+
error_messages = row_errors&.map { |e| e[:message] }&.join(' | ')
|
|
80
|
+
error_categories = row_errors&.map { |e| e[:category] }&.compact&.uniq&.join(' | ')
|
|
59
81
|
raw_row = record[:raw_row] || {}
|
|
60
|
-
csv << [row_number, error_messages] + @headers.map { |h| raw_row[h] }
|
|
82
|
+
csv << [row_number, error_messages, error_categories] + @headers.map { |h| raw_row[h] }
|
|
61
83
|
end
|
|
62
84
|
end
|
|
63
85
|
end
|
|
@@ -76,21 +98,23 @@ module Bulkrax
|
|
|
76
98
|
messages = []
|
|
77
99
|
|
|
78
100
|
Array(@file_errors[:missing_required]).each do |entry|
|
|
79
|
-
messages << "
|
|
101
|
+
messages << I18n.t("#{I18N_BASE}.missing_required_column", field: entry[:field], model: entry[:model])
|
|
80
102
|
end
|
|
81
103
|
|
|
82
104
|
Hash(@file_errors[:unrecognized]).each do |col, suggestion|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
105
|
+
messages << if suggestion.present?
|
|
106
|
+
I18n.t("#{I18N_BASE}.unrecognized_column_with_suggestion", column: col, suggestion: suggestion)
|
|
107
|
+
else
|
|
108
|
+
I18n.t("#{I18N_BASE}.unrecognized_column", column: col)
|
|
109
|
+
end
|
|
86
110
|
end
|
|
87
111
|
|
|
88
112
|
Array(@file_errors[:empty_columns]).each do |pos|
|
|
89
|
-
messages << "
|
|
113
|
+
messages << I18n.t("#{I18N_BASE}.empty_column", column: pos + 2)
|
|
90
114
|
end
|
|
91
115
|
|
|
92
116
|
Array(@file_errors[:missing_files]).each do |filename|
|
|
93
|
-
messages << "
|
|
117
|
+
messages << I18n.t("#{I18N_BASE}.missing_file", filename: filename)
|
|
94
118
|
end
|
|
95
119
|
|
|
96
120
|
messages
|
|
@@ -37,7 +37,8 @@ module Bulkrax
|
|
|
37
37
|
end
|
|
38
38
|
|
|
39
39
|
def self.collect_child_ids(record, context)
|
|
40
|
-
split_pattern = context[:child_split_pattern] ||
|
|
40
|
+
split_pattern = Bulkrax::SplitPatternCoercion.coerce(context[:child_split_pattern]) ||
|
|
41
|
+
Bulkrax::DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON
|
|
41
42
|
children_column = context[:children_column] || 'children'
|
|
42
43
|
|
|
43
44
|
base_ids = record[:children].to_s.split(split_pattern).map(&:strip).reject(&:blank?)
|
|
@@ -34,7 +34,7 @@ module Bulkrax
|
|
|
34
34
|
end
|
|
35
35
|
|
|
36
36
|
def self.collect_parent_ids(record, context)
|
|
37
|
-
split_pattern = context[:parent_split_pattern]
|
|
37
|
+
split_pattern = Bulkrax::SplitPatternCoercion.coerce(context[:parent_split_pattern])
|
|
38
38
|
parent_column = context[:parent_column] || 'parents'
|
|
39
39
|
|
|
40
40
|
base_ids = if split_pattern
|
|
@@ -17,7 +17,7 @@ module Bulkrax
|
|
|
17
17
|
return if metadata.blank?
|
|
18
18
|
|
|
19
19
|
add_default_work_type_warning(context, record, row_index, model) if using_default
|
|
20
|
-
add_missing_required_value_errors(context, record, row_index, metadata)
|
|
20
|
+
add_missing_required_value_errors(context, record, row_index, metadata, context[:mapping_manager])
|
|
21
21
|
end
|
|
22
22
|
|
|
23
23
|
def self.add_default_work_type_warning(context, record, row_index, model)
|
|
@@ -38,9 +38,11 @@ module Bulkrax
|
|
|
38
38
|
end
|
|
39
39
|
private_class_method :add_default_work_type_warning
|
|
40
40
|
|
|
41
|
-
def self.add_missing_required_value_errors(context, record, row_index, metadata)
|
|
41
|
+
def self.add_missing_required_value_errors(context, record, row_index, metadata, mapping_manager)
|
|
42
42
|
(metadata[:required_terms] || []).each do |field|
|
|
43
|
-
|
|
43
|
+
column_present = record[:raw_row].keys.any? { |key| resolve_header(key.to_s, mapping_manager) == field }
|
|
44
|
+
next unless column_present
|
|
45
|
+
next if record[:raw_row].any? { |key, value| resolve_header(key.to_s, mapping_manager) == field && value.present? }
|
|
44
46
|
|
|
45
47
|
context[:errors] << {
|
|
46
48
|
row: row_index,
|
|
@@ -56,6 +58,18 @@ module Bulkrax
|
|
|
56
58
|
end
|
|
57
59
|
private_class_method :add_missing_required_value_errors
|
|
58
60
|
|
|
61
|
+
# Resolves a raw CSV header into its mapping key so that `from:` aliases
|
|
62
|
+
# are honoured (e.g. a column named `rights` satisfies the requirement
|
|
63
|
+
# for `rights_statement` when the mapping declares
|
|
64
|
+
# `rights_statement: { from: ['rights', 'rights_statement', ...] }`).
|
|
65
|
+
# Numeric suffixes (e.g. `title_1`) are stripped before lookup so they
|
|
66
|
+
# satisfy the unsuffixed required field.
|
|
67
|
+
def self.resolve_header(header, mapping_manager)
|
|
68
|
+
base = normalize_header(header)
|
|
69
|
+
mapping_manager ? mapping_manager.mapped_to_key(base) : base
|
|
70
|
+
end
|
|
71
|
+
private_class_method :resolve_header
|
|
72
|
+
|
|
59
73
|
def self.normalize_header(header)
|
|
60
74
|
header.sub(/_\d+\z/, '')
|
|
61
75
|
end
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
<% provide :page_header do %>
|
|
2
|
-
<h1><span class="fa fa-cloud-download" aria-hidden="true"></span>
|
|
2
|
+
<h1><span class="fa fa-cloud-download" aria-hidden="true"></span> <%= t('bulkrax.headings.exporters') %></h1>
|
|
3
3
|
<div class="pull-right">
|
|
4
4
|
<%= link_to new_exporter_path, class: 'btn btn-primary', data: { turbolinks: false } do %>
|
|
5
5
|
<span class="fa fa-edit" aria-hidden="true"></span> <%= t(:'helpers.action.exporter.new') %>
|
|
@@ -11,6 +11,8 @@
|
|
|
11
11
|
<meta name="turbolinks-cache-control" content="no-cache">
|
|
12
12
|
<% end %>
|
|
13
13
|
|
|
14
|
+
<%= render 'bulkrax/shared/datatable_i18n' %>
|
|
15
|
+
|
|
14
16
|
<div class="panel card panel-default">
|
|
15
17
|
<div class="panel-body card-body">
|
|
16
18
|
<div class="table-responsive">
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
<% if File.exist?(@exporter.exporter_export_zip_path) %>
|
|
11
11
|
<%= simple_form_for @exporter, method: :get, url: exporter_download_path(@exporter), html: { class: 'form-inline bulkrax-p-align' } do |form| %>
|
|
12
|
-
<strong
|
|
12
|
+
<strong><%= t('bulkrax.exporter.labels.download') %>:</strong>
|
|
13
13
|
<%= render 'downloads', exporter: @exporter, form: form %>
|
|
14
14
|
<%= form.button :submit, value: t('helpers.action.exporter.download'), data: { disable_with: false } %>
|
|
15
15
|
<% end %>
|
|
@@ -520,6 +520,13 @@
|
|
|
520
520
|
</div>
|
|
521
521
|
</div>
|
|
522
522
|
|
|
523
|
+
<div class="review-section review-errors" style="display: none;">
|
|
524
|
+
<h4><%= t('bulkrax.importer.guided_import.step3.section_errors') %></h4>
|
|
525
|
+
<div class="review-errors-list">
|
|
526
|
+
<!-- Will be populated by JS -->
|
|
527
|
+
</div>
|
|
528
|
+
</div>
|
|
529
|
+
|
|
523
530
|
<div class="review-section review-warnings" style="display: none;">
|
|
524
531
|
<h4><%= t('bulkrax.importer.guided_import.step3.section_warnings') %></h4>
|
|
525
532
|
<div class="review-warnings-list">
|
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
<div class="modal-dialog" role="document">
|
|
3
3
|
<div class="modal-content">
|
|
4
4
|
<div class="modal-body">
|
|
5
|
-
<h5
|
|
5
|
+
<h5><%= t('bulkrax.importer.edit_form.entry_modal_title') %></h5>
|
|
6
6
|
<hr />
|
|
7
|
-
<p
|
|
7
|
+
<p><%= t('bulkrax.importer.edit_form.entry_rebuild_hint') %></p>
|
|
8
8
|
<%= link_to t('helpers.action.importer.build'), item_entry_path(item, e), method: :patch, class: 'btn btn-primary' %>
|
|
9
9
|
<hr />
|
|
10
|
-
<p
|
|
10
|
+
<p><%= t('bulkrax.importer.edit_form.entry_remove_and_recreate_hint') %></p>
|
|
11
11
|
<%= link_to t('helpers.action.importer.remove_and_build'), item_entry_path(item, e, destroy_first: true), method: :patch, class: 'btn btn-primary' %>
|
|
12
12
|
</div>
|
|
13
13
|
<div class="modal-footer">
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
<%= render 'bulkrax/shared/datatable_i18n' %>
|
|
2
|
+
|
|
1
3
|
<div class="col-xs-12 main-header d-flex justify-content-between align-items-center">
|
|
2
4
|
<h1><span class="fa fa-cloud-upload" aria-hidden="true"></span> Importer: <%= @importer.name %></h1>
|
|
3
5
|
<div class="pull-right">
|
|
@@ -58,7 +60,7 @@
|
|
|
58
60
|
<div class="accordion-container">
|
|
59
61
|
<div class="accordion-heading" role="tab" id="parser-fields-heading">
|
|
60
62
|
<a class="accordion-title" role="button" data-toggle="collapse" data-target="#parser-fields-importer-show" aria-expanded="true" aria-controls="parser-fields-importer-show">
|
|
61
|
-
|
|
63
|
+
<%= t('bulkrax.importer.labels.parser_fields') %>:
|
|
62
64
|
</a>
|
|
63
65
|
<a role="button" data-toggle="collapse" data-target="#parser-fields-importer-show" aria-expanded="true" aria-controls="parser-fields-importer-show">
|
|
64
66
|
<div class="accordion-icon fa fa-times-circle" aria-hidden="true"></div>
|