bulkrax 9.3.5 → 9.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +11 -1
  3. data/app/assets/javascripts/bulkrax/application.js +2 -1
  4. data/app/assets/javascripts/bulkrax/bulkrax.js +13 -4
  5. data/app/assets/javascripts/bulkrax/bulkrax_utils.js +96 -0
  6. data/app/assets/javascripts/bulkrax/datatables.js +1 -0
  7. data/app/assets/javascripts/bulkrax/entries.js +17 -10
  8. data/app/assets/javascripts/bulkrax/importers.js.erb +9 -2
  9. data/app/assets/javascripts/bulkrax/importers_stepper.js +2420 -0
  10. data/app/assets/stylesheets/bulkrax/application.css +1 -1
  11. data/app/assets/stylesheets/bulkrax/stepper/_header.scss +83 -0
  12. data/app/assets/stylesheets/bulkrax/stepper/_mixins.scss +26 -0
  13. data/app/assets/stylesheets/bulkrax/stepper/_navigation.scss +103 -0
  14. data/app/assets/stylesheets/bulkrax/stepper/_responsive.scss +46 -0
  15. data/app/assets/stylesheets/bulkrax/stepper/_review.scss +92 -0
  16. data/app/assets/stylesheets/bulkrax/stepper/_settings.scss +106 -0
  17. data/app/assets/stylesheets/bulkrax/stepper/_success.scss +26 -0
  18. data/app/assets/stylesheets/bulkrax/stepper/_summary.scss +171 -0
  19. data/app/assets/stylesheets/bulkrax/stepper/_upload.scss +339 -0
  20. data/app/assets/stylesheets/bulkrax/stepper/_validation.scss +237 -0
  21. data/app/assets/stylesheets/bulkrax/stepper/_variables.scss +46 -0
  22. data/app/assets/stylesheets/bulkrax/stepper.scss +32 -0
  23. data/app/controllers/bulkrax/guided_imports_controller.rb +175 -0
  24. data/app/controllers/bulkrax/importers_controller.rb +28 -31
  25. data/app/controllers/concerns/bulkrax/guided_import_demo_scenarios.rb +201 -0
  26. data/app/controllers/concerns/bulkrax/importer_file_handler.rb +212 -0
  27. data/app/errors/bulkrax/unzip_error.rb +16 -0
  28. data/app/factories/bulkrax/object_factory.rb +3 -2
  29. data/app/factories/bulkrax/valkyrie_object_factory.rb +61 -17
  30. data/app/jobs/bulkrax/importer_job.rb +42 -4
  31. data/app/models/bulkrax/csv_entry.rb +27 -7
  32. data/app/models/bulkrax/entry.rb +4 -0
  33. data/app/models/bulkrax/importer.rb +27 -10
  34. data/app/models/concerns/bulkrax/has_matchers.rb +2 -2
  35. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +6 -5
  36. data/app/parsers/bulkrax/application_parser.rb +63 -20
  37. data/app/parsers/bulkrax/bagit_parser.rb +12 -0
  38. data/app/parsers/bulkrax/csv_parser.rb +168 -25
  39. data/app/parsers/concerns/bulkrax/csv_parser/csv_template_generation.rb +73 -0
  40. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb +133 -0
  41. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_helpers.rb +282 -0
  42. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_hierarchy.rb +96 -0
  43. data/app/services/bulkrax/csv_template/column_builder.rb +60 -0
  44. data/app/services/bulkrax/csv_template/column_descriptor.rb +58 -0
  45. data/app/services/bulkrax/csv_template/csv_builder.rb +83 -0
  46. data/app/services/bulkrax/csv_template/explanation_builder.rb +57 -0
  47. data/app/services/bulkrax/csv_template/field_analyzer.rb +56 -0
  48. data/app/services/bulkrax/csv_template/file_path_generator.rb +47 -0
  49. data/app/services/bulkrax/csv_template/file_validator.rb +68 -0
  50. data/app/services/bulkrax/csv_template/mapping_manager.rb +55 -0
  51. data/app/services/bulkrax/csv_template/model_loader.rb +50 -0
  52. data/app/services/bulkrax/csv_template/row_builder.rb +35 -0
  53. data/app/services/bulkrax/csv_template/schema_analyzer.rb +70 -0
  54. data/app/services/bulkrax/csv_template/split_formatter.rb +44 -0
  55. data/app/services/bulkrax/csv_template/value_determiner.rb +68 -0
  56. data/app/services/bulkrax/stepper_response_formatter.rb +347 -0
  57. data/app/services/bulkrax/validation_error_csv_builder.rb +99 -0
  58. data/app/validators/bulkrax/csv_row/child_reference.rb +56 -0
  59. data/app/validators/bulkrax/csv_row/circular_reference.rb +71 -0
  60. data/app/validators/bulkrax/csv_row/controlled_vocabulary.rb +74 -0
  61. data/app/validators/bulkrax/csv_row/duplicate_identifier.rb +63 -0
  62. data/app/validators/bulkrax/csv_row/missing_source_identifier.rb +31 -0
  63. data/app/validators/bulkrax/csv_row/parent_reference.rb +59 -0
  64. data/app/validators/bulkrax/csv_row/required_values.rb +64 -0
  65. data/app/views/bulkrax/guided_imports/new.html.erb +567 -0
  66. data/app/views/bulkrax/importers/index.html.erb +6 -1
  67. data/app/views/bulkrax/importers/new.html.erb +1 -1
  68. data/app/views/bulkrax/importers/show.html.erb +17 -1
  69. data/config/i18n-tasks.yml +195 -0
  70. data/config/locales/bulkrax.de.yml +508 -0
  71. data/config/locales/bulkrax.en.yml +463 -233
  72. data/config/locales/bulkrax.es.yml +508 -0
  73. data/config/locales/bulkrax.fr.yml +508 -0
  74. data/config/locales/bulkrax.it.yml +508 -0
  75. data/config/locales/bulkrax.pt-BR.yml +508 -0
  76. data/config/locales/bulkrax.zh.yml +507 -0
  77. data/config/routes.rb +10 -1
  78. data/lib/bulkrax/data/demo_scenarios.json +2235 -0
  79. data/lib/bulkrax/version.rb +1 -1
  80. data/lib/bulkrax.rb +31 -0
  81. metadata +56 -16
  82. data/app/services/bulkrax/sample_csv_service/column_builder.rb +0 -58
  83. data/app/services/bulkrax/sample_csv_service/column_descriptor.rb +0 -56
  84. data/app/services/bulkrax/sample_csv_service/csv_builder.rb +0 -82
  85. data/app/services/bulkrax/sample_csv_service/explanation_builder.rb +0 -51
  86. data/app/services/bulkrax/sample_csv_service/field_analyzer.rb +0 -54
  87. data/app/services/bulkrax/sample_csv_service/file_path_generator.rb +0 -16
  88. data/app/services/bulkrax/sample_csv_service/mapping_manager.rb +0 -36
  89. data/app/services/bulkrax/sample_csv_service/model_loader.rb +0 -40
  90. data/app/services/bulkrax/sample_csv_service/row_builder.rb +0 -33
  91. data/app/services/bulkrax/sample_csv_service/schema_analyzer.rb +0 -69
  92. data/app/services/bulkrax/sample_csv_service/split_formatter.rb +0 -42
  93. data/app/services/bulkrax/sample_csv_service/value_determiner.rb +0 -67
  94. data/app/services/bulkrax/sample_csv_service.rb +0 -78
  95. /data/{app/services → lib}/wings/custom_queries/find_by_source_identifier.rb +0 -0
@@ -0,0 +1,282 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class CsvParser < ApplicationParser
5
+ # Private helper methods for CsvValidation.
6
+ module CsvValidationHelpers # rubocop:disable Metrics/ModuleLength
7
+ include CsvValidationHierarchy
8
+
9
+ # Resolve a symbol key from mappings for use as a record hash key.
10
+ # Returns a Symbol matching the parser's symbol-keyed record hash.
11
+ def resolve_validation_key(mapping_manager, key: nil, flag: nil, default:)
12
+ options = mapping_manager.resolve_column_name(key: key, flag: flag, default: default.to_s)
13
+ options.first&.to_sym || default
14
+ end
15
+
16
+ # Parse rows from a CsvEntry.read_data result into the canonical record shape.
17
+ # CsvEntry.read_data returns CSV::Row objects with symbol headers; blank rows
18
+ # are already filtered by CsvWrapper.
19
+ def parse_validation_rows(raw_csv, source_id_key, parent_key, children_key, file_key)
20
+ raw_csv.map do |row|
21
+ # CSV::Row#to_h converts symbol headers → string-keyed hash
22
+ row_hash = row.to_h.transform_keys(&:to_s)
23
+ {
24
+ source_identifier: row[source_id_key],
25
+ model: row[:model],
26
+ parent: row[parent_key],
27
+ children: row[children_key],
28
+ file: row[file_key],
29
+ raw_row: row_hash
30
+ }
31
+ end
32
+ rescue StandardError => e
33
+ Rails.logger.error("CsvParser.validate_csv: error parsing rows – #{e.message}")
34
+ []
35
+ end
36
+
37
+ def build_validation_field_metadata(all_models, field_analyzer)
38
+ all_models.each_with_object({}) do |model, hash|
39
+ field_list = field_analyzer.find_or_create_field_list_for(model_name: model)
40
+ hash[model] = {
41
+ properties: field_list.dig(model, 'properties') || [],
42
+ required_terms: field_list.dig(model, 'required_terms') || [],
43
+ controlled_vocab_terms: field_list.dig(model, 'controlled_vocab_terms') || []
44
+ }
45
+ end
46
+ end
47
+
48
+ def build_valid_validation_headers(mapping_manager, field_analyzer, all_models, mappings, field_metadata)
49
+ svc = Bulkrax::CsvParser::ValidationContext.new(
50
+ mapping_manager: mapping_manager,
51
+ field_analyzer: field_analyzer,
52
+ all_models: all_models,
53
+ mappings: mappings
54
+ )
55
+ all_cols = CsvTemplate::ColumnBuilder.new(svc).all_columns
56
+ all_cols - CsvTemplate::CsvBuilder::IGNORED_PROPERTIES
57
+ rescue StandardError => e
58
+ Rails.logger.error("CsvParser.validate_csv: error building valid headers – #{e.message}")
59
+ standard = %w[model source_identifier parents children file]
60
+ model_fields = field_metadata.values.flat_map { |m| m[:properties] }
61
+ .map { |prop| mapping_manager.key_to_mapped_column(prop) }
62
+ (standard + model_fields).uniq
63
+ end
64
+
65
+ def find_missing_required_headers(headers, field_metadata, mapping_manager)
66
+ csv_keys = headers.map { |h| mapping_manager.mapped_to_key(h).sub(/_\d+\z/, '') }.uniq
67
+ missing = []
68
+ field_metadata.each do |model, meta|
69
+ (meta[:required_terms] || []).each do |field|
70
+ missing << { model: model, field: field } unless csv_keys.include?(field)
71
+ end
72
+ end
73
+ missing.uniq
74
+ end
75
+
76
+ def find_unrecognized_validation_headers(headers, valid_headers)
77
+ checker = DidYouMean::SpellChecker.new(dictionary: valid_headers)
78
+ headers
79
+ .reject { |h| h.blank? || valid_headers.include?(h) || valid_headers.include?(h.sub(/_\d+\z/, '')) }
80
+ .index_with { |h| checker.correct(h).first }
81
+ end
82
+
83
+ def find_empty_column_positions(headers, raw_csv)
84
+ headers.each_with_index.filter_map do |h, i|
85
+ next if h.present?
86
+ has_data = raw_csv.any? { |row| row.fields[i].present? }
87
+ i + 1 if has_data
88
+ end
89
+ end
90
+
91
+ # Adds a missing source_identifier entry to missing_required when the column
92
+ # is absent and fill_in_blank_source_identifiers is not configured.
93
+ def append_missing_source_id!(missing_required, headers, source_id_key, all_models)
94
+ return if headers.map(&:to_s).include?(source_id_key.to_s)
95
+ return if Bulkrax.fill_in_blank_source_identifiers.present?
96
+
97
+ all_models.each { |model| missing_required << { model: model, field: source_id_key.to_s } }
98
+ end
99
+
100
+ # Adds a file-level notice when the model column is absent or every row has a blank
101
+ # model value, indicating that the default work type will be used for all rows.
102
+ # When this notice is present the per-row default_work_type_used warnings are
103
+ # suppressed in the formatter — no need to repeat the same message for every row.
104
+ def append_missing_model_notice!(notices, headers, csv_data)
105
+ default_model = Bulkrax.default_work_type
106
+ return if default_model.blank?
107
+
108
+ model_column_present = headers.map(&:to_s).include?('model')
109
+ all_rows_blank = model_column_present && csv_data.all? { |r| r[:model].blank? }
110
+
111
+ return if model_column_present && !all_rows_blank
112
+
113
+ key_suffix = all_rows_blank ? 'column_empty' : 'column_missing'
114
+ base_key = 'bulkrax.importer.guided_import.validation.default_work_type_notice'
115
+ notices << {
116
+ field: 'model',
117
+ default_work_type: default_model,
118
+ message: I18n.t("#{base_key}.message_#{key_suffix}", default_work_type: default_model),
119
+ suggestion: I18n.t("#{base_key}.suggestion_#{key_suffix}")
120
+ }
121
+ end
122
+
123
+ def apply_rights_statement_validation_override!(result, missing_required)
124
+ only_rights = missing_required.present? &&
125
+ missing_required.all? { |h| h[:field].to_s == 'rights_statement' }
126
+ return unless only_rights && !result[:isValid]
127
+ return if result[:headers].blank?
128
+ return if result[:missingFiles]&.any?
129
+
130
+ result[:isValid] = true
131
+ result[:hasWarnings] = true
132
+ end
133
+
134
+ # Assembles the final result hash returned to the guided import UI.
135
+ def assemble_result(headers:, missing_required:, header_issues:, row_errors:, csv_data:, file_validator:, collections:, works:, file_sets:, notices: []) # rubocop:disable Metrics/ParameterLists
136
+ row_error_entries = row_errors.select { |e| e[:severity] == 'error' }
137
+ row_warning_entries = row_errors.select { |e| e[:severity] == 'warning' }
138
+ has_errors = missing_required.any? || headers.blank? || csv_data.empty? ||
139
+ file_validator.missing_files.any? || row_error_entries.any?
140
+ has_warnings = header_issues[:unrecognized].any? || header_issues[:empty_columns].any? ||
141
+ file_validator.possible_missing_files? || row_warning_entries.any? || notices.any?
142
+
143
+ {
144
+ headers: headers,
145
+ missingRequired: missing_required,
146
+ notices: notices,
147
+ unrecognized: header_issues[:unrecognized],
148
+ emptyColumns: header_issues[:empty_columns],
149
+ rowCount: csv_data.length,
150
+ isValid: !has_errors,
151
+ hasWarnings: has_warnings,
152
+ rowErrors: row_errors,
153
+ collections: collections,
154
+ works: works,
155
+ fileSets: file_sets,
156
+ totalItems: csv_data.length,
157
+ fileReferences: file_validator.count_references,
158
+ missingFiles: file_validator.missing_files,
159
+ foundFiles: file_validator.found_files_count,
160
+ zipIncluded: file_validator.zip_included?
161
+ }
162
+ end
163
+
164
+ # Builds the find_record lambda used by row validators and hierarchy extraction.
165
+ def build_find_record
166
+ all_mappings = Bulkrax.field_mappings['Bulkrax::CsvParser'] || {}
167
+ work_identifier = all_mappings.find { |_k, v| v['source_identifier'] == true }&.first || 'source'
168
+ work_identifier_search = Array.wrap(all_mappings.dig(work_identifier, 'search_field')).first&.to_s ||
169
+ "#{work_identifier}_sim"
170
+ ->(id) { find_record_by_source_identifier(id, work_identifier, work_identifier_search) }
171
+ end
172
+
173
+ # Attempt to locate an existing repository record by its identifier.
174
+ # The identifier may be a repository object ID or a source_identifier property value.
175
+ # Checks the repository directly (by ID, then by Solr property search) — a Bulkrax
176
+ # Entry record alone is not sufficient, as the object may never have been created.
177
+ #
178
+ # @param identifier [String]
179
+ # @param work_identifier [String] the source_identifier property name (e.g. "source")
180
+ # @param work_identifier_search [String] the Solr field for source_identifier (e.g. "source_sim")
181
+ # @return [Boolean] true if a matching repository object is found
182
+ def find_record_by_source_identifier(identifier, work_identifier, work_identifier_search)
183
+ return false if identifier.blank?
184
+
185
+ return true if Bulkrax.object_factory.find_or_nil(identifier).present?
186
+
187
+ [Bulkrax.collection_model_class, *Bulkrax.curation_concerns].any? do |klass|
188
+ Bulkrax.object_factory.search_by_property(
189
+ value: identifier,
190
+ klass: klass,
191
+ search_field: work_identifier_search,
192
+ name_field: work_identifier
193
+ ).present?
194
+ end
195
+ rescue StandardError
196
+ false
197
+ end
198
+
199
+ # Returns the raw CSV column name (String) for a relationship field.
200
+ # Looks for the mapping entry flagged with +flag+ and returns its first +from+ value,
201
+ # falling back to +default+ when none is found.
202
+ def resolve_relationship_column(mappings, flag, default)
203
+ entry = mappings.find { |_k, v| v.is_a?(Hash) && v[flag] }
204
+ entry&.last&.dig('from')&.first || default
205
+ end
206
+
207
+ def resolve_parent_split_pattern(mappings)
208
+ split_val = mappings.dig('parents', 'split') || mappings.dig(:parents, :split)
209
+ return nil if split_val.blank?
210
+ return Bulkrax::DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON if split_val == true
211
+
212
+ split_val
213
+ end
214
+
215
+ def resolve_children_split_pattern(mappings)
216
+ split_val = mappings.dig('children', 'split') || mappings.dig(:children, :split)
217
+ return nil if split_val.blank?
218
+ return Bulkrax::DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON if split_val == true
219
+
220
+ split_val
221
+ end
222
+
223
+ # Builds a graph of { source_identifier => [parent_ids] } from all CSV records.
224
+ # Used by CircularReference validator to detect cycles across the whole CSV.
225
+ #
226
+ # Parent edges are collected from both directions:
227
+ # - explicit parent declarations (parents / parents_N columns)
228
+ # - inverted child declarations (children / children_N columns), mirroring
229
+ # the normalisation done in importers_stepper.js#normalizeRelationships
230
+ def build_relationship_graph(csv_data, mappings)
231
+ parent_column = resolve_relationship_column(mappings, 'related_parents_field_mapping', 'parents')
232
+ children_column = resolve_relationship_column(mappings, 'related_children_field_mapping', 'children')
233
+ parent_suffix = /\A#{Regexp.escape(parent_column)}_\d+\z/
234
+ children_suffix = /\A#{Regexp.escape(children_column)}_\d+\z/
235
+
236
+ graph = build_parent_edges(csv_data, parent_suffix, resolve_parent_split_pattern(mappings))
237
+ invert_child_edges(graph, csv_data, children_suffix, resolve_children_split_pattern(mappings))
238
+ graph
239
+ end
240
+
241
+ def build_parent_edges(csv_data, suffix_pattern, split_pattern)
242
+ csv_data.each_with_object({}) do |record, graph|
243
+ id = record[:source_identifier]
244
+ next if id.blank?
245
+
246
+ base_ids = split_or_single(record[:parent], split_pattern)
247
+ suffix_ids = suffixed_values(record[:raw_row], suffix_pattern)
248
+ graph[id] = (base_ids + suffix_ids).uniq
249
+ end
250
+ end
251
+
252
+ def invert_child_edges(graph, csv_data, suffix_pattern, split_pattern)
253
+ csv_data.each do |record|
254
+ id = record[:source_identifier]
255
+ next if id.blank?
256
+
257
+ child_ids = split_or_single(record[:children], split_pattern) +
258
+ suffixed_values(record[:raw_row], suffix_pattern)
259
+ child_ids.each do |child_id|
260
+ graph[child_id] ||= []
261
+ graph[child_id] << id unless graph[child_id].include?(id)
262
+ end
263
+ end
264
+ end
265
+
266
+ def split_or_single(value, split_pattern)
267
+ if split_pattern
268
+ value.to_s.split(split_pattern).map(&:strip).reject(&:blank?)
269
+ elsif value.present?
270
+ [value.to_s.strip]
271
+ else
272
+ []
273
+ end
274
+ end
275
+
276
+ def suffixed_values(raw_row, suffix_pattern)
277
+ raw_row.select { |k, _| k.to_s.match?(suffix_pattern) }
278
+ .values.map(&:to_s).map(&:strip).reject(&:blank?)
279
+ end
280
+ end
281
+ end
282
+ end
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class CsvParser < ApplicationParser
5
+ # Hierarchy-building helpers for CsvValidation. Handles extracting and
6
+ # categorising items from parsed CSV data for the guided import tree view.
7
+ module CsvValidationHierarchy
8
+ def extract_validation_items(csv_data, all_ids = Set.new, find_record = nil, parent_split_pattern: nil, child_split_pattern: '|')
9
+ child_to_parents = build_child_to_parents_map(csv_data, child_split_pattern: child_split_pattern)
10
+ collections = []
11
+ works = []
12
+ file_sets = []
13
+
14
+ csv_data.each do |item|
15
+ categorise_validation_item(item, child_to_parents, all_ids, collections, works, file_sets, find_record,
16
+ parent_split_pattern: parent_split_pattern, child_split_pattern: child_split_pattern)
17
+ end
18
+
19
+ [collections, works, file_sets]
20
+ end
21
+
22
+ def build_child_to_parents_map(csv_data, child_split_pattern: '|')
23
+ Hash.new { |h, k| h[k] = [] }.tap do |map|
24
+ csv_data.each do |item|
25
+ next if item[:source_identifier].blank?
26
+
27
+ collect_relationship_ids(item[:children], item[:raw_row], 'children', split_pattern: child_split_pattern).each do |child_id|
28
+ map[child_id] << item[:source_identifier]
29
+ end
30
+ end
31
+ end
32
+ end
33
+
34
+ def categorise_validation_item(item, child_to_parents, all_ids, collections, works, file_sets, find_record = nil, parent_split_pattern: nil, child_split_pattern: '|') # rubocop:disable Metrics/ParameterLists
35
+ item_id = item[:source_identifier]
36
+ model_str = item[:model].to_s
37
+
38
+ opts = { type: nil, find_record: find_record, parent: parent_split_pattern, child: child_split_pattern }
39
+ if model_str.casecmp('collection').zero? || model_str.casecmp('collectionresource').zero?
40
+ collections << build_item_hash(item, child_to_parents, all_ids, opts.merge(type: 'collection'))
41
+ elsif model_str.casecmp('fileset').zero? || model_str.casecmp('hyrax::fileset').zero?
42
+ file_sets << { id: item_id, title: item[:raw_row]['title'] || item_id, type: 'file_set' }
43
+ else
44
+ works << build_item_hash(item, child_to_parents, all_ids, opts.merge(type: 'work'))
45
+ end
46
+ end
47
+
48
+ def build_item_hash(item, child_to_parents, all_ids, opts = {}) # rubocop:disable Metrics/MethodLength
49
+ type = opts[:type]
50
+ find_record = opts[:find_record]
51
+ item_id = item[:source_identifier]
52
+ title = item[:raw_row]['title'] || item_id
53
+ parents = collect_relationship_ids(item[:parent], item[:raw_row], 'parents', split_pattern: opts[:parent])
54
+ children = collect_relationship_ids(item[:children], item[:raw_row], 'children', split_pattern: opts[:child] || '|')
55
+
56
+ {
57
+ id: item_id,
58
+ title: title,
59
+ type: type,
60
+ existing: find_record&.call(item_id) || false,
61
+ parentIds: (resolvable_ids(parents, all_ids) + resolvable_ids(child_to_parents[item_id] || [], all_ids)).uniq,
62
+ childIds: resolvable_ids(children, all_ids),
63
+ existingParentIds: external_ids(parents, all_ids, find_record),
64
+ existingChildIds: external_ids(children, all_ids, find_record)
65
+ }
66
+ end
67
+
68
+ def parse_relationship_field(value, split_pattern: '|')
69
+ return [] if value.blank?
70
+ value.to_s.split(split_pattern).map(&:strip).reject(&:blank?)
71
+ end
72
+
73
+ def collect_relationship_ids(base_value, raw_row, column, split_pattern: '|')
74
+ base_ids = parse_relationship_field(base_value, split_pattern: split_pattern)
75
+ suffix_pattern = /\A#{Regexp.escape(column)}_\d+\z/
76
+ suffix_ids = raw_row
77
+ .select { |k, _| k.to_s.match?(suffix_pattern) }
78
+ .values
79
+ .map(&:to_s).map(&:strip).reject(&:blank?)
80
+ (base_ids + suffix_ids).uniq
81
+ end
82
+
83
+ def resolvable_ids(ids, all_ids)
84
+ ids.select { |id| all_ids.include?(id) }
85
+ end
86
+
87
+ # Returns ids from the list that are NOT in the CSV but exist in the repository.
88
+ def external_ids(ids, all_ids, find_record)
89
+ return [] if find_record.nil?
90
+
91
+ ids.reject { |id| all_ids.include?(id) }
92
+ .select { |id| find_record.call(id) }
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module CsvTemplate
5
+ # Builds column headers for CSV
6
+ class ColumnBuilder
7
+ def initialize(service)
8
+ @service = service
9
+ @descriptor = CsvTemplate::ColumnDescriptor.new
10
+ end
11
+
12
+ def all_columns
13
+ required_columns + property_columns
14
+ end
15
+
16
+ def required_columns
17
+ mapped_core_columns +
18
+ relationship_columns +
19
+ file_columns
20
+ end
21
+
22
+ private
23
+
24
+ def mapped_core_columns
25
+ @descriptor.core_columns.map do |column|
26
+ @service.mapping_manager.key_to_mapped_column(column)
27
+ end
28
+ end
29
+
30
+ def property_columns
31
+ field_lists = @service.all_models.map do |m|
32
+ @service.field_analyzer.find_or_create_field_list_for(model_name: m)
33
+ end
34
+
35
+ properties = field_lists
36
+ .flat_map { |item| item.values.flat_map { |config| config["properties"] || [] } }
37
+ .uniq
38
+ .map { |property| @service.mapping_manager.key_to_mapped_column(property) }
39
+ .uniq
40
+
41
+ (properties - required_columns).sort
42
+ end
43
+
44
+ def relationship_columns
45
+ [
46
+ @service.mapping_manager.find_by_flag("related_children_field_mapping", 'children'),
47
+ @service.mapping_manager.find_by_flag("related_parents_field_mapping", 'parents')
48
+ ]
49
+ end
50
+
51
+ def file_columns
52
+ CsvTemplate::ColumnDescriptor::COLUMN_DESCRIPTIONS[:files].flat_map do |property_hash|
53
+ property_hash.keys.map do |key|
54
+ @service.mapping_manager.key_to_mapped_column(key)
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module CsvTemplate
5
+ # Manages column descriptions and metadata
6
+ class ColumnDescriptor
7
+ COLUMN_DESCRIPTIONS = {
8
+ include_first: [
9
+ { "model" => "The work types configured in your repository are listed below.\nIf left blank, your default work type, #{Bulkrax.default_work_type}, is used." },
10
+ { "source_identifier" => "This must be a unique identifier.\nIt can be alphanumeric with some special characters (e.g. hyphens, colons), and URLs are also supported." },
11
+ { "id" => "This column would optionally be included only if it is a re-import, i.e. for updating or deleting records.\nThis is a key identifier used by the system, which you wouldn't have for new imports." },
12
+ { "rights_statement" => "Rights statement URI for the work.\nIf not included, uses the value specified on the bulk import configuration screen." }
13
+ ],
14
+ visibility: [
15
+ { "visibility" => "Uses the value specified on the bulk import configuration screen if not added here.\nValid options: open, authenticated, restricted, embargo, lease" },
16
+ { "embargo_release_date" => "Required for embargo (yyyy-mm-dd)" },
17
+ { "visibility_during_embargo" => "Required for embargo" },
18
+ { "visibility_after_embargo" => "Required for embargo" },
19
+ { "lease_expiration_date" => "Required for lease (yyyy-mm-dd)" },
20
+ { "visibility_during_lease" => "Required for lease" },
21
+ { "visibility_after_lease" => "Required for lease" }
22
+ ],
23
+ files: [
24
+ { "file" => "Use filenames exactly matching those in your files folder.\nZip your CSV and files folder together and attach this to your importer." },
25
+ { "remote_files" => "Use the URLs to remote files to be attached to the work." }
26
+ ],
27
+ relationships: [
28
+ { "parents" => "The source_identifier or id of work or collection to be attached as parent." },
29
+ { "children" => "The source_identifier or id of work or file to be attached as child." }
30
+ ],
31
+ other: [
32
+ { "hide_from_catalog_search" => "Set to 1 to hide the collection from catalog search results." },
33
+ { "show_pdf_download_button" => "Set to 1 to show a PDF download link on the work's page." },
34
+ { "show_pdf_viewer" => "Set to 1 to show a PDF viewer on the work's page." },
35
+ { "video_embed" => "A valid URL to a hosted video that can appear in an iframe, beginning with 'http://' or 'https://'." }
36
+ ]
37
+ }.freeze
38
+
39
+ def core_columns
40
+ extract_column_names(:include_first) + extract_column_names(:visibility)
41
+ end
42
+
43
+ def find_description_for(column)
44
+ COLUMN_DESCRIPTIONS.each_value do |group|
45
+ prop = group.find { |hash| hash.key?(column) }
46
+ return prop[column] if prop
47
+ end
48
+ nil
49
+ end
50
+
51
+ private
52
+
53
+ def extract_column_names(group)
54
+ COLUMN_DESCRIPTIONS[group].map { |hash| hash.keys.first }
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module CsvTemplate
5
+ # Builds CSV content
6
+ class CsvBuilder
7
+ IGNORED_PROPERTIES = %w[
8
+ admin_set_id alternate_ids
9
+ bulkrax_identifier
10
+ collection_type_gid contexts created_at
11
+ date date_modified date_uploaded depositor
12
+ embargo embargo_id
13
+ file_ids
14
+ has_model head
15
+ internal_resource is_child
16
+ lease lease_id
17
+ member_ids member_of_collection_ids modified_date
18
+ new_record
19
+ on_behalf_of owner proxy_depositor
20
+ rendering_ids representative_id
21
+ schema_version split_from_pdf_id state tail
22
+ thumbnail_id
23
+ updated_at
24
+ ].freeze
25
+
26
+ def initialize(service)
27
+ @service = service
28
+ @column_builder = CsvTemplate::ColumnBuilder.new(service)
29
+ @row_builder = CsvTemplate::RowBuilder.new(service)
30
+ @header_row = nil
31
+ @required_headings = []
32
+ end
33
+
34
+ def write_to_file(file_path)
35
+ FileUtils.mkdir_p(File.dirname(file_path))
36
+ CSV.open(file_path, "w") { |csv| write_rows(csv) }
37
+ end
38
+
39
+ def generate_string
40
+ CSV.generate { |csv| write_rows(csv) }
41
+ end
42
+
43
+ private
44
+
45
+ def write_rows(csv)
46
+ csv_rows.each { |row| csv << row }
47
+ end
48
+
49
+ def csv_rows
50
+ @header_row = fill_header_row
51
+ rows = [
52
+ @header_row,
53
+ @row_builder.build_explanation_row(@header_row),
54
+ *@row_builder.build_model_rows(@header_row)
55
+ ]
56
+ remove_empty_columns(rows)
57
+ end
58
+
59
+ def fill_header_row
60
+ @required_headings = @column_builder.required_columns
61
+ all_columns = @column_builder.all_columns
62
+ filtered = all_columns - IGNORED_PROPERTIES
63
+ @required_headings = @column_builder.required_columns & filtered
64
+ filtered
65
+ end
66
+
67
+ def remove_empty_columns(rows)
68
+ return rows if rows.empty?
69
+
70
+ columns = rows.transpose
71
+ non_empty_columns = columns.select { |col| keep_column?(col) }
72
+ non_empty_columns.transpose
73
+ end
74
+
75
+ def keep_column?(column)
76
+ heading = column[0]
77
+ return true if @required_headings.include?(heading)
78
+
79
+ column[2..-1].any? { |value| !value.nil? && value != "" && value != "---" }
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module CsvTemplate
5
+ # Builds explanations for CSV columns
6
+ class ExplanationBuilder
7
+ def initialize(service)
8
+ @service = service
9
+ @descriptor = CsvTemplate::ColumnDescriptor.new
10
+ @split_formatter = CsvTemplate::SplitFormatter.new
11
+ end
12
+
13
+ def build_explanations(header_row)
14
+ header_row.map do |column|
15
+ { column => build_explanation(column) }
16
+ end
17
+ end
18
+
19
+ private
20
+
21
+ def build_explanation(column)
22
+ mapping_key = @service.mapping_manager.mapped_to_key(column)
23
+
24
+ column_description = source_identifier_description(column) || @descriptor.find_description_for(column)
25
+ controlled_vocab_info = controlled_vocab_text(mapping_key)
26
+ split_info = split_text(mapping_key, controlled_vocab_info)
27
+
28
+ components = [
29
+ column_description,
30
+ controlled_vocab_info,
31
+ split_info
32
+ ].compact
33
+
34
+ components.join("\n")
35
+ end
36
+
37
+ def source_identifier_description(column)
38
+ return unless @service.mapping_manager.mapped_to_key(column) == 'source_identifier'
39
+ return if Bulkrax.fill_in_blank_source_identifiers.blank?
40
+ "Will be auto-generated if left blank.\nProviding one allows round-tripping and deduplication across imports."
41
+ end
42
+
43
+ def controlled_vocab_text(field_name)
44
+ vocab_terms = @service.field_analyzer.controlled_vocab_terms
45
+ return unless vocab_terms.include?(field_name) || field_name == 'based_near'
46
+ 'This property uses a controlled vocabulary.'
47
+ end
48
+
49
+ def split_text(mapping_key, controlled_vocab_info)
50
+ return nil if controlled_vocab_info.present? && !mapping_key.in?(%w[location resource_type])
51
+ split_value = @service.mapping_manager.split_value_for(mapping_key)
52
+ return nil unless split_value
53
+ @split_formatter.format(split_value)
54
+ end
55
+ end
56
+ end
57
+ end