bulkrax 9.3.5 → 9.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -1
- data/app/assets/javascripts/bulkrax/application.js +2 -1
- data/app/assets/javascripts/bulkrax/bulkrax.js +13 -4
- data/app/assets/javascripts/bulkrax/bulkrax_utils.js +96 -0
- data/app/assets/javascripts/bulkrax/datatables.js +1 -0
- data/app/assets/javascripts/bulkrax/entries.js +17 -10
- data/app/assets/javascripts/bulkrax/importers.js.erb +9 -2
- data/app/assets/javascripts/bulkrax/importers_stepper.js +2420 -0
- data/app/assets/stylesheets/bulkrax/application.css +1 -1
- data/app/assets/stylesheets/bulkrax/stepper/_header.scss +83 -0
- data/app/assets/stylesheets/bulkrax/stepper/_mixins.scss +26 -0
- data/app/assets/stylesheets/bulkrax/stepper/_navigation.scss +103 -0
- data/app/assets/stylesheets/bulkrax/stepper/_responsive.scss +46 -0
- data/app/assets/stylesheets/bulkrax/stepper/_review.scss +92 -0
- data/app/assets/stylesheets/bulkrax/stepper/_settings.scss +106 -0
- data/app/assets/stylesheets/bulkrax/stepper/_success.scss +26 -0
- data/app/assets/stylesheets/bulkrax/stepper/_summary.scss +171 -0
- data/app/assets/stylesheets/bulkrax/stepper/_upload.scss +339 -0
- data/app/assets/stylesheets/bulkrax/stepper/_validation.scss +237 -0
- data/app/assets/stylesheets/bulkrax/stepper/_variables.scss +46 -0
- data/app/assets/stylesheets/bulkrax/stepper.scss +32 -0
- data/app/controllers/bulkrax/guided_imports_controller.rb +175 -0
- data/app/controllers/bulkrax/importers_controller.rb +28 -31
- data/app/controllers/concerns/bulkrax/guided_import_demo_scenarios.rb +201 -0
- data/app/controllers/concerns/bulkrax/importer_file_handler.rb +212 -0
- data/app/errors/bulkrax/unzip_error.rb +16 -0
- data/app/factories/bulkrax/object_factory.rb +3 -2
- data/app/factories/bulkrax/valkyrie_object_factory.rb +61 -17
- data/app/jobs/bulkrax/importer_job.rb +42 -4
- data/app/models/bulkrax/csv_entry.rb +27 -7
- data/app/models/bulkrax/entry.rb +4 -0
- data/app/models/bulkrax/importer.rb +27 -10
- data/app/models/concerns/bulkrax/has_matchers.rb +2 -2
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +6 -5
- data/app/parsers/bulkrax/application_parser.rb +63 -20
- data/app/parsers/bulkrax/bagit_parser.rb +12 -0
- data/app/parsers/bulkrax/csv_parser.rb +168 -25
- data/app/parsers/concerns/bulkrax/csv_parser/csv_template_generation.rb +73 -0
- data/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb +133 -0
- data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_helpers.rb +282 -0
- data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_hierarchy.rb +96 -0
- data/app/services/bulkrax/csv_template/column_builder.rb +60 -0
- data/app/services/bulkrax/csv_template/column_descriptor.rb +58 -0
- data/app/services/bulkrax/csv_template/csv_builder.rb +83 -0
- data/app/services/bulkrax/csv_template/explanation_builder.rb +57 -0
- data/app/services/bulkrax/csv_template/field_analyzer.rb +56 -0
- data/app/services/bulkrax/csv_template/file_path_generator.rb +47 -0
- data/app/services/bulkrax/csv_template/file_validator.rb +68 -0
- data/app/services/bulkrax/csv_template/mapping_manager.rb +55 -0
- data/app/services/bulkrax/csv_template/model_loader.rb +50 -0
- data/app/services/bulkrax/csv_template/row_builder.rb +35 -0
- data/app/services/bulkrax/csv_template/schema_analyzer.rb +70 -0
- data/app/services/bulkrax/csv_template/split_formatter.rb +44 -0
- data/app/services/bulkrax/csv_template/value_determiner.rb +68 -0
- data/app/services/bulkrax/stepper_response_formatter.rb +347 -0
- data/app/services/bulkrax/validation_error_csv_builder.rb +99 -0
- data/app/validators/bulkrax/csv_row/child_reference.rb +56 -0
- data/app/validators/bulkrax/csv_row/circular_reference.rb +71 -0
- data/app/validators/bulkrax/csv_row/controlled_vocabulary.rb +74 -0
- data/app/validators/bulkrax/csv_row/duplicate_identifier.rb +63 -0
- data/app/validators/bulkrax/csv_row/missing_source_identifier.rb +31 -0
- data/app/validators/bulkrax/csv_row/parent_reference.rb +59 -0
- data/app/validators/bulkrax/csv_row/required_values.rb +64 -0
- data/app/views/bulkrax/guided_imports/new.html.erb +567 -0
- data/app/views/bulkrax/importers/index.html.erb +6 -1
- data/app/views/bulkrax/importers/new.html.erb +1 -1
- data/app/views/bulkrax/importers/show.html.erb +17 -1
- data/config/i18n-tasks.yml +195 -0
- data/config/locales/bulkrax.de.yml +508 -0
- data/config/locales/bulkrax.en.yml +463 -233
- data/config/locales/bulkrax.es.yml +508 -0
- data/config/locales/bulkrax.fr.yml +508 -0
- data/config/locales/bulkrax.it.yml +508 -0
- data/config/locales/bulkrax.pt-BR.yml +508 -0
- data/config/locales/bulkrax.zh.yml +507 -0
- data/config/routes.rb +10 -1
- data/lib/bulkrax/data/demo_scenarios.json +2235 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +31 -0
- metadata +56 -16
- data/app/services/bulkrax/sample_csv_service/column_builder.rb +0 -58
- data/app/services/bulkrax/sample_csv_service/column_descriptor.rb +0 -56
- data/app/services/bulkrax/sample_csv_service/csv_builder.rb +0 -82
- data/app/services/bulkrax/sample_csv_service/explanation_builder.rb +0 -51
- data/app/services/bulkrax/sample_csv_service/field_analyzer.rb +0 -54
- data/app/services/bulkrax/sample_csv_service/file_path_generator.rb +0 -16
- data/app/services/bulkrax/sample_csv_service/mapping_manager.rb +0 -36
- data/app/services/bulkrax/sample_csv_service/model_loader.rb +0 -40
- data/app/services/bulkrax/sample_csv_service/row_builder.rb +0 -33
- data/app/services/bulkrax/sample_csv_service/schema_analyzer.rb +0 -69
- data/app/services/bulkrax/sample_csv_service/split_formatter.rb +0 -42
- data/app/services/bulkrax/sample_csv_service/value_determiner.rb +0 -67
- data/app/services/bulkrax/sample_csv_service.rb +0 -78
- /data/{app/services → lib}/wings/custom_queries/find_by_source_identifier.rb +0 -0
|
@@ -4,7 +4,10 @@ module Bulkrax
|
|
|
4
4
|
class CsvParser < ApplicationParser # rubocop:disable Metrics/ClassLength
|
|
5
5
|
include ErroredEntries
|
|
6
6
|
include ExportBehavior
|
|
7
|
+
include CsvParser::CsvTemplateGeneration
|
|
8
|
+
include CsvParser::CsvValidation
|
|
7
9
|
attr_writer :collections, :file_sets, :works
|
|
10
|
+
attr_accessor :validation_mode
|
|
8
11
|
|
|
9
12
|
def self.export_supported?
|
|
10
13
|
true
|
|
@@ -14,12 +17,14 @@ module Bulkrax
|
|
|
14
17
|
return @records if @records.present?
|
|
15
18
|
|
|
16
19
|
file_for_import = only_updates ? parser_fields['partial_import_file_path'] : import_file_path
|
|
17
|
-
# data for entry does not need source_identifier for csv, because csvs are read sequentially and mapped after raw data is read.
|
|
18
20
|
csv_data = entry_class.read_data(file_for_import)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
+
unless validation_mode
|
|
22
|
+
importer.parser_fields['total'] = csv_data.count
|
|
23
|
+
importer.save
|
|
24
|
+
end
|
|
21
25
|
|
|
22
26
|
@records = csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
|
|
27
|
+
@records
|
|
23
28
|
end
|
|
24
29
|
|
|
25
30
|
# rubocop:disable Metrics/AbcSize
|
|
@@ -95,11 +100,11 @@ module Bulkrax
|
|
|
95
100
|
def missing_elements(record)
|
|
96
101
|
keys_from_record = keys_without_numbers(record.reject { |_, v| v.blank? }.keys.compact.uniq.map(&:to_s))
|
|
97
102
|
keys = []
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
keys << k if keys_from_record.include?(vf)
|
|
103
|
+
mapping_values = importerexporter.mapping.stringify_keys
|
|
104
|
+
mapping_values.each do |k, v|
|
|
105
|
+
from_values = Array.wrap(v.is_a?(Hash) ? (v['from'] || v[:from]) : nil)
|
|
106
|
+
from_values.each do |vf|
|
|
107
|
+
keys << k if vf.present? && keys_from_record.include?(vf.to_s.strip)
|
|
103
108
|
end
|
|
104
109
|
end
|
|
105
110
|
required_elements.map(&:to_s) - keys.uniq.map(&:to_s)
|
|
@@ -360,8 +365,11 @@ module Bulkrax
|
|
|
360
365
|
else
|
|
361
366
|
Bulkrax.multi_value_element_split_on
|
|
362
367
|
end
|
|
368
|
+
files_dir = path_to_files
|
|
369
|
+
raise StandardError, "Record references local files but no files directory could be resolved from the import path" if files_dir.nil?
|
|
370
|
+
|
|
363
371
|
r[file_mapping].split(split_pattern).map do |f|
|
|
364
|
-
file = File.join(
|
|
372
|
+
file = File.join(files_dir, f.strip.tr(' ', '_'))
|
|
365
373
|
if File.exist?(file) # rubocop:disable Style/GuardClause
|
|
366
374
|
file
|
|
367
375
|
else
|
|
@@ -371,23 +379,161 @@ module Bulkrax
|
|
|
371
379
|
end.flatten.compact.uniq
|
|
372
380
|
end
|
|
373
381
|
|
|
374
|
-
# Retrieve the path where we expect to find the files
|
|
382
|
+
# Retrieve the path where we expect to find the files for this import.
|
|
383
|
+
# After {ImporterJob#unzip_imported_file} runs (zip cases), attachments
|
|
384
|
+
# live under `{importer_unzip_path}/files/`. For a server-path-style
|
|
385
|
+
# import (the user specified a CSV file path with a sibling `files/`
|
|
386
|
+
# directory on disk), resolve relative to the CSV's directory instead.
|
|
387
|
+
#
|
|
388
|
+
# When called with `filename:`, returns the full path to that file if
|
|
389
|
+
# it exists on disk, or `nil` otherwise — callers like
|
|
390
|
+
# `Bulkrax::FileSetEntryBehavior#add_path_to_file` rely on the nil
|
|
391
|
+
# sentinel to fall back to the raw filename in their error messages.
|
|
392
|
+
#
|
|
393
|
+
# When called with no filename, returns the `files/` directory itself
|
|
394
|
+
# (only when that directory exists on disk — else `nil` so callers can
|
|
395
|
+
# raise a clear "no files directory" error).
|
|
375
396
|
def path_to_files(**args)
|
|
376
397
|
filename = args.fetch(:filename, '')
|
|
398
|
+
base_dir = files_dir
|
|
399
|
+
return base_dir if filename.blank? && Dir.exist?(base_dir)
|
|
400
|
+
return nil if filename.blank?
|
|
401
|
+
|
|
402
|
+
candidate = File.join(base_dir, filename)
|
|
403
|
+
candidate if File.exist?(candidate)
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
# Extracts a zip that contains a primary CSV. The primary CSV lands at
|
|
407
|
+
# the root of {#importer_unzip_path}; every other entry lands under
|
|
408
|
+
# {#importer_unzip_path}/files/, preserving its path relative to the
|
|
409
|
+
# primary CSV's directory.
|
|
410
|
+
#
|
|
411
|
+
# Primary-CSV selection matches the guided-import validator's rule
|
|
412
|
+
# (see {Bulkrax::ImporterFileHandler#locate_csv_entry_in_zip}): the CSV
|
|
413
|
+
# entry at the shallowest directory level. Visible errors are raised on
|
|
414
|
+
# zero CSVs or multiple CSVs at the shallowest level.
|
|
415
|
+
#
|
|
416
|
+
# @param file_to_unzip [String] absolute path to a .zip
|
|
417
|
+
# @raise [Bulkrax::UnzipError] on no CSV or ambiguous CSVs
|
|
418
|
+
def unzip_with_primary_csv(file_to_unzip)
|
|
419
|
+
dest_dir = importer_unzip_path(mkdir: true)
|
|
420
|
+
Zip::File.open(file_to_unzip) do |zip_file|
|
|
421
|
+
entries = real_zip_entries(zip_file)
|
|
422
|
+
primary = select_primary_csv!(entries)
|
|
423
|
+
primary_dir = File.dirname(primary.name)
|
|
424
|
+
|
|
425
|
+
entries.each do |entry|
|
|
426
|
+
if entry == primary
|
|
427
|
+
extract_to(zip_file, entry, dest_dir, File.basename(entry.name))
|
|
428
|
+
else
|
|
429
|
+
extract_to(zip_file, entry, dest_dir, File.join('files', relative_to(primary_dir, entry.name)))
|
|
430
|
+
end
|
|
431
|
+
end
|
|
432
|
+
end
|
|
433
|
+
end
|
|
377
434
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
435
|
+
# Extracts a zip that accompanies a separately-uploaded CSV. Every
|
|
436
|
+
# entry lands under {#importer_unzip_path}/files/ — including any
|
|
437
|
+
# CSVs inside the zip, which are treated as attachments since the
|
|
438
|
+
# primary CSV was uploaded outside the zip. Strips a single top-level
|
|
439
|
+
# wrapper directory if present, so users can zip either the contents
|
|
440
|
+
# or the enclosing folder.
|
|
441
|
+
#
|
|
442
|
+
# @param file_to_unzip [String] absolute path to a .zip
|
|
443
|
+
def unzip_attachments_only(file_to_unzip)
|
|
444
|
+
dest_dir = importer_unzip_path(mkdir: true)
|
|
445
|
+
Zip::File.open(file_to_unzip) do |zip_file|
|
|
446
|
+
entries = real_zip_entries(zip_file)
|
|
447
|
+
wrapper = single_top_level_wrapper(entries)
|
|
448
|
+
|
|
449
|
+
entries.each do |entry|
|
|
450
|
+
relative = wrapper ? entry.name.delete_prefix("#{wrapper}/") : entry.name
|
|
451
|
+
next if relative.empty?
|
|
452
|
+
extract_to(zip_file, entry, dest_dir, File.join('files', relative))
|
|
453
|
+
end
|
|
454
|
+
end
|
|
455
|
+
end
|
|
382
456
|
|
|
383
|
-
|
|
457
|
+
# File names referenced in CSVs have spaces replaced with underscores.
|
|
458
|
+
# @see #file_paths
|
|
459
|
+
def remove_spaces_from_filenames
|
|
460
|
+
files = Dir.glob(File.join(importer_unzip_path, 'files', '*'))
|
|
461
|
+
files_with_spaces = files.select { |f| f.split('/').last.include?(' ') }
|
|
462
|
+
return if files_with_spaces.blank?
|
|
384
463
|
|
|
385
|
-
|
|
386
|
-
|
|
464
|
+
files_with_spaces.map! { |path| Pathname.new(path) }
|
|
465
|
+
files_with_spaces.each do |path|
|
|
466
|
+
filename_without_spaces = path.basename.to_s.tr(' ', '_')
|
|
467
|
+
path.rename(File.join(path.dirname, filename_without_spaces))
|
|
468
|
+
end
|
|
387
469
|
end
|
|
388
470
|
|
|
389
471
|
private
|
|
390
472
|
|
|
473
|
+
# Memoized base directory under which import attachments live. Kept
|
|
474
|
+
# separate from `#path_to_files`' per-filename return value to avoid
|
|
475
|
+
# cross-contamination between directory lookups and file lookups.
|
|
476
|
+
def files_dir
|
|
477
|
+
@files_dir ||= begin
|
|
478
|
+
has_attachments_zip = parser_fields['attachments_zip_path'].present? && zip_file?(parser_fields['attachments_zip_path'])
|
|
479
|
+
base = zip? || has_attachments_zip ? importer_unzip_path : File.dirname(import_file_path)
|
|
480
|
+
File.join(base, 'files')
|
|
481
|
+
end
|
|
482
|
+
end
|
|
483
|
+
|
|
484
|
+
# Returns zip entries filtered down to real files (no directories, no
|
|
485
|
+
# macOS junk). Raises {Bulkrax::UnzipError} if any entry's name would
|
|
486
|
+
# escape the destination directory (Zip Slip).
|
|
487
|
+
def real_zip_entries(zip_file)
|
|
488
|
+
entries = zip_file.entries.select { |e| e.file? && !macos_junk_entry?(e.name) }
|
|
489
|
+
entries.each { |e| reject_unsafe_entry!(e.name) }
|
|
490
|
+
entries
|
|
491
|
+
end
|
|
492
|
+
|
|
493
|
+
# Picks the single primary CSV from zip entries, enforcing the
|
|
494
|
+
# shallowest-level rule. Raises {Bulkrax::UnzipError} on failure.
|
|
495
|
+
def select_primary_csv!(entries)
|
|
496
|
+
csvs = entries.select { |e| e.name.end_with?('.csv') }
|
|
497
|
+
raise Bulkrax::UnzipError, I18n.t('bulkrax.importer.unzip.errors.no_csv') if csvs.empty?
|
|
498
|
+
|
|
499
|
+
by_depth = csvs.group_by { |e| e.name.count('/') }
|
|
500
|
+
shallowest = by_depth[by_depth.keys.min]
|
|
501
|
+
|
|
502
|
+
raise Bulkrax::UnzipError, I18n.t('bulkrax.importer.unzip.errors.multiple_csv') if shallowest.size > 1
|
|
503
|
+
|
|
504
|
+
shallowest.first
|
|
505
|
+
end
|
|
506
|
+
|
|
507
|
+
# If every entry shares a single top-level directory, returns that
|
|
508
|
+
# directory name; otherwise nil.
|
|
509
|
+
def single_top_level_wrapper(entries)
|
|
510
|
+
tops = entries.map { |e| e.name.split('/').first }.uniq
|
|
511
|
+
return nil unless tops.size == 1
|
|
512
|
+
# If the single top segment is a file (no slashes in the entry), not a dir,
|
|
513
|
+
# there's no wrapper to strip.
|
|
514
|
+
return nil if entries.any? { |e| e.name == tops.first }
|
|
515
|
+
tops.first
|
|
516
|
+
end
|
|
517
|
+
|
|
518
|
+
# Returns `path` with `prefix/` removed from the front, if present, and
|
|
519
|
+
# a leading `files/` segment also stripped so callers can join under
|
|
520
|
+
# `files/` without doubling when the zip already uses that convention.
|
|
521
|
+
def relative_to(prefix, path)
|
|
522
|
+
remaining = prefix == '.' || prefix.empty? ? path : path.delete_prefix("#{prefix}/")
|
|
523
|
+
remaining.delete_prefix('files/')
|
|
524
|
+
end
|
|
525
|
+
|
|
526
|
+
# Extracts a zip entry to `dest_dir/relative_dest`. Creates intermediate
|
|
527
|
+
# directories and honors the rubyzip 2/3 extract-method signature.
|
|
528
|
+
# The destination path is validated by {#safe_extract_path} — an unsafe
|
|
529
|
+
# `relative_dest` raises {Bulkrax::UnzipError} before any write.
|
|
530
|
+
def extract_to(zip_file, entry, dest_dir, relative_dest)
|
|
531
|
+
dest_path = safe_extract_path(dest_dir, relative_dest)
|
|
532
|
+
FileUtils.mkdir_p(File.dirname(dest_path))
|
|
533
|
+
return if File.exist?(dest_path)
|
|
534
|
+
extract_zip_entry(zip_file, entry, dest_dir, relative_dest, dest_path)
|
|
535
|
+
end
|
|
536
|
+
|
|
391
537
|
def unique_collection_identifier(collection_hash)
|
|
392
538
|
entry_uid = collection_hash[source_identifier]
|
|
393
539
|
entry_uid ||= if Bulkrax.fill_in_blank_source_identifiers.present?
|
|
@@ -402,16 +548,13 @@ module Bulkrax
|
|
|
402
548
|
# Override to return the first CSV in the path, if a zip file is supplied
|
|
403
549
|
# We expect a single CSV at the top level of the zip in the CSVParser
|
|
404
550
|
# but we are willing to go look for it if need be
|
|
551
|
+
# When the user uploaded a zip containing a CSV, the job extracts the
|
|
552
|
+
# primary CSV to the root of `importer_unzip_path` (see
|
|
553
|
+
# {#unzip_with_primary_csv}). Any non-primary CSVs live under `files/`
|
|
554
|
+
# and are treated as attachments, so a shallow glob suffices.
|
|
405
555
|
def real_import_file_path
|
|
406
|
-
return Dir["#{importer_unzip_path}
|
|
407
|
-
|
|
556
|
+
return Dir["#{importer_unzip_path}/*.csv"].first if file? && zip?
|
|
408
557
|
parser_fields['import_file_path']
|
|
409
558
|
end
|
|
410
|
-
|
|
411
|
-
# If there are CSVs that are meant to be attachments in the files directory,
|
|
412
|
-
# we don't want to consider them as the import CSV
|
|
413
|
-
def in_files_dir?(path)
|
|
414
|
-
File.dirname(path).ends_with?('files')
|
|
415
|
-
end
|
|
416
559
|
end
|
|
417
560
|
end
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
class CsvParser < ApplicationParser
|
|
5
|
+
module CsvTemplateGeneration
|
|
6
|
+
extend ActiveSupport::Concern
|
|
7
|
+
|
|
8
|
+
class_methods do
|
|
9
|
+
# Generate a CSV template for the specified models.
|
|
10
|
+
#
|
|
11
|
+
# @param models [Array<String>, String] Model names or 'all' for all available models
|
|
12
|
+
# @param output [String] Output format: 'file' or 'csv_string'
|
|
13
|
+
# @param admin_set_id [String, nil] Optional admin set ID for context
|
|
14
|
+
# @param args [Hash] Additional arguments passed to output method (e.g., file_path)
|
|
15
|
+
# @return [String] File path (for 'file' output) or CSV string (for 'csv_string' output)
|
|
16
|
+
def generate_template(models: [], output: 'file', admin_set_id: nil, **args)
|
|
17
|
+
raise NameError, "Hyrax is not defined" unless defined?(::Hyrax)
|
|
18
|
+
TemplateContext.new(models: models, admin_set_id: admin_set_id).send("to_#{output}", **args)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
##
|
|
23
|
+
# Holds state for a single template generation run.
|
|
24
|
+
# Provides the interface expected by CsvTemplate:: components.
|
|
25
|
+
class TemplateContext
|
|
26
|
+
attr_reader :mappings, :all_models, :admin_set_id, :field_analyzer, :mapping_manager
|
|
27
|
+
|
|
28
|
+
def initialize(models: nil, admin_set_id: nil)
|
|
29
|
+
@admin_set_id = admin_set_id
|
|
30
|
+
@mapping_manager = CsvTemplate::MappingManager.new
|
|
31
|
+
@mappings = @mapping_manager.mappings
|
|
32
|
+
@field_analyzer = CsvTemplate::FieldAnalyzer.new(@mappings, admin_set_id)
|
|
33
|
+
@all_models = CsvTemplate::ModelLoader.new(Array.wrap(models)).models
|
|
34
|
+
@csv_builder = CsvTemplate::CsvBuilder.new(self)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def to_file(file_path: nil)
|
|
38
|
+
file_path ||= CsvTemplate::FilePathGenerator.default_path(@admin_set_id)
|
|
39
|
+
@csv_builder.write_to_file(file_path)
|
|
40
|
+
file_path
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def to_csv_string
|
|
44
|
+
@csv_builder.generate_string
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def field_metadata_for_all_models
|
|
48
|
+
@field_metadata ||= @all_models.each_with_object({}) do |model, hash|
|
|
49
|
+
field_list = @field_analyzer.find_or_create_field_list_for(model_name: model)
|
|
50
|
+
hash[model] = {
|
|
51
|
+
properties: field_list.dig(model, "properties") || [],
|
|
52
|
+
required_terms: field_list.dig(model, "required_terms") || [],
|
|
53
|
+
controlled_vocab_terms: field_list.dig(model, "controlled_vocab_terms") || []
|
|
54
|
+
}
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def valid_headers_for_models
|
|
59
|
+
@valid_headers ||= begin
|
|
60
|
+
column_builder = CsvTemplate::ColumnBuilder.new(self)
|
|
61
|
+
all_columns = column_builder.all_columns
|
|
62
|
+
all_columns - CsvTemplate::CsvBuilder::IGNORED_PROPERTIES
|
|
63
|
+
rescue StandardError => e
|
|
64
|
+
Rails.logger.error("Error building valid headers: #{e.message}")
|
|
65
|
+
standard_fields = %w[model source_identifier parent parents file]
|
|
66
|
+
model_fields = field_metadata_for_all_models.values.flat_map { |m| m[:properties] }
|
|
67
|
+
(standard_fields + model_fields).uniq
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
class CsvParser < ApplicationParser
|
|
5
|
+
module CsvValidation
|
|
6
|
+
extend ActiveSupport::Concern
|
|
7
|
+
|
|
8
|
+
included do
|
|
9
|
+
# Lightweight struct used to satisfy the CsvTemplate::ColumnBuilder
|
|
10
|
+
# interface without constructing a full template context.
|
|
11
|
+
ValidationContext = Struct.new(:mapping_manager, :field_analyzer, :all_models, :mappings, keyword_init: true)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
class_methods do
|
|
15
|
+
include CsvValidationHelpers
|
|
16
|
+
|
|
17
|
+
# Validate a CSV (and optional zip) without a persisted Importer record.
|
|
18
|
+
#
|
|
19
|
+
# @param csv_file [File, ActionDispatch::Http::UploadedFile, String] path or file object
|
|
20
|
+
# @param zip_file [File, ActionDispatch::Http::UploadedFile, nil]
|
|
21
|
+
# @param admin_set_id [String, nil]
|
|
22
|
+
# @return [Hash] validation result compatible with the guided import UI
|
|
23
|
+
def validate_csv(csv_file:, zip_file: nil, admin_set_id: nil)
|
|
24
|
+
raw_csv, headers, mapping_manager, mappings, source_id_key, csv_data, field_metadata, field_analyzer =
|
|
25
|
+
parse_csv_inputs(csv_file, admin_set_id)
|
|
26
|
+
|
|
27
|
+
all_ids = csv_data.map { |r| r[:source_identifier] }.compact.to_set
|
|
28
|
+
header_issues = check_headers(headers, raw_csv, mapping_manager, mappings, field_metadata, field_analyzer)
|
|
29
|
+
missing_required = header_issues[:missing_required]
|
|
30
|
+
notices, row_errors, file_validator, collections, works, file_sets =
|
|
31
|
+
run_validations(csv_data, all_ids, headers, source_id_key, mappings, field_metadata, missing_required, zip_file, admin_set_id)
|
|
32
|
+
|
|
33
|
+
result = assemble_result(
|
|
34
|
+
headers: headers, missing_required: missing_required, header_issues: header_issues,
|
|
35
|
+
row_errors: row_errors, csv_data: csv_data, file_validator: file_validator,
|
|
36
|
+
collections: collections, works: works, file_sets: file_sets, notices: notices
|
|
37
|
+
)
|
|
38
|
+
apply_rights_statement_validation_override!(result, missing_required)
|
|
39
|
+
result[:raw_csv_data] = csv_data
|
|
40
|
+
result
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
|
|
45
|
+
# Builds notices, runs row validators, file validator, and hierarchy extraction.
|
|
46
|
+
# Returns [notices, row_errors, file_validator, collections, works, file_sets].
|
|
47
|
+
def run_validations(csv_data, all_ids, headers, source_id_key, mappings, field_metadata, missing_required, zip_file, admin_set_id) # rubocop:disable Metrics/ParameterLists
|
|
48
|
+
find_record = build_find_record
|
|
49
|
+
notices = []
|
|
50
|
+
append_missing_source_id!(missing_required, headers, source_id_key, csv_data.map { |r| r[:model] }.compact.uniq)
|
|
51
|
+
append_missing_model_notice!(notices, headers, csv_data)
|
|
52
|
+
|
|
53
|
+
row_errors = run_row_validators(csv_data, all_ids, source_id_key, mappings, field_metadata, find_record, notices)
|
|
54
|
+
file_validator = CsvTemplate::FileValidator.new(csv_data, zip_file, admin_set_id)
|
|
55
|
+
collections, works, file_sets = extract_hierarchy_items(csv_data, all_ids, find_record, mappings)
|
|
56
|
+
[notices, row_errors, file_validator, collections, works, file_sets]
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Reads the CSV, resolves mappings, parses rows, and builds field metadata.
|
|
60
|
+
# Returns the values needed by all subsequent validation steps.
|
|
61
|
+
def parse_csv_inputs(csv_file, admin_set_id)
|
|
62
|
+
# Use CsvEntry.read_data so header normalisation is identical to a real import.
|
|
63
|
+
raw_csv = CsvEntry.read_data(csv_file)
|
|
64
|
+
headers = raw_csv.headers.map(&:to_s)
|
|
65
|
+
|
|
66
|
+
mapping_manager = CsvTemplate::MappingManager.new
|
|
67
|
+
mappings = mapping_manager.mappings
|
|
68
|
+
|
|
69
|
+
source_id_key = resolve_validation_key(mapping_manager, flag: 'source_identifier', default: :source_identifier)
|
|
70
|
+
parent_key = resolve_validation_key(mapping_manager, flag: 'related_parents_field_mapping', default: :parents)
|
|
71
|
+
children_key = resolve_validation_key(mapping_manager, flag: 'related_children_field_mapping', default: :children)
|
|
72
|
+
file_key = resolve_validation_key(mapping_manager, key: 'file', default: :file)
|
|
73
|
+
|
|
74
|
+
csv_data = parse_validation_rows(raw_csv, source_id_key, parent_key, children_key, file_key)
|
|
75
|
+
all_models = csv_data.map { |r| r[:model] }.compact.uniq
|
|
76
|
+
all_models |= [Bulkrax.default_work_type] if Bulkrax.default_work_type.present?
|
|
77
|
+
field_analyzer = CsvTemplate::FieldAnalyzer.new(mappings, admin_set_id)
|
|
78
|
+
field_metadata = build_validation_field_metadata(all_models, field_analyzer)
|
|
79
|
+
|
|
80
|
+
[raw_csv, headers, mapping_manager, mappings, source_id_key, csv_data, field_metadata, field_analyzer]
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Runs all header-level checks and returns a hash of results.
|
|
84
|
+
def check_headers(headers, raw_csv, mapping_manager, mappings, field_metadata, field_analyzer) # rubocop:disable Metrics/ParameterLists
|
|
85
|
+
all_models = field_metadata.keys
|
|
86
|
+
valid_headers = build_valid_validation_headers(mapping_manager, field_analyzer,
|
|
87
|
+
all_models, mappings, field_metadata)
|
|
88
|
+
suffixed = headers.select { |h| h.match?(/_\d+\z/) }
|
|
89
|
+
valid_headers = (valid_headers + suffixed).uniq
|
|
90
|
+
|
|
91
|
+
{
|
|
92
|
+
missing_required: find_missing_required_headers(headers, field_metadata, mapping_manager),
|
|
93
|
+
unrecognized: find_unrecognized_validation_headers(headers, valid_headers),
|
|
94
|
+
empty_columns: find_empty_column_positions(headers, raw_csv)
|
|
95
|
+
}
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def extract_hierarchy_items(csv_data, all_ids, find_record, mappings)
|
|
99
|
+
extract_validation_items(
|
|
100
|
+
csv_data, all_ids, find_record,
|
|
101
|
+
parent_split_pattern: resolve_parent_split_pattern(mappings),
|
|
102
|
+
child_split_pattern: resolve_children_split_pattern(mappings) || '|'
|
|
103
|
+
)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Runs all registered row validators and returns the collected errors.
|
|
107
|
+
def run_row_validators(csv_data, all_ids, source_id_key, mappings, field_metadata, find_record, notices = []) # rubocop:disable Metrics/ParameterLists
|
|
108
|
+
context = {
|
|
109
|
+
errors: [],
|
|
110
|
+
warnings: [],
|
|
111
|
+
seen_ids: {},
|
|
112
|
+
all_ids: all_ids,
|
|
113
|
+
source_identifier: source_id_key.to_s,
|
|
114
|
+
parent_split_pattern: resolve_parent_split_pattern(mappings),
|
|
115
|
+
child_split_pattern: resolve_children_split_pattern(mappings),
|
|
116
|
+
parent_column: resolve_relationship_column(mappings, 'related_parents_field_mapping', 'parents'),
|
|
117
|
+
children_column: resolve_relationship_column(mappings, 'related_children_field_mapping', 'children'),
|
|
118
|
+
mappings: mappings,
|
|
119
|
+
field_metadata: field_metadata,
|
|
120
|
+
find_record_by_source_identifier: find_record,
|
|
121
|
+
relationship_graph: build_relationship_graph(csv_data, mappings),
|
|
122
|
+
notices: notices
|
|
123
|
+
}
|
|
124
|
+
csv_data.each_with_index do |record, index|
|
|
125
|
+
row_number = index + 2 # 1-indexed, plus header row
|
|
126
|
+
Bulkrax.csv_row_validators.each { |v| v.call(record, row_number, context) }
|
|
127
|
+
end
|
|
128
|
+
context[:errors]
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
end
|