bulkrax 9.3.5 → 9.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +11 -1
  3. data/app/assets/javascripts/bulkrax/application.js +2 -1
  4. data/app/assets/javascripts/bulkrax/bulkrax.js +13 -4
  5. data/app/assets/javascripts/bulkrax/bulkrax_utils.js +96 -0
  6. data/app/assets/javascripts/bulkrax/datatables.js +1 -0
  7. data/app/assets/javascripts/bulkrax/entries.js +17 -10
  8. data/app/assets/javascripts/bulkrax/importers.js.erb +9 -2
  9. data/app/assets/javascripts/bulkrax/importers_stepper.js +2420 -0
  10. data/app/assets/stylesheets/bulkrax/application.css +1 -1
  11. data/app/assets/stylesheets/bulkrax/stepper/_header.scss +83 -0
  12. data/app/assets/stylesheets/bulkrax/stepper/_mixins.scss +26 -0
  13. data/app/assets/stylesheets/bulkrax/stepper/_navigation.scss +103 -0
  14. data/app/assets/stylesheets/bulkrax/stepper/_responsive.scss +46 -0
  15. data/app/assets/stylesheets/bulkrax/stepper/_review.scss +92 -0
  16. data/app/assets/stylesheets/bulkrax/stepper/_settings.scss +106 -0
  17. data/app/assets/stylesheets/bulkrax/stepper/_success.scss +26 -0
  18. data/app/assets/stylesheets/bulkrax/stepper/_summary.scss +171 -0
  19. data/app/assets/stylesheets/bulkrax/stepper/_upload.scss +339 -0
  20. data/app/assets/stylesheets/bulkrax/stepper/_validation.scss +237 -0
  21. data/app/assets/stylesheets/bulkrax/stepper/_variables.scss +46 -0
  22. data/app/assets/stylesheets/bulkrax/stepper.scss +32 -0
  23. data/app/controllers/bulkrax/guided_imports_controller.rb +175 -0
  24. data/app/controllers/bulkrax/importers_controller.rb +28 -31
  25. data/app/controllers/concerns/bulkrax/guided_import_demo_scenarios.rb +201 -0
  26. data/app/controllers/concerns/bulkrax/importer_file_handler.rb +212 -0
  27. data/app/errors/bulkrax/unzip_error.rb +16 -0
  28. data/app/factories/bulkrax/object_factory.rb +3 -2
  29. data/app/factories/bulkrax/valkyrie_object_factory.rb +61 -17
  30. data/app/jobs/bulkrax/importer_job.rb +42 -4
  31. data/app/models/bulkrax/csv_entry.rb +27 -7
  32. data/app/models/bulkrax/entry.rb +4 -0
  33. data/app/models/bulkrax/importer.rb +27 -10
  34. data/app/models/concerns/bulkrax/has_matchers.rb +2 -2
  35. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +6 -5
  36. data/app/parsers/bulkrax/application_parser.rb +63 -20
  37. data/app/parsers/bulkrax/bagit_parser.rb +12 -0
  38. data/app/parsers/bulkrax/csv_parser.rb +168 -25
  39. data/app/parsers/concerns/bulkrax/csv_parser/csv_template_generation.rb +73 -0
  40. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb +133 -0
  41. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_helpers.rb +282 -0
  42. data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_hierarchy.rb +96 -0
  43. data/app/services/bulkrax/csv_template/column_builder.rb +60 -0
  44. data/app/services/bulkrax/csv_template/column_descriptor.rb +58 -0
  45. data/app/services/bulkrax/csv_template/csv_builder.rb +83 -0
  46. data/app/services/bulkrax/csv_template/explanation_builder.rb +57 -0
  47. data/app/services/bulkrax/csv_template/field_analyzer.rb +56 -0
  48. data/app/services/bulkrax/csv_template/file_path_generator.rb +47 -0
  49. data/app/services/bulkrax/csv_template/file_validator.rb +68 -0
  50. data/app/services/bulkrax/csv_template/mapping_manager.rb +55 -0
  51. data/app/services/bulkrax/csv_template/model_loader.rb +50 -0
  52. data/app/services/bulkrax/csv_template/row_builder.rb +35 -0
  53. data/app/services/bulkrax/csv_template/schema_analyzer.rb +70 -0
  54. data/app/services/bulkrax/csv_template/split_formatter.rb +44 -0
  55. data/app/services/bulkrax/csv_template/value_determiner.rb +68 -0
  56. data/app/services/bulkrax/stepper_response_formatter.rb +347 -0
  57. data/app/services/bulkrax/validation_error_csv_builder.rb +99 -0
  58. data/app/validators/bulkrax/csv_row/child_reference.rb +56 -0
  59. data/app/validators/bulkrax/csv_row/circular_reference.rb +71 -0
  60. data/app/validators/bulkrax/csv_row/controlled_vocabulary.rb +74 -0
  61. data/app/validators/bulkrax/csv_row/duplicate_identifier.rb +63 -0
  62. data/app/validators/bulkrax/csv_row/missing_source_identifier.rb +31 -0
  63. data/app/validators/bulkrax/csv_row/parent_reference.rb +59 -0
  64. data/app/validators/bulkrax/csv_row/required_values.rb +64 -0
  65. data/app/views/bulkrax/guided_imports/new.html.erb +567 -0
  66. data/app/views/bulkrax/importers/index.html.erb +6 -1
  67. data/app/views/bulkrax/importers/new.html.erb +1 -1
  68. data/app/views/bulkrax/importers/show.html.erb +17 -1
  69. data/config/i18n-tasks.yml +195 -0
  70. data/config/locales/bulkrax.de.yml +508 -0
  71. data/config/locales/bulkrax.en.yml +463 -233
  72. data/config/locales/bulkrax.es.yml +508 -0
  73. data/config/locales/bulkrax.fr.yml +508 -0
  74. data/config/locales/bulkrax.it.yml +508 -0
  75. data/config/locales/bulkrax.pt-BR.yml +508 -0
  76. data/config/locales/bulkrax.zh.yml +507 -0
  77. data/config/routes.rb +10 -1
  78. data/lib/bulkrax/data/demo_scenarios.json +2235 -0
  79. data/lib/bulkrax/version.rb +1 -1
  80. data/lib/bulkrax.rb +31 -0
  81. metadata +56 -16
  82. data/app/services/bulkrax/sample_csv_service/column_builder.rb +0 -58
  83. data/app/services/bulkrax/sample_csv_service/column_descriptor.rb +0 -56
  84. data/app/services/bulkrax/sample_csv_service/csv_builder.rb +0 -82
  85. data/app/services/bulkrax/sample_csv_service/explanation_builder.rb +0 -51
  86. data/app/services/bulkrax/sample_csv_service/field_analyzer.rb +0 -54
  87. data/app/services/bulkrax/sample_csv_service/file_path_generator.rb +0 -16
  88. data/app/services/bulkrax/sample_csv_service/mapping_manager.rb +0 -36
  89. data/app/services/bulkrax/sample_csv_service/model_loader.rb +0 -40
  90. data/app/services/bulkrax/sample_csv_service/row_builder.rb +0 -33
  91. data/app/services/bulkrax/sample_csv_service/schema_analyzer.rb +0 -69
  92. data/app/services/bulkrax/sample_csv_service/split_formatter.rb +0 -42
  93. data/app/services/bulkrax/sample_csv_service/value_determiner.rb +0 -67
  94. data/app/services/bulkrax/sample_csv_service.rb +0 -78
  95. /data/{app/services → lib}/wings/custom_queries/find_by_source_identifier.rb +0 -0
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module CsvTemplate
5
+ # Analyzes model fields and schemas
6
+ class FieldAnalyzer
7
+ attr_reader :field_list
8
+
9
+ def initialize(mappings, admin_set_id = nil)
10
+ @mappings = mappings
11
+ @field_list = []
12
+ @schema = nil
13
+ @admin_set_id = admin_set_id
14
+ end
15
+
16
+ def find_or_create_field_list_for(model_name:)
17
+ existing = @field_list.find { |entry| entry.key?(model_name) }
18
+ return existing if existing.present?
19
+
20
+ klass = CsvTemplate::ModelLoader.determine_klass_for(model_name)
21
+ return {} if klass.nil?
22
+
23
+ model_entry = build_field_list_entry(model_name, klass)
24
+ @field_list << model_entry
25
+ model_entry
26
+ end
27
+
28
+ def controlled_vocab_terms
29
+ @field_list.flat_map do |hash|
30
+ hash.values.flat_map { |data| data["controlled_vocab_terms"] || [] }
31
+ end.uniq
32
+ end
33
+
34
+ private
35
+
36
+ def build_field_list_entry(model_name, klass)
37
+ schema_analyzer = CsvTemplate::SchemaAnalyzer.new(klass: klass, admin_set_id: @admin_set_id)
38
+ {
39
+ model_name => {
40
+ 'properties' => extract_properties(klass),
41
+ 'required_terms' => schema_analyzer.required_terms,
42
+ 'controlled_vocab_terms' => schema_analyzer.controlled_vocab_terms
43
+ }
44
+ }
45
+ end
46
+
47
+ def extract_properties(klass)
48
+ if klass.respond_to?(:schema)
49
+ Bulkrax::ValkyrieObjectFactory.schema_properties(klass: klass, admin_set_id: @admin_set_id).map(&:to_s)
50
+ else
51
+ klass.properties.keys.map(&:to_s)
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module CsvTemplate
5
+ # Utility class for generating template file paths
6
+ class FilePathGenerator
7
+ TEMPLATE_PREFIX = 'import_template'
8
+
9
+ def self.default_path(admin_set_id)
10
+ context = load_context(admin_set_id)
11
+ tenant = load_tenant
12
+ filename = build_filename(context, tenant)
13
+ path = Rails.root.join('tmp', 'imports', filename)
14
+ FileUtils.mkdir_p(path.dirname.to_s)
15
+ path
16
+ end
17
+
18
+ def self.load_context(admin_set_id)
19
+ return nil if admin_set_id.blank?
20
+
21
+ admin_set = Bulkrax.object_factory.find(admin_set_id)
22
+ admin_set.respond_to?(:contexts) ? admin_set.contexts.first : nil
23
+ end
24
+
25
+ def self.load_tenant
26
+ return nil unless defined?(Apartment::Tenant) && defined?(Account)
27
+
28
+ tenant_id = Apartment::Tenant.current
29
+ return nil if tenant_id.blank?
30
+
31
+ Account.find_by(tenant: tenant_id)&.name
32
+ end
33
+
34
+ def self.build_filename(context, tenant)
35
+ parts = [TEMPLATE_PREFIX]
36
+ parts << "context-#{context}" if context.present?
37
+ parts << "tenant-#{tenant}" if tenant.present?
38
+ parts << timestamp
39
+ "#{parts.join('_')}.csv"
40
+ end
41
+
42
+ def self.timestamp
43
+ Time.current.utc.strftime('%Y%m%d_%H%M%S')
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module CsvTemplate
5
+ ##
6
+ # Validates file references in CSV against zip archive contents
7
+ class FileValidator
8
+ attr_reader :csv_data, :zip_file
9
+
10
+ def initialize(csv_data, zip_file = nil, admin_set_id = nil)
11
+ @csv_data = csv_data
12
+ @zip_file = zip_file
13
+ @admin_set_id = admin_set_id
14
+ end
15
+
16
+ def count_references
17
+ @csv_data.count { |item| item[:file].present? }
18
+ end
19
+
20
+ def missing_files
21
+ return [] unless @zip_file
22
+
23
+ referenced_files - zip_file_list
24
+ end
25
+
26
+ def found_files_count
27
+ return 0 unless @zip_file
28
+
29
+ (referenced_files & zip_file_list).count
30
+ end
31
+
32
+ def zip_included?
33
+ @zip_file.present?
34
+ end
35
+
36
+ def possible_missing_files?
37
+ return false unless referenced_files.any?
38
+ return true if @zip_file.blank?
39
+
40
+ false
41
+ end
42
+
43
+ private
44
+
45
+ def referenced_files
46
+ @referenced_files ||= @csv_data.flat_map do |item|
47
+ next [] if item[:file].blank?
48
+
49
+ item[:file].split(Bulkrax.multi_value_element_split_on).map { |f| File.basename(f.strip) }
50
+ end.compact
51
+ end
52
+
53
+ def zip_file_list
54
+ @zip_file_list ||= begin
55
+ return [] unless @zip_file
56
+
57
+ zip_path = @zip_file.respond_to?(:path) ? @zip_file.path : @zip_file
58
+ Zip::File.open(zip_path) do |zip|
59
+ zip.entries.select(&:file?).map { |entry| File.basename(entry.name) }
60
+ end
61
+ rescue StandardError => e
62
+ Rails.logger.error("Error reading zip file: #{e.message}")
63
+ []
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module CsvTemplate
5
+ # Handles loading and filtering of Bulkrax field mappings
6
+ class MappingManager
7
+ attr_reader :mappings
8
+
9
+ def initialize
10
+ @mappings = load_mappings
11
+ end
12
+
13
+ def mapped_to_key(column_str)
14
+ @mappings.find { |_k, v| v["from"].include?(column_str) }&.first || column_str
15
+ end
16
+
17
+ def key_to_mapped_column(key)
18
+ @mappings.dig(key, "from")&.first || key
19
+ end
20
+
21
+ def find_by_flag(field_name, default)
22
+ @mappings.find { |_k, v| v[field_name] == true }&.first || default
23
+ end
24
+
25
+ def split_value_for(mapping_key)
26
+ @mappings.dig(mapping_key, "split")
27
+ end
28
+
29
+ def resolve_column_name(key: nil, flag: nil, default: nil)
30
+ if flag
31
+ mapped_key = find_by_flag(flag, nil)
32
+ if mapped_key
33
+ mapped_options = @mappings.dig(mapped_key, "from") || []
34
+ return mapped_options if mapped_options.any?
35
+ end
36
+ end
37
+
38
+ if key
39
+ mapped_options = @mappings.dig(key, "from") || []
40
+ return mapped_options if mapped_options.any?
41
+ end
42
+
43
+ default ? [default] : []
44
+ end
45
+
46
+ private
47
+
48
+ def load_mappings
49
+ Bulkrax.field_mappings["Bulkrax::CsvParser"].reject do |_key, value|
50
+ value["generated"] == true
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module CsvTemplate
5
+ # Handles model loading based on configuration
6
+ class ModelLoader
7
+ attr_reader :models
8
+
9
+ def initialize(models)
10
+ @models = load_models(models)
11
+ end
12
+
13
+ def self.determine_klass_for(model_name)
14
+ if Bulkrax.config.object_factory == Bulkrax::ValkyrieObjectFactory
15
+ Valkyrie.config.resource_class_resolver.call(model_name)
16
+ else
17
+ model_name.constantize
18
+ end
19
+ rescue StandardError
20
+ nil
21
+ end
22
+
23
+ private
24
+
25
+ def load_models(models)
26
+ case models
27
+ when Array
28
+ return all_available_models if models.empty?
29
+ return all_available_models if models.include?('all')
30
+ models.filter_map { |model| safe_constantize(model) }
31
+ else
32
+ all_available_models
33
+ end
34
+ rescue StandardError
35
+ []
36
+ end
37
+
38
+ def safe_constantize(model_name)
39
+ model_name.constantize && model_name
40
+ rescue NameError
41
+ nil
42
+ end
43
+
44
+ def all_available_models
45
+ Hyrax.config.curation_concerns.map(&:name) +
46
+ [Bulkrax.collection_model_class&.name, Bulkrax.file_model_class&.name].compact
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module CsvTemplate
5
+ # Builds CSV rows (explanations and model data)
6
+ class RowBuilder
7
+ def initialize(service)
8
+ @service = service
9
+ @explanation_builder = CsvTemplate::ExplanationBuilder.new(service)
10
+ @value_determiner = CsvTemplate::ValueDeterminer.new(service)
11
+ end
12
+
13
+ def build_explanation_row(header_row)
14
+ @explanation_builder.build_explanations(header_row).map { |prop| prop.values.join(" ") }
15
+ end
16
+
17
+ def build_model_rows(header_row)
18
+ @service.all_models.map { |m| model_breakdown(m, header_row) }
19
+ end
20
+
21
+ private
22
+
23
+ def model_breakdown(model_name, header_row)
24
+ klass = CsvTemplate::ModelLoader.determine_klass_for(model_name)
25
+ return [] if klass.nil?
26
+
27
+ field_list = @service.field_analyzer.find_or_create_field_list_for(model_name: model_name)
28
+
29
+ header_row.map do |column|
30
+ @value_determiner.determine_value(column, model_name, field_list)
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module CsvTemplate
5
+ # Analyzes model schemas for required and controlled vocabulary fields
6
+ class SchemaAnalyzer
7
+ def initialize(klass:, admin_set_id: nil)
8
+ @klass = klass
9
+ @admin_set_id = admin_set_id
10
+ @schema = load_schema
11
+ end
12
+
13
+ def required_terms
14
+ return [] if @schema.blank?
15
+
16
+ @schema.select do |field|
17
+ field.respond_to?(:meta) &&
18
+ field.meta["form"].is_a?(Hash) &&
19
+ field.meta["form"]["required"] == true
20
+ end.map(&:name).map(&:to_s)
21
+ rescue StandardError
22
+ []
23
+ end
24
+
25
+ def controlled_vocab_terms
26
+ return [] unless @schema
27
+
28
+ controlled_properties = extract_controlled_properties
29
+ controlled_properties.empty? ? registered_controlled_vocab_fields : controlled_properties
30
+ rescue StandardError
31
+ []
32
+ end
33
+
34
+ private
35
+
36
+ def load_schema
37
+ return nil unless @klass.respond_to?(:schema)
38
+
39
+ if @admin_set_id.present? && defined?(Hyrax) && Hyrax.respond_to?(:schema_for)
40
+ Hyrax.schema_for(klass: @klass, admin_set_id: @admin_set_id)
41
+ else
42
+ @klass.new.singleton_class.schema || @klass.schema
43
+ end
44
+ rescue StandardError
45
+ nil
46
+ end
47
+
48
+ def extract_controlled_properties
49
+ return [] unless @schema
50
+
51
+ @schema.filter_map do |property|
52
+ next unless property.respond_to?(:meta)
53
+ sources = property.meta&.dig('controlled_values', 'sources')
54
+ next if sources.nil? || sources == ['null'] || sources == 'null'
55
+ property.name.to_s
56
+ end
57
+ end
58
+
59
+ def registered_controlled_vocab_fields
60
+ qa_registry.filter_map do |k, v|
61
+ k.singularize if v.klass == Qa::Authorities::Local::FileBasedAuthority
62
+ end
63
+ end
64
+
65
+ def qa_registry
66
+ @qa_registry ||= Qa::Authorities::Local.registry.instance_variable_get('@hash')
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module CsvTemplate
5
+ # Formats split pattern descriptions
6
+ class SplitFormatter
7
+ def format(split_value)
8
+ return "Property does not split." if split_value.nil?
9
+
10
+ if split_value == true
11
+ parse_pattern(Bulkrax.multi_value_element_split_on.source)
12
+ elsif split_value.is_a?(String)
13
+ parse_pattern(split_value)
14
+ else
15
+ split_value
16
+ end
17
+ end
18
+
19
+ private
20
+
21
+ def parse_pattern(pattern)
22
+ chars = extract_characters(pattern)
23
+ format_message(chars)
24
+ end
25
+
26
+ def extract_characters(pattern)
27
+ if (match = pattern.match(/\[([^\]]+)\]/))
28
+ match[1]
29
+ elsif (single = pattern.match(/\\(.)/))
30
+ single[1]
31
+ else
32
+ pattern
33
+ end
34
+ end
35
+
36
+ def format_message(chars)
37
+ formatted = chars.chars.then do |c|
38
+ c.length > 1 ? "#{c[0..-2].join(' ')}, or #{c.last}" : c.first
39
+ end
40
+ "Split multiple values with #{formatted}"
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module CsvTemplate
5
+ # Determines values for CSV cells
6
+ class ValueDeterminer
7
+ def initialize(service)
8
+ @service = service
9
+ @column_builder = CsvTemplate::ColumnBuilder.new(service)
10
+ end
11
+
12
+ def determine_value(column, model_name, field_list)
13
+ key = @service.mapping_manager.mapped_to_key(column)
14
+ required_terms = field_list.dig(model_name, 'required_terms')
15
+
16
+ if field_list.dig(model_name, "properties")&.include?(key)
17
+ mark_required_or_optional(key, required_terms)
18
+ elsif special_column?(column, key)
19
+ special_value(column, key, model_name, required_terms)
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ def special_column?(column, key)
26
+ descriptor = CsvTemplate::ColumnDescriptor.new
27
+ visibility_cols = descriptor.send(:extract_column_names, :visibility)
28
+
29
+ key.in?(['model', 'work_type']) ||
30
+ column.in?(visibility_cols) ||
31
+ column == 'source_identifier' ||
32
+ column == 'rights_statement' ||
33
+ relationship_column?(column) ||
34
+ file_column?(column)
35
+ end
36
+
37
+ def special_value(column, key, model_name, required_terms)
38
+ return CsvTemplate::ModelLoader.determine_klass_for(model_name).to_s if key.in?(['model', 'work_type'])
39
+ return 'Required' if column == 'source_identifier'
40
+ return mark_required_or_optional(key, required_terms) if column == 'rights_statement'
41
+ return nil if file_column?(column) && model_name.in?([Bulkrax.collection_model_class].compact.map(&:to_s))
42
+ 'Optional'
43
+ end
44
+
45
+ def mark_required_or_optional(field, required_terms)
46
+ return 'Unknown' unless required_terms
47
+ required_terms.include?(field) ? 'Required' : 'Optional'
48
+ end
49
+
50
+ def relationship_column?(column)
51
+ relationships = [
52
+ @service.mapping_manager.find_by_flag("related_children_field_mapping", 'children'),
53
+ @service.mapping_manager.find_by_flag("related_parents_field_mapping", 'parents')
54
+ ]
55
+ column.in?(relationships)
56
+ end
57
+
58
+ def file_column?(column)
59
+ file_cols = CsvTemplate::ColumnDescriptor::COLUMN_DESCRIPTIONS[:files].flat_map do |property_hash|
60
+ property_hash.keys.filter_map do |key|
61
+ @service.mappings.dig(key, "from")&.first
62
+ end
63
+ end
64
+ column.in?(file_cols)
65
+ end
66
+ end
67
+ end
68
+ end