bulkrax 9.3.4 → 9.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -1
- data/app/assets/javascripts/bulkrax/application.js +2 -1
- data/app/assets/javascripts/bulkrax/bulkrax.js +13 -4
- data/app/assets/javascripts/bulkrax/bulkrax_utils.js +96 -0
- data/app/assets/javascripts/bulkrax/datatables.js +1 -0
- data/app/assets/javascripts/bulkrax/entries.js +17 -10
- data/app/assets/javascripts/bulkrax/importers.js.erb +9 -2
- data/app/assets/javascripts/bulkrax/importers_stepper.js +2420 -0
- data/app/assets/stylesheets/bulkrax/application.css +1 -1
- data/app/assets/stylesheets/bulkrax/import_export.scss +9 -2
- data/app/assets/stylesheets/bulkrax/stepper/_header.scss +83 -0
- data/app/assets/stylesheets/bulkrax/stepper/_mixins.scss +26 -0
- data/app/assets/stylesheets/bulkrax/stepper/_navigation.scss +103 -0
- data/app/assets/stylesheets/bulkrax/stepper/_responsive.scss +46 -0
- data/app/assets/stylesheets/bulkrax/stepper/_review.scss +92 -0
- data/app/assets/stylesheets/bulkrax/stepper/_settings.scss +106 -0
- data/app/assets/stylesheets/bulkrax/stepper/_success.scss +26 -0
- data/app/assets/stylesheets/bulkrax/stepper/_summary.scss +171 -0
- data/app/assets/stylesheets/bulkrax/stepper/_upload.scss +339 -0
- data/app/assets/stylesheets/bulkrax/stepper/_validation.scss +237 -0
- data/app/assets/stylesheets/bulkrax/stepper/_variables.scss +46 -0
- data/app/assets/stylesheets/bulkrax/stepper.scss +32 -0
- data/app/controllers/bulkrax/guided_imports_controller.rb +175 -0
- data/app/controllers/bulkrax/importers_controller.rb +34 -28
- data/app/controllers/concerns/bulkrax/guided_import_demo_scenarios.rb +201 -0
- data/app/controllers/concerns/bulkrax/importer_file_handler.rb +217 -0
- data/app/factories/bulkrax/object_factory.rb +3 -2
- data/app/factories/bulkrax/valkyrie_object_factory.rb +61 -17
- data/app/jobs/bulkrax/export_work_job.rb +1 -3
- data/app/jobs/bulkrax/importer_job.rb +11 -4
- data/app/models/bulkrax/csv_entry.rb +27 -7
- data/app/models/bulkrax/entry.rb +4 -0
- data/app/models/bulkrax/importer.rb +31 -1
- data/app/models/concerns/bulkrax/has_matchers.rb +2 -2
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +6 -5
- data/app/parsers/bulkrax/application_parser.rb +31 -5
- data/app/parsers/bulkrax/csv_parser.rb +42 -10
- data/app/parsers/concerns/bulkrax/csv_parser/csv_template_generation.rb +73 -0
- data/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb +133 -0
- data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_helpers.rb +282 -0
- data/app/parsers/concerns/bulkrax/csv_parser/csv_validation_hierarchy.rb +96 -0
- data/app/services/bulkrax/csv_template/column_builder.rb +60 -0
- data/app/services/bulkrax/csv_template/column_descriptor.rb +58 -0
- data/app/services/bulkrax/csv_template/csv_builder.rb +83 -0
- data/app/services/bulkrax/csv_template/explanation_builder.rb +57 -0
- data/app/services/bulkrax/csv_template/field_analyzer.rb +56 -0
- data/app/services/bulkrax/csv_template/file_path_generator.rb +47 -0
- data/app/services/bulkrax/csv_template/file_validator.rb +68 -0
- data/app/services/bulkrax/csv_template/mapping_manager.rb +55 -0
- data/app/services/bulkrax/csv_template/model_loader.rb +50 -0
- data/app/services/bulkrax/csv_template/row_builder.rb +35 -0
- data/app/services/bulkrax/csv_template/schema_analyzer.rb +70 -0
- data/app/services/bulkrax/csv_template/split_formatter.rb +44 -0
- data/app/services/bulkrax/csv_template/value_determiner.rb +68 -0
- data/app/services/bulkrax/stepper_response_formatter.rb +347 -0
- data/app/services/bulkrax/validation_error_csv_builder.rb +99 -0
- data/app/validators/bulkrax/csv_row/child_reference.rb +56 -0
- data/app/validators/bulkrax/csv_row/circular_reference.rb +71 -0
- data/app/validators/bulkrax/csv_row/controlled_vocabulary.rb +74 -0
- data/app/validators/bulkrax/csv_row/duplicate_identifier.rb +63 -0
- data/app/validators/bulkrax/csv_row/missing_source_identifier.rb +31 -0
- data/app/validators/bulkrax/csv_row/parent_reference.rb +59 -0
- data/app/validators/bulkrax/csv_row/required_values.rb +64 -0
- data/app/views/bulkrax/entries/_parsed_metadata.html.erb +1 -1
- data/app/views/bulkrax/entries/_raw_metadata.html.erb +1 -1
- data/app/views/bulkrax/entries/show.html.erb +6 -6
- data/app/views/bulkrax/exporters/_form.html.erb +19 -43
- data/app/views/bulkrax/exporters/edit.html.erb +2 -2
- data/app/views/bulkrax/exporters/index.html.erb +5 -5
- data/app/views/bulkrax/exporters/new.html.erb +3 -5
- data/app/views/bulkrax/exporters/show.html.erb +3 -3
- data/app/views/bulkrax/guided_imports/new.html.erb +567 -0
- data/app/views/bulkrax/importers/_bagit_fields.html.erb +9 -9
- data/app/views/bulkrax/importers/_browse_everything.html.erb +1 -1
- data/app/views/bulkrax/importers/_csv_fields.html.erb +11 -11
- data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +23 -23
- data/app/views/bulkrax/importers/_edit_item_buttons.html.erb +2 -2
- data/app/views/bulkrax/importers/_file_uploader.html.erb +3 -3
- data/app/views/bulkrax/importers/_form.html.erb +4 -5
- data/app/views/bulkrax/importers/_oai_fields.html.erb +8 -18
- data/app/views/bulkrax/importers/_xml_fields.html.erb +13 -13
- data/app/views/bulkrax/importers/edit.html.erb +2 -2
- data/app/views/bulkrax/importers/index.html.erb +19 -14
- data/app/views/bulkrax/importers/new.html.erb +10 -9
- data/app/views/bulkrax/importers/show.html.erb +23 -7
- data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +6 -6
- data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +11 -11
- data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +3 -3
- data/config/i18n-tasks.yml +195 -0
- data/config/locales/bulkrax.de.yml +504 -0
- data/config/locales/bulkrax.en.yml +487 -28
- data/config/locales/bulkrax.es.yml +504 -0
- data/config/locales/bulkrax.fr.yml +504 -0
- data/config/locales/bulkrax.it.yml +504 -0
- data/config/locales/bulkrax.pt-BR.yml +504 -0
- data/config/locales/bulkrax.zh.yml +503 -0
- data/config/routes.rb +10 -0
- data/lib/bulkrax/data/demo_scenarios.json +2235 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +31 -3
- data/lib/tasks/bulkrax_tasks.rake +0 -102
- metadata +55 -3
- /data/{app/services → lib}/wings/custom_queries/find_by_source_identifier.rb +0 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
class CsvParser < ApplicationParser
|
|
5
|
+
# Hierarchy-building helpers for CsvValidation. Handles extracting and
|
|
6
|
+
# categorising items from parsed CSV data for the guided import tree view.
|
|
7
|
+
module CsvValidationHierarchy
|
|
8
|
+
def extract_validation_items(csv_data, all_ids = Set.new, find_record = nil, parent_split_pattern: nil, child_split_pattern: '|')
|
|
9
|
+
child_to_parents = build_child_to_parents_map(csv_data, child_split_pattern: child_split_pattern)
|
|
10
|
+
collections = []
|
|
11
|
+
works = []
|
|
12
|
+
file_sets = []
|
|
13
|
+
|
|
14
|
+
csv_data.each do |item|
|
|
15
|
+
categorise_validation_item(item, child_to_parents, all_ids, collections, works, file_sets, find_record,
|
|
16
|
+
parent_split_pattern: parent_split_pattern, child_split_pattern: child_split_pattern)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
[collections, works, file_sets]
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def build_child_to_parents_map(csv_data, child_split_pattern: '|')
|
|
23
|
+
Hash.new { |h, k| h[k] = [] }.tap do |map|
|
|
24
|
+
csv_data.each do |item|
|
|
25
|
+
next if item[:source_identifier].blank?
|
|
26
|
+
|
|
27
|
+
collect_relationship_ids(item[:children], item[:raw_row], 'children', split_pattern: child_split_pattern).each do |child_id|
|
|
28
|
+
map[child_id] << item[:source_identifier]
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def categorise_validation_item(item, child_to_parents, all_ids, collections, works, file_sets, find_record = nil, parent_split_pattern: nil, child_split_pattern: '|') # rubocop:disable Metrics/ParameterLists
|
|
35
|
+
item_id = item[:source_identifier]
|
|
36
|
+
model_str = item[:model].to_s
|
|
37
|
+
|
|
38
|
+
opts = { type: nil, find_record: find_record, parent: parent_split_pattern, child: child_split_pattern }
|
|
39
|
+
if model_str.casecmp('collection').zero? || model_str.casecmp('collectionresource').zero?
|
|
40
|
+
collections << build_item_hash(item, child_to_parents, all_ids, opts.merge(type: 'collection'))
|
|
41
|
+
elsif model_str.casecmp('fileset').zero? || model_str.casecmp('hyrax::fileset').zero?
|
|
42
|
+
file_sets << { id: item_id, title: item[:raw_row]['title'] || item_id, type: 'file_set' }
|
|
43
|
+
else
|
|
44
|
+
works << build_item_hash(item, child_to_parents, all_ids, opts.merge(type: 'work'))
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def build_item_hash(item, child_to_parents, all_ids, opts = {}) # rubocop:disable Metrics/MethodLength
|
|
49
|
+
type = opts[:type]
|
|
50
|
+
find_record = opts[:find_record]
|
|
51
|
+
item_id = item[:source_identifier]
|
|
52
|
+
title = item[:raw_row]['title'] || item_id
|
|
53
|
+
parents = collect_relationship_ids(item[:parent], item[:raw_row], 'parents', split_pattern: opts[:parent])
|
|
54
|
+
children = collect_relationship_ids(item[:children], item[:raw_row], 'children', split_pattern: opts[:child] || '|')
|
|
55
|
+
|
|
56
|
+
{
|
|
57
|
+
id: item_id,
|
|
58
|
+
title: title,
|
|
59
|
+
type: type,
|
|
60
|
+
existing: find_record&.call(item_id) || false,
|
|
61
|
+
parentIds: (resolvable_ids(parents, all_ids) + resolvable_ids(child_to_parents[item_id] || [], all_ids)).uniq,
|
|
62
|
+
childIds: resolvable_ids(children, all_ids),
|
|
63
|
+
existingParentIds: external_ids(parents, all_ids, find_record),
|
|
64
|
+
existingChildIds: external_ids(children, all_ids, find_record)
|
|
65
|
+
}
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def parse_relationship_field(value, split_pattern: '|')
|
|
69
|
+
return [] if value.blank?
|
|
70
|
+
value.to_s.split(split_pattern).map(&:strip).reject(&:blank?)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def collect_relationship_ids(base_value, raw_row, column, split_pattern: '|')
|
|
74
|
+
base_ids = parse_relationship_field(base_value, split_pattern: split_pattern)
|
|
75
|
+
suffix_pattern = /\A#{Regexp.escape(column)}_\d+\z/
|
|
76
|
+
suffix_ids = raw_row
|
|
77
|
+
.select { |k, _| k.to_s.match?(suffix_pattern) }
|
|
78
|
+
.values
|
|
79
|
+
.map(&:to_s).map(&:strip).reject(&:blank?)
|
|
80
|
+
(base_ids + suffix_ids).uniq
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def resolvable_ids(ids, all_ids)
|
|
84
|
+
ids.select { |id| all_ids.include?(id) }
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Returns ids from the list that are NOT in the CSV but exist in the repository.
|
|
88
|
+
def external_ids(ids, all_ids, find_record)
|
|
89
|
+
return [] if find_record.nil?
|
|
90
|
+
|
|
91
|
+
ids.reject { |id| all_ids.include?(id) }
|
|
92
|
+
.select { |id| find_record.call(id) }
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
module CsvTemplate
|
|
5
|
+
# Builds column headers for CSV
|
|
6
|
+
class ColumnBuilder
|
|
7
|
+
def initialize(service)
|
|
8
|
+
@service = service
|
|
9
|
+
@descriptor = CsvTemplate::ColumnDescriptor.new
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def all_columns
|
|
13
|
+
required_columns + property_columns
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def required_columns
|
|
17
|
+
mapped_core_columns +
|
|
18
|
+
relationship_columns +
|
|
19
|
+
file_columns
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
private
|
|
23
|
+
|
|
24
|
+
def mapped_core_columns
|
|
25
|
+
@descriptor.core_columns.map do |column|
|
|
26
|
+
@service.mapping_manager.key_to_mapped_column(column)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def property_columns
|
|
31
|
+
field_lists = @service.all_models.map do |m|
|
|
32
|
+
@service.field_analyzer.find_or_create_field_list_for(model_name: m)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
properties = field_lists
|
|
36
|
+
.flat_map { |item| item.values.flat_map { |config| config["properties"] || [] } }
|
|
37
|
+
.uniq
|
|
38
|
+
.map { |property| @service.mapping_manager.key_to_mapped_column(property) }
|
|
39
|
+
.uniq
|
|
40
|
+
|
|
41
|
+
(properties - required_columns).sort
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def relationship_columns
|
|
45
|
+
[
|
|
46
|
+
@service.mapping_manager.find_by_flag("related_children_field_mapping", 'children'),
|
|
47
|
+
@service.mapping_manager.find_by_flag("related_parents_field_mapping", 'parents')
|
|
48
|
+
]
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def file_columns
|
|
52
|
+
CsvTemplate::ColumnDescriptor::COLUMN_DESCRIPTIONS[:files].flat_map do |property_hash|
|
|
53
|
+
property_hash.keys.map do |key|
|
|
54
|
+
@service.mapping_manager.key_to_mapped_column(key)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
module CsvTemplate
|
|
5
|
+
# Manages column descriptions and metadata
|
|
6
|
+
class ColumnDescriptor
|
|
7
|
+
COLUMN_DESCRIPTIONS = {
|
|
8
|
+
include_first: [
|
|
9
|
+
{ "model" => "The work types configured in your repository are listed below.\nIf left blank, your default work type, #{Bulkrax.default_work_type}, is used." },
|
|
10
|
+
{ "source_identifier" => "This must be a unique identifier.\nIt can be alphanumeric with some special characters (e.g. hyphens, colons), and URLs are also supported." },
|
|
11
|
+
{ "id" => "This column would optionally be included only if it is a re-import, i.e. for updating or deleting records.\nThis is a key identifier used by the system, which you wouldn't have for new imports." },
|
|
12
|
+
{ "rights_statement" => "Rights statement URI for the work.\nIf not included, uses the value specified on the bulk import configuration screen." }
|
|
13
|
+
],
|
|
14
|
+
visibility: [
|
|
15
|
+
{ "visibility" => "Uses the value specified on the bulk import configuration screen if not added here.\nValid options: open, authenticated, restricted, embargo, lease" },
|
|
16
|
+
{ "embargo_release_date" => "Required for embargo (yyyy-mm-dd)" },
|
|
17
|
+
{ "visibility_during_embargo" => "Required for embargo" },
|
|
18
|
+
{ "visibility_after_embargo" => "Required for embargo" },
|
|
19
|
+
{ "lease_expiration_date" => "Required for lease (yyyy-mm-dd)" },
|
|
20
|
+
{ "visibility_during_lease" => "Required for lease" },
|
|
21
|
+
{ "visibility_after_lease" => "Required for lease" }
|
|
22
|
+
],
|
|
23
|
+
files: [
|
|
24
|
+
{ "file" => "Use filenames exactly matching those in your files folder.\nZip your CSV and files folder together and attach this to your importer." },
|
|
25
|
+
{ "remote_files" => "Use the URLs to remote files to be attached to the work." }
|
|
26
|
+
],
|
|
27
|
+
relationships: [
|
|
28
|
+
{ "parents" => "The source_identifier or id of work or collection to be attached as parent." },
|
|
29
|
+
{ "children" => "The source_identifier or id of work or file to be attached as child." }
|
|
30
|
+
],
|
|
31
|
+
other: [
|
|
32
|
+
{ "hide_from_catalog_search" => "Set to 1 to hide the collection from catalog search results." },
|
|
33
|
+
{ "show_pdf_download_button" => "Set to 1 to show a PDF download link on the work's page." },
|
|
34
|
+
{ "show_pdf_viewer" => "Set to 1 to show a PDF viewer on the work's page." },
|
|
35
|
+
{ "video_embed" => "A valid URL to a hosted video that can appear in an iframe, beginning with 'http://' or 'https://'." }
|
|
36
|
+
]
|
|
37
|
+
}.freeze
|
|
38
|
+
|
|
39
|
+
def core_columns
|
|
40
|
+
extract_column_names(:include_first) + extract_column_names(:visibility)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def find_description_for(column)
|
|
44
|
+
COLUMN_DESCRIPTIONS.each_value do |group|
|
|
45
|
+
prop = group.find { |hash| hash.key?(column) }
|
|
46
|
+
return prop[column] if prop
|
|
47
|
+
end
|
|
48
|
+
nil
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
def extract_column_names(group)
|
|
54
|
+
COLUMN_DESCRIPTIONS[group].map { |hash| hash.keys.first }
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
module CsvTemplate
|
|
5
|
+
# Builds CSV content
|
|
6
|
+
class CsvBuilder
|
|
7
|
+
IGNORED_PROPERTIES = %w[
|
|
8
|
+
admin_set_id alternate_ids
|
|
9
|
+
bulkrax_identifier
|
|
10
|
+
collection_type_gid contexts created_at
|
|
11
|
+
date date_modified date_uploaded depositor
|
|
12
|
+
embargo embargo_id
|
|
13
|
+
file_ids
|
|
14
|
+
has_model head
|
|
15
|
+
internal_resource is_child
|
|
16
|
+
lease lease_id
|
|
17
|
+
member_ids member_of_collection_ids modified_date
|
|
18
|
+
new_record
|
|
19
|
+
on_behalf_of owner proxy_depositor
|
|
20
|
+
rendering_ids representative_id
|
|
21
|
+
schema_version split_from_pdf_id state tail
|
|
22
|
+
thumbnail_id
|
|
23
|
+
updated_at
|
|
24
|
+
].freeze
|
|
25
|
+
|
|
26
|
+
def initialize(service)
|
|
27
|
+
@service = service
|
|
28
|
+
@column_builder = CsvTemplate::ColumnBuilder.new(service)
|
|
29
|
+
@row_builder = CsvTemplate::RowBuilder.new(service)
|
|
30
|
+
@header_row = nil
|
|
31
|
+
@required_headings = []
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def write_to_file(file_path)
|
|
35
|
+
FileUtils.mkdir_p(File.dirname(file_path))
|
|
36
|
+
CSV.open(file_path, "w") { |csv| write_rows(csv) }
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def generate_string
|
|
40
|
+
CSV.generate { |csv| write_rows(csv) }
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
|
|
45
|
+
def write_rows(csv)
|
|
46
|
+
csv_rows.each { |row| csv << row }
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def csv_rows
|
|
50
|
+
@header_row = fill_header_row
|
|
51
|
+
rows = [
|
|
52
|
+
@header_row,
|
|
53
|
+
@row_builder.build_explanation_row(@header_row),
|
|
54
|
+
*@row_builder.build_model_rows(@header_row)
|
|
55
|
+
]
|
|
56
|
+
remove_empty_columns(rows)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def fill_header_row
|
|
60
|
+
@required_headings = @column_builder.required_columns
|
|
61
|
+
all_columns = @column_builder.all_columns
|
|
62
|
+
filtered = all_columns - IGNORED_PROPERTIES
|
|
63
|
+
@required_headings = @column_builder.required_columns & filtered
|
|
64
|
+
filtered
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def remove_empty_columns(rows)
|
|
68
|
+
return rows if rows.empty?
|
|
69
|
+
|
|
70
|
+
columns = rows.transpose
|
|
71
|
+
non_empty_columns = columns.select { |col| keep_column?(col) }
|
|
72
|
+
non_empty_columns.transpose
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def keep_column?(column)
|
|
76
|
+
heading = column[0]
|
|
77
|
+
return true if @required_headings.include?(heading)
|
|
78
|
+
|
|
79
|
+
column[2..-1].any? { |value| !value.nil? && value != "" && value != "---" }
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
module CsvTemplate
|
|
5
|
+
# Builds explanations for CSV columns
|
|
6
|
+
class ExplanationBuilder
|
|
7
|
+
def initialize(service)
|
|
8
|
+
@service = service
|
|
9
|
+
@descriptor = CsvTemplate::ColumnDescriptor.new
|
|
10
|
+
@split_formatter = CsvTemplate::SplitFormatter.new
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def build_explanations(header_row)
|
|
14
|
+
header_row.map do |column|
|
|
15
|
+
{ column => build_explanation(column) }
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
private
|
|
20
|
+
|
|
21
|
+
def build_explanation(column)
|
|
22
|
+
mapping_key = @service.mapping_manager.mapped_to_key(column)
|
|
23
|
+
|
|
24
|
+
column_description = source_identifier_description(column) || @descriptor.find_description_for(column)
|
|
25
|
+
controlled_vocab_info = controlled_vocab_text(mapping_key)
|
|
26
|
+
split_info = split_text(mapping_key, controlled_vocab_info)
|
|
27
|
+
|
|
28
|
+
components = [
|
|
29
|
+
column_description,
|
|
30
|
+
controlled_vocab_info,
|
|
31
|
+
split_info
|
|
32
|
+
].compact
|
|
33
|
+
|
|
34
|
+
components.join("\n")
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def source_identifier_description(column)
|
|
38
|
+
return unless @service.mapping_manager.mapped_to_key(column) == 'source_identifier'
|
|
39
|
+
return if Bulkrax.fill_in_blank_source_identifiers.blank?
|
|
40
|
+
"Will be auto-generated if left blank.\nProviding one allows round-tripping and deduplication across imports."
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def controlled_vocab_text(field_name)
|
|
44
|
+
vocab_terms = @service.field_analyzer.controlled_vocab_terms
|
|
45
|
+
return unless vocab_terms.include?(field_name) || field_name == 'based_near'
|
|
46
|
+
'This property uses a controlled vocabulary.'
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def split_text(mapping_key, controlled_vocab_info)
|
|
50
|
+
return nil if controlled_vocab_info.present? && !mapping_key.in?(%w[location resource_type])
|
|
51
|
+
split_value = @service.mapping_manager.split_value_for(mapping_key)
|
|
52
|
+
return nil unless split_value
|
|
53
|
+
@split_formatter.format(split_value)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
module CsvTemplate
|
|
5
|
+
# Analyzes model fields and schemas
|
|
6
|
+
class FieldAnalyzer
|
|
7
|
+
attr_reader :field_list
|
|
8
|
+
|
|
9
|
+
def initialize(mappings, admin_set_id = nil)
|
|
10
|
+
@mappings = mappings
|
|
11
|
+
@field_list = []
|
|
12
|
+
@schema = nil
|
|
13
|
+
@admin_set_id = admin_set_id
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def find_or_create_field_list_for(model_name:)
|
|
17
|
+
existing = @field_list.find { |entry| entry.key?(model_name) }
|
|
18
|
+
return existing if existing.present?
|
|
19
|
+
|
|
20
|
+
klass = CsvTemplate::ModelLoader.determine_klass_for(model_name)
|
|
21
|
+
return {} if klass.nil?
|
|
22
|
+
|
|
23
|
+
model_entry = build_field_list_entry(model_name, klass)
|
|
24
|
+
@field_list << model_entry
|
|
25
|
+
model_entry
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def controlled_vocab_terms
|
|
29
|
+
@field_list.flat_map do |hash|
|
|
30
|
+
hash.values.flat_map { |data| data["controlled_vocab_terms"] || [] }
|
|
31
|
+
end.uniq
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
def build_field_list_entry(model_name, klass)
|
|
37
|
+
schema_analyzer = CsvTemplate::SchemaAnalyzer.new(klass: klass, admin_set_id: @admin_set_id)
|
|
38
|
+
{
|
|
39
|
+
model_name => {
|
|
40
|
+
'properties' => extract_properties(klass),
|
|
41
|
+
'required_terms' => schema_analyzer.required_terms,
|
|
42
|
+
'controlled_vocab_terms' => schema_analyzer.controlled_vocab_terms
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def extract_properties(klass)
|
|
48
|
+
if klass.respond_to?(:schema)
|
|
49
|
+
Bulkrax::ValkyrieObjectFactory.schema_properties(klass: klass, admin_set_id: @admin_set_id).map(&:to_s)
|
|
50
|
+
else
|
|
51
|
+
klass.properties.keys.map(&:to_s)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
module CsvTemplate
|
|
5
|
+
# Utility class for generating template file paths
|
|
6
|
+
class FilePathGenerator
|
|
7
|
+
TEMPLATE_PREFIX = 'import_template'
|
|
8
|
+
|
|
9
|
+
def self.default_path(admin_set_id)
|
|
10
|
+
context = load_context(admin_set_id)
|
|
11
|
+
tenant = load_tenant
|
|
12
|
+
filename = build_filename(context, tenant)
|
|
13
|
+
path = Rails.root.join('tmp', 'imports', filename)
|
|
14
|
+
FileUtils.mkdir_p(path.dirname.to_s)
|
|
15
|
+
path
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def self.load_context(admin_set_id)
|
|
19
|
+
return nil if admin_set_id.blank?
|
|
20
|
+
|
|
21
|
+
admin_set = Bulkrax.object_factory.find(admin_set_id)
|
|
22
|
+
admin_set.respond_to?(:contexts) ? admin_set.contexts.first : nil
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def self.load_tenant
|
|
26
|
+
return nil unless defined?(Apartment::Tenant) && defined?(Account)
|
|
27
|
+
|
|
28
|
+
tenant_id = Apartment::Tenant.current
|
|
29
|
+
return nil if tenant_id.blank?
|
|
30
|
+
|
|
31
|
+
Account.find_by(tenant: tenant_id)&.name
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def self.build_filename(context, tenant)
|
|
35
|
+
parts = [TEMPLATE_PREFIX]
|
|
36
|
+
parts << "context-#{context}" if context.present?
|
|
37
|
+
parts << "tenant-#{tenant}" if tenant.present?
|
|
38
|
+
parts << timestamp
|
|
39
|
+
"#{parts.join('_')}.csv"
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def self.timestamp
|
|
43
|
+
Time.current.utc.strftime('%Y%m%d_%H%M%S')
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
module CsvTemplate
|
|
5
|
+
##
|
|
6
|
+
# Validates file references in CSV against zip archive contents
|
|
7
|
+
class FileValidator
|
|
8
|
+
attr_reader :csv_data, :zip_file
|
|
9
|
+
|
|
10
|
+
def initialize(csv_data, zip_file = nil, admin_set_id = nil)
|
|
11
|
+
@csv_data = csv_data
|
|
12
|
+
@zip_file = zip_file
|
|
13
|
+
@admin_set_id = admin_set_id
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def count_references
|
|
17
|
+
@csv_data.count { |item| item[:file].present? }
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def missing_files
|
|
21
|
+
return [] unless @zip_file
|
|
22
|
+
|
|
23
|
+
referenced_files - zip_file_list
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def found_files_count
|
|
27
|
+
return 0 unless @zip_file
|
|
28
|
+
|
|
29
|
+
(referenced_files & zip_file_list).count
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def zip_included?
|
|
33
|
+
@zip_file.present?
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def possible_missing_files?
|
|
37
|
+
return false unless referenced_files.any?
|
|
38
|
+
return true if @zip_file.blank?
|
|
39
|
+
|
|
40
|
+
false
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
|
|
45
|
+
def referenced_files
|
|
46
|
+
@referenced_files ||= @csv_data.flat_map do |item|
|
|
47
|
+
next [] if item[:file].blank?
|
|
48
|
+
|
|
49
|
+
item[:file].split(Bulkrax.multi_value_element_split_on).map { |f| File.basename(f.strip) }
|
|
50
|
+
end.compact
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def zip_file_list
|
|
54
|
+
@zip_file_list ||= begin
|
|
55
|
+
return [] unless @zip_file
|
|
56
|
+
|
|
57
|
+
zip_path = @zip_file.respond_to?(:path) ? @zip_file.path : @zip_file
|
|
58
|
+
Zip::File.open(zip_path) do |zip|
|
|
59
|
+
zip.entries.select(&:file?).map { |entry| File.basename(entry.name) }
|
|
60
|
+
end
|
|
61
|
+
rescue StandardError => e
|
|
62
|
+
Rails.logger.error("Error reading zip file: #{e.message}")
|
|
63
|
+
[]
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
module CsvTemplate
|
|
5
|
+
# Handles loading and filtering of Bulkrax field mappings
|
|
6
|
+
class MappingManager
|
|
7
|
+
attr_reader :mappings
|
|
8
|
+
|
|
9
|
+
def initialize
|
|
10
|
+
@mappings = load_mappings
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def mapped_to_key(column_str)
|
|
14
|
+
@mappings.find { |_k, v| v["from"].include?(column_str) }&.first || column_str
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def key_to_mapped_column(key)
|
|
18
|
+
@mappings.dig(key, "from")&.first || key
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def find_by_flag(field_name, default)
|
|
22
|
+
@mappings.find { |_k, v| v[field_name] == true }&.first || default
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def split_value_for(mapping_key)
|
|
26
|
+
@mappings.dig(mapping_key, "split")
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def resolve_column_name(key: nil, flag: nil, default: nil)
|
|
30
|
+
if flag
|
|
31
|
+
mapped_key = find_by_flag(flag, nil)
|
|
32
|
+
if mapped_key
|
|
33
|
+
mapped_options = @mappings.dig(mapped_key, "from") || []
|
|
34
|
+
return mapped_options if mapped_options.any?
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
if key
|
|
39
|
+
mapped_options = @mappings.dig(key, "from") || []
|
|
40
|
+
return mapped_options if mapped_options.any?
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
default ? [default] : []
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def load_mappings
|
|
49
|
+
Bulkrax.field_mappings["Bulkrax::CsvParser"].reject do |_key, value|
|
|
50
|
+
value["generated"] == true
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
module CsvTemplate
|
|
5
|
+
# Handles model loading based on configuration
|
|
6
|
+
class ModelLoader
|
|
7
|
+
attr_reader :models
|
|
8
|
+
|
|
9
|
+
def initialize(models)
|
|
10
|
+
@models = load_models(models)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def self.determine_klass_for(model_name)
|
|
14
|
+
if Bulkrax.config.object_factory == Bulkrax::ValkyrieObjectFactory
|
|
15
|
+
Valkyrie.config.resource_class_resolver.call(model_name)
|
|
16
|
+
else
|
|
17
|
+
model_name.constantize
|
|
18
|
+
end
|
|
19
|
+
rescue StandardError
|
|
20
|
+
nil
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def load_models(models)
|
|
26
|
+
case models
|
|
27
|
+
when Array
|
|
28
|
+
return all_available_models if models.empty?
|
|
29
|
+
return all_available_models if models.include?('all')
|
|
30
|
+
models.filter_map { |model| safe_constantize(model) }
|
|
31
|
+
else
|
|
32
|
+
all_available_models
|
|
33
|
+
end
|
|
34
|
+
rescue StandardError
|
|
35
|
+
[]
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def safe_constantize(model_name)
|
|
39
|
+
model_name.constantize && model_name
|
|
40
|
+
rescue NameError
|
|
41
|
+
nil
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def all_available_models
|
|
45
|
+
Hyrax.config.curation_concerns.map(&:name) +
|
|
46
|
+
[Bulkrax.collection_model_class&.name, Bulkrax.file_model_class&.name].compact
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
module CsvTemplate
|
|
5
|
+
# Builds CSV rows (explanations and model data)
|
|
6
|
+
class RowBuilder
|
|
7
|
+
def initialize(service)
|
|
8
|
+
@service = service
|
|
9
|
+
@explanation_builder = CsvTemplate::ExplanationBuilder.new(service)
|
|
10
|
+
@value_determiner = CsvTemplate::ValueDeterminer.new(service)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def build_explanation_row(header_row)
|
|
14
|
+
@explanation_builder.build_explanations(header_row).map { |prop| prop.values.join(" ") }
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def build_model_rows(header_row)
|
|
18
|
+
@service.all_models.map { |m| model_breakdown(m, header_row) }
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
|
|
23
|
+
def model_breakdown(model_name, header_row)
|
|
24
|
+
klass = CsvTemplate::ModelLoader.determine_klass_for(model_name)
|
|
25
|
+
return [] if klass.nil?
|
|
26
|
+
|
|
27
|
+
field_list = @service.field_analyzer.find_or_create_field_list_for(model_name: model_name)
|
|
28
|
+
|
|
29
|
+
header_row.map do |column|
|
|
30
|
+
@value_determiner.determine_value(column, model_name, field_list)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|