bulkrax 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +205 -0
- data/README.md +202 -0
- data/Rakefile +42 -0
- data/app/assets/config/bulkrax_manifest.js +2 -0
- data/app/assets/javascripts/bulkrax/application.js +14 -0
- data/app/assets/javascripts/bulkrax/bulkrax.js +11 -0
- data/app/assets/javascripts/bulkrax/entries.js +15 -0
- data/app/assets/javascripts/bulkrax/exporters.js +60 -0
- data/app/assets/javascripts/bulkrax/importers.js.erb +166 -0
- data/app/assets/stylesheets/bulkrax/accordion.scss +40 -0
- data/app/assets/stylesheets/bulkrax/application.css +15 -0
- data/app/assets/stylesheets/bulkrax/coderay.scss +264 -0
- data/app/assets/stylesheets/bulkrax/import_export.scss +37 -0
- data/app/controllers/bulkrax/application_controller.rb +8 -0
- data/app/controllers/bulkrax/entries_controller.rb +44 -0
- data/app/controllers/bulkrax/exporters_controller.rb +125 -0
- data/app/controllers/bulkrax/importers_controller.rb +315 -0
- data/app/controllers/concerns/bulkrax/api.rb +29 -0
- data/app/factories/bulkrax/object_factory.rb +230 -0
- data/app/helpers/bulkrax/application_helper.rb +15 -0
- data/app/helpers/bulkrax/exporters_helper.rb +6 -0
- data/app/helpers/bulkrax/importers_helper.rb +13 -0
- data/app/helpers/bulkrax/validation_helper.rb +153 -0
- data/app/jobs/bulkrax/application_job.rb +6 -0
- data/app/jobs/bulkrax/child_relationships_job.rb +128 -0
- data/app/jobs/bulkrax/delete_work_job.rb +16 -0
- data/app/jobs/bulkrax/download_cloud_file_job.rb +18 -0
- data/app/jobs/bulkrax/export_work_job.rb +37 -0
- data/app/jobs/bulkrax/exporter_job.rb +14 -0
- data/app/jobs/bulkrax/import_work_collection_job.rb +41 -0
- data/app/jobs/bulkrax/import_work_job.rb +32 -0
- data/app/jobs/bulkrax/importer_job.rb +26 -0
- data/app/mailers/bulkrax/application_mailer.rb +8 -0
- data/app/matchers/bulkrax/application_matcher.rb +113 -0
- data/app/matchers/bulkrax/bagit_matcher.rb +6 -0
- data/app/matchers/bulkrax/csv_matcher.rb +6 -0
- data/app/matchers/bulkrax/oai_matcher.rb +6 -0
- data/app/models/bulkrax/application_record.rb +7 -0
- data/app/models/bulkrax/csv_collection_entry.rb +19 -0
- data/app/models/bulkrax/csv_entry.rb +163 -0
- data/app/models/bulkrax/entry.rb +104 -0
- data/app/models/bulkrax/exporter.rb +122 -0
- data/app/models/bulkrax/exporter_run.rb +7 -0
- data/app/models/bulkrax/import_failed.rb +13 -0
- data/app/models/bulkrax/importer.rb +155 -0
- data/app/models/bulkrax/importer_run.rb +8 -0
- data/app/models/bulkrax/oai_dc_entry.rb +6 -0
- data/app/models/bulkrax/oai_entry.rb +74 -0
- data/app/models/bulkrax/oai_qualified_dc_entry.rb +6 -0
- data/app/models/bulkrax/oai_set_entry.rb +19 -0
- data/app/models/bulkrax/rdf_collection_entry.rb +19 -0
- data/app/models/bulkrax/rdf_entry.rb +90 -0
- data/app/models/bulkrax/status.rb +25 -0
- data/app/models/bulkrax/xml_entry.rb +73 -0
- data/app/models/concerns/bulkrax/download_behavior.rb +61 -0
- data/app/models/concerns/bulkrax/errored_entries.rb +45 -0
- data/app/models/concerns/bulkrax/export_behavior.rb +58 -0
- data/app/models/concerns/bulkrax/file_factory.rb +140 -0
- data/app/models/concerns/bulkrax/has_local_processing.rb +7 -0
- data/app/models/concerns/bulkrax/has_matchers.rb +155 -0
- data/app/models/concerns/bulkrax/import_behavior.rb +90 -0
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +34 -0
- data/app/models/concerns/bulkrax/status_info.rb +56 -0
- data/app/parsers/bulkrax/application_parser.rb +299 -0
- data/app/parsers/bulkrax/bagit_parser.rb +157 -0
- data/app/parsers/bulkrax/csv_parser.rb +266 -0
- data/app/parsers/bulkrax/oai_dc_parser.rb +130 -0
- data/app/parsers/bulkrax/oai_qualified_dc_parser.rb +9 -0
- data/app/parsers/bulkrax/xml_parser.rb +103 -0
- data/app/views/bulkrax/entries/_parsed_metadata.html.erb +19 -0
- data/app/views/bulkrax/entries/_raw_metadata.html.erb +19 -0
- data/app/views/bulkrax/entries/show.html.erb +63 -0
- data/app/views/bulkrax/exporters/_form.html.erb +120 -0
- data/app/views/bulkrax/exporters/edit.html.erb +23 -0
- data/app/views/bulkrax/exporters/index.html.erb +67 -0
- data/app/views/bulkrax/exporters/new.html.erb +23 -0
- data/app/views/bulkrax/exporters/show.html.erb +124 -0
- data/app/views/bulkrax/importers/_bagit_fields.html.erb +54 -0
- data/app/views/bulkrax/importers/_browse_everything.html.erb +12 -0
- data/app/views/bulkrax/importers/_csv_fields.html.erb +39 -0
- data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +16 -0
- data/app/views/bulkrax/importers/_form.html.erb +35 -0
- data/app/views/bulkrax/importers/_oai_fields.html.erb +42 -0
- data/app/views/bulkrax/importers/_xml_fields.html.erb +60 -0
- data/app/views/bulkrax/importers/edit.html.erb +20 -0
- data/app/views/bulkrax/importers/index.html.erb +77 -0
- data/app/views/bulkrax/importers/new.html.erb +25 -0
- data/app/views/bulkrax/importers/show.html.erb +175 -0
- data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +37 -0
- data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +52 -0
- data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +39 -0
- data/app/views/hyrax/dashboard/sidebar/_bulkrax_sidebar_additions.html.erb +6 -0
- data/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb +19 -0
- data/app/views/layouts/bulkrax/application.html.erb +14 -0
- data/config/locales/bulkrax.en.yml +36 -0
- data/config/routes.rb +18 -0
- data/db/migrate/20181011230201_create_bulkrax_importers.rb +18 -0
- data/db/migrate/20181011230228_create_bulkrax_importer_runs.rb +16 -0
- data/db/migrate/20190325183136_create_bulkrax_entries.rb +16 -0
- data/db/migrate/20190601221109_add_status_to_entry.rb +9 -0
- data/db/migrate/20190715161939_add_collections_to_importer_runs.rb +6 -0
- data/db/migrate/20190715162044_change_collection_ids_on_entries.rb +5 -0
- data/db/migrate/20190729124607_create_bulkrax_exporters.rb +19 -0
- data/db/migrate/20190729134158_create_bulkrax_exporter_runs.rb +14 -0
- data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +12 -0
- data/db/migrate/20191203225129_add_total_collection_records_to_importer_runs.rb +5 -0
- data/db/migrate/20191204191623_add_children_to_importer_runs.rb +6 -0
- data/db/migrate/20191204223857_change_total_records_to_total_work_entries.rb +6 -0
- data/db/migrate/20191212155530_change_entry_last_error.rb +19 -0
- data/db/migrate/20200108194557_add_validate_only_to_bulkrax_importers.rb +5 -0
- data/db/migrate/20200301232856_add_status_to_importers.rb +9 -0
- data/db/migrate/20200312190638_remove_foreign_key_from_bulkrax_entries.rb +5 -0
- data/db/migrate/20200326235838_add_status_to_exporters.rb +7 -0
- data/db/migrate/20200601204556_add_invalid_record_to_importer_run.rb +5 -0
- data/db/migrate/20200818055819_create_bulkrax_statuses.rb +18 -0
- data/db/migrate/20200819054016_move_to_statuses.rb +30 -0
- data/db/migrate/20201106014204_add_date_filter_and_status_to_bulkrax_exporters.rb +7 -0
- data/db/migrate/20201117220007_add_workflow_status_to_bulkrax_exporter.rb +5 -0
- data/db/migrate/20210806044408_remove_unused_last_error.rb +7 -0
- data/db/migrate/20210806065737_increase_text_sizes.rb +12 -0
- data/lib/bulkrax.rb +161 -0
- data/lib/bulkrax/engine.rb +37 -0
- data/lib/bulkrax/version.rb +5 -0
- data/lib/generators/bulkrax/install_generator.rb +80 -0
- data/lib/generators/bulkrax/templates/README +3 -0
- data/lib/generators/bulkrax/templates/app/assets/images/bulkrax/removed.png +0 -0
- data/lib/generators/bulkrax/templates/app/models/concerns/bulkrax/has_local_processing.rb +8 -0
- data/lib/generators/bulkrax/templates/bin/importer +140 -0
- data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +84 -0
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +72 -0
- data/lib/tasks/bulkrax_tasks.rake +6 -0
- metadata +388 -0
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
module DownloadBehavior
|
5
|
+
# The following download code is based on
|
6
|
+
# https://github.com/samvera/hydra-head/blob/main/hydra-core/app/controllers/concerns/hydra/controller/download_behavior.rb
|
7
|
+
|
8
|
+
def file
|
9
|
+
@file ||= File.open(file_path, 'r')
|
10
|
+
end
|
11
|
+
|
12
|
+
# Override this if you'd like a different filename
|
13
|
+
# @return [String] the filename
|
14
|
+
def file_name
|
15
|
+
file_path.split('/').last
|
16
|
+
end
|
17
|
+
|
18
|
+
def download_content_type
|
19
|
+
'application/zip'
|
20
|
+
end
|
21
|
+
|
22
|
+
def send_content
|
23
|
+
response.headers['Accept-Ranges'] = 'bytes'
|
24
|
+
if request.head?
|
25
|
+
content_head
|
26
|
+
else
|
27
|
+
send_file_contents
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# Create some headers for the datastream
|
32
|
+
def content_options
|
33
|
+
{ disposition: 'inline', type: download_content_type, filename: file_name }
|
34
|
+
end
|
35
|
+
|
36
|
+
# render an HTTP HEAD response
|
37
|
+
def content_head
|
38
|
+
response.headers['Content-Length'] = file.size
|
39
|
+
head :ok, content_type: download_content_type
|
40
|
+
end
|
41
|
+
|
42
|
+
def send_file_contents
|
43
|
+
self.status = 200
|
44
|
+
prepare_file_headers
|
45
|
+
stream_body file.read
|
46
|
+
end
|
47
|
+
|
48
|
+
def prepare_file_headers
|
49
|
+
send_file_headers! content_options
|
50
|
+
response.headers['Content-Type'] = download_content_type
|
51
|
+
response.headers['Content-Length'] ||= file.size.to_s
|
52
|
+
# Prevent Rack::ETag from calculating a digest over body
|
53
|
+
response.headers['Last-Modified'] = File.mtime(file_path).utc.strftime("%a, %d %b %Y %T GMT")
|
54
|
+
self.content_type = download_content_type
|
55
|
+
end
|
56
|
+
|
57
|
+
def stream_body(iostream)
|
58
|
+
self.response_body = iostream
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
module ErroredEntries
|
5
|
+
extend ActiveSupport::Concern
|
6
|
+
|
7
|
+
def write_errored_entries_file
|
8
|
+
if @errored_entries.blank?
|
9
|
+
entry_ids = importerexporter.entries.pluck(:id)
|
10
|
+
error_statuses = Bulkrax::Status.latest_by_statusable
|
11
|
+
.includes(:statusable)
|
12
|
+
.where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Failed')
|
13
|
+
@errored_entries = error_statuses.map(&:statusable)
|
14
|
+
end
|
15
|
+
return if @errored_entries.blank?
|
16
|
+
|
17
|
+
file = setup_errored_entries_file
|
18
|
+
headers = import_fields
|
19
|
+
file.puts(headers.to_csv)
|
20
|
+
@errored_entries.each do |ee|
|
21
|
+
row = build_errored_entry_row(headers, ee)
|
22
|
+
file.puts(row)
|
23
|
+
end
|
24
|
+
file.close
|
25
|
+
true
|
26
|
+
end
|
27
|
+
|
28
|
+
def build_errored_entry_row(headers, errored_entry)
|
29
|
+
row = {}
|
30
|
+
# Ensure each header has a value, even if it's just an empty string
|
31
|
+
headers.each do |h|
|
32
|
+
row.merge!("#{h}": nil)
|
33
|
+
end
|
34
|
+
# Match each value to its corresponding header
|
35
|
+
row.merge!(errored_entry.raw_metadata.symbolize_keys)
|
36
|
+
|
37
|
+
row.values.to_csv
|
38
|
+
end
|
39
|
+
|
40
|
+
def setup_errored_entries_file
|
41
|
+
FileUtils.mkdir_p(File.dirname(importerexporter.errored_entries_csv_path))
|
42
|
+
File.open(importerexporter.errored_entries_csv_path, 'w')
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Bulkrax
|
3
|
+
module ExportBehavior
|
4
|
+
extend ActiveSupport::Concern
|
5
|
+
|
6
|
+
delegate :export_type, :exporter_export_path, to: :importerexporter
|
7
|
+
|
8
|
+
def build_for_exporter
|
9
|
+
build_export_metadata
|
10
|
+
write_files if export_type == 'full' && !importerexporter.parser_klass.include?('Bagit')
|
11
|
+
rescue RSolr::Error::Http, CollectionsCreatedError => e
|
12
|
+
raise e
|
13
|
+
rescue StandardError => e
|
14
|
+
status_info(e)
|
15
|
+
else
|
16
|
+
status_info
|
17
|
+
end
|
18
|
+
|
19
|
+
def build_export_metadata
|
20
|
+
raise StandardError, 'not implemented'
|
21
|
+
end
|
22
|
+
|
23
|
+
def hyrax_record
|
24
|
+
@hyrax_record ||= ActiveFedora::Base.find(self.identifier)
|
25
|
+
end
|
26
|
+
|
27
|
+
def write_files
|
28
|
+
return if hyrax_record.is_a?(Collection)
|
29
|
+
hyrax_record.file_sets.each do |fs|
|
30
|
+
path = File.join(exporter_export_path, 'files')
|
31
|
+
FileUtils.mkdir_p(path)
|
32
|
+
file = filename(fs)
|
33
|
+
require 'open-uri'
|
34
|
+
io = open(fs.original_file.uri)
|
35
|
+
next if file.blank?
|
36
|
+
File.open(File.join(path, file), 'wb') do |f|
|
37
|
+
f.write(io.read)
|
38
|
+
f.close
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Append the file_set id to ensure a unique filename
|
44
|
+
def filename(file_set)
|
45
|
+
return if file_set.original_file.blank?
|
46
|
+
fn = file_set.original_file.file_name.first
|
47
|
+
mime = Mime::Type.lookup(file_set.original_file.mime_type)
|
48
|
+
ext_mime = MIME::Types.of(file_set.original_file.file_name).first
|
49
|
+
if fn.include?(file_set.id)
|
50
|
+
return fn if mime.to_s == ext_mime.to_s
|
51
|
+
return "#{fn}.#{mime.to_sym}"
|
52
|
+
else
|
53
|
+
return "#{file_set.id}_#{fn}" if mime.to_s == ext_mime.to_s
|
54
|
+
return "#{file_set.id}_#{fn}.#{mime.to_sym}"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,140 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
module FileFactory
|
5
|
+
extend ActiveSupport::Concern
|
6
|
+
|
7
|
+
# Find existing files or upload new files. This assumes a Work will have unique file titles;
|
8
|
+
# and that those file titles will not have changed
|
9
|
+
# could filter by URIs instead (slower).
|
10
|
+
# When an uploaded_file already exists we do not want to pass its id in `file_attributes`
|
11
|
+
# otherwise it gets reuploaded by `work_actor`.
|
12
|
+
# support multiple files; ensure attributes[:file] is an Array
|
13
|
+
def upload_ids
|
14
|
+
return [] if klass == Collection
|
15
|
+
attributes[:file] = file_paths
|
16
|
+
import_files
|
17
|
+
end
|
18
|
+
|
19
|
+
def file_attributes(update_files = false)
|
20
|
+
@update_files = update_files
|
21
|
+
hash = {}
|
22
|
+
return hash if klass == Collection
|
23
|
+
hash[:uploaded_files] = upload_ids if attributes[:file].present?
|
24
|
+
hash[:remote_files] = new_remote_files if new_remote_files.present?
|
25
|
+
hash
|
26
|
+
end
|
27
|
+
|
28
|
+
# Its possible to get just an array of strings here, so we need to make sure they are all hashes
|
29
|
+
def parsed_remote_files
|
30
|
+
return @parsed_remote_files if @parsed_remote_files.present?
|
31
|
+
@parsed_remote_files = attributes[:remote_files] || []
|
32
|
+
@parsed_remote_files = @parsed_remote_files.map do |file_value|
|
33
|
+
if file_value.is_a?(Hash)
|
34
|
+
file_value
|
35
|
+
elsif file_value.is_a?(String)
|
36
|
+
{ url: file_value }
|
37
|
+
else
|
38
|
+
Rails.logger.error("skipped remote file #{file_value} because we do not recognize the type")
|
39
|
+
nil
|
40
|
+
end
|
41
|
+
end
|
42
|
+
@parsed_remote_files.delete(nil)
|
43
|
+
@parsed_remote_files
|
44
|
+
end
|
45
|
+
|
46
|
+
def new_remote_files
|
47
|
+
@new_remote_files ||= if object.present? && object.file_sets.present?
|
48
|
+
parsed_remote_files.select do |file|
|
49
|
+
# is the url valid?
|
50
|
+
is_valid = file[:url]&.match(URI::ABS_URI)
|
51
|
+
# does the file already exist
|
52
|
+
is_existing = object.file_sets.detect { |f| f.import_url && f.import_url == file[:url] }
|
53
|
+
is_valid && !is_existing
|
54
|
+
end
|
55
|
+
else
|
56
|
+
parsed_remote_files.select do |file|
|
57
|
+
file[:url]&.match(URI::ABS_URI)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def file_paths
|
63
|
+
@file_paths ||= Array.wrap(attributes[:file])&.select { |file| File.exist?(file) }
|
64
|
+
end
|
65
|
+
|
66
|
+
# Retrieve the orginal filenames for the files to be imported
|
67
|
+
def work_files_filenames
|
68
|
+
object.file_sets.map { |fn| fn.original_file.file_name.to_a }.flatten if object.present? && object.file_sets.present?
|
69
|
+
end
|
70
|
+
|
71
|
+
# Retrieve the filenames for the files to be imported
|
72
|
+
def import_files_filenames
|
73
|
+
file_paths.map { |f| f.split('/').last }
|
74
|
+
end
|
75
|
+
|
76
|
+
# Called if #replace_files is true
|
77
|
+
# Destroy all file_sets for this object
|
78
|
+
# Reload the object to ensure the remaining methods have the most up to date object
|
79
|
+
def destroy_existing_files
|
80
|
+
return unless object.present? && object.file_sets.present?
|
81
|
+
object.file_sets.each do |fs|
|
82
|
+
Hyrax::Actors::FileSetActor.new(fs, @user).destroy
|
83
|
+
end
|
84
|
+
@object = object.reload
|
85
|
+
log_deleted_fs(object)
|
86
|
+
end
|
87
|
+
|
88
|
+
def set_removed_filesets
|
89
|
+
local_file_sets.each do |fileset|
|
90
|
+
fileset.files.first.create_version
|
91
|
+
opts = {}
|
92
|
+
opts[:path] = fileset.files.first.id.split('/', 2).last
|
93
|
+
opts[:original_name] = 'removed.png'
|
94
|
+
opts[:mime_type] = 'image/png'
|
95
|
+
|
96
|
+
fileset.add_file(File.open(Bulkrax.removed_image_path), opts)
|
97
|
+
fileset.save
|
98
|
+
::CreateDerivativesJob.set(wait: 1.minute).perform_later(fileset, fileset.files.first.id)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def local_file_sets
|
103
|
+
@local_file_sets ||= object&.ordered_file_sets
|
104
|
+
end
|
105
|
+
|
106
|
+
def import_files
|
107
|
+
paths = file_paths.map { |path| import_file(path) }.compact
|
108
|
+
set_removed_filesets if local_file_sets.present?
|
109
|
+
paths
|
110
|
+
end
|
111
|
+
|
112
|
+
def import_file(path)
|
113
|
+
u = Hyrax::UploadedFile.new
|
114
|
+
u.user_id = @user.id
|
115
|
+
u.file = CarrierWave::SanitizedFile.new(path)
|
116
|
+
update_filesets(u)
|
117
|
+
end
|
118
|
+
|
119
|
+
def update_filesets(current_file)
|
120
|
+
if @update_files && local_file_sets.present?
|
121
|
+
fileset = local_file_sets.shift
|
122
|
+
return nil if fileset.files.first.checksum.value == Digest::SHA1.file(current_file.file.path).to_s
|
123
|
+
|
124
|
+
fileset.files.first.create_version
|
125
|
+
opts = {}
|
126
|
+
opts[:path] = fileset.files.first.id.split('/', 2).last
|
127
|
+
opts[:original_name] = current_file.file.file.original_filename
|
128
|
+
opts[:mime_type] = current_file.file.content_type
|
129
|
+
|
130
|
+
fileset.add_file(File.open(current_file.file.to_s), opts)
|
131
|
+
fileset.save
|
132
|
+
::CreateDerivativesJob.set(wait: 1.minute).perform_later(fileset, fileset.files.first.id)
|
133
|
+
nil
|
134
|
+
else
|
135
|
+
current_file.save
|
136
|
+
current_file.id
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
@@ -0,0 +1,155 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
module HasMatchers
|
5
|
+
extend ActiveSupport::Concern
|
6
|
+
|
7
|
+
included do
|
8
|
+
class_attribute :matchers
|
9
|
+
self.matchers ||= {}
|
10
|
+
end
|
11
|
+
|
12
|
+
class_methods do
|
13
|
+
def matcher_class
|
14
|
+
Bulkrax::ApplicationMatcher
|
15
|
+
end
|
16
|
+
|
17
|
+
def matcher(name, args = {})
|
18
|
+
matcher = matcher_class.new(
|
19
|
+
to: name,
|
20
|
+
parsed: args[:parsed],
|
21
|
+
split: args[:split],
|
22
|
+
if: args[:if],
|
23
|
+
excluded: args[:excluded],
|
24
|
+
nested_type: args[:nested_type]
|
25
|
+
)
|
26
|
+
self.matchers[name] = matcher
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def add_metadata(node_name, node_content, index = nil)
|
31
|
+
field_to(node_name).each do |name|
|
32
|
+
matcher = self.class.matcher(name, mapping[name].symbolize_keys) if mapping[name] # the field matched to a pre parsed value in application_matcher.rb
|
33
|
+
object_name = get_object_name(name) || false # the "key" of an object property. e.g. { object_name: { alpha: 'beta' } }
|
34
|
+
multiple = multiple?(name) # the property has multiple values. e.g. 'letters': ['a', 'b', 'c']
|
35
|
+
object_multiple = object_name && multiple?(object_name) # the property's value is an array of object(s)
|
36
|
+
|
37
|
+
next unless field_supported?(name) || (object_name && field_supported?(object_name))
|
38
|
+
|
39
|
+
if object_name
|
40
|
+
Rails.logger.info("Bulkrax Column automatically matched object #{node_name}, #{node_content}")
|
41
|
+
parsed_metadata[object_name] ||= object_multiple ? [{}] : {}
|
42
|
+
end
|
43
|
+
|
44
|
+
value = if matcher
|
45
|
+
result = matcher.result(self, node_content)
|
46
|
+
next unless result
|
47
|
+
matched_metadata(multiple, name, result, object_multiple)
|
48
|
+
elsif multiple
|
49
|
+
Rails.logger.info("Bulkrax Column automatically matched #{node_name}, #{node_content}")
|
50
|
+
multiple_metadata(node_content)
|
51
|
+
else
|
52
|
+
Rails.logger.info("Bulkrax Column automatically matched #{node_name}, #{node_content}")
|
53
|
+
single_metadata(node_content)
|
54
|
+
end
|
55
|
+
|
56
|
+
set_parsed_data(object_multiple, object_name, name, index, value) if value
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def set_parsed_data(object_multiple, object_name, name, index, value)
|
61
|
+
if object_multiple
|
62
|
+
parsed_metadata[object_name][index] ||= {}
|
63
|
+
parsed_metadata[object_name][index][name] ||= []
|
64
|
+
if value.is_a?(Array)
|
65
|
+
parsed_metadata[object_name][index][name] += value
|
66
|
+
else
|
67
|
+
parsed_metadata[object_name][index][name] = value
|
68
|
+
end
|
69
|
+
elsif object_name
|
70
|
+
parsed_metadata[object_name][name] ||= []
|
71
|
+
if value.is_a?(Array)
|
72
|
+
parsed_metadata[object_name][name] += value
|
73
|
+
else
|
74
|
+
parsed_metadata[object_name][name] = value
|
75
|
+
end
|
76
|
+
else
|
77
|
+
parsed_metadata[name] ||= []
|
78
|
+
if value.is_a?(Array)
|
79
|
+
parsed_metadata[name] += value
|
80
|
+
else
|
81
|
+
parsed_metadata[name] = value
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def single_metadata(content)
|
87
|
+
content = content.content if content.is_a?(Nokogiri::XML::NodeSet)
|
88
|
+
return unless content
|
89
|
+
Array.wrap(content.to_s.strip).join('; ')
|
90
|
+
end
|
91
|
+
|
92
|
+
def multiple_metadata(content)
|
93
|
+
content = content.content if content.is_a?(Nokogiri::XML::NodeSet)
|
94
|
+
return unless content
|
95
|
+
content.is_a?(Array) ? content : Array.wrap(content.strip)
|
96
|
+
end
|
97
|
+
|
98
|
+
def matched_metadata(multiple, name, result, object_multiple)
|
99
|
+
if object_multiple
|
100
|
+
if mapping[name]['nested_type'] && mapping[name]['nested_type'] == 'Array'
|
101
|
+
multiple_metadata(result)
|
102
|
+
else
|
103
|
+
single_metadata(result)
|
104
|
+
end
|
105
|
+
elsif multiple
|
106
|
+
multiple_metadata(result)
|
107
|
+
else
|
108
|
+
single_metadata(result)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def field_supported?(field)
|
113
|
+
field = field.gsub('_attributes', '')
|
114
|
+
|
115
|
+
return false if excluded?(field)
|
116
|
+
return true if ['collections', 'file', 'remote_files', 'model', 'delete'].include?(field)
|
117
|
+
return factory_class.method_defined?(field) && factory_class.properties[field].present?
|
118
|
+
end
|
119
|
+
|
120
|
+
def multiple?(field)
|
121
|
+
return true if field == 'file' || field == 'remote_files' || field == 'collections'
|
122
|
+
return false if field == 'model'
|
123
|
+
|
124
|
+
field_supported?(field) && factory_class&.properties&.[](field)&.[]('multiple')
|
125
|
+
end
|
126
|
+
|
127
|
+
def get_object_name(field)
|
128
|
+
mapping&.[](field)&.[]('object')
|
129
|
+
end
|
130
|
+
|
131
|
+
# Hyrax field to use for the given import field
|
132
|
+
# @param field [String] the importer field name
|
133
|
+
# @return [Array] hyrax fields
|
134
|
+
def field_to(field)
|
135
|
+
fields = mapping&.map do |key, value|
|
136
|
+
return unless value
|
137
|
+
|
138
|
+
if value['from'].instance_of?(Array)
|
139
|
+
key if value['from'].include?(field) || key == field
|
140
|
+
elsif (value['from'] == field) || key == field
|
141
|
+
key
|
142
|
+
end
|
143
|
+
end&.compact
|
144
|
+
|
145
|
+
return [field] if fields.blank?
|
146
|
+
return fields
|
147
|
+
end
|
148
|
+
|
149
|
+
# Check whether a field is explicitly excluded in the mapping
|
150
|
+
def excluded?(field)
|
151
|
+
return false if mapping[field].blank?
|
152
|
+
mapping[field]['excluded'] || false
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|