bulkrax 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +205 -0
- data/README.md +202 -0
- data/Rakefile +42 -0
- data/app/assets/config/bulkrax_manifest.js +2 -0
- data/app/assets/javascripts/bulkrax/application.js +14 -0
- data/app/assets/javascripts/bulkrax/bulkrax.js +11 -0
- data/app/assets/javascripts/bulkrax/entries.js +15 -0
- data/app/assets/javascripts/bulkrax/exporters.js +60 -0
- data/app/assets/javascripts/bulkrax/importers.js.erb +166 -0
- data/app/assets/stylesheets/bulkrax/accordion.scss +40 -0
- data/app/assets/stylesheets/bulkrax/application.css +15 -0
- data/app/assets/stylesheets/bulkrax/coderay.scss +264 -0
- data/app/assets/stylesheets/bulkrax/import_export.scss +37 -0
- data/app/controllers/bulkrax/application_controller.rb +8 -0
- data/app/controllers/bulkrax/entries_controller.rb +44 -0
- data/app/controllers/bulkrax/exporters_controller.rb +125 -0
- data/app/controllers/bulkrax/importers_controller.rb +315 -0
- data/app/controllers/concerns/bulkrax/api.rb +29 -0
- data/app/factories/bulkrax/object_factory.rb +230 -0
- data/app/helpers/bulkrax/application_helper.rb +15 -0
- data/app/helpers/bulkrax/exporters_helper.rb +6 -0
- data/app/helpers/bulkrax/importers_helper.rb +13 -0
- data/app/helpers/bulkrax/validation_helper.rb +153 -0
- data/app/jobs/bulkrax/application_job.rb +6 -0
- data/app/jobs/bulkrax/child_relationships_job.rb +128 -0
- data/app/jobs/bulkrax/delete_work_job.rb +16 -0
- data/app/jobs/bulkrax/download_cloud_file_job.rb +18 -0
- data/app/jobs/bulkrax/export_work_job.rb +37 -0
- data/app/jobs/bulkrax/exporter_job.rb +14 -0
- data/app/jobs/bulkrax/import_work_collection_job.rb +41 -0
- data/app/jobs/bulkrax/import_work_job.rb +32 -0
- data/app/jobs/bulkrax/importer_job.rb +26 -0
- data/app/mailers/bulkrax/application_mailer.rb +8 -0
- data/app/matchers/bulkrax/application_matcher.rb +113 -0
- data/app/matchers/bulkrax/bagit_matcher.rb +6 -0
- data/app/matchers/bulkrax/csv_matcher.rb +6 -0
- data/app/matchers/bulkrax/oai_matcher.rb +6 -0
- data/app/models/bulkrax/application_record.rb +7 -0
- data/app/models/bulkrax/csv_collection_entry.rb +19 -0
- data/app/models/bulkrax/csv_entry.rb +163 -0
- data/app/models/bulkrax/entry.rb +104 -0
- data/app/models/bulkrax/exporter.rb +122 -0
- data/app/models/bulkrax/exporter_run.rb +7 -0
- data/app/models/bulkrax/import_failed.rb +13 -0
- data/app/models/bulkrax/importer.rb +155 -0
- data/app/models/bulkrax/importer_run.rb +8 -0
- data/app/models/bulkrax/oai_dc_entry.rb +6 -0
- data/app/models/bulkrax/oai_entry.rb +74 -0
- data/app/models/bulkrax/oai_qualified_dc_entry.rb +6 -0
- data/app/models/bulkrax/oai_set_entry.rb +19 -0
- data/app/models/bulkrax/rdf_collection_entry.rb +19 -0
- data/app/models/bulkrax/rdf_entry.rb +90 -0
- data/app/models/bulkrax/status.rb +25 -0
- data/app/models/bulkrax/xml_entry.rb +73 -0
- data/app/models/concerns/bulkrax/download_behavior.rb +61 -0
- data/app/models/concerns/bulkrax/errored_entries.rb +45 -0
- data/app/models/concerns/bulkrax/export_behavior.rb +58 -0
- data/app/models/concerns/bulkrax/file_factory.rb +140 -0
- data/app/models/concerns/bulkrax/has_local_processing.rb +7 -0
- data/app/models/concerns/bulkrax/has_matchers.rb +155 -0
- data/app/models/concerns/bulkrax/import_behavior.rb +90 -0
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +34 -0
- data/app/models/concerns/bulkrax/status_info.rb +56 -0
- data/app/parsers/bulkrax/application_parser.rb +299 -0
- data/app/parsers/bulkrax/bagit_parser.rb +157 -0
- data/app/parsers/bulkrax/csv_parser.rb +266 -0
- data/app/parsers/bulkrax/oai_dc_parser.rb +130 -0
- data/app/parsers/bulkrax/oai_qualified_dc_parser.rb +9 -0
- data/app/parsers/bulkrax/xml_parser.rb +103 -0
- data/app/views/bulkrax/entries/_parsed_metadata.html.erb +19 -0
- data/app/views/bulkrax/entries/_raw_metadata.html.erb +19 -0
- data/app/views/bulkrax/entries/show.html.erb +63 -0
- data/app/views/bulkrax/exporters/_form.html.erb +120 -0
- data/app/views/bulkrax/exporters/edit.html.erb +23 -0
- data/app/views/bulkrax/exporters/index.html.erb +67 -0
- data/app/views/bulkrax/exporters/new.html.erb +23 -0
- data/app/views/bulkrax/exporters/show.html.erb +124 -0
- data/app/views/bulkrax/importers/_bagit_fields.html.erb +54 -0
- data/app/views/bulkrax/importers/_browse_everything.html.erb +12 -0
- data/app/views/bulkrax/importers/_csv_fields.html.erb +39 -0
- data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +16 -0
- data/app/views/bulkrax/importers/_form.html.erb +35 -0
- data/app/views/bulkrax/importers/_oai_fields.html.erb +42 -0
- data/app/views/bulkrax/importers/_xml_fields.html.erb +60 -0
- data/app/views/bulkrax/importers/edit.html.erb +20 -0
- data/app/views/bulkrax/importers/index.html.erb +77 -0
- data/app/views/bulkrax/importers/new.html.erb +25 -0
- data/app/views/bulkrax/importers/show.html.erb +175 -0
- data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +37 -0
- data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +52 -0
- data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +39 -0
- data/app/views/hyrax/dashboard/sidebar/_bulkrax_sidebar_additions.html.erb +6 -0
- data/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb +19 -0
- data/app/views/layouts/bulkrax/application.html.erb +14 -0
- data/config/locales/bulkrax.en.yml +36 -0
- data/config/routes.rb +18 -0
- data/db/migrate/20181011230201_create_bulkrax_importers.rb +18 -0
- data/db/migrate/20181011230228_create_bulkrax_importer_runs.rb +16 -0
- data/db/migrate/20190325183136_create_bulkrax_entries.rb +16 -0
- data/db/migrate/20190601221109_add_status_to_entry.rb +9 -0
- data/db/migrate/20190715161939_add_collections_to_importer_runs.rb +6 -0
- data/db/migrate/20190715162044_change_collection_ids_on_entries.rb +5 -0
- data/db/migrate/20190729124607_create_bulkrax_exporters.rb +19 -0
- data/db/migrate/20190729134158_create_bulkrax_exporter_runs.rb +14 -0
- data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +12 -0
- data/db/migrate/20191203225129_add_total_collection_records_to_importer_runs.rb +5 -0
- data/db/migrate/20191204191623_add_children_to_importer_runs.rb +6 -0
- data/db/migrate/20191204223857_change_total_records_to_total_work_entries.rb +6 -0
- data/db/migrate/20191212155530_change_entry_last_error.rb +19 -0
- data/db/migrate/20200108194557_add_validate_only_to_bulkrax_importers.rb +5 -0
- data/db/migrate/20200301232856_add_status_to_importers.rb +9 -0
- data/db/migrate/20200312190638_remove_foreign_key_from_bulkrax_entries.rb +5 -0
- data/db/migrate/20200326235838_add_status_to_exporters.rb +7 -0
- data/db/migrate/20200601204556_add_invalid_record_to_importer_run.rb +5 -0
- data/db/migrate/20200818055819_create_bulkrax_statuses.rb +18 -0
- data/db/migrate/20200819054016_move_to_statuses.rb +30 -0
- data/db/migrate/20201106014204_add_date_filter_and_status_to_bulkrax_exporters.rb +7 -0
- data/db/migrate/20201117220007_add_workflow_status_to_bulkrax_exporter.rb +5 -0
- data/db/migrate/20210806044408_remove_unused_last_error.rb +7 -0
- data/db/migrate/20210806065737_increase_text_sizes.rb +12 -0
- data/lib/bulkrax.rb +161 -0
- data/lib/bulkrax/engine.rb +37 -0
- data/lib/bulkrax/version.rb +5 -0
- data/lib/generators/bulkrax/install_generator.rb +80 -0
- data/lib/generators/bulkrax/templates/README +3 -0
- data/lib/generators/bulkrax/templates/app/assets/images/bulkrax/removed.png +0 -0
- data/lib/generators/bulkrax/templates/app/models/concerns/bulkrax/has_local_processing.rb +8 -0
- data/lib/generators/bulkrax/templates/bin/importer +140 -0
- data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +84 -0
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +72 -0
- data/lib/tasks/bulkrax_tasks.rake +6 -0
- metadata +388 -0
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
module DownloadBehavior
|
5
|
+
# The following download code is based on
|
6
|
+
# https://github.com/samvera/hydra-head/blob/main/hydra-core/app/controllers/concerns/hydra/controller/download_behavior.rb
|
7
|
+
|
8
|
+
def file
|
9
|
+
@file ||= File.open(file_path, 'r')
|
10
|
+
end
|
11
|
+
|
12
|
+
# Override this if you'd like a different filename
|
13
|
+
# @return [String] the filename
|
14
|
+
def file_name
|
15
|
+
file_path.split('/').last
|
16
|
+
end
|
17
|
+
|
18
|
+
def download_content_type
|
19
|
+
'application/zip'
|
20
|
+
end
|
21
|
+
|
22
|
+
def send_content
|
23
|
+
response.headers['Accept-Ranges'] = 'bytes'
|
24
|
+
if request.head?
|
25
|
+
content_head
|
26
|
+
else
|
27
|
+
send_file_contents
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# Create some headers for the datastream
|
32
|
+
def content_options
|
33
|
+
{ disposition: 'inline', type: download_content_type, filename: file_name }
|
34
|
+
end
|
35
|
+
|
36
|
+
# render an HTTP HEAD response
|
37
|
+
def content_head
|
38
|
+
response.headers['Content-Length'] = file.size
|
39
|
+
head :ok, content_type: download_content_type
|
40
|
+
end
|
41
|
+
|
42
|
+
def send_file_contents
|
43
|
+
self.status = 200
|
44
|
+
prepare_file_headers
|
45
|
+
stream_body file.read
|
46
|
+
end
|
47
|
+
|
48
|
+
def prepare_file_headers
|
49
|
+
send_file_headers! content_options
|
50
|
+
response.headers['Content-Type'] = download_content_type
|
51
|
+
response.headers['Content-Length'] ||= file.size.to_s
|
52
|
+
# Prevent Rack::ETag from calculating a digest over body
|
53
|
+
response.headers['Last-Modified'] = File.mtime(file_path).utc.strftime("%a, %d %b %Y %T GMT")
|
54
|
+
self.content_type = download_content_type
|
55
|
+
end
|
56
|
+
|
57
|
+
def stream_body(iostream)
|
58
|
+
self.response_body = iostream
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
module ErroredEntries
|
5
|
+
extend ActiveSupport::Concern
|
6
|
+
|
7
|
+
def write_errored_entries_file
|
8
|
+
if @errored_entries.blank?
|
9
|
+
entry_ids = importerexporter.entries.pluck(:id)
|
10
|
+
error_statuses = Bulkrax::Status.latest_by_statusable
|
11
|
+
.includes(:statusable)
|
12
|
+
.where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Failed')
|
13
|
+
@errored_entries = error_statuses.map(&:statusable)
|
14
|
+
end
|
15
|
+
return if @errored_entries.blank?
|
16
|
+
|
17
|
+
file = setup_errored_entries_file
|
18
|
+
headers = import_fields
|
19
|
+
file.puts(headers.to_csv)
|
20
|
+
@errored_entries.each do |ee|
|
21
|
+
row = build_errored_entry_row(headers, ee)
|
22
|
+
file.puts(row)
|
23
|
+
end
|
24
|
+
file.close
|
25
|
+
true
|
26
|
+
end
|
27
|
+
|
28
|
+
def build_errored_entry_row(headers, errored_entry)
|
29
|
+
row = {}
|
30
|
+
# Ensure each header has a value, even if it's just an empty string
|
31
|
+
headers.each do |h|
|
32
|
+
row.merge!("#{h}": nil)
|
33
|
+
end
|
34
|
+
# Match each value to its corresponding header
|
35
|
+
row.merge!(errored_entry.raw_metadata.symbolize_keys)
|
36
|
+
|
37
|
+
row.values.to_csv
|
38
|
+
end
|
39
|
+
|
40
|
+
def setup_errored_entries_file
|
41
|
+
FileUtils.mkdir_p(File.dirname(importerexporter.errored_entries_csv_path))
|
42
|
+
File.open(importerexporter.errored_entries_csv_path, 'w')
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Bulkrax
|
3
|
+
module ExportBehavior
|
4
|
+
extend ActiveSupport::Concern
|
5
|
+
|
6
|
+
delegate :export_type, :exporter_export_path, to: :importerexporter
|
7
|
+
|
8
|
+
def build_for_exporter
|
9
|
+
build_export_metadata
|
10
|
+
write_files if export_type == 'full' && !importerexporter.parser_klass.include?('Bagit')
|
11
|
+
rescue RSolr::Error::Http, CollectionsCreatedError => e
|
12
|
+
raise e
|
13
|
+
rescue StandardError => e
|
14
|
+
status_info(e)
|
15
|
+
else
|
16
|
+
status_info
|
17
|
+
end
|
18
|
+
|
19
|
+
def build_export_metadata
|
20
|
+
raise StandardError, 'not implemented'
|
21
|
+
end
|
22
|
+
|
23
|
+
def hyrax_record
|
24
|
+
@hyrax_record ||= ActiveFedora::Base.find(self.identifier)
|
25
|
+
end
|
26
|
+
|
27
|
+
def write_files
|
28
|
+
return if hyrax_record.is_a?(Collection)
|
29
|
+
hyrax_record.file_sets.each do |fs|
|
30
|
+
path = File.join(exporter_export_path, 'files')
|
31
|
+
FileUtils.mkdir_p(path)
|
32
|
+
file = filename(fs)
|
33
|
+
require 'open-uri'
|
34
|
+
io = open(fs.original_file.uri)
|
35
|
+
next if file.blank?
|
36
|
+
File.open(File.join(path, file), 'wb') do |f|
|
37
|
+
f.write(io.read)
|
38
|
+
f.close
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Append the file_set id to ensure a unique filename
|
44
|
+
def filename(file_set)
|
45
|
+
return if file_set.original_file.blank?
|
46
|
+
fn = file_set.original_file.file_name.first
|
47
|
+
mime = Mime::Type.lookup(file_set.original_file.mime_type)
|
48
|
+
ext_mime = MIME::Types.of(file_set.original_file.file_name).first
|
49
|
+
if fn.include?(file_set.id)
|
50
|
+
return fn if mime.to_s == ext_mime.to_s
|
51
|
+
return "#{fn}.#{mime.to_sym}"
|
52
|
+
else
|
53
|
+
return "#{file_set.id}_#{fn}" if mime.to_s == ext_mime.to_s
|
54
|
+
return "#{file_set.id}_#{fn}.#{mime.to_sym}"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,140 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
module FileFactory
|
5
|
+
extend ActiveSupport::Concern
|
6
|
+
|
7
|
+
# Find existing files or upload new files. This assumes a Work will have unique file titles;
|
8
|
+
# and that those file titles will not have changed
|
9
|
+
# could filter by URIs instead (slower).
|
10
|
+
# When an uploaded_file already exists we do not want to pass its id in `file_attributes`
|
11
|
+
# otherwise it gets reuploaded by `work_actor`.
|
12
|
+
# support multiple files; ensure attributes[:file] is an Array
|
13
|
+
def upload_ids
|
14
|
+
return [] if klass == Collection
|
15
|
+
attributes[:file] = file_paths
|
16
|
+
import_files
|
17
|
+
end
|
18
|
+
|
19
|
+
def file_attributes(update_files = false)
|
20
|
+
@update_files = update_files
|
21
|
+
hash = {}
|
22
|
+
return hash if klass == Collection
|
23
|
+
hash[:uploaded_files] = upload_ids if attributes[:file].present?
|
24
|
+
hash[:remote_files] = new_remote_files if new_remote_files.present?
|
25
|
+
hash
|
26
|
+
end
|
27
|
+
|
28
|
+
# Its possible to get just an array of strings here, so we need to make sure they are all hashes
|
29
|
+
def parsed_remote_files
|
30
|
+
return @parsed_remote_files if @parsed_remote_files.present?
|
31
|
+
@parsed_remote_files = attributes[:remote_files] || []
|
32
|
+
@parsed_remote_files = @parsed_remote_files.map do |file_value|
|
33
|
+
if file_value.is_a?(Hash)
|
34
|
+
file_value
|
35
|
+
elsif file_value.is_a?(String)
|
36
|
+
{ url: file_value }
|
37
|
+
else
|
38
|
+
Rails.logger.error("skipped remote file #{file_value} because we do not recognize the type")
|
39
|
+
nil
|
40
|
+
end
|
41
|
+
end
|
42
|
+
@parsed_remote_files.delete(nil)
|
43
|
+
@parsed_remote_files
|
44
|
+
end
|
45
|
+
|
46
|
+
def new_remote_files
|
47
|
+
@new_remote_files ||= if object.present? && object.file_sets.present?
|
48
|
+
parsed_remote_files.select do |file|
|
49
|
+
# is the url valid?
|
50
|
+
is_valid = file[:url]&.match(URI::ABS_URI)
|
51
|
+
# does the file already exist
|
52
|
+
is_existing = object.file_sets.detect { |f| f.import_url && f.import_url == file[:url] }
|
53
|
+
is_valid && !is_existing
|
54
|
+
end
|
55
|
+
else
|
56
|
+
parsed_remote_files.select do |file|
|
57
|
+
file[:url]&.match(URI::ABS_URI)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def file_paths
|
63
|
+
@file_paths ||= Array.wrap(attributes[:file])&.select { |file| File.exist?(file) }
|
64
|
+
end
|
65
|
+
|
66
|
+
# Retrieve the orginal filenames for the files to be imported
|
67
|
+
def work_files_filenames
|
68
|
+
object.file_sets.map { |fn| fn.original_file.file_name.to_a }.flatten if object.present? && object.file_sets.present?
|
69
|
+
end
|
70
|
+
|
71
|
+
# Retrieve the filenames for the files to be imported
|
72
|
+
def import_files_filenames
|
73
|
+
file_paths.map { |f| f.split('/').last }
|
74
|
+
end
|
75
|
+
|
76
|
+
# Called if #replace_files is true
|
77
|
+
# Destroy all file_sets for this object
|
78
|
+
# Reload the object to ensure the remaining methods have the most up to date object
|
79
|
+
def destroy_existing_files
|
80
|
+
return unless object.present? && object.file_sets.present?
|
81
|
+
object.file_sets.each do |fs|
|
82
|
+
Hyrax::Actors::FileSetActor.new(fs, @user).destroy
|
83
|
+
end
|
84
|
+
@object = object.reload
|
85
|
+
log_deleted_fs(object)
|
86
|
+
end
|
87
|
+
|
88
|
+
def set_removed_filesets
|
89
|
+
local_file_sets.each do |fileset|
|
90
|
+
fileset.files.first.create_version
|
91
|
+
opts = {}
|
92
|
+
opts[:path] = fileset.files.first.id.split('/', 2).last
|
93
|
+
opts[:original_name] = 'removed.png'
|
94
|
+
opts[:mime_type] = 'image/png'
|
95
|
+
|
96
|
+
fileset.add_file(File.open(Bulkrax.removed_image_path), opts)
|
97
|
+
fileset.save
|
98
|
+
::CreateDerivativesJob.set(wait: 1.minute).perform_later(fileset, fileset.files.first.id)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def local_file_sets
|
103
|
+
@local_file_sets ||= object&.ordered_file_sets
|
104
|
+
end
|
105
|
+
|
106
|
+
def import_files
|
107
|
+
paths = file_paths.map { |path| import_file(path) }.compact
|
108
|
+
set_removed_filesets if local_file_sets.present?
|
109
|
+
paths
|
110
|
+
end
|
111
|
+
|
112
|
+
def import_file(path)
|
113
|
+
u = Hyrax::UploadedFile.new
|
114
|
+
u.user_id = @user.id
|
115
|
+
u.file = CarrierWave::SanitizedFile.new(path)
|
116
|
+
update_filesets(u)
|
117
|
+
end
|
118
|
+
|
119
|
+
def update_filesets(current_file)
|
120
|
+
if @update_files && local_file_sets.present?
|
121
|
+
fileset = local_file_sets.shift
|
122
|
+
return nil if fileset.files.first.checksum.value == Digest::SHA1.file(current_file.file.path).to_s
|
123
|
+
|
124
|
+
fileset.files.first.create_version
|
125
|
+
opts = {}
|
126
|
+
opts[:path] = fileset.files.first.id.split('/', 2).last
|
127
|
+
opts[:original_name] = current_file.file.file.original_filename
|
128
|
+
opts[:mime_type] = current_file.file.content_type
|
129
|
+
|
130
|
+
fileset.add_file(File.open(current_file.file.to_s), opts)
|
131
|
+
fileset.save
|
132
|
+
::CreateDerivativesJob.set(wait: 1.minute).perform_later(fileset, fileset.files.first.id)
|
133
|
+
nil
|
134
|
+
else
|
135
|
+
current_file.save
|
136
|
+
current_file.id
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
@@ -0,0 +1,155 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
module HasMatchers
|
5
|
+
extend ActiveSupport::Concern
|
6
|
+
|
7
|
+
included do
|
8
|
+
class_attribute :matchers
|
9
|
+
self.matchers ||= {}
|
10
|
+
end
|
11
|
+
|
12
|
+
class_methods do
|
13
|
+
def matcher_class
|
14
|
+
Bulkrax::ApplicationMatcher
|
15
|
+
end
|
16
|
+
|
17
|
+
def matcher(name, args = {})
|
18
|
+
matcher = matcher_class.new(
|
19
|
+
to: name,
|
20
|
+
parsed: args[:parsed],
|
21
|
+
split: args[:split],
|
22
|
+
if: args[:if],
|
23
|
+
excluded: args[:excluded],
|
24
|
+
nested_type: args[:nested_type]
|
25
|
+
)
|
26
|
+
self.matchers[name] = matcher
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def add_metadata(node_name, node_content, index = nil)
|
31
|
+
field_to(node_name).each do |name|
|
32
|
+
matcher = self.class.matcher(name, mapping[name].symbolize_keys) if mapping[name] # the field matched to a pre parsed value in application_matcher.rb
|
33
|
+
object_name = get_object_name(name) || false # the "key" of an object property. e.g. { object_name: { alpha: 'beta' } }
|
34
|
+
multiple = multiple?(name) # the property has multiple values. e.g. 'letters': ['a', 'b', 'c']
|
35
|
+
object_multiple = object_name && multiple?(object_name) # the property's value is an array of object(s)
|
36
|
+
|
37
|
+
next unless field_supported?(name) || (object_name && field_supported?(object_name))
|
38
|
+
|
39
|
+
if object_name
|
40
|
+
Rails.logger.info("Bulkrax Column automatically matched object #{node_name}, #{node_content}")
|
41
|
+
parsed_metadata[object_name] ||= object_multiple ? [{}] : {}
|
42
|
+
end
|
43
|
+
|
44
|
+
value = if matcher
|
45
|
+
result = matcher.result(self, node_content)
|
46
|
+
next unless result
|
47
|
+
matched_metadata(multiple, name, result, object_multiple)
|
48
|
+
elsif multiple
|
49
|
+
Rails.logger.info("Bulkrax Column automatically matched #{node_name}, #{node_content}")
|
50
|
+
multiple_metadata(node_content)
|
51
|
+
else
|
52
|
+
Rails.logger.info("Bulkrax Column automatically matched #{node_name}, #{node_content}")
|
53
|
+
single_metadata(node_content)
|
54
|
+
end
|
55
|
+
|
56
|
+
set_parsed_data(object_multiple, object_name, name, index, value) if value
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def set_parsed_data(object_multiple, object_name, name, index, value)
|
61
|
+
if object_multiple
|
62
|
+
parsed_metadata[object_name][index] ||= {}
|
63
|
+
parsed_metadata[object_name][index][name] ||= []
|
64
|
+
if value.is_a?(Array)
|
65
|
+
parsed_metadata[object_name][index][name] += value
|
66
|
+
else
|
67
|
+
parsed_metadata[object_name][index][name] = value
|
68
|
+
end
|
69
|
+
elsif object_name
|
70
|
+
parsed_metadata[object_name][name] ||= []
|
71
|
+
if value.is_a?(Array)
|
72
|
+
parsed_metadata[object_name][name] += value
|
73
|
+
else
|
74
|
+
parsed_metadata[object_name][name] = value
|
75
|
+
end
|
76
|
+
else
|
77
|
+
parsed_metadata[name] ||= []
|
78
|
+
if value.is_a?(Array)
|
79
|
+
parsed_metadata[name] += value
|
80
|
+
else
|
81
|
+
parsed_metadata[name] = value
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def single_metadata(content)
|
87
|
+
content = content.content if content.is_a?(Nokogiri::XML::NodeSet)
|
88
|
+
return unless content
|
89
|
+
Array.wrap(content.to_s.strip).join('; ')
|
90
|
+
end
|
91
|
+
|
92
|
+
def multiple_metadata(content)
|
93
|
+
content = content.content if content.is_a?(Nokogiri::XML::NodeSet)
|
94
|
+
return unless content
|
95
|
+
content.is_a?(Array) ? content : Array.wrap(content.strip)
|
96
|
+
end
|
97
|
+
|
98
|
+
def matched_metadata(multiple, name, result, object_multiple)
|
99
|
+
if object_multiple
|
100
|
+
if mapping[name]['nested_type'] && mapping[name]['nested_type'] == 'Array'
|
101
|
+
multiple_metadata(result)
|
102
|
+
else
|
103
|
+
single_metadata(result)
|
104
|
+
end
|
105
|
+
elsif multiple
|
106
|
+
multiple_metadata(result)
|
107
|
+
else
|
108
|
+
single_metadata(result)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def field_supported?(field)
|
113
|
+
field = field.gsub('_attributes', '')
|
114
|
+
|
115
|
+
return false if excluded?(field)
|
116
|
+
return true if ['collections', 'file', 'remote_files', 'model', 'delete'].include?(field)
|
117
|
+
return factory_class.method_defined?(field) && factory_class.properties[field].present?
|
118
|
+
end
|
119
|
+
|
120
|
+
def multiple?(field)
|
121
|
+
return true if field == 'file' || field == 'remote_files' || field == 'collections'
|
122
|
+
return false if field == 'model'
|
123
|
+
|
124
|
+
field_supported?(field) && factory_class&.properties&.[](field)&.[]('multiple')
|
125
|
+
end
|
126
|
+
|
127
|
+
def get_object_name(field)
|
128
|
+
mapping&.[](field)&.[]('object')
|
129
|
+
end
|
130
|
+
|
131
|
+
# Hyrax field to use for the given import field
|
132
|
+
# @param field [String] the importer field name
|
133
|
+
# @return [Array] hyrax fields
|
134
|
+
def field_to(field)
|
135
|
+
fields = mapping&.map do |key, value|
|
136
|
+
return unless value
|
137
|
+
|
138
|
+
if value['from'].instance_of?(Array)
|
139
|
+
key if value['from'].include?(field) || key == field
|
140
|
+
elsif (value['from'] == field) || key == field
|
141
|
+
key
|
142
|
+
end
|
143
|
+
end&.compact
|
144
|
+
|
145
|
+
return [field] if fields.blank?
|
146
|
+
return fields
|
147
|
+
end
|
148
|
+
|
149
|
+
# Check whether a field is explicitly excluded in the mapping
|
150
|
+
def excluded?(field)
|
151
|
+
return false if mapping[field].blank?
|
152
|
+
mapping[field]['excluded'] || false
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|