bulkrax 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +205 -0
- data/README.md +202 -0
- data/Rakefile +42 -0
- data/app/assets/config/bulkrax_manifest.js +2 -0
- data/app/assets/javascripts/bulkrax/application.js +14 -0
- data/app/assets/javascripts/bulkrax/bulkrax.js +11 -0
- data/app/assets/javascripts/bulkrax/entries.js +15 -0
- data/app/assets/javascripts/bulkrax/exporters.js +60 -0
- data/app/assets/javascripts/bulkrax/importers.js.erb +166 -0
- data/app/assets/stylesheets/bulkrax/accordion.scss +40 -0
- data/app/assets/stylesheets/bulkrax/application.css +15 -0
- data/app/assets/stylesheets/bulkrax/coderay.scss +264 -0
- data/app/assets/stylesheets/bulkrax/import_export.scss +37 -0
- data/app/controllers/bulkrax/application_controller.rb +8 -0
- data/app/controllers/bulkrax/entries_controller.rb +44 -0
- data/app/controllers/bulkrax/exporters_controller.rb +125 -0
- data/app/controllers/bulkrax/importers_controller.rb +315 -0
- data/app/controllers/concerns/bulkrax/api.rb +29 -0
- data/app/factories/bulkrax/object_factory.rb +230 -0
- data/app/helpers/bulkrax/application_helper.rb +15 -0
- data/app/helpers/bulkrax/exporters_helper.rb +6 -0
- data/app/helpers/bulkrax/importers_helper.rb +13 -0
- data/app/helpers/bulkrax/validation_helper.rb +153 -0
- data/app/jobs/bulkrax/application_job.rb +6 -0
- data/app/jobs/bulkrax/child_relationships_job.rb +128 -0
- data/app/jobs/bulkrax/delete_work_job.rb +16 -0
- data/app/jobs/bulkrax/download_cloud_file_job.rb +18 -0
- data/app/jobs/bulkrax/export_work_job.rb +37 -0
- data/app/jobs/bulkrax/exporter_job.rb +14 -0
- data/app/jobs/bulkrax/import_work_collection_job.rb +41 -0
- data/app/jobs/bulkrax/import_work_job.rb +32 -0
- data/app/jobs/bulkrax/importer_job.rb +26 -0
- data/app/mailers/bulkrax/application_mailer.rb +8 -0
- data/app/matchers/bulkrax/application_matcher.rb +113 -0
- data/app/matchers/bulkrax/bagit_matcher.rb +6 -0
- data/app/matchers/bulkrax/csv_matcher.rb +6 -0
- data/app/matchers/bulkrax/oai_matcher.rb +6 -0
- data/app/models/bulkrax/application_record.rb +7 -0
- data/app/models/bulkrax/csv_collection_entry.rb +19 -0
- data/app/models/bulkrax/csv_entry.rb +163 -0
- data/app/models/bulkrax/entry.rb +104 -0
- data/app/models/bulkrax/exporter.rb +122 -0
- data/app/models/bulkrax/exporter_run.rb +7 -0
- data/app/models/bulkrax/import_failed.rb +13 -0
- data/app/models/bulkrax/importer.rb +155 -0
- data/app/models/bulkrax/importer_run.rb +8 -0
- data/app/models/bulkrax/oai_dc_entry.rb +6 -0
- data/app/models/bulkrax/oai_entry.rb +74 -0
- data/app/models/bulkrax/oai_qualified_dc_entry.rb +6 -0
- data/app/models/bulkrax/oai_set_entry.rb +19 -0
- data/app/models/bulkrax/rdf_collection_entry.rb +19 -0
- data/app/models/bulkrax/rdf_entry.rb +90 -0
- data/app/models/bulkrax/status.rb +25 -0
- data/app/models/bulkrax/xml_entry.rb +73 -0
- data/app/models/concerns/bulkrax/download_behavior.rb +61 -0
- data/app/models/concerns/bulkrax/errored_entries.rb +45 -0
- data/app/models/concerns/bulkrax/export_behavior.rb +58 -0
- data/app/models/concerns/bulkrax/file_factory.rb +140 -0
- data/app/models/concerns/bulkrax/has_local_processing.rb +7 -0
- data/app/models/concerns/bulkrax/has_matchers.rb +155 -0
- data/app/models/concerns/bulkrax/import_behavior.rb +90 -0
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +34 -0
- data/app/models/concerns/bulkrax/status_info.rb +56 -0
- data/app/parsers/bulkrax/application_parser.rb +299 -0
- data/app/parsers/bulkrax/bagit_parser.rb +157 -0
- data/app/parsers/bulkrax/csv_parser.rb +266 -0
- data/app/parsers/bulkrax/oai_dc_parser.rb +130 -0
- data/app/parsers/bulkrax/oai_qualified_dc_parser.rb +9 -0
- data/app/parsers/bulkrax/xml_parser.rb +103 -0
- data/app/views/bulkrax/entries/_parsed_metadata.html.erb +19 -0
- data/app/views/bulkrax/entries/_raw_metadata.html.erb +19 -0
- data/app/views/bulkrax/entries/show.html.erb +63 -0
- data/app/views/bulkrax/exporters/_form.html.erb +120 -0
- data/app/views/bulkrax/exporters/edit.html.erb +23 -0
- data/app/views/bulkrax/exporters/index.html.erb +67 -0
- data/app/views/bulkrax/exporters/new.html.erb +23 -0
- data/app/views/bulkrax/exporters/show.html.erb +124 -0
- data/app/views/bulkrax/importers/_bagit_fields.html.erb +54 -0
- data/app/views/bulkrax/importers/_browse_everything.html.erb +12 -0
- data/app/views/bulkrax/importers/_csv_fields.html.erb +39 -0
- data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +16 -0
- data/app/views/bulkrax/importers/_form.html.erb +35 -0
- data/app/views/bulkrax/importers/_oai_fields.html.erb +42 -0
- data/app/views/bulkrax/importers/_xml_fields.html.erb +60 -0
- data/app/views/bulkrax/importers/edit.html.erb +20 -0
- data/app/views/bulkrax/importers/index.html.erb +77 -0
- data/app/views/bulkrax/importers/new.html.erb +25 -0
- data/app/views/bulkrax/importers/show.html.erb +175 -0
- data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +37 -0
- data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +52 -0
- data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +39 -0
- data/app/views/hyrax/dashboard/sidebar/_bulkrax_sidebar_additions.html.erb +6 -0
- data/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb +19 -0
- data/app/views/layouts/bulkrax/application.html.erb +14 -0
- data/config/locales/bulkrax.en.yml +36 -0
- data/config/routes.rb +18 -0
- data/db/migrate/20181011230201_create_bulkrax_importers.rb +18 -0
- data/db/migrate/20181011230228_create_bulkrax_importer_runs.rb +16 -0
- data/db/migrate/20190325183136_create_bulkrax_entries.rb +16 -0
- data/db/migrate/20190601221109_add_status_to_entry.rb +9 -0
- data/db/migrate/20190715161939_add_collections_to_importer_runs.rb +6 -0
- data/db/migrate/20190715162044_change_collection_ids_on_entries.rb +5 -0
- data/db/migrate/20190729124607_create_bulkrax_exporters.rb +19 -0
- data/db/migrate/20190729134158_create_bulkrax_exporter_runs.rb +14 -0
- data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +12 -0
- data/db/migrate/20191203225129_add_total_collection_records_to_importer_runs.rb +5 -0
- data/db/migrate/20191204191623_add_children_to_importer_runs.rb +6 -0
- data/db/migrate/20191204223857_change_total_records_to_total_work_entries.rb +6 -0
- data/db/migrate/20191212155530_change_entry_last_error.rb +19 -0
- data/db/migrate/20200108194557_add_validate_only_to_bulkrax_importers.rb +5 -0
- data/db/migrate/20200301232856_add_status_to_importers.rb +9 -0
- data/db/migrate/20200312190638_remove_foreign_key_from_bulkrax_entries.rb +5 -0
- data/db/migrate/20200326235838_add_status_to_exporters.rb +7 -0
- data/db/migrate/20200601204556_add_invalid_record_to_importer_run.rb +5 -0
- data/db/migrate/20200818055819_create_bulkrax_statuses.rb +18 -0
- data/db/migrate/20200819054016_move_to_statuses.rb +30 -0
- data/db/migrate/20201106014204_add_date_filter_and_status_to_bulkrax_exporters.rb +7 -0
- data/db/migrate/20201117220007_add_workflow_status_to_bulkrax_exporter.rb +5 -0
- data/db/migrate/20210806044408_remove_unused_last_error.rb +7 -0
- data/db/migrate/20210806065737_increase_text_sizes.rb +12 -0
- data/lib/bulkrax.rb +161 -0
- data/lib/bulkrax/engine.rb +37 -0
- data/lib/bulkrax/version.rb +5 -0
- data/lib/generators/bulkrax/install_generator.rb +80 -0
- data/lib/generators/bulkrax/templates/README +3 -0
- data/lib/generators/bulkrax/templates/app/assets/images/bulkrax/removed.png +0 -0
- data/lib/generators/bulkrax/templates/app/models/concerns/bulkrax/has_local_processing.rb +8 -0
- data/lib/generators/bulkrax/templates/bin/importer +140 -0
- data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +84 -0
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +72 -0
- data/lib/tasks/bulkrax_tasks.rake +6 -0
- metadata +388 -0
@@ -0,0 +1,128 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class ChildWorksError < RuntimeError; end
|
5
|
+
class ChildRelationshipsJob < ApplicationJob
|
6
|
+
queue_as :import
|
7
|
+
|
8
|
+
def perform(*args)
|
9
|
+
@args = args
|
10
|
+
|
11
|
+
if entry.factory_class == Collection
|
12
|
+
collection_membership
|
13
|
+
else
|
14
|
+
work_membership
|
15
|
+
end
|
16
|
+
# Not all of the Works/Collections exist yet; reschedule
|
17
|
+
rescue Bulkrax::ChildWorksError
|
18
|
+
reschedule(args[0], args[1], args[2])
|
19
|
+
end
|
20
|
+
|
21
|
+
def collection_membership
|
22
|
+
# add collection to works
|
23
|
+
member_of_collection = []
|
24
|
+
child_works_hash.each { |k, v| member_of_collection << k if v[:class_name] != 'Collection' }
|
25
|
+
member_of_collection.each { |work| work_child_collection_parent(work) }
|
26
|
+
|
27
|
+
# add collections to collection
|
28
|
+
members_collections = []
|
29
|
+
child_works_hash.each { |k, v| members_collections << k if v[:class_name] == 'Collection' }
|
30
|
+
collection_parent_collection_child(members_collections) if members_collections.present?
|
31
|
+
end
|
32
|
+
|
33
|
+
def work_membership
|
34
|
+
# add works to work
|
35
|
+
# reject any Collections, they can't be children of Works
|
36
|
+
members_works = []
|
37
|
+
# reject any Collections, they can't be children of Works
|
38
|
+
child_works_hash.each { |k, v| members_works << k if v[:class_name] != 'Collection' }
|
39
|
+
if members_works.length < child_entries.length # rubocop:disable Style/IfUnlessModifier
|
40
|
+
Rails.logger.warn("Cannot add collections as children of works: #{(@child_entries.length - members_works.length)} collections were discarded for parent entry #{@entry.id} (of #{@child_entries.length})")
|
41
|
+
end
|
42
|
+
work_parent_work_child(members_works) if members_works.present?
|
43
|
+
end
|
44
|
+
|
45
|
+
def entry
|
46
|
+
@entry ||= Bulkrax::Entry.find(@args[0])
|
47
|
+
end
|
48
|
+
|
49
|
+
def child_entries
|
50
|
+
@child_entries ||= @args[1].map { |e| Bulkrax::Entry.find(e) }
|
51
|
+
end
|
52
|
+
|
53
|
+
def child_works_hash
|
54
|
+
@child_works_hash ||= child_entries.each_with_object({}) do |child_entry, hash|
|
55
|
+
work = child_entry.factory.find
|
56
|
+
# If we can't find the Work/Collection, raise a custom error
|
57
|
+
raise ChildWorksError if work.blank?
|
58
|
+
hash[work.id] = { class_name: work.class.to_s, entry.parser.source_identifier => child_entry.identifier }
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def importer_run_id
|
63
|
+
@args[2]
|
64
|
+
end
|
65
|
+
|
66
|
+
def user
|
67
|
+
@user ||= entry.importerexporter.user
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
# rubocop:disable Rails/SkipsModelValidations
|
73
|
+
# Work-Collection membership is added to the child as member_of_collection_ids
|
74
|
+
# This is adding the reverse relatinship, from the child to the parent
|
75
|
+
def work_child_collection_parent(work_id)
|
76
|
+
attrs = { id: work_id, collections: [{ id: entry&.factory&.find&.id }] }
|
77
|
+
Bulkrax::ObjectFactory.new(attributes: attrs,
|
78
|
+
source_identifier_value: child_works_hash[work_id][entry.parser.source_identifier],
|
79
|
+
work_identifier: entry.parser.work_identifier,
|
80
|
+
replace_files: false,
|
81
|
+
user: user,
|
82
|
+
klass: child_works_hash[work_id][:class_name].constantize).run
|
83
|
+
ImporterRun.find(importer_run_id).increment!(:processed_children)
|
84
|
+
rescue StandardError => e
|
85
|
+
entry.status_info(e)
|
86
|
+
ImporterRun.find(importer_run_id).increment!(:failed_children)
|
87
|
+
end
|
88
|
+
|
89
|
+
# Collection-Collection membership is added to the as member_ids
|
90
|
+
def collection_parent_collection_child(member_ids)
|
91
|
+
attrs = { id: entry&.factory&.find&.id, children: member_ids }
|
92
|
+
Bulkrax::ObjectFactory.new(attributes: attrs,
|
93
|
+
source_identifier_value: entry.identifier,
|
94
|
+
work_identifier: entry.parser.work_identifier,
|
95
|
+
replace_files: false,
|
96
|
+
user: user,
|
97
|
+
klass: entry.factory_class).run
|
98
|
+
ImporterRun.find(importer_run_id).increment!(:processed_children)
|
99
|
+
rescue StandardError => e
|
100
|
+
entry.status_info(e)
|
101
|
+
ImporterRun.find(importer_run_id).increment!(:failed_children)
|
102
|
+
end
|
103
|
+
|
104
|
+
# Work-Work membership is added to the parent as member_ids
|
105
|
+
def work_parent_work_child(member_ids)
|
106
|
+
# build work_members_attributes
|
107
|
+
attrs = { id: entry&.factory&.find&.id,
|
108
|
+
work_members_attributes: member_ids.each.with_index.each_with_object({}) do |(member, index), ids|
|
109
|
+
ids[index] = { id: member }
|
110
|
+
end }
|
111
|
+
Bulkrax::ObjectFactory.new(attributes: attrs,
|
112
|
+
source_identifier_value: entry.identifier,
|
113
|
+
work_identifier: entry.parser.work_identifier,
|
114
|
+
replace_files: false,
|
115
|
+
user: user,
|
116
|
+
klass: entry.factory_class).run
|
117
|
+
ImporterRun.find(importer_run_id).increment!(:processed_children)
|
118
|
+
rescue StandardError => e
|
119
|
+
entry.status_info(e)
|
120
|
+
ImporterRun.find(importer_run_id).increment!(:failed_children)
|
121
|
+
end
|
122
|
+
# rubocop:enable Rails/SkipsModelValidations
|
123
|
+
|
124
|
+
def reschedule(entry_id, child_entry_ids, importer_run_id)
|
125
|
+
ChildRelationshipsJob.set(wait: 10.minutes).perform_later(entry_id, child_entry_ids, importer_run_id)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class DeleteWorkJob < ApplicationJob
|
5
|
+
queue_as :import
|
6
|
+
|
7
|
+
# rubocop:disable Rails/SkipsModelValidations
|
8
|
+
def perform(entry, importer_run)
|
9
|
+
work = entry.factory.find
|
10
|
+
work&.delete
|
11
|
+
importer_run.increment!(:deleted_records)
|
12
|
+
importer_run.decrement!(:enqueued_records)
|
13
|
+
end
|
14
|
+
# rubocop:enable Rails/SkipsModelValidations
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class DownloadCloudFileJob < ApplicationJob
|
5
|
+
queue_as :import
|
6
|
+
|
7
|
+
# Retrieve cloud file and write to the imports directory
|
8
|
+
# Note: if using the file system, the mounted directory in
|
9
|
+
# browse_everything MUST be shared by web and worker servers
|
10
|
+
def perform(file, target_file)
|
11
|
+
retriever = BrowseEverything::Retriever.new
|
12
|
+
retriever.download(file, target_file) do |filename, retrieved, total|
|
13
|
+
# The block is still useful for showing progress, but the
|
14
|
+
# first argument is the filename instead of a chunk of data.
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class ExportWorkJob < ApplicationJob
|
5
|
+
queue_as :export
|
6
|
+
|
7
|
+
def perform(*args)
|
8
|
+
entry = Entry.find(args[0])
|
9
|
+
begin
|
10
|
+
entry.build
|
11
|
+
entry.save
|
12
|
+
rescue StandardError
|
13
|
+
# rubocop:disable Rails/SkipsModelValidations
|
14
|
+
ExporterRun.find(args[1]).increment!(:failed_records)
|
15
|
+
ExporterRun.find(args[1]).decrement!(:enqueued_records)
|
16
|
+
raise
|
17
|
+
else
|
18
|
+
if entry.failed?
|
19
|
+
ExporterRun.find(args[1]).increment!(:failed_records)
|
20
|
+
ExporterRun.find(args[1]).decrement!(:enqueued_records)
|
21
|
+
raise entry.reload.current_status.error_class.constantize
|
22
|
+
else
|
23
|
+
ExporterRun.find(args[1]).increment!(:processed_records)
|
24
|
+
ExporterRun.find(args[1]).decrement!(:enqueued_records)
|
25
|
+
end
|
26
|
+
# rubocop:enable Rails/SkipsModelValidations
|
27
|
+
end
|
28
|
+
exporter_run = ExporterRun.find(args[1])
|
29
|
+
return if exporter_run.enqueued_records.positive?
|
30
|
+
if exporter_run.failed_records.positive?
|
31
|
+
exporter_run.exporter.status_info('Complete (with failures)')
|
32
|
+
else
|
33
|
+
exporter_run.exporter.status_info('Complete')
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class ImportWorkCollectionJob < ApplicationJob
|
5
|
+
queue_as :import
|
6
|
+
|
7
|
+
# rubocop:disable Rails/SkipsModelValidations
|
8
|
+
def perform(*args)
|
9
|
+
entry = Entry.find(args[0])
|
10
|
+
begin
|
11
|
+
entry.build
|
12
|
+
entry.save
|
13
|
+
add_user_to_permission_template!(entry)
|
14
|
+
ImporterRun.find(args[1]).increment!(:processed_collections)
|
15
|
+
ImporterRun.find(args[1]).decrement!(:enqueued_records)
|
16
|
+
rescue => e
|
17
|
+
ImporterRun.find(args[1]).increment!(:failed_collections)
|
18
|
+
ImporterRun.find(args[1]).decrement!(:enqueued_records)
|
19
|
+
raise e
|
20
|
+
end
|
21
|
+
end
|
22
|
+
# rubocop:enable Rails/SkipsModelValidations
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def add_user_to_permission_template!(entry)
|
27
|
+
user = ::User.find(entry.importerexporter.user_id)
|
28
|
+
collection = entry.factory.find
|
29
|
+
permission_template = Hyrax::PermissionTemplate.find_or_create_by!(source_id: collection.id)
|
30
|
+
|
31
|
+
Hyrax::PermissionTemplateAccess.create!(
|
32
|
+
permission_template_id: permission_template.id,
|
33
|
+
agent_id: user.user_key,
|
34
|
+
agent_type: 'user',
|
35
|
+
access: 'manage'
|
36
|
+
)
|
37
|
+
|
38
|
+
collection.reset_access_controls!
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class ImportWorkJob < ApplicationJob
|
5
|
+
queue_as :import
|
6
|
+
|
7
|
+
# rubocop:disable Rails/SkipsModelValidations
|
8
|
+
def perform(*args)
|
9
|
+
entry = Entry.find(args[0])
|
10
|
+
entry.build
|
11
|
+
if entry.status == "Complete"
|
12
|
+
ImporterRun.find(args[1]).increment!(:processed_records)
|
13
|
+
ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
|
14
|
+
else
|
15
|
+
# do not retry here because whatever parse error kept you from creating a work will likely
|
16
|
+
# keep preventing you from doing so.
|
17
|
+
ImporterRun.find(args[1]).increment!(:failed_records)
|
18
|
+
ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
|
19
|
+
end
|
20
|
+
entry.save!
|
21
|
+
entry.importer.current_run = ImporterRun.find(args[1])
|
22
|
+
entry.importer.record_status
|
23
|
+
rescue Bulkrax::CollectionsCreatedError
|
24
|
+
reschedule(args[0], args[1])
|
25
|
+
end
|
26
|
+
# rubocop:enable Rails/SkipsModelValidations
|
27
|
+
|
28
|
+
def reschedule(entry_id, run_id)
|
29
|
+
ImportWorkJob.set(wait: 1.minute).perform_later(entry_id, run_id)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class ImporterJob < ApplicationJob
|
5
|
+
queue_as :import
|
6
|
+
|
7
|
+
def perform(importer_id, only_updates_since_last_import = false)
|
8
|
+
importer = Importer.find(importer_id)
|
9
|
+
importer.current_run
|
10
|
+
import(importer, only_updates_since_last_import)
|
11
|
+
schedule(importer) if importer.schedulable?
|
12
|
+
end
|
13
|
+
|
14
|
+
def import(importer, only_updates_since_last_import)
|
15
|
+
importer.only_updates = only_updates_since_last_import || false
|
16
|
+
return unless importer.valid_import?
|
17
|
+
importer.import_collections
|
18
|
+
importer.import_works
|
19
|
+
importer.create_parent_child_relationships unless importer.validate_only
|
20
|
+
end
|
21
|
+
|
22
|
+
def schedule(importer)
|
23
|
+
ImporterJob.set(wait_until: importer.next_import_at).perform_later(importer.id, true)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'language_list'
|
4
|
+
|
5
|
+
module Bulkrax
|
6
|
+
class ApplicationMatcher
|
7
|
+
attr_accessor :to, :from, :parsed, :if, :split, :excluded, :nested_type
|
8
|
+
|
9
|
+
def initialize(args)
|
10
|
+
args.each do |k, v|
|
11
|
+
send("#{k}=", v)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def result(_parser, content)
|
16
|
+
return nil if self.excluded == true || Bulkrax.reserved_properties.include?(self.to)
|
17
|
+
return nil if self.if && (!self.if.is_a?(Array) && self.if.length != 2)
|
18
|
+
|
19
|
+
if self.if
|
20
|
+
return unless content.send(self.if[0], Regexp.new(self.if[1]))
|
21
|
+
end
|
22
|
+
|
23
|
+
@result = content.to_s.gsub(/\s/, ' ') # remove any line feeds and tabs
|
24
|
+
@result.strip!
|
25
|
+
process_split
|
26
|
+
@result = @result[0] if @result.is_a?(Array) && @result.size == 1
|
27
|
+
process_parse
|
28
|
+
return @result
|
29
|
+
end
|
30
|
+
|
31
|
+
def process_split
|
32
|
+
if self.split.is_a?(TrueClass)
|
33
|
+
@result = @result.split(/\s*[:;|]\s*/) # default split by : ; |
|
34
|
+
elsif self.split
|
35
|
+
result = @result.split(Regexp.new(self.split))
|
36
|
+
@result = result.map(&:strip)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def process_parse
|
41
|
+
# New parse methods will need to be added here
|
42
|
+
parsed_fields = ['remote_files', 'language', 'subject', 'types', 'model', 'resource_type', 'format_original']
|
43
|
+
# This accounts for prefixed matchers
|
44
|
+
parser = parsed_fields.find { |field| to&.include? field }
|
45
|
+
|
46
|
+
if @result.is_a?(Array) && self.parsed && self.respond_to?("parse_#{parser}")
|
47
|
+
@result.each_with_index do |res, index|
|
48
|
+
@result[index] = send("parse_#{parser}", res.strip)
|
49
|
+
end
|
50
|
+
@result.delete(nil)
|
51
|
+
elsif self.parsed && self.respond_to?("parse_#{parser}")
|
52
|
+
@result = send("parse_#{parser}", @result)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def parse_remote_files(src)
|
57
|
+
{ url: src.strip } if src.present?
|
58
|
+
end
|
59
|
+
|
60
|
+
def parse_language(src)
|
61
|
+
l = ::LanguageList::LanguageInfo.find(src.strip)
|
62
|
+
l ? l.name : src
|
63
|
+
end
|
64
|
+
|
65
|
+
def parse_subject(src)
|
66
|
+
string = src.to_s.strip.downcase
|
67
|
+
return if string.blank?
|
68
|
+
|
69
|
+
string.slice(0, 1).capitalize + string.slice(1..-1)
|
70
|
+
end
|
71
|
+
|
72
|
+
def parse_types(src)
|
73
|
+
src.to_s.strip.titleize
|
74
|
+
end
|
75
|
+
|
76
|
+
# Allow for mapping a model field to the work type or collection
|
77
|
+
def parse_model(src)
|
78
|
+
model = nil
|
79
|
+
if src.is_a?(Array)
|
80
|
+
models = src.map { |m| extract_model(m) }.compact
|
81
|
+
model = models.first if models.present?
|
82
|
+
else
|
83
|
+
model = extract_model(src)
|
84
|
+
end
|
85
|
+
return model
|
86
|
+
end
|
87
|
+
|
88
|
+
def extract_model(src)
|
89
|
+
if src&.match(URI::ABS_URI)
|
90
|
+
src.split('/').last
|
91
|
+
else
|
92
|
+
src
|
93
|
+
end
|
94
|
+
rescue StandardError
|
95
|
+
nil
|
96
|
+
end
|
97
|
+
|
98
|
+
# Only add valid resource types
|
99
|
+
def parse_resource_type(src)
|
100
|
+
Hyrax::ResourceTypesService.label(src.to_s.strip.titleize)
|
101
|
+
rescue KeyError
|
102
|
+
nil
|
103
|
+
end
|
104
|
+
|
105
|
+
def parse_format_original(src)
|
106
|
+
# drop the case completely then upcase the first letter
|
107
|
+
string = src.to_s.strip.downcase
|
108
|
+
return if string.blank?
|
109
|
+
|
110
|
+
string.slice(0, 1).capitalize + string.slice(1..-1)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|