bulkrax 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +205 -0
- data/README.md +202 -0
- data/Rakefile +42 -0
- data/app/assets/config/bulkrax_manifest.js +2 -0
- data/app/assets/javascripts/bulkrax/application.js +14 -0
- data/app/assets/javascripts/bulkrax/bulkrax.js +11 -0
- data/app/assets/javascripts/bulkrax/entries.js +15 -0
- data/app/assets/javascripts/bulkrax/exporters.js +60 -0
- data/app/assets/javascripts/bulkrax/importers.js.erb +166 -0
- data/app/assets/stylesheets/bulkrax/accordion.scss +40 -0
- data/app/assets/stylesheets/bulkrax/application.css +15 -0
- data/app/assets/stylesheets/bulkrax/coderay.scss +264 -0
- data/app/assets/stylesheets/bulkrax/import_export.scss +37 -0
- data/app/controllers/bulkrax/application_controller.rb +8 -0
- data/app/controllers/bulkrax/entries_controller.rb +44 -0
- data/app/controllers/bulkrax/exporters_controller.rb +125 -0
- data/app/controllers/bulkrax/importers_controller.rb +315 -0
- data/app/controllers/concerns/bulkrax/api.rb +29 -0
- data/app/factories/bulkrax/object_factory.rb +230 -0
- data/app/helpers/bulkrax/application_helper.rb +15 -0
- data/app/helpers/bulkrax/exporters_helper.rb +6 -0
- data/app/helpers/bulkrax/importers_helper.rb +13 -0
- data/app/helpers/bulkrax/validation_helper.rb +153 -0
- data/app/jobs/bulkrax/application_job.rb +6 -0
- data/app/jobs/bulkrax/child_relationships_job.rb +128 -0
- data/app/jobs/bulkrax/delete_work_job.rb +16 -0
- data/app/jobs/bulkrax/download_cloud_file_job.rb +18 -0
- data/app/jobs/bulkrax/export_work_job.rb +37 -0
- data/app/jobs/bulkrax/exporter_job.rb +14 -0
- data/app/jobs/bulkrax/import_work_collection_job.rb +41 -0
- data/app/jobs/bulkrax/import_work_job.rb +32 -0
- data/app/jobs/bulkrax/importer_job.rb +26 -0
- data/app/mailers/bulkrax/application_mailer.rb +8 -0
- data/app/matchers/bulkrax/application_matcher.rb +113 -0
- data/app/matchers/bulkrax/bagit_matcher.rb +6 -0
- data/app/matchers/bulkrax/csv_matcher.rb +6 -0
- data/app/matchers/bulkrax/oai_matcher.rb +6 -0
- data/app/models/bulkrax/application_record.rb +7 -0
- data/app/models/bulkrax/csv_collection_entry.rb +19 -0
- data/app/models/bulkrax/csv_entry.rb +163 -0
- data/app/models/bulkrax/entry.rb +104 -0
- data/app/models/bulkrax/exporter.rb +122 -0
- data/app/models/bulkrax/exporter_run.rb +7 -0
- data/app/models/bulkrax/import_failed.rb +13 -0
- data/app/models/bulkrax/importer.rb +155 -0
- data/app/models/bulkrax/importer_run.rb +8 -0
- data/app/models/bulkrax/oai_dc_entry.rb +6 -0
- data/app/models/bulkrax/oai_entry.rb +74 -0
- data/app/models/bulkrax/oai_qualified_dc_entry.rb +6 -0
- data/app/models/bulkrax/oai_set_entry.rb +19 -0
- data/app/models/bulkrax/rdf_collection_entry.rb +19 -0
- data/app/models/bulkrax/rdf_entry.rb +90 -0
- data/app/models/bulkrax/status.rb +25 -0
- data/app/models/bulkrax/xml_entry.rb +73 -0
- data/app/models/concerns/bulkrax/download_behavior.rb +61 -0
- data/app/models/concerns/bulkrax/errored_entries.rb +45 -0
- data/app/models/concerns/bulkrax/export_behavior.rb +58 -0
- data/app/models/concerns/bulkrax/file_factory.rb +140 -0
- data/app/models/concerns/bulkrax/has_local_processing.rb +7 -0
- data/app/models/concerns/bulkrax/has_matchers.rb +155 -0
- data/app/models/concerns/bulkrax/import_behavior.rb +90 -0
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +34 -0
- data/app/models/concerns/bulkrax/status_info.rb +56 -0
- data/app/parsers/bulkrax/application_parser.rb +299 -0
- data/app/parsers/bulkrax/bagit_parser.rb +157 -0
- data/app/parsers/bulkrax/csv_parser.rb +266 -0
- data/app/parsers/bulkrax/oai_dc_parser.rb +130 -0
- data/app/parsers/bulkrax/oai_qualified_dc_parser.rb +9 -0
- data/app/parsers/bulkrax/xml_parser.rb +103 -0
- data/app/views/bulkrax/entries/_parsed_metadata.html.erb +19 -0
- data/app/views/bulkrax/entries/_raw_metadata.html.erb +19 -0
- data/app/views/bulkrax/entries/show.html.erb +63 -0
- data/app/views/bulkrax/exporters/_form.html.erb +120 -0
- data/app/views/bulkrax/exporters/edit.html.erb +23 -0
- data/app/views/bulkrax/exporters/index.html.erb +67 -0
- data/app/views/bulkrax/exporters/new.html.erb +23 -0
- data/app/views/bulkrax/exporters/show.html.erb +124 -0
- data/app/views/bulkrax/importers/_bagit_fields.html.erb +54 -0
- data/app/views/bulkrax/importers/_browse_everything.html.erb +12 -0
- data/app/views/bulkrax/importers/_csv_fields.html.erb +39 -0
- data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +16 -0
- data/app/views/bulkrax/importers/_form.html.erb +35 -0
- data/app/views/bulkrax/importers/_oai_fields.html.erb +42 -0
- data/app/views/bulkrax/importers/_xml_fields.html.erb +60 -0
- data/app/views/bulkrax/importers/edit.html.erb +20 -0
- data/app/views/bulkrax/importers/index.html.erb +77 -0
- data/app/views/bulkrax/importers/new.html.erb +25 -0
- data/app/views/bulkrax/importers/show.html.erb +175 -0
- data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +37 -0
- data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +52 -0
- data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +39 -0
- data/app/views/hyrax/dashboard/sidebar/_bulkrax_sidebar_additions.html.erb +6 -0
- data/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb +19 -0
- data/app/views/layouts/bulkrax/application.html.erb +14 -0
- data/config/locales/bulkrax.en.yml +36 -0
- data/config/routes.rb +18 -0
- data/db/migrate/20181011230201_create_bulkrax_importers.rb +18 -0
- data/db/migrate/20181011230228_create_bulkrax_importer_runs.rb +16 -0
- data/db/migrate/20190325183136_create_bulkrax_entries.rb +16 -0
- data/db/migrate/20190601221109_add_status_to_entry.rb +9 -0
- data/db/migrate/20190715161939_add_collections_to_importer_runs.rb +6 -0
- data/db/migrate/20190715162044_change_collection_ids_on_entries.rb +5 -0
- data/db/migrate/20190729124607_create_bulkrax_exporters.rb +19 -0
- data/db/migrate/20190729134158_create_bulkrax_exporter_runs.rb +14 -0
- data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +12 -0
- data/db/migrate/20191203225129_add_total_collection_records_to_importer_runs.rb +5 -0
- data/db/migrate/20191204191623_add_children_to_importer_runs.rb +6 -0
- data/db/migrate/20191204223857_change_total_records_to_total_work_entries.rb +6 -0
- data/db/migrate/20191212155530_change_entry_last_error.rb +19 -0
- data/db/migrate/20200108194557_add_validate_only_to_bulkrax_importers.rb +5 -0
- data/db/migrate/20200301232856_add_status_to_importers.rb +9 -0
- data/db/migrate/20200312190638_remove_foreign_key_from_bulkrax_entries.rb +5 -0
- data/db/migrate/20200326235838_add_status_to_exporters.rb +7 -0
- data/db/migrate/20200601204556_add_invalid_record_to_importer_run.rb +5 -0
- data/db/migrate/20200818055819_create_bulkrax_statuses.rb +18 -0
- data/db/migrate/20200819054016_move_to_statuses.rb +30 -0
- data/db/migrate/20201106014204_add_date_filter_and_status_to_bulkrax_exporters.rb +7 -0
- data/db/migrate/20201117220007_add_workflow_status_to_bulkrax_exporter.rb +5 -0
- data/db/migrate/20210806044408_remove_unused_last_error.rb +7 -0
- data/db/migrate/20210806065737_increase_text_sizes.rb +12 -0
- data/lib/bulkrax.rb +161 -0
- data/lib/bulkrax/engine.rb +37 -0
- data/lib/bulkrax/version.rb +5 -0
- data/lib/generators/bulkrax/install_generator.rb +80 -0
- data/lib/generators/bulkrax/templates/README +3 -0
- data/lib/generators/bulkrax/templates/app/assets/images/bulkrax/removed.png +0 -0
- data/lib/generators/bulkrax/templates/app/models/concerns/bulkrax/has_local_processing.rb +8 -0
- data/lib/generators/bulkrax/templates/bin/importer +140 -0
- data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +84 -0
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +72 -0
- data/lib/tasks/bulkrax_tasks.rake +6 -0
- metadata +388 -0
@@ -0,0 +1,128 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class ChildWorksError < RuntimeError; end
|
5
|
+
class ChildRelationshipsJob < ApplicationJob
|
6
|
+
queue_as :import
|
7
|
+
|
8
|
+
def perform(*args)
|
9
|
+
@args = args
|
10
|
+
|
11
|
+
if entry.factory_class == Collection
|
12
|
+
collection_membership
|
13
|
+
else
|
14
|
+
work_membership
|
15
|
+
end
|
16
|
+
# Not all of the Works/Collections exist yet; reschedule
|
17
|
+
rescue Bulkrax::ChildWorksError
|
18
|
+
reschedule(args[0], args[1], args[2])
|
19
|
+
end
|
20
|
+
|
21
|
+
def collection_membership
|
22
|
+
# add collection to works
|
23
|
+
member_of_collection = []
|
24
|
+
child_works_hash.each { |k, v| member_of_collection << k if v[:class_name] != 'Collection' }
|
25
|
+
member_of_collection.each { |work| work_child_collection_parent(work) }
|
26
|
+
|
27
|
+
# add collections to collection
|
28
|
+
members_collections = []
|
29
|
+
child_works_hash.each { |k, v| members_collections << k if v[:class_name] == 'Collection' }
|
30
|
+
collection_parent_collection_child(members_collections) if members_collections.present?
|
31
|
+
end
|
32
|
+
|
33
|
+
def work_membership
|
34
|
+
# add works to work
|
35
|
+
# reject any Collections, they can't be children of Works
|
36
|
+
members_works = []
|
37
|
+
# reject any Collections, they can't be children of Works
|
38
|
+
child_works_hash.each { |k, v| members_works << k if v[:class_name] != 'Collection' }
|
39
|
+
if members_works.length < child_entries.length # rubocop:disable Style/IfUnlessModifier
|
40
|
+
Rails.logger.warn("Cannot add collections as children of works: #{(@child_entries.length - members_works.length)} collections were discarded for parent entry #{@entry.id} (of #{@child_entries.length})")
|
41
|
+
end
|
42
|
+
work_parent_work_child(members_works) if members_works.present?
|
43
|
+
end
|
44
|
+
|
45
|
+
def entry
|
46
|
+
@entry ||= Bulkrax::Entry.find(@args[0])
|
47
|
+
end
|
48
|
+
|
49
|
+
def child_entries
|
50
|
+
@child_entries ||= @args[1].map { |e| Bulkrax::Entry.find(e) }
|
51
|
+
end
|
52
|
+
|
53
|
+
def child_works_hash
|
54
|
+
@child_works_hash ||= child_entries.each_with_object({}) do |child_entry, hash|
|
55
|
+
work = child_entry.factory.find
|
56
|
+
# If we can't find the Work/Collection, raise a custom error
|
57
|
+
raise ChildWorksError if work.blank?
|
58
|
+
hash[work.id] = { class_name: work.class.to_s, entry.parser.source_identifier => child_entry.identifier }
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def importer_run_id
|
63
|
+
@args[2]
|
64
|
+
end
|
65
|
+
|
66
|
+
def user
|
67
|
+
@user ||= entry.importerexporter.user
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
# rubocop:disable Rails/SkipsModelValidations
|
73
|
+
# Work-Collection membership is added to the child as member_of_collection_ids
|
74
|
+
# This is adding the reverse relatinship, from the child to the parent
|
75
|
+
def work_child_collection_parent(work_id)
|
76
|
+
attrs = { id: work_id, collections: [{ id: entry&.factory&.find&.id }] }
|
77
|
+
Bulkrax::ObjectFactory.new(attributes: attrs,
|
78
|
+
source_identifier_value: child_works_hash[work_id][entry.parser.source_identifier],
|
79
|
+
work_identifier: entry.parser.work_identifier,
|
80
|
+
replace_files: false,
|
81
|
+
user: user,
|
82
|
+
klass: child_works_hash[work_id][:class_name].constantize).run
|
83
|
+
ImporterRun.find(importer_run_id).increment!(:processed_children)
|
84
|
+
rescue StandardError => e
|
85
|
+
entry.status_info(e)
|
86
|
+
ImporterRun.find(importer_run_id).increment!(:failed_children)
|
87
|
+
end
|
88
|
+
|
89
|
+
# Collection-Collection membership is added to the as member_ids
|
90
|
+
def collection_parent_collection_child(member_ids)
|
91
|
+
attrs = { id: entry&.factory&.find&.id, children: member_ids }
|
92
|
+
Bulkrax::ObjectFactory.new(attributes: attrs,
|
93
|
+
source_identifier_value: entry.identifier,
|
94
|
+
work_identifier: entry.parser.work_identifier,
|
95
|
+
replace_files: false,
|
96
|
+
user: user,
|
97
|
+
klass: entry.factory_class).run
|
98
|
+
ImporterRun.find(importer_run_id).increment!(:processed_children)
|
99
|
+
rescue StandardError => e
|
100
|
+
entry.status_info(e)
|
101
|
+
ImporterRun.find(importer_run_id).increment!(:failed_children)
|
102
|
+
end
|
103
|
+
|
104
|
+
# Work-Work membership is added to the parent as member_ids
|
105
|
+
def work_parent_work_child(member_ids)
|
106
|
+
# build work_members_attributes
|
107
|
+
attrs = { id: entry&.factory&.find&.id,
|
108
|
+
work_members_attributes: member_ids.each.with_index.each_with_object({}) do |(member, index), ids|
|
109
|
+
ids[index] = { id: member }
|
110
|
+
end }
|
111
|
+
Bulkrax::ObjectFactory.new(attributes: attrs,
|
112
|
+
source_identifier_value: entry.identifier,
|
113
|
+
work_identifier: entry.parser.work_identifier,
|
114
|
+
replace_files: false,
|
115
|
+
user: user,
|
116
|
+
klass: entry.factory_class).run
|
117
|
+
ImporterRun.find(importer_run_id).increment!(:processed_children)
|
118
|
+
rescue StandardError => e
|
119
|
+
entry.status_info(e)
|
120
|
+
ImporterRun.find(importer_run_id).increment!(:failed_children)
|
121
|
+
end
|
122
|
+
# rubocop:enable Rails/SkipsModelValidations
|
123
|
+
|
124
|
+
def reschedule(entry_id, child_entry_ids, importer_run_id)
|
125
|
+
ChildRelationshipsJob.set(wait: 10.minutes).perform_later(entry_id, child_entry_ids, importer_run_id)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class DeleteWorkJob < ApplicationJob
|
5
|
+
queue_as :import
|
6
|
+
|
7
|
+
# rubocop:disable Rails/SkipsModelValidations
|
8
|
+
def perform(entry, importer_run)
|
9
|
+
work = entry.factory.find
|
10
|
+
work&.delete
|
11
|
+
importer_run.increment!(:deleted_records)
|
12
|
+
importer_run.decrement!(:enqueued_records)
|
13
|
+
end
|
14
|
+
# rubocop:enable Rails/SkipsModelValidations
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class DownloadCloudFileJob < ApplicationJob
|
5
|
+
queue_as :import
|
6
|
+
|
7
|
+
# Retrieve cloud file and write to the imports directory
|
8
|
+
# Note: if using the file system, the mounted directory in
|
9
|
+
# browse_everything MUST be shared by web and worker servers
|
10
|
+
def perform(file, target_file)
|
11
|
+
retriever = BrowseEverything::Retriever.new
|
12
|
+
retriever.download(file, target_file) do |filename, retrieved, total|
|
13
|
+
# The block is still useful for showing progress, but the
|
14
|
+
# first argument is the filename instead of a chunk of data.
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class ExportWorkJob < ApplicationJob
|
5
|
+
queue_as :export
|
6
|
+
|
7
|
+
def perform(*args)
|
8
|
+
entry = Entry.find(args[0])
|
9
|
+
begin
|
10
|
+
entry.build
|
11
|
+
entry.save
|
12
|
+
rescue StandardError
|
13
|
+
# rubocop:disable Rails/SkipsModelValidations
|
14
|
+
ExporterRun.find(args[1]).increment!(:failed_records)
|
15
|
+
ExporterRun.find(args[1]).decrement!(:enqueued_records)
|
16
|
+
raise
|
17
|
+
else
|
18
|
+
if entry.failed?
|
19
|
+
ExporterRun.find(args[1]).increment!(:failed_records)
|
20
|
+
ExporterRun.find(args[1]).decrement!(:enqueued_records)
|
21
|
+
raise entry.reload.current_status.error_class.constantize
|
22
|
+
else
|
23
|
+
ExporterRun.find(args[1]).increment!(:processed_records)
|
24
|
+
ExporterRun.find(args[1]).decrement!(:enqueued_records)
|
25
|
+
end
|
26
|
+
# rubocop:enable Rails/SkipsModelValidations
|
27
|
+
end
|
28
|
+
exporter_run = ExporterRun.find(args[1])
|
29
|
+
return if exporter_run.enqueued_records.positive?
|
30
|
+
if exporter_run.failed_records.positive?
|
31
|
+
exporter_run.exporter.status_info('Complete (with failures)')
|
32
|
+
else
|
33
|
+
exporter_run.exporter.status_info('Complete')
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class ImportWorkCollectionJob < ApplicationJob
|
5
|
+
queue_as :import
|
6
|
+
|
7
|
+
# rubocop:disable Rails/SkipsModelValidations
|
8
|
+
def perform(*args)
|
9
|
+
entry = Entry.find(args[0])
|
10
|
+
begin
|
11
|
+
entry.build
|
12
|
+
entry.save
|
13
|
+
add_user_to_permission_template!(entry)
|
14
|
+
ImporterRun.find(args[1]).increment!(:processed_collections)
|
15
|
+
ImporterRun.find(args[1]).decrement!(:enqueued_records)
|
16
|
+
rescue => e
|
17
|
+
ImporterRun.find(args[1]).increment!(:failed_collections)
|
18
|
+
ImporterRun.find(args[1]).decrement!(:enqueued_records)
|
19
|
+
raise e
|
20
|
+
end
|
21
|
+
end
|
22
|
+
# rubocop:enable Rails/SkipsModelValidations
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def add_user_to_permission_template!(entry)
|
27
|
+
user = ::User.find(entry.importerexporter.user_id)
|
28
|
+
collection = entry.factory.find
|
29
|
+
permission_template = Hyrax::PermissionTemplate.find_or_create_by!(source_id: collection.id)
|
30
|
+
|
31
|
+
Hyrax::PermissionTemplateAccess.create!(
|
32
|
+
permission_template_id: permission_template.id,
|
33
|
+
agent_id: user.user_key,
|
34
|
+
agent_type: 'user',
|
35
|
+
access: 'manage'
|
36
|
+
)
|
37
|
+
|
38
|
+
collection.reset_access_controls!
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class ImportWorkJob < ApplicationJob
|
5
|
+
queue_as :import
|
6
|
+
|
7
|
+
# rubocop:disable Rails/SkipsModelValidations
|
8
|
+
def perform(*args)
|
9
|
+
entry = Entry.find(args[0])
|
10
|
+
entry.build
|
11
|
+
if entry.status == "Complete"
|
12
|
+
ImporterRun.find(args[1]).increment!(:processed_records)
|
13
|
+
ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
|
14
|
+
else
|
15
|
+
# do not retry here because whatever parse error kept you from creating a work will likely
|
16
|
+
# keep preventing you from doing so.
|
17
|
+
ImporterRun.find(args[1]).increment!(:failed_records)
|
18
|
+
ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
|
19
|
+
end
|
20
|
+
entry.save!
|
21
|
+
entry.importer.current_run = ImporterRun.find(args[1])
|
22
|
+
entry.importer.record_status
|
23
|
+
rescue Bulkrax::CollectionsCreatedError
|
24
|
+
reschedule(args[0], args[1])
|
25
|
+
end
|
26
|
+
# rubocop:enable Rails/SkipsModelValidations
|
27
|
+
|
28
|
+
def reschedule(entry_id, run_id)
|
29
|
+
ImportWorkJob.set(wait: 1.minute).perform_later(entry_id, run_id)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class ImporterJob < ApplicationJob
|
5
|
+
queue_as :import
|
6
|
+
|
7
|
+
def perform(importer_id, only_updates_since_last_import = false)
|
8
|
+
importer = Importer.find(importer_id)
|
9
|
+
importer.current_run
|
10
|
+
import(importer, only_updates_since_last_import)
|
11
|
+
schedule(importer) if importer.schedulable?
|
12
|
+
end
|
13
|
+
|
14
|
+
def import(importer, only_updates_since_last_import)
|
15
|
+
importer.only_updates = only_updates_since_last_import || false
|
16
|
+
return unless importer.valid_import?
|
17
|
+
importer.import_collections
|
18
|
+
importer.import_works
|
19
|
+
importer.create_parent_child_relationships unless importer.validate_only
|
20
|
+
end
|
21
|
+
|
22
|
+
def schedule(importer)
|
23
|
+
ImporterJob.set(wait_until: importer.next_import_at).perform_later(importer.id, true)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'language_list'
|
4
|
+
|
5
|
+
module Bulkrax
|
6
|
+
class ApplicationMatcher
|
7
|
+
attr_accessor :to, :from, :parsed, :if, :split, :excluded, :nested_type
|
8
|
+
|
9
|
+
def initialize(args)
|
10
|
+
args.each do |k, v|
|
11
|
+
send("#{k}=", v)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def result(_parser, content)
|
16
|
+
return nil if self.excluded == true || Bulkrax.reserved_properties.include?(self.to)
|
17
|
+
return nil if self.if && (!self.if.is_a?(Array) && self.if.length != 2)
|
18
|
+
|
19
|
+
if self.if
|
20
|
+
return unless content.send(self.if[0], Regexp.new(self.if[1]))
|
21
|
+
end
|
22
|
+
|
23
|
+
@result = content.to_s.gsub(/\s/, ' ') # remove any line feeds and tabs
|
24
|
+
@result.strip!
|
25
|
+
process_split
|
26
|
+
@result = @result[0] if @result.is_a?(Array) && @result.size == 1
|
27
|
+
process_parse
|
28
|
+
return @result
|
29
|
+
end
|
30
|
+
|
31
|
+
def process_split
|
32
|
+
if self.split.is_a?(TrueClass)
|
33
|
+
@result = @result.split(/\s*[:;|]\s*/) # default split by : ; |
|
34
|
+
elsif self.split
|
35
|
+
result = @result.split(Regexp.new(self.split))
|
36
|
+
@result = result.map(&:strip)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def process_parse
|
41
|
+
# New parse methods will need to be added here
|
42
|
+
parsed_fields = ['remote_files', 'language', 'subject', 'types', 'model', 'resource_type', 'format_original']
|
43
|
+
# This accounts for prefixed matchers
|
44
|
+
parser = parsed_fields.find { |field| to&.include? field }
|
45
|
+
|
46
|
+
if @result.is_a?(Array) && self.parsed && self.respond_to?("parse_#{parser}")
|
47
|
+
@result.each_with_index do |res, index|
|
48
|
+
@result[index] = send("parse_#{parser}", res.strip)
|
49
|
+
end
|
50
|
+
@result.delete(nil)
|
51
|
+
elsif self.parsed && self.respond_to?("parse_#{parser}")
|
52
|
+
@result = send("parse_#{parser}", @result)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def parse_remote_files(src)
|
57
|
+
{ url: src.strip } if src.present?
|
58
|
+
end
|
59
|
+
|
60
|
+
def parse_language(src)
|
61
|
+
l = ::LanguageList::LanguageInfo.find(src.strip)
|
62
|
+
l ? l.name : src
|
63
|
+
end
|
64
|
+
|
65
|
+
def parse_subject(src)
|
66
|
+
string = src.to_s.strip.downcase
|
67
|
+
return if string.blank?
|
68
|
+
|
69
|
+
string.slice(0, 1).capitalize + string.slice(1..-1)
|
70
|
+
end
|
71
|
+
|
72
|
+
def parse_types(src)
|
73
|
+
src.to_s.strip.titleize
|
74
|
+
end
|
75
|
+
|
76
|
+
# Allow for mapping a model field to the work type or collection
|
77
|
+
def parse_model(src)
|
78
|
+
model = nil
|
79
|
+
if src.is_a?(Array)
|
80
|
+
models = src.map { |m| extract_model(m) }.compact
|
81
|
+
model = models.first if models.present?
|
82
|
+
else
|
83
|
+
model = extract_model(src)
|
84
|
+
end
|
85
|
+
return model
|
86
|
+
end
|
87
|
+
|
88
|
+
def extract_model(src)
|
89
|
+
if src&.match(URI::ABS_URI)
|
90
|
+
src.split('/').last
|
91
|
+
else
|
92
|
+
src
|
93
|
+
end
|
94
|
+
rescue StandardError
|
95
|
+
nil
|
96
|
+
end
|
97
|
+
|
98
|
+
# Only add valid resource types
|
99
|
+
def parse_resource_type(src)
|
100
|
+
Hyrax::ResourceTypesService.label(src.to_s.strip.titleize)
|
101
|
+
rescue KeyError
|
102
|
+
nil
|
103
|
+
end
|
104
|
+
|
105
|
+
def parse_format_original(src)
|
106
|
+
# drop the case completely then upcase the first letter
|
107
|
+
string = src.to_s.strip.downcase
|
108
|
+
return if string.blank?
|
109
|
+
|
110
|
+
string.slice(0, 1).capitalize + string.slice(1..-1)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|