bulkrax 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (133) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +205 -0
  3. data/README.md +202 -0
  4. data/Rakefile +42 -0
  5. data/app/assets/config/bulkrax_manifest.js +2 -0
  6. data/app/assets/javascripts/bulkrax/application.js +14 -0
  7. data/app/assets/javascripts/bulkrax/bulkrax.js +11 -0
  8. data/app/assets/javascripts/bulkrax/entries.js +15 -0
  9. data/app/assets/javascripts/bulkrax/exporters.js +60 -0
  10. data/app/assets/javascripts/bulkrax/importers.js.erb +166 -0
  11. data/app/assets/stylesheets/bulkrax/accordion.scss +40 -0
  12. data/app/assets/stylesheets/bulkrax/application.css +15 -0
  13. data/app/assets/stylesheets/bulkrax/coderay.scss +264 -0
  14. data/app/assets/stylesheets/bulkrax/import_export.scss +37 -0
  15. data/app/controllers/bulkrax/application_controller.rb +8 -0
  16. data/app/controllers/bulkrax/entries_controller.rb +44 -0
  17. data/app/controllers/bulkrax/exporters_controller.rb +125 -0
  18. data/app/controllers/bulkrax/importers_controller.rb +315 -0
  19. data/app/controllers/concerns/bulkrax/api.rb +29 -0
  20. data/app/factories/bulkrax/object_factory.rb +230 -0
  21. data/app/helpers/bulkrax/application_helper.rb +15 -0
  22. data/app/helpers/bulkrax/exporters_helper.rb +6 -0
  23. data/app/helpers/bulkrax/importers_helper.rb +13 -0
  24. data/app/helpers/bulkrax/validation_helper.rb +153 -0
  25. data/app/jobs/bulkrax/application_job.rb +6 -0
  26. data/app/jobs/bulkrax/child_relationships_job.rb +128 -0
  27. data/app/jobs/bulkrax/delete_work_job.rb +16 -0
  28. data/app/jobs/bulkrax/download_cloud_file_job.rb +18 -0
  29. data/app/jobs/bulkrax/export_work_job.rb +37 -0
  30. data/app/jobs/bulkrax/exporter_job.rb +14 -0
  31. data/app/jobs/bulkrax/import_work_collection_job.rb +41 -0
  32. data/app/jobs/bulkrax/import_work_job.rb +32 -0
  33. data/app/jobs/bulkrax/importer_job.rb +26 -0
  34. data/app/mailers/bulkrax/application_mailer.rb +8 -0
  35. data/app/matchers/bulkrax/application_matcher.rb +113 -0
  36. data/app/matchers/bulkrax/bagit_matcher.rb +6 -0
  37. data/app/matchers/bulkrax/csv_matcher.rb +6 -0
  38. data/app/matchers/bulkrax/oai_matcher.rb +6 -0
  39. data/app/models/bulkrax/application_record.rb +7 -0
  40. data/app/models/bulkrax/csv_collection_entry.rb +19 -0
  41. data/app/models/bulkrax/csv_entry.rb +163 -0
  42. data/app/models/bulkrax/entry.rb +104 -0
  43. data/app/models/bulkrax/exporter.rb +122 -0
  44. data/app/models/bulkrax/exporter_run.rb +7 -0
  45. data/app/models/bulkrax/import_failed.rb +13 -0
  46. data/app/models/bulkrax/importer.rb +155 -0
  47. data/app/models/bulkrax/importer_run.rb +8 -0
  48. data/app/models/bulkrax/oai_dc_entry.rb +6 -0
  49. data/app/models/bulkrax/oai_entry.rb +74 -0
  50. data/app/models/bulkrax/oai_qualified_dc_entry.rb +6 -0
  51. data/app/models/bulkrax/oai_set_entry.rb +19 -0
  52. data/app/models/bulkrax/rdf_collection_entry.rb +19 -0
  53. data/app/models/bulkrax/rdf_entry.rb +90 -0
  54. data/app/models/bulkrax/status.rb +25 -0
  55. data/app/models/bulkrax/xml_entry.rb +73 -0
  56. data/app/models/concerns/bulkrax/download_behavior.rb +61 -0
  57. data/app/models/concerns/bulkrax/errored_entries.rb +45 -0
  58. data/app/models/concerns/bulkrax/export_behavior.rb +58 -0
  59. data/app/models/concerns/bulkrax/file_factory.rb +140 -0
  60. data/app/models/concerns/bulkrax/has_local_processing.rb +7 -0
  61. data/app/models/concerns/bulkrax/has_matchers.rb +155 -0
  62. data/app/models/concerns/bulkrax/import_behavior.rb +90 -0
  63. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +34 -0
  64. data/app/models/concerns/bulkrax/status_info.rb +56 -0
  65. data/app/parsers/bulkrax/application_parser.rb +299 -0
  66. data/app/parsers/bulkrax/bagit_parser.rb +157 -0
  67. data/app/parsers/bulkrax/csv_parser.rb +266 -0
  68. data/app/parsers/bulkrax/oai_dc_parser.rb +130 -0
  69. data/app/parsers/bulkrax/oai_qualified_dc_parser.rb +9 -0
  70. data/app/parsers/bulkrax/xml_parser.rb +103 -0
  71. data/app/views/bulkrax/entries/_parsed_metadata.html.erb +19 -0
  72. data/app/views/bulkrax/entries/_raw_metadata.html.erb +19 -0
  73. data/app/views/bulkrax/entries/show.html.erb +63 -0
  74. data/app/views/bulkrax/exporters/_form.html.erb +120 -0
  75. data/app/views/bulkrax/exporters/edit.html.erb +23 -0
  76. data/app/views/bulkrax/exporters/index.html.erb +67 -0
  77. data/app/views/bulkrax/exporters/new.html.erb +23 -0
  78. data/app/views/bulkrax/exporters/show.html.erb +124 -0
  79. data/app/views/bulkrax/importers/_bagit_fields.html.erb +54 -0
  80. data/app/views/bulkrax/importers/_browse_everything.html.erb +12 -0
  81. data/app/views/bulkrax/importers/_csv_fields.html.erb +39 -0
  82. data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +16 -0
  83. data/app/views/bulkrax/importers/_form.html.erb +35 -0
  84. data/app/views/bulkrax/importers/_oai_fields.html.erb +42 -0
  85. data/app/views/bulkrax/importers/_xml_fields.html.erb +60 -0
  86. data/app/views/bulkrax/importers/edit.html.erb +20 -0
  87. data/app/views/bulkrax/importers/index.html.erb +77 -0
  88. data/app/views/bulkrax/importers/new.html.erb +25 -0
  89. data/app/views/bulkrax/importers/show.html.erb +175 -0
  90. data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +37 -0
  91. data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +52 -0
  92. data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +39 -0
  93. data/app/views/hyrax/dashboard/sidebar/_bulkrax_sidebar_additions.html.erb +6 -0
  94. data/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb +19 -0
  95. data/app/views/layouts/bulkrax/application.html.erb +14 -0
  96. data/config/locales/bulkrax.en.yml +36 -0
  97. data/config/routes.rb +18 -0
  98. data/db/migrate/20181011230201_create_bulkrax_importers.rb +18 -0
  99. data/db/migrate/20181011230228_create_bulkrax_importer_runs.rb +16 -0
  100. data/db/migrate/20190325183136_create_bulkrax_entries.rb +16 -0
  101. data/db/migrate/20190601221109_add_status_to_entry.rb +9 -0
  102. data/db/migrate/20190715161939_add_collections_to_importer_runs.rb +6 -0
  103. data/db/migrate/20190715162044_change_collection_ids_on_entries.rb +5 -0
  104. data/db/migrate/20190729124607_create_bulkrax_exporters.rb +19 -0
  105. data/db/migrate/20190729134158_create_bulkrax_exporter_runs.rb +14 -0
  106. data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +12 -0
  107. data/db/migrate/20191203225129_add_total_collection_records_to_importer_runs.rb +5 -0
  108. data/db/migrate/20191204191623_add_children_to_importer_runs.rb +6 -0
  109. data/db/migrate/20191204223857_change_total_records_to_total_work_entries.rb +6 -0
  110. data/db/migrate/20191212155530_change_entry_last_error.rb +19 -0
  111. data/db/migrate/20200108194557_add_validate_only_to_bulkrax_importers.rb +5 -0
  112. data/db/migrate/20200301232856_add_status_to_importers.rb +9 -0
  113. data/db/migrate/20200312190638_remove_foreign_key_from_bulkrax_entries.rb +5 -0
  114. data/db/migrate/20200326235838_add_status_to_exporters.rb +7 -0
  115. data/db/migrate/20200601204556_add_invalid_record_to_importer_run.rb +5 -0
  116. data/db/migrate/20200818055819_create_bulkrax_statuses.rb +18 -0
  117. data/db/migrate/20200819054016_move_to_statuses.rb +30 -0
  118. data/db/migrate/20201106014204_add_date_filter_and_status_to_bulkrax_exporters.rb +7 -0
  119. data/db/migrate/20201117220007_add_workflow_status_to_bulkrax_exporter.rb +5 -0
  120. data/db/migrate/20210806044408_remove_unused_last_error.rb +7 -0
  121. data/db/migrate/20210806065737_increase_text_sizes.rb +12 -0
  122. data/lib/bulkrax.rb +161 -0
  123. data/lib/bulkrax/engine.rb +37 -0
  124. data/lib/bulkrax/version.rb +5 -0
  125. data/lib/generators/bulkrax/install_generator.rb +80 -0
  126. data/lib/generators/bulkrax/templates/README +3 -0
  127. data/lib/generators/bulkrax/templates/app/assets/images/bulkrax/removed.png +0 -0
  128. data/lib/generators/bulkrax/templates/app/models/concerns/bulkrax/has_local_processing.rb +8 -0
  129. data/lib/generators/bulkrax/templates/bin/importer +140 -0
  130. data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +84 -0
  131. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +72 -0
  132. data/lib/tasks/bulkrax_tasks.rake +6 -0
  133. metadata +388 -0
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class ApplicationJob < ActiveJob::Base
5
+ end
6
+ end
@@ -0,0 +1,128 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class ChildWorksError < RuntimeError; end
5
+ class ChildRelationshipsJob < ApplicationJob
6
+ queue_as :import
7
+
8
+ def perform(*args)
9
+ @args = args
10
+
11
+ if entry.factory_class == Collection
12
+ collection_membership
13
+ else
14
+ work_membership
15
+ end
16
+ # Not all of the Works/Collections exist yet; reschedule
17
+ rescue Bulkrax::ChildWorksError
18
+ reschedule(args[0], args[1], args[2])
19
+ end
20
+
21
+ def collection_membership
22
+ # add collection to works
23
+ member_of_collection = []
24
+ child_works_hash.each { |k, v| member_of_collection << k if v[:class_name] != 'Collection' }
25
+ member_of_collection.each { |work| work_child_collection_parent(work) }
26
+
27
+ # add collections to collection
28
+ members_collections = []
29
+ child_works_hash.each { |k, v| members_collections << k if v[:class_name] == 'Collection' }
30
+ collection_parent_collection_child(members_collections) if members_collections.present?
31
+ end
32
+
33
+ def work_membership
34
+ # add works to work
35
+ # reject any Collections, they can't be children of Works
36
+ members_works = []
37
+ # reject any Collections, they can't be children of Works
38
+ child_works_hash.each { |k, v| members_works << k if v[:class_name] != 'Collection' }
39
+ if members_works.length < child_entries.length # rubocop:disable Style/IfUnlessModifier
40
+ Rails.logger.warn("Cannot add collections as children of works: #{(@child_entries.length - members_works.length)} collections were discarded for parent entry #{@entry.id} (of #{@child_entries.length})")
41
+ end
42
+ work_parent_work_child(members_works) if members_works.present?
43
+ end
44
+
45
+ def entry
46
+ @entry ||= Bulkrax::Entry.find(@args[0])
47
+ end
48
+
49
+ def child_entries
50
+ @child_entries ||= @args[1].map { |e| Bulkrax::Entry.find(e) }
51
+ end
52
+
53
+ def child_works_hash
54
+ @child_works_hash ||= child_entries.each_with_object({}) do |child_entry, hash|
55
+ work = child_entry.factory.find
56
+ # If we can't find the Work/Collection, raise a custom error
57
+ raise ChildWorksError if work.blank?
58
+ hash[work.id] = { class_name: work.class.to_s, entry.parser.source_identifier => child_entry.identifier }
59
+ end
60
+ end
61
+
62
+ def importer_run_id
63
+ @args[2]
64
+ end
65
+
66
+ def user
67
+ @user ||= entry.importerexporter.user
68
+ end
69
+
70
+ private
71
+
72
+ # rubocop:disable Rails/SkipsModelValidations
73
+ # Work-Collection membership is added to the child as member_of_collection_ids
74
+ # This is adding the reverse relatinship, from the child to the parent
75
+ def work_child_collection_parent(work_id)
76
+ attrs = { id: work_id, collections: [{ id: entry&.factory&.find&.id }] }
77
+ Bulkrax::ObjectFactory.new(attributes: attrs,
78
+ source_identifier_value: child_works_hash[work_id][entry.parser.source_identifier],
79
+ work_identifier: entry.parser.work_identifier,
80
+ replace_files: false,
81
+ user: user,
82
+ klass: child_works_hash[work_id][:class_name].constantize).run
83
+ ImporterRun.find(importer_run_id).increment!(:processed_children)
84
+ rescue StandardError => e
85
+ entry.status_info(e)
86
+ ImporterRun.find(importer_run_id).increment!(:failed_children)
87
+ end
88
+
89
+ # Collection-Collection membership is added to the as member_ids
90
+ def collection_parent_collection_child(member_ids)
91
+ attrs = { id: entry&.factory&.find&.id, children: member_ids }
92
+ Bulkrax::ObjectFactory.new(attributes: attrs,
93
+ source_identifier_value: entry.identifier,
94
+ work_identifier: entry.parser.work_identifier,
95
+ replace_files: false,
96
+ user: user,
97
+ klass: entry.factory_class).run
98
+ ImporterRun.find(importer_run_id).increment!(:processed_children)
99
+ rescue StandardError => e
100
+ entry.status_info(e)
101
+ ImporterRun.find(importer_run_id).increment!(:failed_children)
102
+ end
103
+
104
+ # Work-Work membership is added to the parent as member_ids
105
+ def work_parent_work_child(member_ids)
106
+ # build work_members_attributes
107
+ attrs = { id: entry&.factory&.find&.id,
108
+ work_members_attributes: member_ids.each.with_index.each_with_object({}) do |(member, index), ids|
109
+ ids[index] = { id: member }
110
+ end }
111
+ Bulkrax::ObjectFactory.new(attributes: attrs,
112
+ source_identifier_value: entry.identifier,
113
+ work_identifier: entry.parser.work_identifier,
114
+ replace_files: false,
115
+ user: user,
116
+ klass: entry.factory_class).run
117
+ ImporterRun.find(importer_run_id).increment!(:processed_children)
118
+ rescue StandardError => e
119
+ entry.status_info(e)
120
+ ImporterRun.find(importer_run_id).increment!(:failed_children)
121
+ end
122
+ # rubocop:enable Rails/SkipsModelValidations
123
+
124
+ def reschedule(entry_id, child_entry_ids, importer_run_id)
125
+ ChildRelationshipsJob.set(wait: 10.minutes).perform_later(entry_id, child_entry_ids, importer_run_id)
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class DeleteWorkJob < ApplicationJob
5
+ queue_as :import
6
+
7
+ # rubocop:disable Rails/SkipsModelValidations
8
+ def perform(entry, importer_run)
9
+ work = entry.factory.find
10
+ work&.delete
11
+ importer_run.increment!(:deleted_records)
12
+ importer_run.decrement!(:enqueued_records)
13
+ end
14
+ # rubocop:enable Rails/SkipsModelValidations
15
+ end
16
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class DownloadCloudFileJob < ApplicationJob
5
+ queue_as :import
6
+
7
+ # Retrieve cloud file and write to the imports directory
8
+ # Note: if using the file system, the mounted directory in
9
+ # browse_everything MUST be shared by web and worker servers
10
+ def perform(file, target_file)
11
+ retriever = BrowseEverything::Retriever.new
12
+ retriever.download(file, target_file) do |filename, retrieved, total|
13
+ # The block is still useful for showing progress, but the
14
+ # first argument is the filename instead of a chunk of data.
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class ExportWorkJob < ApplicationJob
5
+ queue_as :export
6
+
7
+ def perform(*args)
8
+ entry = Entry.find(args[0])
9
+ begin
10
+ entry.build
11
+ entry.save
12
+ rescue StandardError
13
+ # rubocop:disable Rails/SkipsModelValidations
14
+ ExporterRun.find(args[1]).increment!(:failed_records)
15
+ ExporterRun.find(args[1]).decrement!(:enqueued_records)
16
+ raise
17
+ else
18
+ if entry.failed?
19
+ ExporterRun.find(args[1]).increment!(:failed_records)
20
+ ExporterRun.find(args[1]).decrement!(:enqueued_records)
21
+ raise entry.reload.current_status.error_class.constantize
22
+ else
23
+ ExporterRun.find(args[1]).increment!(:processed_records)
24
+ ExporterRun.find(args[1]).decrement!(:enqueued_records)
25
+ end
26
+ # rubocop:enable Rails/SkipsModelValidations
27
+ end
28
+ exporter_run = ExporterRun.find(args[1])
29
+ return if exporter_run.enqueued_records.positive?
30
+ if exporter_run.failed_records.positive?
31
+ exporter_run.exporter.status_info('Complete (with failures)')
32
+ else
33
+ exporter_run.exporter.status_info('Complete')
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class ExporterJob < ApplicationJob
5
+ queue_as :export
6
+
7
+ def perform(exporter_id)
8
+ exporter = Exporter.find(exporter_id)
9
+ exporter.export
10
+ exporter.write
11
+ true
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class ImportWorkCollectionJob < ApplicationJob
5
+ queue_as :import
6
+
7
+ # rubocop:disable Rails/SkipsModelValidations
8
+ def perform(*args)
9
+ entry = Entry.find(args[0])
10
+ begin
11
+ entry.build
12
+ entry.save
13
+ add_user_to_permission_template!(entry)
14
+ ImporterRun.find(args[1]).increment!(:processed_collections)
15
+ ImporterRun.find(args[1]).decrement!(:enqueued_records)
16
+ rescue => e
17
+ ImporterRun.find(args[1]).increment!(:failed_collections)
18
+ ImporterRun.find(args[1]).decrement!(:enqueued_records)
19
+ raise e
20
+ end
21
+ end
22
+ # rubocop:enable Rails/SkipsModelValidations
23
+
24
+ private
25
+
26
+ def add_user_to_permission_template!(entry)
27
+ user = ::User.find(entry.importerexporter.user_id)
28
+ collection = entry.factory.find
29
+ permission_template = Hyrax::PermissionTemplate.find_or_create_by!(source_id: collection.id)
30
+
31
+ Hyrax::PermissionTemplateAccess.create!(
32
+ permission_template_id: permission_template.id,
33
+ agent_id: user.user_key,
34
+ agent_type: 'user',
35
+ access: 'manage'
36
+ )
37
+
38
+ collection.reset_access_controls!
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class ImportWorkJob < ApplicationJob
5
+ queue_as :import
6
+
7
+ # rubocop:disable Rails/SkipsModelValidations
8
+ def perform(*args)
9
+ entry = Entry.find(args[0])
10
+ entry.build
11
+ if entry.status == "Complete"
12
+ ImporterRun.find(args[1]).increment!(:processed_records)
13
+ ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
14
+ else
15
+ # do not retry here because whatever parse error kept you from creating a work will likely
16
+ # keep preventing you from doing so.
17
+ ImporterRun.find(args[1]).increment!(:failed_records)
18
+ ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
19
+ end
20
+ entry.save!
21
+ entry.importer.current_run = ImporterRun.find(args[1])
22
+ entry.importer.record_status
23
+ rescue Bulkrax::CollectionsCreatedError
24
+ reschedule(args[0], args[1])
25
+ end
26
+ # rubocop:enable Rails/SkipsModelValidations
27
+
28
+ def reschedule(entry_id, run_id)
29
+ ImportWorkJob.set(wait: 1.minute).perform_later(entry_id, run_id)
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class ImporterJob < ApplicationJob
5
+ queue_as :import
6
+
7
+ def perform(importer_id, only_updates_since_last_import = false)
8
+ importer = Importer.find(importer_id)
9
+ importer.current_run
10
+ import(importer, only_updates_since_last_import)
11
+ schedule(importer) if importer.schedulable?
12
+ end
13
+
14
+ def import(importer, only_updates_since_last_import)
15
+ importer.only_updates = only_updates_since_last_import || false
16
+ return unless importer.valid_import?
17
+ importer.import_collections
18
+ importer.import_works
19
+ importer.create_parent_child_relationships unless importer.validate_only
20
+ end
21
+
22
+ def schedule(importer)
23
+ ImporterJob.set(wait_until: importer.next_import_at).perform_later(importer.id, true)
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class ApplicationMailer < ActionMailer::Base
5
+ default from: 'from@example.com'
6
+ layout 'mailer'
7
+ end
8
+ end
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'language_list'
4
+
5
+ module Bulkrax
6
+ class ApplicationMatcher
7
+ attr_accessor :to, :from, :parsed, :if, :split, :excluded, :nested_type
8
+
9
+ def initialize(args)
10
+ args.each do |k, v|
11
+ send("#{k}=", v)
12
+ end
13
+ end
14
+
15
+ def result(_parser, content)
16
+ return nil if self.excluded == true || Bulkrax.reserved_properties.include?(self.to)
17
+ return nil if self.if && (!self.if.is_a?(Array) && self.if.length != 2)
18
+
19
+ if self.if
20
+ return unless content.send(self.if[0], Regexp.new(self.if[1]))
21
+ end
22
+
23
+ @result = content.to_s.gsub(/\s/, ' ') # remove any line feeds and tabs
24
+ @result.strip!
25
+ process_split
26
+ @result = @result[0] if @result.is_a?(Array) && @result.size == 1
27
+ process_parse
28
+ return @result
29
+ end
30
+
31
+ def process_split
32
+ if self.split.is_a?(TrueClass)
33
+ @result = @result.split(/\s*[:;|]\s*/) # default split by : ; |
34
+ elsif self.split
35
+ result = @result.split(Regexp.new(self.split))
36
+ @result = result.map(&:strip)
37
+ end
38
+ end
39
+
40
+ def process_parse
41
+ # New parse methods will need to be added here
42
+ parsed_fields = ['remote_files', 'language', 'subject', 'types', 'model', 'resource_type', 'format_original']
43
+ # This accounts for prefixed matchers
44
+ parser = parsed_fields.find { |field| to&.include? field }
45
+
46
+ if @result.is_a?(Array) && self.parsed && self.respond_to?("parse_#{parser}")
47
+ @result.each_with_index do |res, index|
48
+ @result[index] = send("parse_#{parser}", res.strip)
49
+ end
50
+ @result.delete(nil)
51
+ elsif self.parsed && self.respond_to?("parse_#{parser}")
52
+ @result = send("parse_#{parser}", @result)
53
+ end
54
+ end
55
+
56
+ def parse_remote_files(src)
57
+ { url: src.strip } if src.present?
58
+ end
59
+
60
+ def parse_language(src)
61
+ l = ::LanguageList::LanguageInfo.find(src.strip)
62
+ l ? l.name : src
63
+ end
64
+
65
+ def parse_subject(src)
66
+ string = src.to_s.strip.downcase
67
+ return if string.blank?
68
+
69
+ string.slice(0, 1).capitalize + string.slice(1..-1)
70
+ end
71
+
72
+ def parse_types(src)
73
+ src.to_s.strip.titleize
74
+ end
75
+
76
+ # Allow for mapping a model field to the work type or collection
77
+ def parse_model(src)
78
+ model = nil
79
+ if src.is_a?(Array)
80
+ models = src.map { |m| extract_model(m) }.compact
81
+ model = models.first if models.present?
82
+ else
83
+ model = extract_model(src)
84
+ end
85
+ return model
86
+ end
87
+
88
+ def extract_model(src)
89
+ if src&.match(URI::ABS_URI)
90
+ src.split('/').last
91
+ else
92
+ src
93
+ end
94
+ rescue StandardError
95
+ nil
96
+ end
97
+
98
+ # Only add valid resource types
99
+ def parse_resource_type(src)
100
+ Hyrax::ResourceTypesService.label(src.to_s.strip.titleize)
101
+ rescue KeyError
102
+ nil
103
+ end
104
+
105
+ def parse_format_original(src)
106
+ # drop the case completely then upcase the first letter
107
+ string = src.to_s.strip.downcase
108
+ return if string.blank?
109
+
110
+ string.slice(0, 1).capitalize + string.slice(1..-1)
111
+ end
112
+ end
113
+ end