bulkrax 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +205 -0
  3. data/README.md +202 -0
  4. data/Rakefile +42 -0
  5. data/app/assets/config/bulkrax_manifest.js +2 -0
  6. data/app/assets/javascripts/bulkrax/application.js +14 -0
  7. data/app/assets/javascripts/bulkrax/bulkrax.js +11 -0
  8. data/app/assets/javascripts/bulkrax/entries.js +15 -0
  9. data/app/assets/javascripts/bulkrax/exporters.js +60 -0
  10. data/app/assets/javascripts/bulkrax/importers.js.erb +166 -0
  11. data/app/assets/stylesheets/bulkrax/accordion.scss +40 -0
  12. data/app/assets/stylesheets/bulkrax/application.css +15 -0
  13. data/app/assets/stylesheets/bulkrax/coderay.scss +264 -0
  14. data/app/assets/stylesheets/bulkrax/import_export.scss +37 -0
  15. data/app/controllers/bulkrax/application_controller.rb +8 -0
  16. data/app/controllers/bulkrax/entries_controller.rb +44 -0
  17. data/app/controllers/bulkrax/exporters_controller.rb +125 -0
  18. data/app/controllers/bulkrax/importers_controller.rb +315 -0
  19. data/app/controllers/concerns/bulkrax/api.rb +29 -0
  20. data/app/factories/bulkrax/object_factory.rb +230 -0
  21. data/app/helpers/bulkrax/application_helper.rb +15 -0
  22. data/app/helpers/bulkrax/exporters_helper.rb +6 -0
  23. data/app/helpers/bulkrax/importers_helper.rb +13 -0
  24. data/app/helpers/bulkrax/validation_helper.rb +153 -0
  25. data/app/jobs/bulkrax/application_job.rb +6 -0
  26. data/app/jobs/bulkrax/child_relationships_job.rb +128 -0
  27. data/app/jobs/bulkrax/delete_work_job.rb +16 -0
  28. data/app/jobs/bulkrax/download_cloud_file_job.rb +18 -0
  29. data/app/jobs/bulkrax/export_work_job.rb +37 -0
  30. data/app/jobs/bulkrax/exporter_job.rb +14 -0
  31. data/app/jobs/bulkrax/import_work_collection_job.rb +41 -0
  32. data/app/jobs/bulkrax/import_work_job.rb +32 -0
  33. data/app/jobs/bulkrax/importer_job.rb +26 -0
  34. data/app/mailers/bulkrax/application_mailer.rb +8 -0
  35. data/app/matchers/bulkrax/application_matcher.rb +113 -0
  36. data/app/matchers/bulkrax/bagit_matcher.rb +6 -0
  37. data/app/matchers/bulkrax/csv_matcher.rb +6 -0
  38. data/app/matchers/bulkrax/oai_matcher.rb +6 -0
  39. data/app/models/bulkrax/application_record.rb +7 -0
  40. data/app/models/bulkrax/csv_collection_entry.rb +19 -0
  41. data/app/models/bulkrax/csv_entry.rb +163 -0
  42. data/app/models/bulkrax/entry.rb +104 -0
  43. data/app/models/bulkrax/exporter.rb +122 -0
  44. data/app/models/bulkrax/exporter_run.rb +7 -0
  45. data/app/models/bulkrax/import_failed.rb +13 -0
  46. data/app/models/bulkrax/importer.rb +155 -0
  47. data/app/models/bulkrax/importer_run.rb +8 -0
  48. data/app/models/bulkrax/oai_dc_entry.rb +6 -0
  49. data/app/models/bulkrax/oai_entry.rb +74 -0
  50. data/app/models/bulkrax/oai_qualified_dc_entry.rb +6 -0
  51. data/app/models/bulkrax/oai_set_entry.rb +19 -0
  52. data/app/models/bulkrax/rdf_collection_entry.rb +19 -0
  53. data/app/models/bulkrax/rdf_entry.rb +90 -0
  54. data/app/models/bulkrax/status.rb +25 -0
  55. data/app/models/bulkrax/xml_entry.rb +73 -0
  56. data/app/models/concerns/bulkrax/download_behavior.rb +61 -0
  57. data/app/models/concerns/bulkrax/errored_entries.rb +45 -0
  58. data/app/models/concerns/bulkrax/export_behavior.rb +58 -0
  59. data/app/models/concerns/bulkrax/file_factory.rb +140 -0
  60. data/app/models/concerns/bulkrax/has_local_processing.rb +7 -0
  61. data/app/models/concerns/bulkrax/has_matchers.rb +155 -0
  62. data/app/models/concerns/bulkrax/import_behavior.rb +90 -0
  63. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +34 -0
  64. data/app/models/concerns/bulkrax/status_info.rb +56 -0
  65. data/app/parsers/bulkrax/application_parser.rb +299 -0
  66. data/app/parsers/bulkrax/bagit_parser.rb +157 -0
  67. data/app/parsers/bulkrax/csv_parser.rb +266 -0
  68. data/app/parsers/bulkrax/oai_dc_parser.rb +130 -0
  69. data/app/parsers/bulkrax/oai_qualified_dc_parser.rb +9 -0
  70. data/app/parsers/bulkrax/xml_parser.rb +103 -0
  71. data/app/views/bulkrax/entries/_parsed_metadata.html.erb +19 -0
  72. data/app/views/bulkrax/entries/_raw_metadata.html.erb +19 -0
  73. data/app/views/bulkrax/entries/show.html.erb +63 -0
  74. data/app/views/bulkrax/exporters/_form.html.erb +120 -0
  75. data/app/views/bulkrax/exporters/edit.html.erb +23 -0
  76. data/app/views/bulkrax/exporters/index.html.erb +67 -0
  77. data/app/views/bulkrax/exporters/new.html.erb +23 -0
  78. data/app/views/bulkrax/exporters/show.html.erb +124 -0
  79. data/app/views/bulkrax/importers/_bagit_fields.html.erb +54 -0
  80. data/app/views/bulkrax/importers/_browse_everything.html.erb +12 -0
  81. data/app/views/bulkrax/importers/_csv_fields.html.erb +39 -0
  82. data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +16 -0
  83. data/app/views/bulkrax/importers/_form.html.erb +35 -0
  84. data/app/views/bulkrax/importers/_oai_fields.html.erb +42 -0
  85. data/app/views/bulkrax/importers/_xml_fields.html.erb +60 -0
  86. data/app/views/bulkrax/importers/edit.html.erb +20 -0
  87. data/app/views/bulkrax/importers/index.html.erb +77 -0
  88. data/app/views/bulkrax/importers/new.html.erb +25 -0
  89. data/app/views/bulkrax/importers/show.html.erb +175 -0
  90. data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +37 -0
  91. data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +52 -0
  92. data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +39 -0
  93. data/app/views/hyrax/dashboard/sidebar/_bulkrax_sidebar_additions.html.erb +6 -0
  94. data/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb +19 -0
  95. data/app/views/layouts/bulkrax/application.html.erb +14 -0
  96. data/config/locales/bulkrax.en.yml +36 -0
  97. data/config/routes.rb +18 -0
  98. data/db/migrate/20181011230201_create_bulkrax_importers.rb +18 -0
  99. data/db/migrate/20181011230228_create_bulkrax_importer_runs.rb +16 -0
  100. data/db/migrate/20190325183136_create_bulkrax_entries.rb +16 -0
  101. data/db/migrate/20190601221109_add_status_to_entry.rb +9 -0
  102. data/db/migrate/20190715161939_add_collections_to_importer_runs.rb +6 -0
  103. data/db/migrate/20190715162044_change_collection_ids_on_entries.rb +5 -0
  104. data/db/migrate/20190729124607_create_bulkrax_exporters.rb +19 -0
  105. data/db/migrate/20190729134158_create_bulkrax_exporter_runs.rb +14 -0
  106. data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +12 -0
  107. data/db/migrate/20191203225129_add_total_collection_records_to_importer_runs.rb +5 -0
  108. data/db/migrate/20191204191623_add_children_to_importer_runs.rb +6 -0
  109. data/db/migrate/20191204223857_change_total_records_to_total_work_entries.rb +6 -0
  110. data/db/migrate/20191212155530_change_entry_last_error.rb +19 -0
  111. data/db/migrate/20200108194557_add_validate_only_to_bulkrax_importers.rb +5 -0
  112. data/db/migrate/20200301232856_add_status_to_importers.rb +9 -0
  113. data/db/migrate/20200312190638_remove_foreign_key_from_bulkrax_entries.rb +5 -0
  114. data/db/migrate/20200326235838_add_status_to_exporters.rb +7 -0
  115. data/db/migrate/20200601204556_add_invalid_record_to_importer_run.rb +5 -0
  116. data/db/migrate/20200818055819_create_bulkrax_statuses.rb +18 -0
  117. data/db/migrate/20200819054016_move_to_statuses.rb +30 -0
  118. data/db/migrate/20201106014204_add_date_filter_and_status_to_bulkrax_exporters.rb +7 -0
  119. data/db/migrate/20201117220007_add_workflow_status_to_bulkrax_exporter.rb +5 -0
  120. data/db/migrate/20210806044408_remove_unused_last_error.rb +7 -0
  121. data/db/migrate/20210806065737_increase_text_sizes.rb +12 -0
  122. data/lib/bulkrax.rb +161 -0
  123. data/lib/bulkrax/engine.rb +37 -0
  124. data/lib/bulkrax/version.rb +5 -0
  125. data/lib/generators/bulkrax/install_generator.rb +80 -0
  126. data/lib/generators/bulkrax/templates/README +3 -0
  127. data/lib/generators/bulkrax/templates/app/assets/images/bulkrax/removed.png +0 -0
  128. data/lib/generators/bulkrax/templates/app/models/concerns/bulkrax/has_local_processing.rb +8 -0
  129. data/lib/generators/bulkrax/templates/bin/importer +140 -0
  130. data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +84 -0
  131. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +72 -0
  132. data/lib/tasks/bulkrax_tasks.rake +6 -0
  133. metadata +388 -0
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class ApplicationJob < ActiveJob::Base
5
+ end
6
+ end
@@ -0,0 +1,128 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class ChildWorksError < RuntimeError; end
5
+ class ChildRelationshipsJob < ApplicationJob
6
+ queue_as :import
7
+
8
+ def perform(*args)
9
+ @args = args
10
+
11
+ if entry.factory_class == Collection
12
+ collection_membership
13
+ else
14
+ work_membership
15
+ end
16
+ # Not all of the Works/Collections exist yet; reschedule
17
+ rescue Bulkrax::ChildWorksError
18
+ reschedule(args[0], args[1], args[2])
19
+ end
20
+
21
+ def collection_membership
22
+ # add collection to works
23
+ member_of_collection = []
24
+ child_works_hash.each { |k, v| member_of_collection << k if v[:class_name] != 'Collection' }
25
+ member_of_collection.each { |work| work_child_collection_parent(work) }
26
+
27
+ # add collections to collection
28
+ members_collections = []
29
+ child_works_hash.each { |k, v| members_collections << k if v[:class_name] == 'Collection' }
30
+ collection_parent_collection_child(members_collections) if members_collections.present?
31
+ end
32
+
33
+ def work_membership
34
+ # add works to work
35
+ # reject any Collections, they can't be children of Works
36
+ members_works = []
37
+ # reject any Collections, they can't be children of Works
38
+ child_works_hash.each { |k, v| members_works << k if v[:class_name] != 'Collection' }
39
+ if members_works.length < child_entries.length # rubocop:disable Style/IfUnlessModifier
40
+ Rails.logger.warn("Cannot add collections as children of works: #{(@child_entries.length - members_works.length)} collections were discarded for parent entry #{@entry.id} (of #{@child_entries.length})")
41
+ end
42
+ work_parent_work_child(members_works) if members_works.present?
43
+ end
44
+
45
+ def entry
46
+ @entry ||= Bulkrax::Entry.find(@args[0])
47
+ end
48
+
49
+ def child_entries
50
+ @child_entries ||= @args[1].map { |e| Bulkrax::Entry.find(e) }
51
+ end
52
+
53
+ def child_works_hash
54
+ @child_works_hash ||= child_entries.each_with_object({}) do |child_entry, hash|
55
+ work = child_entry.factory.find
56
+ # If we can't find the Work/Collection, raise a custom error
57
+ raise ChildWorksError if work.blank?
58
+ hash[work.id] = { class_name: work.class.to_s, entry.parser.source_identifier => child_entry.identifier }
59
+ end
60
+ end
61
+
62
+ def importer_run_id
63
+ @args[2]
64
+ end
65
+
66
+ def user
67
+ @user ||= entry.importerexporter.user
68
+ end
69
+
70
+ private
71
+
72
+ # rubocop:disable Rails/SkipsModelValidations
73
+ # Work-Collection membership is added to the child as member_of_collection_ids
74
+ # This is adding the reverse relatinship, from the child to the parent
75
+ def work_child_collection_parent(work_id)
76
+ attrs = { id: work_id, collections: [{ id: entry&.factory&.find&.id }] }
77
+ Bulkrax::ObjectFactory.new(attributes: attrs,
78
+ source_identifier_value: child_works_hash[work_id][entry.parser.source_identifier],
79
+ work_identifier: entry.parser.work_identifier,
80
+ replace_files: false,
81
+ user: user,
82
+ klass: child_works_hash[work_id][:class_name].constantize).run
83
+ ImporterRun.find(importer_run_id).increment!(:processed_children)
84
+ rescue StandardError => e
85
+ entry.status_info(e)
86
+ ImporterRun.find(importer_run_id).increment!(:failed_children)
87
+ end
88
+
89
+ # Collection-Collection membership is added to the as member_ids
90
+ def collection_parent_collection_child(member_ids)
91
+ attrs = { id: entry&.factory&.find&.id, children: member_ids }
92
+ Bulkrax::ObjectFactory.new(attributes: attrs,
93
+ source_identifier_value: entry.identifier,
94
+ work_identifier: entry.parser.work_identifier,
95
+ replace_files: false,
96
+ user: user,
97
+ klass: entry.factory_class).run
98
+ ImporterRun.find(importer_run_id).increment!(:processed_children)
99
+ rescue StandardError => e
100
+ entry.status_info(e)
101
+ ImporterRun.find(importer_run_id).increment!(:failed_children)
102
+ end
103
+
104
+ # Work-Work membership is added to the parent as member_ids
105
+ def work_parent_work_child(member_ids)
106
+ # build work_members_attributes
107
+ attrs = { id: entry&.factory&.find&.id,
108
+ work_members_attributes: member_ids.each.with_index.each_with_object({}) do |(member, index), ids|
109
+ ids[index] = { id: member }
110
+ end }
111
+ Bulkrax::ObjectFactory.new(attributes: attrs,
112
+ source_identifier_value: entry.identifier,
113
+ work_identifier: entry.parser.work_identifier,
114
+ replace_files: false,
115
+ user: user,
116
+ klass: entry.factory_class).run
117
+ ImporterRun.find(importer_run_id).increment!(:processed_children)
118
+ rescue StandardError => e
119
+ entry.status_info(e)
120
+ ImporterRun.find(importer_run_id).increment!(:failed_children)
121
+ end
122
+ # rubocop:enable Rails/SkipsModelValidations
123
+
124
+ def reschedule(entry_id, child_entry_ids, importer_run_id)
125
+ ChildRelationshipsJob.set(wait: 10.minutes).perform_later(entry_id, child_entry_ids, importer_run_id)
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class DeleteWorkJob < ApplicationJob
5
+ queue_as :import
6
+
7
+ # rubocop:disable Rails/SkipsModelValidations
8
+ def perform(entry, importer_run)
9
+ work = entry.factory.find
10
+ work&.delete
11
+ importer_run.increment!(:deleted_records)
12
+ importer_run.decrement!(:enqueued_records)
13
+ end
14
+ # rubocop:enable Rails/SkipsModelValidations
15
+ end
16
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class DownloadCloudFileJob < ApplicationJob
5
+ queue_as :import
6
+
7
+ # Retrieve cloud file and write to the imports directory
8
+ # Note: if using the file system, the mounted directory in
9
+ # browse_everything MUST be shared by web and worker servers
10
+ def perform(file, target_file)
11
+ retriever = BrowseEverything::Retriever.new
12
+ retriever.download(file, target_file) do |filename, retrieved, total|
13
+ # The block is still useful for showing progress, but the
14
+ # first argument is the filename instead of a chunk of data.
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class ExportWorkJob < ApplicationJob
5
+ queue_as :export
6
+
7
+ def perform(*args)
8
+ entry = Entry.find(args[0])
9
+ begin
10
+ entry.build
11
+ entry.save
12
+ rescue StandardError
13
+ # rubocop:disable Rails/SkipsModelValidations
14
+ ExporterRun.find(args[1]).increment!(:failed_records)
15
+ ExporterRun.find(args[1]).decrement!(:enqueued_records)
16
+ raise
17
+ else
18
+ if entry.failed?
19
+ ExporterRun.find(args[1]).increment!(:failed_records)
20
+ ExporterRun.find(args[1]).decrement!(:enqueued_records)
21
+ raise entry.reload.current_status.error_class.constantize
22
+ else
23
+ ExporterRun.find(args[1]).increment!(:processed_records)
24
+ ExporterRun.find(args[1]).decrement!(:enqueued_records)
25
+ end
26
+ # rubocop:enable Rails/SkipsModelValidations
27
+ end
28
+ exporter_run = ExporterRun.find(args[1])
29
+ return if exporter_run.enqueued_records.positive?
30
+ if exporter_run.failed_records.positive?
31
+ exporter_run.exporter.status_info('Complete (with failures)')
32
+ else
33
+ exporter_run.exporter.status_info('Complete')
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class ExporterJob < ApplicationJob
5
+ queue_as :export
6
+
7
+ def perform(exporter_id)
8
+ exporter = Exporter.find(exporter_id)
9
+ exporter.export
10
+ exporter.write
11
+ true
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class ImportWorkCollectionJob < ApplicationJob
5
+ queue_as :import
6
+
7
+ # rubocop:disable Rails/SkipsModelValidations
8
+ def perform(*args)
9
+ entry = Entry.find(args[0])
10
+ begin
11
+ entry.build
12
+ entry.save
13
+ add_user_to_permission_template!(entry)
14
+ ImporterRun.find(args[1]).increment!(:processed_collections)
15
+ ImporterRun.find(args[1]).decrement!(:enqueued_records)
16
+ rescue => e
17
+ ImporterRun.find(args[1]).increment!(:failed_collections)
18
+ ImporterRun.find(args[1]).decrement!(:enqueued_records)
19
+ raise e
20
+ end
21
+ end
22
+ # rubocop:enable Rails/SkipsModelValidations
23
+
24
+ private
25
+
26
+ def add_user_to_permission_template!(entry)
27
+ user = ::User.find(entry.importerexporter.user_id)
28
+ collection = entry.factory.find
29
+ permission_template = Hyrax::PermissionTemplate.find_or_create_by!(source_id: collection.id)
30
+
31
+ Hyrax::PermissionTemplateAccess.create!(
32
+ permission_template_id: permission_template.id,
33
+ agent_id: user.user_key,
34
+ agent_type: 'user',
35
+ access: 'manage'
36
+ )
37
+
38
+ collection.reset_access_controls!
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class ImportWorkJob < ApplicationJob
5
+ queue_as :import
6
+
7
+ # rubocop:disable Rails/SkipsModelValidations
8
+ def perform(*args)
9
+ entry = Entry.find(args[0])
10
+ entry.build
11
+ if entry.status == "Complete"
12
+ ImporterRun.find(args[1]).increment!(:processed_records)
13
+ ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
14
+ else
15
+ # do not retry here because whatever parse error kept you from creating a work will likely
16
+ # keep preventing you from doing so.
17
+ ImporterRun.find(args[1]).increment!(:failed_records)
18
+ ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
19
+ end
20
+ entry.save!
21
+ entry.importer.current_run = ImporterRun.find(args[1])
22
+ entry.importer.record_status
23
+ rescue Bulkrax::CollectionsCreatedError
24
+ reschedule(args[0], args[1])
25
+ end
26
+ # rubocop:enable Rails/SkipsModelValidations
27
+
28
+ def reschedule(entry_id, run_id)
29
+ ImportWorkJob.set(wait: 1.minute).perform_later(entry_id, run_id)
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class ImporterJob < ApplicationJob
5
+ queue_as :import
6
+
7
+ def perform(importer_id, only_updates_since_last_import = false)
8
+ importer = Importer.find(importer_id)
9
+ importer.current_run
10
+ import(importer, only_updates_since_last_import)
11
+ schedule(importer) if importer.schedulable?
12
+ end
13
+
14
+ def import(importer, only_updates_since_last_import)
15
+ importer.only_updates = only_updates_since_last_import || false
16
+ return unless importer.valid_import?
17
+ importer.import_collections
18
+ importer.import_works
19
+ importer.create_parent_child_relationships unless importer.validate_only
20
+ end
21
+
22
+ def schedule(importer)
23
+ ImporterJob.set(wait_until: importer.next_import_at).perform_later(importer.id, true)
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class ApplicationMailer < ActionMailer::Base
5
+ default from: 'from@example.com'
6
+ layout 'mailer'
7
+ end
8
+ end
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'language_list'
4
+
5
+ module Bulkrax
6
+ class ApplicationMatcher
7
+ attr_accessor :to, :from, :parsed, :if, :split, :excluded, :nested_type
8
+
9
+ def initialize(args)
10
+ args.each do |k, v|
11
+ send("#{k}=", v)
12
+ end
13
+ end
14
+
15
+ def result(_parser, content)
16
+ return nil if self.excluded == true || Bulkrax.reserved_properties.include?(self.to)
17
+ return nil if self.if && (!self.if.is_a?(Array) && self.if.length != 2)
18
+
19
+ if self.if
20
+ return unless content.send(self.if[0], Regexp.new(self.if[1]))
21
+ end
22
+
23
+ @result = content.to_s.gsub(/\s/, ' ') # remove any line feeds and tabs
24
+ @result.strip!
25
+ process_split
26
+ @result = @result[0] if @result.is_a?(Array) && @result.size == 1
27
+ process_parse
28
+ return @result
29
+ end
30
+
31
+ def process_split
32
+ if self.split.is_a?(TrueClass)
33
+ @result = @result.split(/\s*[:;|]\s*/) # default split by : ; |
34
+ elsif self.split
35
+ result = @result.split(Regexp.new(self.split))
36
+ @result = result.map(&:strip)
37
+ end
38
+ end
39
+
40
+ def process_parse
41
+ # New parse methods will need to be added here
42
+ parsed_fields = ['remote_files', 'language', 'subject', 'types', 'model', 'resource_type', 'format_original']
43
+ # This accounts for prefixed matchers
44
+ parser = parsed_fields.find { |field| to&.include? field }
45
+
46
+ if @result.is_a?(Array) && self.parsed && self.respond_to?("parse_#{parser}")
47
+ @result.each_with_index do |res, index|
48
+ @result[index] = send("parse_#{parser}", res.strip)
49
+ end
50
+ @result.delete(nil)
51
+ elsif self.parsed && self.respond_to?("parse_#{parser}")
52
+ @result = send("parse_#{parser}", @result)
53
+ end
54
+ end
55
+
56
+ def parse_remote_files(src)
57
+ { url: src.strip } if src.present?
58
+ end
59
+
60
+ def parse_language(src)
61
+ l = ::LanguageList::LanguageInfo.find(src.strip)
62
+ l ? l.name : src
63
+ end
64
+
65
+ def parse_subject(src)
66
+ string = src.to_s.strip.downcase
67
+ return if string.blank?
68
+
69
+ string.slice(0, 1).capitalize + string.slice(1..-1)
70
+ end
71
+
72
+ def parse_types(src)
73
+ src.to_s.strip.titleize
74
+ end
75
+
76
+ # Allow for mapping a model field to the work type or collection
77
+ def parse_model(src)
78
+ model = nil
79
+ if src.is_a?(Array)
80
+ models = src.map { |m| extract_model(m) }.compact
81
+ model = models.first if models.present?
82
+ else
83
+ model = extract_model(src)
84
+ end
85
+ return model
86
+ end
87
+
88
+ def extract_model(src)
89
+ if src&.match(URI::ABS_URI)
90
+ src.split('/').last
91
+ else
92
+ src
93
+ end
94
+ rescue StandardError
95
+ nil
96
+ end
97
+
98
+ # Only add valid resource types
99
+ def parse_resource_type(src)
100
+ Hyrax::ResourceTypesService.label(src.to_s.strip.titleize)
101
+ rescue KeyError
102
+ nil
103
+ end
104
+
105
+ def parse_format_original(src)
106
+ # drop the case completely then upcase the first letter
107
+ string = src.to_s.strip.downcase
108
+ return if string.blank?
109
+
110
+ string.slice(0, 1).capitalize + string.slice(1..-1)
111
+ end
112
+ end
113
+ end