bulkrax 4.3.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/javascripts/bulkrax/exporters.js +12 -0
  3. data/app/controllers/bulkrax/entries_controller.rb +5 -0
  4. data/app/controllers/bulkrax/exporters_controller.rb +5 -0
  5. data/app/controllers/bulkrax/importers_controller.rb +9 -1
  6. data/app/factories/bulkrax/object_factory.rb +87 -11
  7. data/app/jobs/bulkrax/create_relationships_job.rb +1 -1
  8. data/app/jobs/bulkrax/import_file_set_job.rb +1 -0
  9. data/app/jobs/bulkrax/import_work_job.rb +23 -13
  10. data/app/matchers/bulkrax/application_matcher.rb +5 -3
  11. data/app/models/bulkrax/csv_entry.rb +20 -8
  12. data/app/models/bulkrax/entry.rb +2 -1
  13. data/app/models/bulkrax/importer.rb +20 -15
  14. data/app/models/bulkrax/oai_entry.rb +42 -9
  15. data/app/models/bulkrax/rdf_entry.rb +1 -1
  16. data/app/models/bulkrax/xml_entry.rb +54 -12
  17. data/app/models/concerns/bulkrax/file_factory.rb +9 -3
  18. data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +8 -1
  19. data/app/models/concerns/bulkrax/import_behavior.rb +27 -19
  20. data/app/parsers/bulkrax/application_parser.rb +90 -13
  21. data/app/parsers/bulkrax/csv_parser.rb +13 -6
  22. data/app/parsers/bulkrax/oai_dc_parser.rb +2 -2
  23. data/app/parsers/bulkrax/xml_parser.rb +6 -0
  24. data/app/services/bulkrax/remove_relationships_for_importer.rb +107 -0
  25. data/app/views/bulkrax/exporters/_form.html.erb +3 -3
  26. data/app/views/bulkrax/shared/_file_set_entries_tab.html.erb +3 -3
  27. data/app/views/hyrax/dashboard/sidebar/_bulkrax_sidebar_additions.html.erb +9 -5
  28. data/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb +18 -7
  29. data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +1 -1
  30. data/db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb +1 -1
  31. data/db/migrate/20220118001339_add_import_attempts_to_entries.rb +1 -1
  32. data/db/migrate/20220119213325_add_work_counters_to_importer_runs.rb +1 -1
  33. data/db/migrate/20220301001839_create_bulkrax_pending_relationships.rb +1 -1
  34. data/db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb +1 -1
  35. data/db/migrate/20220412233954_add_include_thumbnails_to_bulkrax_exporters.rb +1 -1
  36. data/db/migrate/20220413180915_add_generated_metadata_to_bulkrax_exporters.rb +1 -1
  37. data/db/migrate/20220609001128_rename_bulkrax_importer_run_to_importer_run.rb +1 -1
  38. data/lib/bulkrax/version.rb +1 -1
  39. data/lib/bulkrax.rb +85 -11
  40. data/lib/generators/bulkrax/install_generator.rb +20 -0
  41. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +13 -1
  42. data/lib/tasks/reset.rake +65 -0
  43. metadata +6 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1c636e3c53228a9cd6f2d45356f10b2339265f5f3f11b32bd6e523a01388b60b
4
- data.tar.gz: 6eeb6679cc8e93f77926baa30f91b09c9cea173d7d4135c2be9e865b558d5056
3
+ metadata.gz: 49a1f0ccc806cf73a7872a634c8c2819dac62f98f0a9dc163e7ada8931d9b1fe
4
+ data.tar.gz: ecf5c8ad3e4864110665cfb5c6c5f6329e3b868414815bba163646db839400e7
5
5
  SHA512:
6
- metadata.gz: 389a8059749407dc1a6d9326e60131934b9729aaf33f1a4802c72255550fdf7e71ad874c88969e244dccf81e80ad95eb09e30179425fb1313fc2383d7f7adc7a
7
- data.tar.gz: c58567026f654f0577377babd2311f537c3155f6c71833255d13730dc32eed51ec9a0708f0283b925e09945b59d338a14ffdb9945e0375de5ff7be120ed62559
6
+ metadata.gz: df394e4fbbc6ca0a71eb595c075c5181d24098b1ab889ff185da10991ec01c2eaf60239658564838b67df4c0a421306de923d2eab4bbeffafb31321acb9ba1ff
7
+ data.tar.gz: 35b691e96d0e59f83efadc35364283239f7f741ab4bf9d859756dacc8483e380090512cd10bf3b6ec4ce81936d00c88eb4d1ed96742c384e0f3082028ffdcf45
@@ -1,10 +1,12 @@
1
1
  function hideUnhide(field) {
2
2
  var allSources = $('body').find('.export-source-option')
3
+ removeRequired(allSources)
3
4
  hide(allSources)
4
5
 
5
6
  if (field.length > 0) {
6
7
  var selectedSource = $('.' + field)
7
8
  unhideSelected(selectedSource)
9
+ addRequired(selectedSource)
8
10
  }
9
11
 
10
12
  if (field === 'collection') {
@@ -12,6 +14,16 @@ function hideUnhide(field) {
12
14
  }
13
15
  };
14
16
 
17
+ function addRequired(selectedSource) {
18
+ selectedSource.addClass('required').attr('required', 'required');
19
+ selectedSource.parent().addClass('required');
20
+ }
21
+
22
+ function removeRequired(allSources) {
23
+ allSources.removeClass('required').removeAttr('required');
24
+ allSources.parent().removeClass('required').removeAttr('required')
25
+ };
26
+
15
27
  // hide all export_source
16
28
  function hide(allSources) {
17
29
  allSources.addClass('hidden');
@@ -7,6 +7,7 @@ module Bulkrax
7
7
  class EntriesController < ApplicationController
8
8
  include Hyrax::ThemedLayoutController
9
9
  before_action :authenticate_user!
10
+ before_action :check_permissions
10
11
  with_themed_layout 'dashboard'
11
12
 
12
13
  def show
@@ -40,5 +41,9 @@ module Bulkrax
40
41
  add_breadcrumb @exporter.name, bulkrax.exporter_path(@exporter.id)
41
42
  add_breadcrumb @entry.id
42
43
  end
44
+
45
+ def check_permissions
46
+ raise CanCan::AccessDenied unless current_ability.can_import_works? || current_ability.can_export_works?
47
+ end
43
48
  end
44
49
  end
@@ -7,6 +7,7 @@ module Bulkrax
7
7
  include Hyrax::ThemedLayoutController
8
8
  include Bulkrax::DownloadBehavior
9
9
  before_action :authenticate_user!
10
+ before_action :check_permissions
10
11
  before_action :set_exporter, only: [:show, :edit, :update, :destroy]
11
12
  with_themed_layout 'dashboard'
12
13
 
@@ -131,5 +132,9 @@ module Bulkrax
131
132
  def file_path
132
133
  "#{@exporter.exporter_export_zip_path}/#{params['exporter']['exporter_export_zip_files']}"
133
134
  end
135
+
136
+ def check_permissions
137
+ raise CanCan::AccessDenied unless current_ability.can_export_works?
138
+ end
134
139
  end
135
140
  end
@@ -14,6 +14,7 @@ module Bulkrax
14
14
  protect_from_forgery unless: -> { api_request? }
15
15
  before_action :token_authenticate!, if: -> { api_request? }, only: [:create, :update, :delete]
16
16
  before_action :authenticate_user!, unless: -> { api_request? }
17
+ before_action :check_permissions
17
18
  before_action :set_importer, only: [:show, :edit, :update, :destroy]
18
19
  with_themed_layout 'dashboard'
19
20
 
@@ -76,6 +77,9 @@ module Bulkrax
76
77
  @importer = Importer.new(importer_params)
77
78
  field_mapping_params
78
79
  @importer.validate_only = true if params[:commit] == 'Create and Validate'
80
+ # the following line is needed to handle updating remote files of a FileSet
81
+ # on a new import otherwise it only gets updated during the update path
82
+ @importer.parser_fields['update_files'] = true if params[:commit] == 'Create and Import'
79
83
  if @importer.save
80
84
  files_for_import(file, cloud_files)
81
85
  if params[:commit] == 'Create and Import'
@@ -276,7 +280,7 @@ module Bulkrax
276
280
  def setup_client(url)
277
281
  return false if url.nil?
278
282
  headers = { from: Bulkrax.server_name }
279
- @client ||= OAI::Client.new(url, headers: headers, parser: 'libxml', metadata_prefix: 'oai_dc')
283
+ @client ||= OAI::Client.new(url, headers: headers, parser: 'libxml')
280
284
  end
281
285
 
282
286
  # Download methods
@@ -316,6 +320,10 @@ module Bulkrax
316
320
  end
317
321
  @importer.save
318
322
  end
323
+
324
+ def check_permissions
325
+ raise CanCan::AccessDenied unless current_ability.can_import_works?
326
+ end
319
327
  end
320
328
  # rubocop:enable Metrics/ClassLength
321
329
  end
@@ -6,6 +6,27 @@ module Bulkrax
6
6
  include Bulkrax::FileFactory
7
7
  include DynamicRecordLookup
8
8
 
9
+ # @api private
10
+ #
11
+ # These are the attributes that we assume all "work type" classes (e.g. the given :klass) will
12
+ # have in addition to their specific attributes.
13
+ #
14
+ # @return [Array<Symbol>]
15
+ # @see #permitted_attributes
16
+ class_attribute :base_permitted_attributes,
17
+ default: %i[id edit_users edit_groups read_groups visibility work_members_attributes admin_set_id]
18
+
19
+ # @return [Boolean]
20
+ #
21
+ # @example
22
+ # Bulkrax::ObjectFactory.transformation_removes_blank_hash_values = true
23
+ #
24
+ # @see #transform_attributes
25
+ # @see https://github.com/samvera-labs/bulkrax/pull/708 For discussion concerning this feature
26
+ # @see https://github.com/samvera-labs/bulkrax/wiki/Interacting-with-Metadata For documentation
27
+ # concerning default behavior.
28
+ class_attribute :transformation_removes_blank_hash_values, default: false
29
+
9
30
  define_model_callbacks :save, :create
10
31
  attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :related_parents_parsed_mapping, :importer_run_id
11
32
 
@@ -58,7 +79,7 @@ module Bulkrax
58
79
  elsif klass == FileSet
59
80
  update_file_set(attrs)
60
81
  else
61
- work_actor.update(environment(attrs))
82
+ update_work(attrs)
62
83
  end
63
84
  end
64
85
  object.apply_depositor_metadata(@user) && object.save! if object.depositor.nil?
@@ -104,7 +125,7 @@ module Bulkrax
104
125
  elsif klass == FileSet
105
126
  create_file_set(attrs)
106
127
  else
107
- work_actor.create(environment(attrs))
128
+ create_work(attrs)
108
129
  end
109
130
  end
110
131
  end
@@ -139,6 +160,14 @@ module Bulkrax
139
160
  Hyrax::CurationConcern.actor
140
161
  end
141
162
 
163
+ def create_work(attrs)
164
+ work_actor.create(environment(attrs))
165
+ end
166
+
167
+ def update_work(attrs)
168
+ work_actor.update(environment(attrs))
169
+ end
170
+
142
171
  def create_collection(attrs)
143
172
  attrs = clean_attrs(attrs)
144
173
  attrs = collection_type(attrs)
@@ -159,28 +188,55 @@ module Bulkrax
159
188
  file_set_attrs = attrs.slice(*object.attributes.keys)
160
189
  object.assign_attributes(file_set_attrs)
161
190
 
162
- attrs['uploaded_files'].each do |uploaded_file_id|
191
+ attrs['uploaded_files']&.each do |uploaded_file_id|
163
192
  uploaded_file = ::Hyrax::UploadedFile.find(uploaded_file_id)
164
193
  next if uploaded_file.file_set_uri.present?
165
194
 
166
- actor = ::Hyrax::Actors::FileSetActor.new(object, @user)
167
- uploaded_file.update(file_set_uri: actor.file_set.uri)
168
- actor.file_set.permissions_attributes = work_permissions
169
- actor.create_metadata
170
- actor.create_content(uploaded_file)
171
- actor.attach_to_work(work)
195
+ create_file_set_actor(attrs, work, work_permissions, uploaded_file)
196
+ end
197
+ attrs['remote_files']&.each do |remote_file|
198
+ create_file_set_actor(attrs, work, work_permissions, nil, remote_file)
172
199
  end
173
200
 
174
201
  object.save!
175
202
  end
176
203
 
204
+ def create_file_set_actor(attrs, work, work_permissions, uploaded_file, remote_file = nil)
205
+ actor = ::Hyrax::Actors::FileSetActor.new(object, @user)
206
+ uploaded_file&.update(file_set_uri: actor.file_set.uri)
207
+ actor.file_set.permissions_attributes = work_permissions
208
+ actor.create_metadata(attrs)
209
+ actor.create_content(uploaded_file) if uploaded_file
210
+ actor.attach_to_work(work, attrs)
211
+ handle_remote_file(remote_file: remote_file, actor: actor, update: false) if remote_file
212
+ end
213
+
177
214
  def update_file_set(attrs)
178
215
  file_set_attrs = attrs.slice(*object.attributes.keys)
179
216
  actor = ::Hyrax::Actors::FileSetActor.new(object, @user)
180
-
217
+ attrs['remote_files']&.each do |remote_file|
218
+ handle_remote_file(remote_file: remote_file, actor: actor, update: true)
219
+ end
181
220
  actor.update_metadata(file_set_attrs)
182
221
  end
183
222
 
223
+ def handle_remote_file(remote_file:, actor:, update: false)
224
+ actor.file_set.label = remote_file['file_name']
225
+ actor.file_set.import_url = remote_file['url']
226
+
227
+ url = remote_file['url']
228
+ tmp_file = Tempfile.new(remote_file['file_name'].split('.').first)
229
+ tmp_file.binmode
230
+
231
+ URI.open(url) do |url_file|
232
+ tmp_file.write(url_file.read)
233
+ end
234
+
235
+ tmp_file.rewind
236
+ update == true ? actor.update_content(tmp_file) : actor.create_content(tmp_file, from_url: true)
237
+ tmp_file.close
238
+ end
239
+
184
240
  def clean_attrs(attrs)
185
241
  # avoid the "ArgumentError: Identifier must be a string of size > 0 in order to be treeified" error
186
242
  # when setting object.attributes
@@ -200,12 +256,32 @@ module Bulkrax
200
256
  def transform_attributes(update: false)
201
257
  @transform_attributes = attributes.slice(*permitted_attributes)
202
258
  @transform_attributes.merge!(file_attributes(update_files)) if with_files
259
+ @transform_attributes = remove_blank_hash_values(@transform_attributes) if transformation_removes_blank_hash_values?
203
260
  update ? @transform_attributes.except(:id) : @transform_attributes
204
261
  end
205
262
 
206
263
  # Regardless of what the Parser gives us, these are the properties we are prepared to accept.
207
264
  def permitted_attributes
208
- klass.properties.keys.map(&:to_sym) + %i[id edit_users edit_groups read_groups visibility work_members_attributes admin_set_id]
265
+ klass.properties.keys.map(&:to_sym) + base_permitted_attributes
266
+ end
267
+
268
+ # Return a copy of the given attributes, such that all values that are empty or an array of all
269
+ # empty values are fully emptied. (See implementation details)
270
+ #
271
+ # @param attributes [Hash]
272
+ # @return [Hash]
273
+ #
274
+ # @see https://github.com/emory-libraries/dlp-curate/issues/1973
275
+ def remove_blank_hash_values(attributes)
276
+ dupe = attributes.dup
277
+ dupe.each do |key, values|
278
+ if values.is_a?(Array) && values.all? { |value| value.is_a?(String) && value.empty? }
279
+ dupe[key] = []
280
+ elsif values.is_a?(String) && values.empty?
281
+ dupe[key] = nil
282
+ end
283
+ end
284
+ dupe
209
285
  end
210
286
  end
211
287
  end
@@ -81,7 +81,7 @@ module Bulkrax
81
81
  # This is adding the reverse relationship, from the child to the parent
82
82
  def collection_parent_work_child
83
83
  child_work_ids = child_records[:works].map(&:id)
84
- parent_record.reindex_extent = Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX
84
+ parent_record.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
85
85
 
86
86
  parent_record.add_member_objects(child_work_ids)
87
87
  ImporterRun.find(importer_run_id).increment!(:processed_relationships, child_work_ids.count) # rubocop:disable Rails/SkipsModelValidations
@@ -2,6 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class MissingParentError < ::StandardError; end
5
+
5
6
  class ImportFileSetJob < ApplicationJob
6
7
  include DynamicRecordLookup
7
8
 
@@ -5,30 +5,40 @@ module Bulkrax
5
5
  queue_as :import
6
6
 
7
7
  # rubocop:disable Rails/SkipsModelValidations
8
- def perform(*args)
9
- entry = Entry.find(args[0])
8
+ def perform(entry_id, run_id, time_to_live = 3, *)
9
+ entry = Entry.find(entry_id)
10
+ importer_run = ImporterRun.find(run_id)
10
11
  entry.build
11
12
  if entry.status == "Complete"
12
- ImporterRun.find(args[1]).increment!(:processed_records)
13
- ImporterRun.find(args[1]).increment!(:processed_works)
14
- ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
13
+ importer_run.increment!(:processed_records)
14
+ importer_run.increment!(:processed_works)
15
15
  else
16
16
  # do not retry here because whatever parse error kept you from creating a work will likely
17
17
  # keep preventing you from doing so.
18
- ImporterRun.find(args[1]).increment!(:failed_records)
19
- ImporterRun.find(args[1]).increment!(:failed_works)
20
- ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
18
+ importer_run.increment!(:failed_records)
19
+ importer_run.increment!(:failed_works)
21
20
  end
21
+ # Regardless of completion or not, we want to decrement the enqueued records.
22
+ importer_run.decrement!(:enqueued_records) unless importer_run.enqueued_records <= 0
23
+
22
24
  entry.save!
23
- entry.importer.current_run = ImporterRun.find(args[1])
25
+ entry.importer.current_run = importer_run
24
26
  entry.importer.record_status
25
- rescue Bulkrax::CollectionsCreatedError
26
- reschedule(args[0], args[1])
27
+ rescue Bulkrax::CollectionsCreatedError => e
28
+ Rails.logger.warn("#{self.class} entry_id: #{entry_id}, run_id: #{run_id} encountered #{e.class}: #{e.message}")
29
+ # You get 3 attempts at the above perform before we have the import exception cascade into
30
+ # the Sidekiq retry ecosystem.
31
+ # rubocop:disable Style/IfUnlessModifier
32
+ if time_to_live <= 1
33
+ raise "Exhauted reschedule limit for #{self.class} entry_id: #{entry_id}, run_id: #{run_id}. Attemping retries"
34
+ end
35
+ # rubocop:enable Style/IfUnlessModifier
36
+ reschedule(entry_id, run_id, time_to_live)
27
37
  end
28
38
  # rubocop:enable Rails/SkipsModelValidations
29
39
 
30
- def reschedule(entry_id, run_id)
31
- ImportWorkJob.set(wait: 1.minute).perform_later(entry_id, run_id)
40
+ def reschedule(entry_id, run_id, time_to_live)
41
+ ImportWorkJob.set(wait: 1.minute).perform_later(entry_id, run_id, time_to_live - 1)
32
42
  end
33
43
  end
34
44
  end
@@ -6,6 +6,10 @@ module Bulkrax
6
6
  class ApplicationMatcher
7
7
  attr_accessor :to, :from, :parsed, :if, :split, :excluded, :nested_type
8
8
 
9
+ # New parse methods will need to be added here; you'll also want to define a corresponding
10
+ # "parse_#{field}" method.
11
+ class_attribute :parsed_fields, instance_writer: false, default: ['remote_files', 'language', 'subject', 'types', 'model', 'resource_type', 'format_original']
12
+
9
13
  def initialize(args)
10
14
  args.each do |k, v|
11
15
  send("#{k}=", v)
@@ -30,7 +34,7 @@ module Bulkrax
30
34
 
31
35
  def process_split
32
36
  if self.split.is_a?(TrueClass)
33
- @result = @result.split(/\s*[:;|]\s*/) # default split by : ; |
37
+ @result = @result.split(Bulkrax.multi_value_element_split_on)
34
38
  elsif self.split
35
39
  result = @result.split(Regexp.new(self.split))
36
40
  @result = result.map(&:strip)
@@ -38,8 +42,6 @@ module Bulkrax
38
42
  end
39
43
 
40
44
  def process_parse
41
- # New parse methods will need to be added here
42
- parsed_fields = ['remote_files', 'language', 'subject', 'types', 'model', 'resource_type', 'format_original']
43
45
  # This accounts for prefixed matchers
44
46
  parser = parsed_fields.find { |field| to&.include? field }
45
47
 
@@ -7,7 +7,7 @@ module Bulkrax
7
7
  # We do too much in these entry classes. We need to extract the common logic from the various
8
8
  # entry models into a module that can be shared between them.
9
9
  class CsvEntry < Entry # rubocop:disable Metrics/ClassLength
10
- serialize :raw_metadata, JSON
10
+ serialize :raw_metadata, Bulkrax::NormalizedJson
11
11
 
12
12
  def self.fields_from_data(data)
13
13
  data.headers.flatten.compact.uniq
@@ -18,7 +18,7 @@ module Bulkrax
18
18
  raise StandardError, 'CSV path empty' if path.blank?
19
19
  CSV.read(path,
20
20
  headers: true,
21
- header_converters: :symbol,
21
+ header_converters: ->(h) { h.to_sym },
22
22
  encoding: 'utf-8')
23
23
  end
24
24
 
@@ -36,10 +36,14 @@ module Bulkrax
36
36
 
37
37
  def build_metadata
38
38
  raise StandardError, 'Record not found' if record.nil?
39
- raise StandardError, "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}" unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
39
+ unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
40
+ raise StandardError,
41
+ "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}"
42
+ end
40
43
 
41
44
  self.parsed_metadata = {}
42
45
  add_identifier
46
+ establish_factory_class
43
47
  add_ingested_metadata
44
48
  # TODO(alishaevn): remove the collections stuff entirely and only reference collections via the new parents code
45
49
  add_collections
@@ -56,6 +60,12 @@ module Bulkrax
56
60
  self.parsed_metadata[work_identifier] = [record[source_identifier]]
57
61
  end
58
62
 
63
+ def establish_factory_class
64
+ parser.model_field_mappings.each do |key|
65
+ add_metadata('model', record[key]) if record.key?(key)
66
+ end
67
+ end
68
+
59
69
  def add_metadata_for_model
60
70
  if factory_class == Collection
61
71
  add_collection_type_gid
@@ -81,7 +91,7 @@ module Bulkrax
81
91
  def add_file
82
92
  self.parsed_metadata['file'] ||= []
83
93
  if record['file']&.is_a?(String)
84
- self.parsed_metadata['file'] = record['file'].split(/\s*[;|]\s*/)
94
+ self.parsed_metadata['file'] = record['file'].split(Bulkrax.multi_value_element_split_on)
85
95
  elsif record['file'].is_a?(Array)
86
96
  self.parsed_metadata['file'] = record['file']
87
97
  end
@@ -107,7 +117,9 @@ module Bulkrax
107
117
  # Metadata required by Bulkrax for round-tripping
108
118
  def build_system_metadata
109
119
  self.parsed_metadata['id'] = hyrax_record.id
110
- self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
120
+ source_id = hyrax_record.send(work_identifier)
121
+ source_id = source_id.to_a.first if source_id.is_a?(ActiveTriples::Relation)
122
+ self.parsed_metadata[source_identifier] = source_id
111
123
  self.parsed_metadata[key_for_export('model')] = hyrax_record.has_model.first
112
124
  end
113
125
 
@@ -176,7 +188,7 @@ module Bulkrax
176
188
  data = hyrax_record.send(key.to_s)
177
189
  if data.is_a?(ActiveTriples::Relation)
178
190
  if value['join']
179
- self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join(' | ').to_s # TODO: make split char dynamic
191
+ self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join(Bulkrax.multi_value_element_join_on).to_s
180
192
  else
181
193
  data.each_with_index do |d, i|
182
194
  self.parsed_metadata["#{key_for_export(key)}_#{i + 1}"] = prepare_export_data(d)
@@ -236,7 +248,7 @@ module Bulkrax
236
248
 
237
249
  def handle_join_on_export(key, values, join)
238
250
  if join
239
- parsed_metadata[key] = values.join(' | ') # TODO: make split char dynamic
251
+ parsed_metadata[key] = values.join(Bulkrax.multi_value_element_join_on)
240
252
  else
241
253
  values.each_with_index do |value, i|
242
254
  parsed_metadata["#{key}_#{i + 1}"] = value
@@ -260,7 +272,7 @@ module Bulkrax
260
272
  return [] unless parent_field_mapping.present? && record[parent_field_mapping].present?
261
273
 
262
274
  identifiers = []
263
- split_references = record[parent_field_mapping].split(/\s*[;|]\s*/)
275
+ split_references = record[parent_field_mapping].split(Bulkrax.multi_value_element_split_on)
264
276
  split_references.each do |c_reference|
265
277
  matching_collection_entries = importerexporter.entries.select do |e|
266
278
  (e.raw_metadata&.[](source_identifier) == c_reference) &&
@@ -4,6 +4,7 @@ module Bulkrax
4
4
  # Custom error class for collections_created?
5
5
  class CollectionsCreatedError < RuntimeError; end
6
6
  class OAIError < RuntimeError; end
7
+
7
8
  class Entry < ApplicationRecord
8
9
  include Bulkrax::HasMatchers
9
10
  include Bulkrax::ImportBehavior
@@ -15,7 +16,7 @@ module Bulkrax
15
16
  alias importer importerexporter
16
17
  alias exporter importerexporter
17
18
 
18
- serialize :parsed_metadata, JSON
19
+ serialize :parsed_metadata, Bulkrax::NormalizedJson
19
20
  # Do not serialize raw_metadata as so we can support xml or other formats
20
21
  serialize :collection_ids, Array
21
22
 
@@ -58,17 +58,26 @@ module Bulkrax
58
58
 
59
59
  # If field_mapping is empty, setup a default based on the export_properties
60
60
  def mapping
61
+ # rubocop:disable Style/IfUnlessModifier
61
62
  @mapping ||= if self.field_mapping.blank? || self.field_mapping == [{}]
62
63
  if parser.import_fields.present? || self.field_mapping == [{}]
63
- ActiveSupport::HashWithIndifferentAccess.new(
64
- parser.import_fields.reject(&:nil?).map do |m|
65
- Bulkrax.default_field_mapping.call(m)
66
- end.inject(:merge)
67
- )
64
+ default_field_mapping
68
65
  end
69
66
  else
70
- self.field_mapping
67
+ default_field_mapping.merge(self.field_mapping)
71
68
  end
69
+
70
+ # rubocop:enable Style/IfUnlessModifier
71
+ end
72
+
73
+ def default_field_mapping
74
+ return self.field_mapping if parser.import_fields.nil?
75
+
76
+ ActiveSupport::HashWithIndifferentAccess.new(
77
+ parser.import_fields.reject(&:nil?).map do |m|
78
+ Bulkrax.default_field_mapping.call(m)
79
+ end.inject(:merge)
80
+ )
72
81
  end
73
82
 
74
83
  def parser_fields
@@ -143,17 +152,13 @@ module Bulkrax
143
152
  import_objects(['relationship'])
144
153
  end
145
154
 
155
+ DEFAULT_OBJECT_TYPES = %w[collection work file_set relationship].freeze
156
+
146
157
  def import_objects(types_array = nil)
147
158
  self.only_updates ||= false
148
- types = types_array || %w[collection work file_set relationship]
149
- if parser.class == Bulkrax::CsvParser
150
- parser.create_objects(types)
151
- else
152
- types.each do |object_type|
153
- self.save if self.new_record? # Object needs to be saved for statuses
154
- parser.send("create_#{object_type.pluralize}")
155
- end
156
- end
159
+ self.save if self.new_record? # Object needs to be saved for statuses
160
+ types = types_array || DEFAULT_OBJECT_TYPES
161
+ parser.create_objects(types)
157
162
  rescue StandardError => e
158
163
  status_info(e)
159
164
  end
@@ -5,7 +5,7 @@ require 'ostruct'
5
5
 
6
6
  module Bulkrax
7
7
  class OaiEntry < Entry
8
- serialize :raw_metadata, JSON
8
+ serialize :raw_metadata, Bulkrax::NormalizedJson
9
9
 
10
10
  delegate :record, to: :raw_record
11
11
 
@@ -28,13 +28,16 @@ module Bulkrax
28
28
  def build_metadata
29
29
  self.parsed_metadata = {}
30
30
  self.parsed_metadata[work_identifier] = [record.header.identifier]
31
+ self.raw_metadata = { xml: record.metadata.to_s }
31
32
 
32
- record.metadata.children.each do |child|
33
- child.children.each do |node|
34
- add_metadata(node.name, node.content)
35
- end
36
- end
37
- add_metadata('thumbnail_url', thumbnail_url)
33
+ # We need to establish the #factory_class before we proceed with the metadata. See
34
+ # https://github.com/samvera-labs/bulkrax/issues/702 for further details.
35
+ #
36
+ # tl;dr - if we don't have the right factory_class we might skip properties that are
37
+ # specifically assigned to the factory class
38
+ establish_factory_class
39
+ add_metadata_from_record
40
+ add_thumbnail_url
38
41
 
39
42
  add_visibility
40
43
  add_rights_statement
@@ -53,18 +56,48 @@ module Bulkrax
53
56
  end
54
57
  end
55
58
 
59
+ # To ensure we capture the correct parse data, we first need to establish the factory_class.
60
+ # @see https://github.com/samvera-labs/bulkrax/issues/702
61
+ def establish_factory_class
62
+ model_field_names = parser.model_field_mappings
63
+
64
+ each_candidate_metadata_node do |node|
65
+ next unless model_field_names.include?(node.name)
66
+ add_metadata(node.name, node.content)
67
+ end
68
+ end
69
+
70
+ def add_metadata_from_record
71
+ each_candidate_metadata_node do |node|
72
+ add_metadata(node.name, node.content)
73
+ end
74
+ end
75
+
76
+ # A method that you could override to better handle the shape of the record's metadata.
77
+ # @yieldparam node [Object<#name, #content>]
78
+ def each_candidate_metadata_node
79
+ record.metadata.children.each do |child|
80
+ child.children.each do |node|
81
+ yield(node)
82
+ end
83
+ end
84
+ end
85
+
86
+ def add_thumbnail_url
87
+ add_metadata('thumbnail_url', thumbnail_url)
88
+ end
89
+
56
90
  # Retrieve list of collections for the entry; add to collection_ids
57
91
  # If OAI-PMH doesn't return setSpec in the headers for GetRecord, use parser.collection_name
58
92
  # in this case, if 'All' is selected, records will not be added to a collection.
59
93
  def find_collection_ids
60
94
  return self.collection_ids if collections_created?
61
95
  if sets.blank? || parser.collection_name != 'all'
62
- # c = Collection.where(Bulkrax.system_identifier_field => importerexporter.unique_collection_identifier(parser.collection_name)).first
63
96
  collection = find_collection(importerexporter.unique_collection_identifier(parser.collection_name))
64
97
  self.collection_ids << collection.id if collection.present? && !self.collection_ids.include?(collection.id)
65
98
  else # All - collections should exist for all sets
66
99
  sets.each do |set|
67
- c = Collection.find_by(work_identifier => importerexporter.unique_collection_identifier(set.content))
100
+ c = find_collection(importerexporter.unique_collection_identifier(set.content))
68
101
  self.collection_ids << c.id if c.present? && !self.collection_ids.include?(c.id)
69
102
  end
70
103
  end
@@ -3,7 +3,7 @@
3
3
  require 'rdf'
4
4
  module Bulkrax
5
5
  class RdfEntry < Entry
6
- serialize :raw_metadata, JSON
6
+ serialize :raw_metadata, Bulkrax::NormalizedJson
7
7
 
8
8
  def self.read_data(path)
9
9
  RDF::Reader.open(path)