bulkrax 4.3.0 → 5.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/javascripts/bulkrax/exporters.js +12 -0
  3. data/app/controllers/bulkrax/entries_controller.rb +5 -0
  4. data/app/controllers/bulkrax/exporters_controller.rb +5 -0
  5. data/app/controllers/bulkrax/importers_controller.rb +9 -1
  6. data/app/factories/bulkrax/object_factory.rb +87 -11
  7. data/app/jobs/bulkrax/create_relationships_job.rb +1 -1
  8. data/app/jobs/bulkrax/import_file_set_job.rb +1 -0
  9. data/app/jobs/bulkrax/import_work_job.rb +23 -13
  10. data/app/matchers/bulkrax/application_matcher.rb +5 -3
  11. data/app/models/bulkrax/csv_entry.rb +20 -8
  12. data/app/models/bulkrax/entry.rb +2 -1
  13. data/app/models/bulkrax/importer.rb +20 -15
  14. data/app/models/bulkrax/oai_entry.rb +42 -9
  15. data/app/models/bulkrax/rdf_entry.rb +1 -1
  16. data/app/models/bulkrax/xml_entry.rb +54 -12
  17. data/app/models/concerns/bulkrax/file_factory.rb +9 -3
  18. data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +8 -1
  19. data/app/models/concerns/bulkrax/import_behavior.rb +27 -19
  20. data/app/parsers/bulkrax/application_parser.rb +90 -13
  21. data/app/parsers/bulkrax/csv_parser.rb +13 -6
  22. data/app/parsers/bulkrax/oai_dc_parser.rb +2 -2
  23. data/app/parsers/bulkrax/xml_parser.rb +6 -0
  24. data/app/services/bulkrax/remove_relationships_for_importer.rb +107 -0
  25. data/app/views/bulkrax/exporters/_form.html.erb +3 -3
  26. data/app/views/bulkrax/shared/_file_set_entries_tab.html.erb +3 -3
  27. data/app/views/hyrax/dashboard/sidebar/_bulkrax_sidebar_additions.html.erb +9 -5
  28. data/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb +18 -7
  29. data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +1 -1
  30. data/db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb +1 -1
  31. data/db/migrate/20220118001339_add_import_attempts_to_entries.rb +1 -1
  32. data/db/migrate/20220119213325_add_work_counters_to_importer_runs.rb +1 -1
  33. data/db/migrate/20220301001839_create_bulkrax_pending_relationships.rb +1 -1
  34. data/db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb +1 -1
  35. data/db/migrate/20220412233954_add_include_thumbnails_to_bulkrax_exporters.rb +1 -1
  36. data/db/migrate/20220413180915_add_generated_metadata_to_bulkrax_exporters.rb +1 -1
  37. data/db/migrate/20220609001128_rename_bulkrax_importer_run_to_importer_run.rb +1 -1
  38. data/lib/bulkrax/version.rb +1 -1
  39. data/lib/bulkrax.rb +85 -11
  40. data/lib/generators/bulkrax/install_generator.rb +20 -0
  41. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +13 -1
  42. data/lib/tasks/reset.rake +65 -0
  43. metadata +6 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1c636e3c53228a9cd6f2d45356f10b2339265f5f3f11b32bd6e523a01388b60b
4
- data.tar.gz: 6eeb6679cc8e93f77926baa30f91b09c9cea173d7d4135c2be9e865b558d5056
3
+ metadata.gz: 49a1f0ccc806cf73a7872a634c8c2819dac62f98f0a9dc163e7ada8931d9b1fe
4
+ data.tar.gz: ecf5c8ad3e4864110665cfb5c6c5f6329e3b868414815bba163646db839400e7
5
5
  SHA512:
6
- metadata.gz: 389a8059749407dc1a6d9326e60131934b9729aaf33f1a4802c72255550fdf7e71ad874c88969e244dccf81e80ad95eb09e30179425fb1313fc2383d7f7adc7a
7
- data.tar.gz: c58567026f654f0577377babd2311f537c3155f6c71833255d13730dc32eed51ec9a0708f0283b925e09945b59d338a14ffdb9945e0375de5ff7be120ed62559
6
+ metadata.gz: df394e4fbbc6ca0a71eb595c075c5181d24098b1ab889ff185da10991ec01c2eaf60239658564838b67df4c0a421306de923d2eab4bbeffafb31321acb9ba1ff
7
+ data.tar.gz: 35b691e96d0e59f83efadc35364283239f7f741ab4bf9d859756dacc8483e380090512cd10bf3b6ec4ce81936d00c88eb4d1ed96742c384e0f3082028ffdcf45
@@ -1,10 +1,12 @@
1
1
  function hideUnhide(field) {
2
2
  var allSources = $('body').find('.export-source-option')
3
+ removeRequired(allSources)
3
4
  hide(allSources)
4
5
 
5
6
  if (field.length > 0) {
6
7
  var selectedSource = $('.' + field)
7
8
  unhideSelected(selectedSource)
9
+ addRequired(selectedSource)
8
10
  }
9
11
 
10
12
  if (field === 'collection') {
@@ -12,6 +14,16 @@ function hideUnhide(field) {
12
14
  }
13
15
  };
14
16
 
17
+ function addRequired(selectedSource) {
18
+ selectedSource.addClass('required').attr('required', 'required');
19
+ selectedSource.parent().addClass('required');
20
+ }
21
+
22
+ function removeRequired(allSources) {
23
+ allSources.removeClass('required').removeAttr('required');
24
+ allSources.parent().removeClass('required').removeAttr('required')
25
+ };
26
+
15
27
  // hide all export_source
16
28
  function hide(allSources) {
17
29
  allSources.addClass('hidden');
@@ -7,6 +7,7 @@ module Bulkrax
7
7
  class EntriesController < ApplicationController
8
8
  include Hyrax::ThemedLayoutController
9
9
  before_action :authenticate_user!
10
+ before_action :check_permissions
10
11
  with_themed_layout 'dashboard'
11
12
 
12
13
  def show
@@ -40,5 +41,9 @@ module Bulkrax
40
41
  add_breadcrumb @exporter.name, bulkrax.exporter_path(@exporter.id)
41
42
  add_breadcrumb @entry.id
42
43
  end
44
+
45
+ def check_permissions
46
+ raise CanCan::AccessDenied unless current_ability.can_import_works? || current_ability.can_export_works?
47
+ end
43
48
  end
44
49
  end
@@ -7,6 +7,7 @@ module Bulkrax
7
7
  include Hyrax::ThemedLayoutController
8
8
  include Bulkrax::DownloadBehavior
9
9
  before_action :authenticate_user!
10
+ before_action :check_permissions
10
11
  before_action :set_exporter, only: [:show, :edit, :update, :destroy]
11
12
  with_themed_layout 'dashboard'
12
13
 
@@ -131,5 +132,9 @@ module Bulkrax
131
132
  def file_path
132
133
  "#{@exporter.exporter_export_zip_path}/#{params['exporter']['exporter_export_zip_files']}"
133
134
  end
135
+
136
+ def check_permissions
137
+ raise CanCan::AccessDenied unless current_ability.can_export_works?
138
+ end
134
139
  end
135
140
  end
@@ -14,6 +14,7 @@ module Bulkrax
14
14
  protect_from_forgery unless: -> { api_request? }
15
15
  before_action :token_authenticate!, if: -> { api_request? }, only: [:create, :update, :delete]
16
16
  before_action :authenticate_user!, unless: -> { api_request? }
17
+ before_action :check_permissions
17
18
  before_action :set_importer, only: [:show, :edit, :update, :destroy]
18
19
  with_themed_layout 'dashboard'
19
20
 
@@ -76,6 +77,9 @@ module Bulkrax
76
77
  @importer = Importer.new(importer_params)
77
78
  field_mapping_params
78
79
  @importer.validate_only = true if params[:commit] == 'Create and Validate'
80
+ # the following line is needed to handle updating remote files of a FileSet
81
+ # on a new import otherwise it only gets updated during the update path
82
+ @importer.parser_fields['update_files'] = true if params[:commit] == 'Create and Import'
79
83
  if @importer.save
80
84
  files_for_import(file, cloud_files)
81
85
  if params[:commit] == 'Create and Import'
@@ -276,7 +280,7 @@ module Bulkrax
276
280
  def setup_client(url)
277
281
  return false if url.nil?
278
282
  headers = { from: Bulkrax.server_name }
279
- @client ||= OAI::Client.new(url, headers: headers, parser: 'libxml', metadata_prefix: 'oai_dc')
283
+ @client ||= OAI::Client.new(url, headers: headers, parser: 'libxml')
280
284
  end
281
285
 
282
286
  # Download methods
@@ -316,6 +320,10 @@ module Bulkrax
316
320
  end
317
321
  @importer.save
318
322
  end
323
+
324
+ def check_permissions
325
+ raise CanCan::AccessDenied unless current_ability.can_import_works?
326
+ end
319
327
  end
320
328
  # rubocop:enable Metrics/ClassLength
321
329
  end
@@ -6,6 +6,27 @@ module Bulkrax
6
6
  include Bulkrax::FileFactory
7
7
  include DynamicRecordLookup
8
8
 
9
+ # @api private
10
+ #
11
+ # These are the attributes that we assume all "work type" classes (e.g. the given :klass) will
12
+ # have in addition to their specific attributes.
13
+ #
14
+ # @return [Array<Symbol>]
15
+ # @see #permitted_attributes
16
+ class_attribute :base_permitted_attributes,
17
+ default: %i[id edit_users edit_groups read_groups visibility work_members_attributes admin_set_id]
18
+
19
+ # @return [Boolean]
20
+ #
21
+ # @example
22
+ # Bulkrax::ObjectFactory.transformation_removes_blank_hash_values = true
23
+ #
24
+ # @see #transform_attributes
25
+ # @see https://github.com/samvera-labs/bulkrax/pull/708 For discussion concerning this feature
26
+ # @see https://github.com/samvera-labs/bulkrax/wiki/Interacting-with-Metadata For documentation
27
+ # concerning default behavior.
28
+ class_attribute :transformation_removes_blank_hash_values, default: false
29
+
9
30
  define_model_callbacks :save, :create
10
31
  attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :related_parents_parsed_mapping, :importer_run_id
11
32
 
@@ -58,7 +79,7 @@ module Bulkrax
58
79
  elsif klass == FileSet
59
80
  update_file_set(attrs)
60
81
  else
61
- work_actor.update(environment(attrs))
82
+ update_work(attrs)
62
83
  end
63
84
  end
64
85
  object.apply_depositor_metadata(@user) && object.save! if object.depositor.nil?
@@ -104,7 +125,7 @@ module Bulkrax
104
125
  elsif klass == FileSet
105
126
  create_file_set(attrs)
106
127
  else
107
- work_actor.create(environment(attrs))
128
+ create_work(attrs)
108
129
  end
109
130
  end
110
131
  end
@@ -139,6 +160,14 @@ module Bulkrax
139
160
  Hyrax::CurationConcern.actor
140
161
  end
141
162
 
163
+ def create_work(attrs)
164
+ work_actor.create(environment(attrs))
165
+ end
166
+
167
+ def update_work(attrs)
168
+ work_actor.update(environment(attrs))
169
+ end
170
+
142
171
  def create_collection(attrs)
143
172
  attrs = clean_attrs(attrs)
144
173
  attrs = collection_type(attrs)
@@ -159,28 +188,55 @@ module Bulkrax
159
188
  file_set_attrs = attrs.slice(*object.attributes.keys)
160
189
  object.assign_attributes(file_set_attrs)
161
190
 
162
- attrs['uploaded_files'].each do |uploaded_file_id|
191
+ attrs['uploaded_files']&.each do |uploaded_file_id|
163
192
  uploaded_file = ::Hyrax::UploadedFile.find(uploaded_file_id)
164
193
  next if uploaded_file.file_set_uri.present?
165
194
 
166
- actor = ::Hyrax::Actors::FileSetActor.new(object, @user)
167
- uploaded_file.update(file_set_uri: actor.file_set.uri)
168
- actor.file_set.permissions_attributes = work_permissions
169
- actor.create_metadata
170
- actor.create_content(uploaded_file)
171
- actor.attach_to_work(work)
195
+ create_file_set_actor(attrs, work, work_permissions, uploaded_file)
196
+ end
197
+ attrs['remote_files']&.each do |remote_file|
198
+ create_file_set_actor(attrs, work, work_permissions, nil, remote_file)
172
199
  end
173
200
 
174
201
  object.save!
175
202
  end
176
203
 
204
+ def create_file_set_actor(attrs, work, work_permissions, uploaded_file, remote_file = nil)
205
+ actor = ::Hyrax::Actors::FileSetActor.new(object, @user)
206
+ uploaded_file&.update(file_set_uri: actor.file_set.uri)
207
+ actor.file_set.permissions_attributes = work_permissions
208
+ actor.create_metadata(attrs)
209
+ actor.create_content(uploaded_file) if uploaded_file
210
+ actor.attach_to_work(work, attrs)
211
+ handle_remote_file(remote_file: remote_file, actor: actor, update: false) if remote_file
212
+ end
213
+
177
214
  def update_file_set(attrs)
178
215
  file_set_attrs = attrs.slice(*object.attributes.keys)
179
216
  actor = ::Hyrax::Actors::FileSetActor.new(object, @user)
180
-
217
+ attrs['remote_files']&.each do |remote_file|
218
+ handle_remote_file(remote_file: remote_file, actor: actor, update: true)
219
+ end
181
220
  actor.update_metadata(file_set_attrs)
182
221
  end
183
222
 
223
+ def handle_remote_file(remote_file:, actor:, update: false)
224
+ actor.file_set.label = remote_file['file_name']
225
+ actor.file_set.import_url = remote_file['url']
226
+
227
+ url = remote_file['url']
228
+ tmp_file = Tempfile.new(remote_file['file_name'].split('.').first)
229
+ tmp_file.binmode
230
+
231
+ URI.open(url) do |url_file|
232
+ tmp_file.write(url_file.read)
233
+ end
234
+
235
+ tmp_file.rewind
236
+ update == true ? actor.update_content(tmp_file) : actor.create_content(tmp_file, from_url: true)
237
+ tmp_file.close
238
+ end
239
+
184
240
  def clean_attrs(attrs)
185
241
  # avoid the "ArgumentError: Identifier must be a string of size > 0 in order to be treeified" error
186
242
  # when setting object.attributes
@@ -200,12 +256,32 @@ module Bulkrax
200
256
  def transform_attributes(update: false)
201
257
  @transform_attributes = attributes.slice(*permitted_attributes)
202
258
  @transform_attributes.merge!(file_attributes(update_files)) if with_files
259
+ @transform_attributes = remove_blank_hash_values(@transform_attributes) if transformation_removes_blank_hash_values?
203
260
  update ? @transform_attributes.except(:id) : @transform_attributes
204
261
  end
205
262
 
206
263
  # Regardless of what the Parser gives us, these are the properties we are prepared to accept.
207
264
  def permitted_attributes
208
- klass.properties.keys.map(&:to_sym) + %i[id edit_users edit_groups read_groups visibility work_members_attributes admin_set_id]
265
+ klass.properties.keys.map(&:to_sym) + base_permitted_attributes
266
+ end
267
+
268
+ # Return a copy of the given attributes, such that all values that are empty or an array of all
269
+ # empty values are fully emptied. (See implementation details)
270
+ #
271
+ # @param attributes [Hash]
272
+ # @return [Hash]
273
+ #
274
+ # @see https://github.com/emory-libraries/dlp-curate/issues/1973
275
+ def remove_blank_hash_values(attributes)
276
+ dupe = attributes.dup
277
+ dupe.each do |key, values|
278
+ if values.is_a?(Array) && values.all? { |value| value.is_a?(String) && value.empty? }
279
+ dupe[key] = []
280
+ elsif values.is_a?(String) && values.empty?
281
+ dupe[key] = nil
282
+ end
283
+ end
284
+ dupe
209
285
  end
210
286
  end
211
287
  end
@@ -81,7 +81,7 @@ module Bulkrax
81
81
  # This is adding the reverse relationship, from the child to the parent
82
82
  def collection_parent_work_child
83
83
  child_work_ids = child_records[:works].map(&:id)
84
- parent_record.reindex_extent = Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX
84
+ parent_record.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
85
85
 
86
86
  parent_record.add_member_objects(child_work_ids)
87
87
  ImporterRun.find(importer_run_id).increment!(:processed_relationships, child_work_ids.count) # rubocop:disable Rails/SkipsModelValidations
@@ -2,6 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class MissingParentError < ::StandardError; end
5
+
5
6
  class ImportFileSetJob < ApplicationJob
6
7
  include DynamicRecordLookup
7
8
 
@@ -5,30 +5,40 @@ module Bulkrax
5
5
  queue_as :import
6
6
 
7
7
  # rubocop:disable Rails/SkipsModelValidations
8
- def perform(*args)
9
- entry = Entry.find(args[0])
8
+ def perform(entry_id, run_id, time_to_live = 3, *)
9
+ entry = Entry.find(entry_id)
10
+ importer_run = ImporterRun.find(run_id)
10
11
  entry.build
11
12
  if entry.status == "Complete"
12
- ImporterRun.find(args[1]).increment!(:processed_records)
13
- ImporterRun.find(args[1]).increment!(:processed_works)
14
- ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
13
+ importer_run.increment!(:processed_records)
14
+ importer_run.increment!(:processed_works)
15
15
  else
16
16
  # do not retry here because whatever parse error kept you from creating a work will likely
17
17
  # keep preventing you from doing so.
18
- ImporterRun.find(args[1]).increment!(:failed_records)
19
- ImporterRun.find(args[1]).increment!(:failed_works)
20
- ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
18
+ importer_run.increment!(:failed_records)
19
+ importer_run.increment!(:failed_works)
21
20
  end
21
+ # Regardless of completion or not, we want to decrement the enqueued records.
22
+ importer_run.decrement!(:enqueued_records) unless importer_run.enqueued_records <= 0
23
+
22
24
  entry.save!
23
- entry.importer.current_run = ImporterRun.find(args[1])
25
+ entry.importer.current_run = importer_run
24
26
  entry.importer.record_status
25
- rescue Bulkrax::CollectionsCreatedError
26
- reschedule(args[0], args[1])
27
+ rescue Bulkrax::CollectionsCreatedError => e
28
+ Rails.logger.warn("#{self.class} entry_id: #{entry_id}, run_id: #{run_id} encountered #{e.class}: #{e.message}")
29
+ # You get 3 attempts at the above perform before we have the import exception cascade into
30
+ # the Sidekiq retry ecosystem.
31
+ # rubocop:disable Style/IfUnlessModifier
32
+ if time_to_live <= 1
33
+ raise "Exhauted reschedule limit for #{self.class} entry_id: #{entry_id}, run_id: #{run_id}. Attemping retries"
34
+ end
35
+ # rubocop:enable Style/IfUnlessModifier
36
+ reschedule(entry_id, run_id, time_to_live)
27
37
  end
28
38
  # rubocop:enable Rails/SkipsModelValidations
29
39
 
30
- def reschedule(entry_id, run_id)
31
- ImportWorkJob.set(wait: 1.minute).perform_later(entry_id, run_id)
40
+ def reschedule(entry_id, run_id, time_to_live)
41
+ ImportWorkJob.set(wait: 1.minute).perform_later(entry_id, run_id, time_to_live - 1)
32
42
  end
33
43
  end
34
44
  end
@@ -6,6 +6,10 @@ module Bulkrax
6
6
  class ApplicationMatcher
7
7
  attr_accessor :to, :from, :parsed, :if, :split, :excluded, :nested_type
8
8
 
9
+ # New parse methods will need to be added here; you'll also want to define a corresponding
10
+ # "parse_#{field}" method.
11
+ class_attribute :parsed_fields, instance_writer: false, default: ['remote_files', 'language', 'subject', 'types', 'model', 'resource_type', 'format_original']
12
+
9
13
  def initialize(args)
10
14
  args.each do |k, v|
11
15
  send("#{k}=", v)
@@ -30,7 +34,7 @@ module Bulkrax
30
34
 
31
35
  def process_split
32
36
  if self.split.is_a?(TrueClass)
33
- @result = @result.split(/\s*[:;|]\s*/) # default split by : ; |
37
+ @result = @result.split(Bulkrax.multi_value_element_split_on)
34
38
  elsif self.split
35
39
  result = @result.split(Regexp.new(self.split))
36
40
  @result = result.map(&:strip)
@@ -38,8 +42,6 @@ module Bulkrax
38
42
  end
39
43
 
40
44
  def process_parse
41
- # New parse methods will need to be added here
42
- parsed_fields = ['remote_files', 'language', 'subject', 'types', 'model', 'resource_type', 'format_original']
43
45
  # This accounts for prefixed matchers
44
46
  parser = parsed_fields.find { |field| to&.include? field }
45
47
 
@@ -7,7 +7,7 @@ module Bulkrax
7
7
  # We do too much in these entry classes. We need to extract the common logic from the various
8
8
  # entry models into a module that can be shared between them.
9
9
  class CsvEntry < Entry # rubocop:disable Metrics/ClassLength
10
- serialize :raw_metadata, JSON
10
+ serialize :raw_metadata, Bulkrax::NormalizedJson
11
11
 
12
12
  def self.fields_from_data(data)
13
13
  data.headers.flatten.compact.uniq
@@ -18,7 +18,7 @@ module Bulkrax
18
18
  raise StandardError, 'CSV path empty' if path.blank?
19
19
  CSV.read(path,
20
20
  headers: true,
21
- header_converters: :symbol,
21
+ header_converters: ->(h) { h.to_sym },
22
22
  encoding: 'utf-8')
23
23
  end
24
24
 
@@ -36,10 +36,14 @@ module Bulkrax
36
36
 
37
37
  def build_metadata
38
38
  raise StandardError, 'Record not found' if record.nil?
39
- raise StandardError, "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}" unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
39
+ unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
40
+ raise StandardError,
41
+ "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}"
42
+ end
40
43
 
41
44
  self.parsed_metadata = {}
42
45
  add_identifier
46
+ establish_factory_class
43
47
  add_ingested_metadata
44
48
  # TODO(alishaevn): remove the collections stuff entirely and only reference collections via the new parents code
45
49
  add_collections
@@ -56,6 +60,12 @@ module Bulkrax
56
60
  self.parsed_metadata[work_identifier] = [record[source_identifier]]
57
61
  end
58
62
 
63
+ def establish_factory_class
64
+ parser.model_field_mappings.each do |key|
65
+ add_metadata('model', record[key]) if record.key?(key)
66
+ end
67
+ end
68
+
59
69
  def add_metadata_for_model
60
70
  if factory_class == Collection
61
71
  add_collection_type_gid
@@ -81,7 +91,7 @@ module Bulkrax
81
91
  def add_file
82
92
  self.parsed_metadata['file'] ||= []
83
93
  if record['file']&.is_a?(String)
84
- self.parsed_metadata['file'] = record['file'].split(/\s*[;|]\s*/)
94
+ self.parsed_metadata['file'] = record['file'].split(Bulkrax.multi_value_element_split_on)
85
95
  elsif record['file'].is_a?(Array)
86
96
  self.parsed_metadata['file'] = record['file']
87
97
  end
@@ -107,7 +117,9 @@ module Bulkrax
107
117
  # Metadata required by Bulkrax for round-tripping
108
118
  def build_system_metadata
109
119
  self.parsed_metadata['id'] = hyrax_record.id
110
- self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
120
+ source_id = hyrax_record.send(work_identifier)
121
+ source_id = source_id.to_a.first if source_id.is_a?(ActiveTriples::Relation)
122
+ self.parsed_metadata[source_identifier] = source_id
111
123
  self.parsed_metadata[key_for_export('model')] = hyrax_record.has_model.first
112
124
  end
113
125
 
@@ -176,7 +188,7 @@ module Bulkrax
176
188
  data = hyrax_record.send(key.to_s)
177
189
  if data.is_a?(ActiveTriples::Relation)
178
190
  if value['join']
179
- self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join(' | ').to_s # TODO: make split char dynamic
191
+ self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join(Bulkrax.multi_value_element_join_on).to_s
180
192
  else
181
193
  data.each_with_index do |d, i|
182
194
  self.parsed_metadata["#{key_for_export(key)}_#{i + 1}"] = prepare_export_data(d)
@@ -236,7 +248,7 @@ module Bulkrax
236
248
 
237
249
  def handle_join_on_export(key, values, join)
238
250
  if join
239
- parsed_metadata[key] = values.join(' | ') # TODO: make split char dynamic
251
+ parsed_metadata[key] = values.join(Bulkrax.multi_value_element_join_on)
240
252
  else
241
253
  values.each_with_index do |value, i|
242
254
  parsed_metadata["#{key}_#{i + 1}"] = value
@@ -260,7 +272,7 @@ module Bulkrax
260
272
  return [] unless parent_field_mapping.present? && record[parent_field_mapping].present?
261
273
 
262
274
  identifiers = []
263
- split_references = record[parent_field_mapping].split(/\s*[;|]\s*/)
275
+ split_references = record[parent_field_mapping].split(Bulkrax.multi_value_element_split_on)
264
276
  split_references.each do |c_reference|
265
277
  matching_collection_entries = importerexporter.entries.select do |e|
266
278
  (e.raw_metadata&.[](source_identifier) == c_reference) &&
@@ -4,6 +4,7 @@ module Bulkrax
4
4
  # Custom error class for collections_created?
5
5
  class CollectionsCreatedError < RuntimeError; end
6
6
  class OAIError < RuntimeError; end
7
+
7
8
  class Entry < ApplicationRecord
8
9
  include Bulkrax::HasMatchers
9
10
  include Bulkrax::ImportBehavior
@@ -15,7 +16,7 @@ module Bulkrax
15
16
  alias importer importerexporter
16
17
  alias exporter importerexporter
17
18
 
18
- serialize :parsed_metadata, JSON
19
+ serialize :parsed_metadata, Bulkrax::NormalizedJson
19
20
  # Do not serialize raw_metadata as so we can support xml or other formats
20
21
  serialize :collection_ids, Array
21
22
 
@@ -58,17 +58,26 @@ module Bulkrax
58
58
 
59
59
  # If field_mapping is empty, setup a default based on the export_properties
60
60
  def mapping
61
+ # rubocop:disable Style/IfUnlessModifier
61
62
  @mapping ||= if self.field_mapping.blank? || self.field_mapping == [{}]
62
63
  if parser.import_fields.present? || self.field_mapping == [{}]
63
- ActiveSupport::HashWithIndifferentAccess.new(
64
- parser.import_fields.reject(&:nil?).map do |m|
65
- Bulkrax.default_field_mapping.call(m)
66
- end.inject(:merge)
67
- )
64
+ default_field_mapping
68
65
  end
69
66
  else
70
- self.field_mapping
67
+ default_field_mapping.merge(self.field_mapping)
71
68
  end
69
+
70
+ # rubocop:enable Style/IfUnlessModifier
71
+ end
72
+
73
+ def default_field_mapping
74
+ return self.field_mapping if parser.import_fields.nil?
75
+
76
+ ActiveSupport::HashWithIndifferentAccess.new(
77
+ parser.import_fields.reject(&:nil?).map do |m|
78
+ Bulkrax.default_field_mapping.call(m)
79
+ end.inject(:merge)
80
+ )
72
81
  end
73
82
 
74
83
  def parser_fields
@@ -143,17 +152,13 @@ module Bulkrax
143
152
  import_objects(['relationship'])
144
153
  end
145
154
 
155
+ DEFAULT_OBJECT_TYPES = %w[collection work file_set relationship].freeze
156
+
146
157
  def import_objects(types_array = nil)
147
158
  self.only_updates ||= false
148
- types = types_array || %w[collection work file_set relationship]
149
- if parser.class == Bulkrax::CsvParser
150
- parser.create_objects(types)
151
- else
152
- types.each do |object_type|
153
- self.save if self.new_record? # Object needs to be saved for statuses
154
- parser.send("create_#{object_type.pluralize}")
155
- end
156
- end
159
+ self.save if self.new_record? # Object needs to be saved for statuses
160
+ types = types_array || DEFAULT_OBJECT_TYPES
161
+ parser.create_objects(types)
157
162
  rescue StandardError => e
158
163
  status_info(e)
159
164
  end
@@ -5,7 +5,7 @@ require 'ostruct'
5
5
 
6
6
  module Bulkrax
7
7
  class OaiEntry < Entry
8
- serialize :raw_metadata, JSON
8
+ serialize :raw_metadata, Bulkrax::NormalizedJson
9
9
 
10
10
  delegate :record, to: :raw_record
11
11
 
@@ -28,13 +28,16 @@ module Bulkrax
28
28
  def build_metadata
29
29
  self.parsed_metadata = {}
30
30
  self.parsed_metadata[work_identifier] = [record.header.identifier]
31
+ self.raw_metadata = { xml: record.metadata.to_s }
31
32
 
32
- record.metadata.children.each do |child|
33
- child.children.each do |node|
34
- add_metadata(node.name, node.content)
35
- end
36
- end
37
- add_metadata('thumbnail_url', thumbnail_url)
33
+ # We need to establish the #factory_class before we proceed with the metadata. See
34
+ # https://github.com/samvera-labs/bulkrax/issues/702 for further details.
35
+ #
36
+ # tl;dr - if we don't have the right factory_class we might skip properties that are
37
+ # specifically assigned to the factory class
38
+ establish_factory_class
39
+ add_metadata_from_record
40
+ add_thumbnail_url
38
41
 
39
42
  add_visibility
40
43
  add_rights_statement
@@ -53,18 +56,48 @@ module Bulkrax
53
56
  end
54
57
  end
55
58
 
59
+ # To ensure we capture the correct parse data, we first need to establish the factory_class.
60
+ # @see https://github.com/samvera-labs/bulkrax/issues/702
61
+ def establish_factory_class
62
+ model_field_names = parser.model_field_mappings
63
+
64
+ each_candidate_metadata_node do |node|
65
+ next unless model_field_names.include?(node.name)
66
+ add_metadata(node.name, node.content)
67
+ end
68
+ end
69
+
70
+ def add_metadata_from_record
71
+ each_candidate_metadata_node do |node|
72
+ add_metadata(node.name, node.content)
73
+ end
74
+ end
75
+
76
+ # A method that you could override to better handle the shape of the record's metadata.
77
+ # @yieldparam node [Object<#name, #content>]
78
+ def each_candidate_metadata_node
79
+ record.metadata.children.each do |child|
80
+ child.children.each do |node|
81
+ yield(node)
82
+ end
83
+ end
84
+ end
85
+
86
+ def add_thumbnail_url
87
+ add_metadata('thumbnail_url', thumbnail_url)
88
+ end
89
+
56
90
  # Retrieve list of collections for the entry; add to collection_ids
57
91
  # If OAI-PMH doesn't return setSpec in the headers for GetRecord, use parser.collection_name
58
92
  # in this case, if 'All' is selected, records will not be added to a collection.
59
93
  def find_collection_ids
60
94
  return self.collection_ids if collections_created?
61
95
  if sets.blank? || parser.collection_name != 'all'
62
- # c = Collection.where(Bulkrax.system_identifier_field => importerexporter.unique_collection_identifier(parser.collection_name)).first
63
96
  collection = find_collection(importerexporter.unique_collection_identifier(parser.collection_name))
64
97
  self.collection_ids << collection.id if collection.present? && !self.collection_ids.include?(collection.id)
65
98
  else # All - collections should exist for all sets
66
99
  sets.each do |set|
67
- c = Collection.find_by(work_identifier => importerexporter.unique_collection_identifier(set.content))
100
+ c = find_collection(importerexporter.unique_collection_identifier(set.content))
68
101
  self.collection_ids << c.id if c.present? && !self.collection_ids.include?(c.id)
69
102
  end
70
103
  end
@@ -3,7 +3,7 @@
3
3
  require 'rdf'
4
4
  module Bulkrax
5
5
  class RdfEntry < Entry
6
- serialize :raw_metadata, JSON
6
+ serialize :raw_metadata, Bulkrax::NormalizedJson
7
7
 
8
8
  def self.read_data(path)
9
9
  RDF::Reader.open(path)