bulkrax 7.0.0 → 8.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/javascripts/bulkrax/datatables.js +1 -1
  3. data/app/concerns/loggable.rb +25 -0
  4. data/app/controllers/bulkrax/exporters_controller.rb +1 -1
  5. data/app/controllers/bulkrax/importers_controller.rb +2 -1
  6. data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
  7. data/app/factories/bulkrax/object_factory.rb +135 -163
  8. data/app/factories/bulkrax/object_factory_interface.rb +483 -0
  9. data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
  10. data/app/factories/bulkrax/valkyrize-hyku.code-workspace +19 -0
  11. data/app/helpers/bulkrax/importers_helper.rb +1 -1
  12. data/app/helpers/bulkrax/validation_helper.rb +4 -4
  13. data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
  14. data/app/jobs/bulkrax/delete_job.rb +3 -2
  15. data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
  16. data/app/jobs/bulkrax/import_file_set_job.rb +23 -19
  17. data/app/jobs/bulkrax/importer_job.rb +18 -2
  18. data/app/matchers/bulkrax/application_matcher.rb +0 -2
  19. data/app/models/bulkrax/csv_collection_entry.rb +1 -1
  20. data/app/models/bulkrax/csv_entry.rb +7 -6
  21. data/app/models/bulkrax/entry.rb +7 -11
  22. data/app/models/bulkrax/exporter.rb +2 -2
  23. data/app/models/bulkrax/importer.rb +1 -3
  24. data/app/models/bulkrax/oai_entry.rb +0 -3
  25. data/app/models/bulkrax/oai_set_entry.rb +1 -1
  26. data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
  27. data/app/models/bulkrax/rdf_entry.rb +70 -69
  28. data/app/models/bulkrax/xml_entry.rb +0 -1
  29. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
  30. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  31. data/app/models/concerns/bulkrax/file_factory.rb +178 -118
  32. data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
  33. data/app/models/concerns/bulkrax/has_matchers.rb +39 -25
  34. data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
  35. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
  36. data/app/parsers/bulkrax/application_parser.rb +31 -7
  37. data/app/parsers/bulkrax/bagit_parser.rb +175 -174
  38. data/app/parsers/bulkrax/csv_parser.rb +15 -5
  39. data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
  40. data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
  41. data/app/parsers/bulkrax/xml_parser.rb +0 -2
  42. data/app/services/bulkrax/factory_class_finder.rb +2 -0
  43. data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
  44. data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
  45. data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
  46. data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
  47. data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
  48. data/app/views/bulkrax/entries/show.html.erb +9 -8
  49. data/app/views/bulkrax/exporters/edit.html.erb +1 -1
  50. data/app/views/bulkrax/exporters/new.html.erb +1 -1
  51. data/app/views/bulkrax/exporters/show.html.erb +4 -2
  52. data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
  53. data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
  54. data/app/views/bulkrax/importers/edit.html.erb +1 -1
  55. data/app/views/bulkrax/importers/new.html.erb +1 -1
  56. data/app/views/bulkrax/importers/show.html.erb +1 -1
  57. data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
  58. data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
  59. data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
  60. data/config/locales/bulkrax.en.yml +7 -0
  61. data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
  62. data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
  63. data/lib/bulkrax/engine.rb +23 -6
  64. data/lib/bulkrax/version.rb +1 -1
  65. data/lib/bulkrax.rb +54 -52
  66. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
  67. data/lib/tasks/bulkrax_tasks.rake +1 -0
  68. data/lib/tasks/reset.rake +4 -4
  69. metadata +25 -7
  70. data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
  71. data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
  72. data/lib/bulkrax/persistence_layer.rb +0 -38
@@ -0,0 +1,483 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ ##
5
+ # @abstract
6
+ #
7
+ # The purpose of the object factory is to provide an interface for interacting
8
+ # with the underlying data repository's storage. Each application that mounts
9
+ # Bulkrax should configure the appropriate object factory (via
10
+ # `Bulkrax.object_factory=`).
11
+ #
12
+ # The class methods are for issueing query/commands to the underlying
13
+ # repository.
14
+ #
15
+ # The instance methods are for mapping a {Bulkrax::Entry} to a corresponding
16
+ # data repository object (e.g. a Fedora Commons record or a Postgresql record
17
+ # via ActiveFedora::Base and/or Valkyrie).
18
+ #
19
+ # rubocop:disable Metrics/ClassLength
20
+ class ObjectFactoryInterface
21
+ extend ActiveModel::Callbacks
22
+ include DynamicRecordLookup
23
+ include Loggable
24
+
25
+ # We're inheriting from an ActiveRecord exception as that is something we
26
+ # know will be here; and something that the main_app will be expect to be
27
+ # able to handle.
28
+ class ObjectNotFoundError < ActiveRecord::RecordNotFound
29
+ end
30
+
31
+ # We're inheriting from an ActiveRecord exception as that is something
32
+ # we know will be here; and something that the main_app will be expect to be
33
+ # able to handle.
34
+ class RecordInvalid < ActiveRecord::RecordInvalid
35
+ end
36
+
37
+ ##
38
+ # @note This does not save either object. We need to do that in another
39
+ # loop. Why? Because we might be adding many items to the parent.
40
+ def self.add_child_to_parent_work(parent:, child:)
41
+ raise NotImplementedError, "#{self}.#{__method__}"
42
+ end
43
+
44
+ def self.add_resource_to_collection(collection:, resource:, user:)
45
+ raise NotImplementedError, "#{self}.#{__method__}"
46
+ end
47
+
48
+ ##
49
+ # Add the user to the collection; assuming the given collection is a
50
+ # Collection. This is also only something we use in Hyrax.
51
+ #
52
+ # @param collection [#id]
53
+ # @param user [User]
54
+ # @see Bulkrax.collection_model_class
55
+ def self.add_user_to_collection_permissions(collection:, user:)
56
+ return unless collection.is_a?(Bulkrax.collection_model_class)
57
+ return unless defined?(Hyrax)
58
+
59
+ permission_template = Hyrax::PermissionTemplate.find_or_create_by!(source_id: collection.id)
60
+
61
+ # NOTE: Should we extract the specific logic here? Also, does it make
62
+ # sense to apply permissions to the permission template (and then update)
63
+ # instead of applying permissions directly to the collection?
64
+ Hyrax::PermissionTemplateAccess.find_or_create_by!(
65
+ permission_template_id: permission_template.id,
66
+ agent_id: user.user_key,
67
+ agent_type: 'user',
68
+ access: 'manage'
69
+ )
70
+
71
+ # NOTE: This is a bit surprising that we'd add admin as a group.
72
+ Hyrax::PermissionTemplateAccess.find_or_create_by!(
73
+ permission_template_id: permission_template.id,
74
+ agent_id: 'admin',
75
+ agent_type: 'group',
76
+ access: 'manage'
77
+ )
78
+
79
+ if permission_template.respond_to?(:reset_access_controls_for)
80
+ # Hyrax 4+
81
+ # must pass interpret_visibility: true to avoid clobbering provided visibility
82
+ permission_template.reset_access_controls_for(collection: collection, interpret_visibility: true)
83
+ elsif collection.respond_to?(:reset_access_controls!)
84
+ # Hyrax 3 or earlier
85
+ collection.reset_access_controls!
86
+ else
87
+ raise "Unable to reset access controls for #{collection.class} ID=#{collection.id}"
88
+ end
89
+ end
90
+
91
+ ##
92
+ # @yield when Rails application is running in test environment.
93
+ def self.clean!
94
+ return true unless Rails.env.test?
95
+ yield
96
+ end
97
+
98
+ ##
99
+ # @return [String]
100
+ def self.default_admin_set_id
101
+ if defined?(Hyrax::AdminSetCreateService::DEFAULT_ID)
102
+ return Hyrax::AdminSetCreateService::DEFAULT_ID
103
+ elsif defined?(AdminSet::DEFAULT_ID)
104
+ return AdminSet::DEFAULT_ID
105
+ else
106
+ return 'admin_set/default'
107
+ end
108
+ end
109
+
110
+ ##
111
+ # @return [Object] when we have an existing admin set.
112
+ # @return [NilClass] when we the default admin set does not exist.
113
+ #
114
+ # @see .find_or_nil
115
+ def self.default_admin_set_or_nil
116
+ find_or_nil(default_admin_set_id)
117
+ end
118
+
119
+ ##
120
+ # @return [Array<String>]
121
+ def self.export_properties
122
+ raise NotImplementedError, "#{self}.#{__method__}"
123
+ end
124
+
125
+ ##
126
+ # @param field [String]
127
+ # @param model [Class]
128
+ #
129
+ # @return [TrueClass] when the given :field is a valid property on the given
130
+ # :model.
131
+
132
+ # @return [FalseClass] when the given :field is **not** a valid property on
133
+ # the given :model.
134
+ def self.field_supported?(field:, model:)
135
+ raise NotImplementedError, "#{self}.#{__method__}"
136
+ end
137
+
138
+ ##
139
+ # @param field [String]
140
+ # @param model [Class]
141
+ #
142
+ # @return [TrueClass] when the given :field is a multi-value property on the
143
+ # given :model.
144
+ # @return [FalseClass] when given :field is **not** a scalar (not
145
+ # multi-value) property on the given :model.
146
+ def self.field_multi_value?(field:, model:)
147
+ raise NotImplementedError, "#{self}.#{__method__}"
148
+ end
149
+
150
+ def self.find_or_create_default_admin_set
151
+ raise NotImplementedError, "#{self}.#{__method__}"
152
+ end
153
+
154
+ ##
155
+ # @param resource [Object]
156
+ #
157
+ # @return [Array<Object>] interrogate the given :object and return an array
158
+ # of object's file sets. When the object is a file set, return that
159
+ # file set as an Array of one element.
160
+ def self.file_sets_for(resource:)
161
+ raise NotImplementedError, "#{self}.#{__method__}"
162
+ end
163
+
164
+ ##
165
+ # @see ActiveFedora::Base.find
166
+ def self.find(id)
167
+ raise NotImplementedError, "#{self}.#{__method__}"
168
+ end
169
+
170
+ def self.find_or_nil(id)
171
+ find(id)
172
+ rescue NotImplementedError => e
173
+ raise e
174
+ rescue
175
+ nil
176
+ end
177
+
178
+ def self.publish(event:, **kwargs)
179
+ raise NotImplementedError, "#{self}.#{__method__}"
180
+ end
181
+
182
+ def self.query(q, **kwargs)
183
+ raise NotImplementedError, "#{self}.#{__method__}"
184
+ end
185
+
186
+ def self.save!(resource:, user:)
187
+ raise NotImplementedError, "#{self}.#{__method__}"
188
+ end
189
+
190
+ # rubocop:disable Metrics/ParameterLists
191
+ def self.search_by_property(value:, klass:, field: nil, search_field: nil, name_field: nil, verify_property: false)
192
+ raise NotImplementedError, "#{self}.#{__method__}"
193
+ end
194
+
195
+ def self.solr_name(field_name)
196
+ raise NotImplementedError, "#{self}.#{__method__}"
197
+ end
198
+
199
+ ##
200
+ # @param resources [Array<Object>]
201
+ def self.update_index(resources: [])
202
+ raise NotImplementedError, "#{self}.#{__method__}"
203
+ end
204
+
205
+ ##
206
+ # @param resource [Object] something that *might* have file_sets members.
207
+ def self.update_index_for_file_sets_of(resource:)
208
+ raise NotImplementedError, "#{self}.#{__method__}"
209
+ end
210
+ # rubocop:enable Metrics/ParameterLists
211
+
212
+ ##
213
+ # @api private
214
+ #
215
+ # These are the attributes that we assume all "work type" classes (e.g. the
216
+ # given :klass) will have in addition to their specific attributes.
217
+ #
218
+ # @return [Array<Symbol>]
219
+ # @see #permitted_attributes
220
+ class_attribute :base_permitted_attributes,
221
+ default: %i[
222
+ admin_set_id
223
+ edit_groups
224
+ edit_users
225
+ id
226
+ read_groups
227
+ visibility
228
+ visibility_during_embargo
229
+ embargo_release_date
230
+ visibility_after_embargo
231
+ visibility_during_lease
232
+ lease_expiration_date
233
+ visibility_after_lease
234
+ work_members_attributes
235
+ ]
236
+
237
+ # @return [Boolean]
238
+ #
239
+ # @example
240
+ # Bulkrax::ObjectFactory.transformation_removes_blank_hash_values = true
241
+ #
242
+ # @see #transform_attributes
243
+ # @see https://github.com/samvera-labs/bulkrax/pull/708 For discussion concerning this feature
244
+ # @see https://github.com/samvera-labs/bulkrax/wiki/Interacting-with-Metadata For documentation
245
+ # concerning default behavior.
246
+ class_attribute :transformation_removes_blank_hash_values, default: false
247
+
248
+ define_model_callbacks :save, :create
249
+ attr_reader(
250
+ :attributes,
251
+ :importer_run_id,
252
+ :klass,
253
+ :object,
254
+ :related_parents_parsed_mapping,
255
+ :replace_files,
256
+ :source_identifier_value,
257
+ :update_files,
258
+ :user,
259
+ :work_identifier,
260
+ :work_identifier_search_field
261
+ )
262
+
263
+ # rubocop:disable Metrics/ParameterLists
264
+ def initialize(attributes:, source_identifier_value:, work_identifier:, work_identifier_search_field:, related_parents_parsed_mapping: nil, replace_files: false, user: nil, klass: nil, importer_run_id: nil, update_files: false)
265
+ @attributes = ActiveSupport::HashWithIndifferentAccess.new(attributes)
266
+ @replace_files = replace_files
267
+ @update_files = update_files
268
+ @user = user || User.batch_user
269
+ @work_identifier = work_identifier
270
+ @work_identifier_search_field = work_identifier_search_field
271
+ @related_parents_parsed_mapping = related_parents_parsed_mapping
272
+ @source_identifier_value = source_identifier_value
273
+ @klass = klass || Bulkrax.default_work_type.constantize
274
+ @importer_run_id = importer_run_id
275
+ end
276
+ # rubocop:enable Metrics/ParameterLists
277
+
278
+ ##
279
+ # NOTE: There has been a long-standing implementation where we might reset
280
+ # the @update_files when we call #file_attributes. As we refactor
281
+ # towards extracting a class, this attr_writer preserves the behavior.
282
+ #
283
+ # Jeremy here, I think the behavior of setting the instance variable when
284
+ # calling file_attributes is wrong, but now is not the time to untwine.
285
+ attr_writer :update_files
286
+
287
+ alias update_files? update_files
288
+
289
+ # An ActiveFedora bug when there are many habtm <-> has_many associations
290
+ # means they won't all get saved.
291
+ # https://github.com/projecthydra/active_fedora/issues/874 9+ years later,
292
+ # still open!
293
+ def create
294
+ attrs = transform_attributes
295
+ @object = klass.new
296
+ conditionally_set_reindex_extent
297
+ run_callbacks :save do
298
+ run_callbacks :create do
299
+ if klass == Bulkrax.collection_model_class
300
+ create_collection(attrs)
301
+ elsif klass == Bulkrax.file_model_class
302
+ create_file_set(attrs)
303
+ else
304
+ create_work(attrs)
305
+ end
306
+ end
307
+ end
308
+
309
+ apply_depositor_metadata
310
+ log_created(object)
311
+ end
312
+
313
+ def delete(_user)
314
+ raise NotImplementedError, "#{self.class}##{__method__}"
315
+ end
316
+
317
+ ##
318
+ # @api public
319
+ #
320
+ # @return [Object] when we've found the object by the entry's :id or by it's
321
+ # source_identifier
322
+ # @return [FalseClass] when we cannot find the object.
323
+ def find
324
+ find_by_id || search_by_identifier || false
325
+ end
326
+
327
+ ##
328
+ # @abstract
329
+ #
330
+ # @return [Object] when we've found the object by the entry's :id or by it's
331
+ # source_identifier
332
+ # @return [FalseClass] when we cannot find the object.
333
+ def find_by_id
334
+ raise NotImplementedError, "#{self.class}##{__method__}"
335
+ end
336
+
337
+ ##
338
+ # @return [Object] either the one found in persistence or the one created
339
+ # via the run method.
340
+ # @see .save!
341
+ def find_or_create
342
+ # Do we need to call save! This was how we previously did this but it
343
+ # seems odd that we'd not find it. Also, why not simply call create.
344
+ find || self.class.save!(object: run, user: @user)
345
+ end
346
+
347
+ def run
348
+ arg_hash = { id: attributes[:id], name: 'UPDATE', klass: klass }
349
+
350
+ @object = find
351
+ if object
352
+ conditionally_set_reindex_extent
353
+ ActiveSupport::Notifications.instrument('import.importer', arg_hash) { update }
354
+ else
355
+ ActiveSupport::Notifications.instrument('import.importer', arg_hash.merge(name: 'CREATE')) { create }
356
+ end
357
+ yield(object) if block_given?
358
+ object
359
+ end
360
+
361
+ def run!
362
+ self.run
363
+ # Create the error exception if the object is not validly saved for some
364
+ # reason
365
+ raise ObjectFactoryInterface::RecordInvalid, object if !object.persisted? || object.changed?
366
+ object
367
+ end
368
+
369
+ ##
370
+ # @return [FalseClass] when :source_identifier_value is blank or is not
371
+ # found via {.search_by_property} query.
372
+ # @return [Object] when we have a source_identifier_value value and we can
373
+ # find it in the data store.
374
+ def search_by_identifier
375
+ return false if source_identifier_value.blank?
376
+
377
+ self.class.search_by_property(
378
+ klass: klass,
379
+ search_field: work_identifier_search_field,
380
+ value: source_identifier_value,
381
+ name_field: work_identifier
382
+ )
383
+ end
384
+
385
+ def update
386
+ raise "Object doesn't exist" unless object
387
+ conditionally_destroy_existing_files
388
+
389
+ attrs = transform_attributes(update: true)
390
+ run_callbacks :save do
391
+ if klass == Bulkrax.collection_model_class
392
+ update_collection(attrs)
393
+ elsif klass == Bulkrax.file_model_class
394
+ update_file_set(attrs)
395
+ else
396
+ update_work(attrs)
397
+ end
398
+ end
399
+ apply_depositor_metadata
400
+ log_updated(object)
401
+ end
402
+
403
+ def add_user_to_collection_permissions(*args)
404
+ arguments = args.first
405
+ self.class.add_user_to_collection_permissions(**arguments)
406
+ end
407
+
408
+ private
409
+
410
+ def apply_depositor_metadata
411
+ object.apply_depositor_metadata(@user) && object.save! if object.depositor.nil?
412
+ end
413
+
414
+ def clean_attrs(attrs)
415
+ # avoid the "ArgumentError: Identifier must be a string of size > 0 in
416
+ # order to be treeified" error when setting object.attributes
417
+ attrs.delete('id') if attrs['id'].blank?
418
+ attrs
419
+ end
420
+
421
+ def collection_type(attrs)
422
+ return attrs if attrs['collection_type_gid'].present?
423
+
424
+ attrs['collection_type_gid'] = Hyrax::CollectionType.find_or_create_default_collection_type.to_global_id.to_s
425
+ attrs
426
+ end
427
+
428
+ def conditionally_set_reindex_extent
429
+ return unless defined?(Hyrax::Adapters::NestingIndexAdapter)
430
+ return unless object.respond_to?(:reindex_extent)
431
+ object.reindex_extent = Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX
432
+ end
433
+
434
+ def conditionally_destroy_existing_files
435
+ return unless @replace_files
436
+
437
+ return if [Bulkrax.collection_model_class, Bulkrax.file_model_class].include?(klass)
438
+
439
+ destroy_existing_files
440
+ end
441
+
442
+ # Regardless of what the Parser gives us, these are the properties we are
443
+ # prepared to accept.
444
+ def permitted_attributes
445
+ klass.properties.keys.map(&:to_sym) + base_permitted_attributes
446
+ end
447
+
448
+ # Return a copy of the given attributes, such that all values that are empty
449
+ # or an array of all empty values are fully emptied. (See implementation
450
+ # details)
451
+ #
452
+ # @param attributes [Hash]
453
+ # @return [Hash]
454
+ #
455
+ # @see https://github.com/emory-libraries/dlp-curate/issues/1973
456
+ def remove_blank_hash_values(attributes)
457
+ dupe = attributes.dup
458
+ dupe.each do |key, values|
459
+ if values.is_a?(Array) && values.all? { |value| value.is_a?(String) && value.empty? }
460
+ dupe[key] = []
461
+ elsif values.is_a?(String) && values.empty?
462
+ dupe[key] = nil
463
+ end
464
+ end
465
+ dupe
466
+ end
467
+
468
+ # Override if we need to map the attributes from the parser in
469
+ # a way that is compatible with how the factory needs them.
470
+ def transform_attributes(update: false)
471
+ @transform_attributes = attributes.slice(*permitted_attributes)
472
+ @transform_attributes.merge!(file_attributes(update_files?)) if with_files
473
+ @transform_attributes = remove_blank_hash_values(@transform_attributes) if transformation_removes_blank_hash_values?
474
+ update ? @transform_attributes.except(:id) : @transform_attributes
475
+ end
476
+
477
+ # update files is set, replace files is set or this is a create
478
+ def with_files
479
+ update_files || replace_files || !object
480
+ end
481
+ end
482
+ # rubocop:enable Metrics/ClassLength
483
+ end