bulkrax 7.0.0 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/javascripts/bulkrax/datatables.js +1 -1
  3. data/app/controllers/bulkrax/exporters_controller.rb +1 -1
  4. data/app/controllers/bulkrax/importers_controller.rb +2 -1
  5. data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
  6. data/app/factories/bulkrax/object_factory.rb +135 -163
  7. data/app/factories/bulkrax/object_factory_interface.rb +491 -0
  8. data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
  9. data/app/helpers/bulkrax/importers_helper.rb +1 -1
  10. data/app/helpers/bulkrax/validation_helper.rb +4 -4
  11. data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
  12. data/app/jobs/bulkrax/delete_job.rb +3 -2
  13. data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
  14. data/app/jobs/bulkrax/import_file_set_job.rb +5 -2
  15. data/app/jobs/bulkrax/importer_job.rb +18 -2
  16. data/app/matchers/bulkrax/application_matcher.rb +0 -2
  17. data/app/models/bulkrax/csv_collection_entry.rb +1 -1
  18. data/app/models/bulkrax/csv_entry.rb +7 -6
  19. data/app/models/bulkrax/entry.rb +7 -11
  20. data/app/models/bulkrax/exporter.rb +2 -2
  21. data/app/models/bulkrax/importer.rb +1 -3
  22. data/app/models/bulkrax/oai_entry.rb +0 -3
  23. data/app/models/bulkrax/oai_set_entry.rb +1 -1
  24. data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
  25. data/app/models/bulkrax/rdf_entry.rb +70 -69
  26. data/app/models/bulkrax/xml_entry.rb +0 -1
  27. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
  28. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  29. data/app/models/concerns/bulkrax/file_factory.rb +174 -118
  30. data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
  31. data/app/models/concerns/bulkrax/has_matchers.rb +28 -25
  32. data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
  33. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
  34. data/app/parsers/bulkrax/application_parser.rb +31 -7
  35. data/app/parsers/bulkrax/bagit_parser.rb +175 -174
  36. data/app/parsers/bulkrax/csv_parser.rb +15 -5
  37. data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
  38. data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
  39. data/app/parsers/bulkrax/xml_parser.rb +0 -2
  40. data/app/services/bulkrax/factory_class_finder.rb +2 -0
  41. data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
  42. data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
  43. data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
  44. data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
  45. data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
  46. data/app/views/bulkrax/entries/show.html.erb +9 -8
  47. data/app/views/bulkrax/exporters/edit.html.erb +1 -1
  48. data/app/views/bulkrax/exporters/new.html.erb +1 -1
  49. data/app/views/bulkrax/exporters/show.html.erb +4 -2
  50. data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
  51. data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
  52. data/app/views/bulkrax/importers/edit.html.erb +1 -1
  53. data/app/views/bulkrax/importers/new.html.erb +1 -1
  54. data/app/views/bulkrax/importers/show.html.erb +1 -1
  55. data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
  56. data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
  57. data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
  58. data/config/locales/bulkrax.en.yml +7 -0
  59. data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
  60. data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
  61. data/lib/bulkrax/engine.rb +23 -6
  62. data/lib/bulkrax/version.rb +1 -1
  63. data/lib/bulkrax.rb +54 -52
  64. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
  65. data/lib/tasks/bulkrax_tasks.rake +1 -0
  66. data/lib/tasks/reset.rake +4 -4
  67. metadata +24 -8
  68. data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
  69. data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
  70. data/lib/bulkrax/persistence_layer.rb +0 -38
@@ -0,0 +1,491 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ ##
5
+ # @abstract
6
+ #
7
+ # The purpose of the object factory is to provide an interface for interacting
8
+ # with the underlying data repository's storage. Each application that mounts
9
+ # Bulkrax should configure the appropriate object factory (via
10
+ # `Bulkrax.object_factory=`).
11
+ #
12
+ # The class methods are for issueing query/commands to the underlying
13
+ # repository.
14
+ #
15
+ # The instance methods are for mapping a {Bulkrax::Entry} to a corresponding
16
+ # data repository object (e.g. a Fedora Commons record or a Postgresql record
17
+ # via ActiveFedora::Base and/or Valkyrie).
18
+ #
19
+ # rubocop:disable Metrics/ClassLength
20
+ class ObjectFactoryInterface
21
+ extend ActiveModel::Callbacks
22
+ include DynamicRecordLookup
23
+
24
+ # We're inheriting from an ActiveRecord exception as that is something we
25
+ # know will be here; and something that the main_app will be expect to be
26
+ # able to handle.
27
+ class ObjectNotFoundError < ActiveRecord::RecordNotFound
28
+ end
29
+
30
+ # We're inheriting from an ActiveRecord exception as that is something
31
+ # we know will be here; and something that the main_app will be expect to be
32
+ # able to handle.
33
+ class RecordInvalid < ActiveRecord::RecordInvalid
34
+ end
35
+
36
+ ##
37
+ # @note This does not save either object. We need to do that in another
38
+ # loop. Why? Because we might be adding many items to the parent.
39
+ def self.add_child_to_parent_work(parent:, child:)
40
+ raise NotImplementedError, "#{self}.#{__method__}"
41
+ end
42
+
43
+ def self.add_resource_to_collection(collection:, resource:, user:)
44
+ raise NotImplementedError, "#{self}.#{__method__}"
45
+ end
46
+
47
+ ##
48
+ # Add the user to the collection; assuming the given collection is a
49
+ # Collection. This is also only something we use in Hyrax.
50
+ #
51
+ # @param collection [#id]
52
+ # @param user [User]
53
+ # @see Bulkrax.collection_model_class
54
+ def self.add_user_to_collection_permissions(collection:, user:)
55
+ return unless collection.is_a?(Bulkrax.collection_model_class)
56
+ return unless defined?(Hyrax)
57
+
58
+ permission_template = Hyrax::PermissionTemplate.find_or_create_by!(source_id: collection.id)
59
+
60
+ # NOTE: Should we extract the specific logic here? Also, does it make
61
+ # sense to apply permissions to the permission template (and then update)
62
+ # instead of applying permissions directly to the collection?
63
+ Hyrax::PermissionTemplateAccess.find_or_create_by!(
64
+ permission_template_id: permission_template.id,
65
+ agent_id: user.user_key,
66
+ agent_type: 'user',
67
+ access: 'manage'
68
+ )
69
+
70
+ # NOTE: This is a bit surprising that we'd add admin as a group.
71
+ Hyrax::PermissionTemplateAccess.find_or_create_by!(
72
+ permission_template_id: permission_template.id,
73
+ agent_id: 'admin',
74
+ agent_type: 'group',
75
+ access: 'manage'
76
+ )
77
+
78
+ if permission_template.respond_to?(:reset_access_controls_for)
79
+ # Hyrax 4+
80
+ # must pass interpret_visibility: true to avoid clobbering provided visibility
81
+ permission_template.reset_access_controls_for(collection: collection, interpret_visibility: true)
82
+ elsif collection.respond_to?(:reset_access_controls!)
83
+ # Hyrax 3 or earlier
84
+ collection.reset_access_controls!
85
+ else
86
+ raise "Unable to reset access controls for #{collection.class} ID=#{collection.id}"
87
+ end
88
+ end
89
+
90
+ ##
91
+ # @yield when Rails application is running in test environment.
92
+ def self.clean!
93
+ return true unless Rails.env.test?
94
+ yield
95
+ end
96
+
97
+ ##
98
+ # @return [String]
99
+ def self.default_admin_set_id
100
+ if defined?(Hyrax::AdminSetCreateService::DEFAULT_ID)
101
+ return Hyrax::AdminSetCreateService::DEFAULT_ID
102
+ elsif defined?(AdminSet::DEFAULT_ID)
103
+ return AdminSet::DEFAULT_ID
104
+ else
105
+ return 'admin_set/default'
106
+ end
107
+ end
108
+
109
+ ##
110
+ # @return [Object] when we have an existing admin set.
111
+ # @return [NilClass] when we the default admin set does not exist.
112
+ #
113
+ # @see .find_or_nil
114
+ def self.default_admin_set_or_nil
115
+ find_or_nil(default_admin_set_id)
116
+ end
117
+
118
+ ##
119
+ # @return [Array<String>]
120
+ def self.export_properties
121
+ raise NotImplementedError, "#{self}.#{__method__}"
122
+ end
123
+
124
+ ##
125
+ # @param field [String]
126
+ # @param model [Class]
127
+ #
128
+ # @return [TrueClass] when the given :field is a valid property on the given
129
+ # :model.
130
+
131
+ # @return [FalseClass] when the given :field is **not** a valid property on
132
+ # the given :model.
133
+ def self.field_supported?(field:, model:)
134
+ raise NotImplementedError, "#{self}.#{__method__}"
135
+ end
136
+
137
+ ##
138
+ # @param field [String]
139
+ # @param model [Class]
140
+ #
141
+ # @return [TrueClass] when the given :field is a multi-value property on the
142
+ # given :model.
143
+ # @return [FalseClass] when given :field is **not** a scalar (not
144
+ # multi-value) property on the given :model.
145
+ def self.field_multi_value?(field:, model:)
146
+ raise NotImplementedError, "#{self}.#{__method__}"
147
+ end
148
+
149
+ def self.find_or_create_default_admin_set
150
+ raise NotImplementedError, "#{self}.#{__method__}"
151
+ end
152
+
153
+ ##
154
+ # @param resource [Object]
155
+ #
156
+ # @return [Array<Object>] interrogate the given :object and return an array
157
+ # of object's file sets. When the object is a file set, return that
158
+ # file set as an Array of one element.
159
+ def self.file_sets_for(resource:)
160
+ raise NotImplementedError, "#{self}.#{__method__}"
161
+ end
162
+
163
+ ##
164
+ # @see ActiveFedora::Base.find
165
+ def self.find(id)
166
+ raise NotImplementedError, "#{self}.#{__method__}"
167
+ end
168
+
169
+ def self.find_or_nil(id)
170
+ find(id)
171
+ rescue NotImplementedError => e
172
+ raise e
173
+ rescue
174
+ nil
175
+ end
176
+
177
+ def self.publish(event:, **kwargs)
178
+ raise NotImplementedError, "#{self}.#{__method__}"
179
+ end
180
+
181
+ def self.query(q, **kwargs)
182
+ raise NotImplementedError, "#{self}.#{__method__}"
183
+ end
184
+
185
+ def self.save!(resource:, user:)
186
+ raise NotImplementedError, "#{self}.#{__method__}"
187
+ end
188
+
189
+ # rubocop:disable Metrics/ParameterLists
190
+ def self.search_by_property(value:, klass:, field: nil, search_field: nil, name_field: nil, verify_property: false)
191
+ raise NotImplementedError, "#{self}.#{__method__}"
192
+ end
193
+
194
+ def self.solr_name(field_name)
195
+ raise NotImplementedError, "#{self}.#{__method__}"
196
+ end
197
+
198
+ ##
199
+ # @param resources [Array<Object>]
200
+ def self.update_index(resources: [])
201
+ raise NotImplementedError, "#{self}.#{__method__}"
202
+ end
203
+
204
+ ##
205
+ # @param resource [Object] something that *might* have file_sets members.
206
+ def self.update_index_for_file_sets_of(resource:)
207
+ raise NotImplementedError, "#{self}.#{__method__}"
208
+ end
209
+ # rubocop:enable Metrics/ParameterLists
210
+
211
+ ##
212
+ # @api private
213
+ #
214
+ # These are the attributes that we assume all "work type" classes (e.g. the
215
+ # given :klass) will have in addition to their specific attributes.
216
+ #
217
+ # @return [Array<Symbol>]
218
+ # @see #permitted_attributes
219
+ class_attribute :base_permitted_attributes,
220
+ default: %i[
221
+ admin_set_id
222
+ edit_groups
223
+ edit_users
224
+ id
225
+ read_groups
226
+ visibility
227
+ work_members_attributes
228
+ ]
229
+
230
+ # @return [Boolean]
231
+ #
232
+ # @example
233
+ # Bulkrax::ObjectFactory.transformation_removes_blank_hash_values = true
234
+ #
235
+ # @see #transform_attributes
236
+ # @see https://github.com/samvera-labs/bulkrax/pull/708 For discussion concerning this feature
237
+ # @see https://github.com/samvera-labs/bulkrax/wiki/Interacting-with-Metadata For documentation
238
+ # concerning default behavior.
239
+ class_attribute :transformation_removes_blank_hash_values, default: false
240
+
241
+ define_model_callbacks :save, :create
242
+ attr_reader(
243
+ :attributes,
244
+ :importer_run_id,
245
+ :klass,
246
+ :object,
247
+ :related_parents_parsed_mapping,
248
+ :replace_files,
249
+ :source_identifier_value,
250
+ :update_files,
251
+ :user,
252
+ :work_identifier,
253
+ :work_identifier_search_field
254
+ )
255
+
256
+ # rubocop:disable Metrics/ParameterLists
257
+ def initialize(attributes:, source_identifier_value:, work_identifier:, work_identifier_search_field:, related_parents_parsed_mapping: nil, replace_files: false, user: nil, klass: nil, importer_run_id: nil, update_files: false)
258
+ @attributes = ActiveSupport::HashWithIndifferentAccess.new(attributes)
259
+ @replace_files = replace_files
260
+ @update_files = update_files
261
+ @user = user || User.batch_user
262
+ @work_identifier = work_identifier
263
+ @work_identifier_search_field = work_identifier_search_field
264
+ @related_parents_parsed_mapping = related_parents_parsed_mapping
265
+ @source_identifier_value = source_identifier_value
266
+ @klass = klass || Bulkrax.default_work_type.constantize
267
+ @importer_run_id = importer_run_id
268
+ end
269
+ # rubocop:enable Metrics/ParameterLists
270
+
271
+ ##
272
+ # NOTE: There has been a long-standing implementation where we might reset
273
+ # the @update_files when we call #file_attributes. As we refactor
274
+ # towards extracting a class, this attr_writer preserves the behavior.
275
+ #
276
+ # Jeremy here, I think the behavior of setting the instance variable when
277
+ # calling file_attributes is wrong, but now is not the time to untwine.
278
+ attr_writer :update_files
279
+
280
+ alias update_files? update_files
281
+
282
+ # An ActiveFedora bug when there are many habtm <-> has_many associations
283
+ # means they won't all get saved.
284
+ # https://github.com/projecthydra/active_fedora/issues/874 9+ years later,
285
+ # still open!
286
+ def create
287
+ attrs = transform_attributes
288
+ @object = klass.new
289
+ conditionally_set_reindex_extent
290
+ run_callbacks :save do
291
+ run_callbacks :create do
292
+ if klass == Bulkrax.collection_model_class
293
+ create_collection(attrs)
294
+ elsif klass == Bulkrax.file_model_class
295
+ create_file_set(attrs)
296
+ else
297
+ create_work(attrs)
298
+ end
299
+ end
300
+ end
301
+
302
+ apply_depositor_metadata
303
+ log_created(object)
304
+ end
305
+
306
+ def delete(_user)
307
+ raise NotImplementedError, "#{self.class}##{__method__}"
308
+ end
309
+
310
+ ##
311
+ # @api public
312
+ #
313
+ # @return [Object] when we've found the object by the entry's :id or by it's
314
+ # source_identifier
315
+ # @return [FalseClass] when we cannot find the object.
316
+ def find
317
+ find_by_id || search_by_identifier || false
318
+ end
319
+
320
+ ##
321
+ # @abstract
322
+ #
323
+ # @return [Object] when we've found the object by the entry's :id or by it's
324
+ # source_identifier
325
+ # @return [FalseClass] when we cannot find the object.
326
+ def find_by_id
327
+ raise NotImplementedError, "#{self.class}##{__method__}"
328
+ end
329
+
330
+ ##
331
+ # @return [Object] either the one found in persistence or the one created
332
+ # via the run method.
333
+ # @see .save!
334
+ def find_or_create
335
+ # Do we need to call save! This was how we previously did this but it
336
+ # seems odd that we'd not find it. Also, why not simply call create.
337
+ find || self.class.save!(object: run, user: @user)
338
+ end
339
+
340
+ def run
341
+ arg_hash = { id: attributes[:id], name: 'UPDATE', klass: klass }
342
+
343
+ @object = find
344
+ if object
345
+ conditionally_set_reindex_extent
346
+ ActiveSupport::Notifications.instrument('import.importer', arg_hash) { update }
347
+ else
348
+ ActiveSupport::Notifications.instrument('import.importer', arg_hash.merge(name: 'CREATE')) { create }
349
+ end
350
+ yield(object) if block_given?
351
+ object
352
+ end
353
+
354
+ def run!
355
+ self.run
356
+ # Create the error exception if the object is not validly saved for some
357
+ # reason
358
+ raise ObjectFactoryInterface::RecordInvalid, object if !object.persisted? || object.changed?
359
+ object
360
+ end
361
+
362
+ ##
363
+ # @return [FalseClass] when :source_identifier_value is blank or is not
364
+ # found via {.search_by_property} query.
365
+ # @return [Object] when we have a source_identifier_value value and we can
366
+ # find it in the data store.
367
+ def search_by_identifier
368
+ return false if source_identifier_value.blank?
369
+
370
+ self.class.search_by_property(
371
+ klass: klass,
372
+ search_field: work_identifier_search_field,
373
+ value: source_identifier_value,
374
+ name_field: work_identifier
375
+ )
376
+ end
377
+
378
+ def update
379
+ raise "Object doesn't exist" unless object
380
+ conditionally_destroy_existing_files
381
+
382
+ attrs = transform_attributes(update: true)
383
+ run_callbacks :save do
384
+ if klass == Bulkrax.collection_model_class
385
+ update_collection(attrs)
386
+ elsif klass == Bulkrax.file_model_class
387
+ update_file_set(attrs)
388
+ else
389
+ update_work(attrs)
390
+ end
391
+ end
392
+ apply_depositor_metadata
393
+ log_updated(object)
394
+ end
395
+
396
+ def add_user_to_collection_permissions(*args)
397
+ arguments = args.first
398
+ self.class.add_user_to_collection_permissions(**arguments)
399
+ end
400
+
401
+ def log_created(obj)
402
+ msg = "Created #{klass.model_name.human} #{obj.id}"
403
+ Rails.logger.info("#{msg} (#{Array(attributes[work_identifier]).first})")
404
+ end
405
+
406
+ def log_updated(obj)
407
+ msg = "Updated #{klass.model_name.human} #{obj.id}"
408
+ Rails.logger.info("#{msg} (#{Array(attributes[work_identifier]).first})")
409
+ end
410
+
411
+ def log_deleted_fs(obj)
412
+ msg = "Deleted All Files from #{obj.id}"
413
+ Rails.logger.info("#{msg} (#{Array(attributes[work_identifier]).first})")
414
+ end
415
+
416
+ private
417
+
418
+ def apply_depositor_metadata
419
+ object.apply_depositor_metadata(@user) && object.save! if object.depositor.nil?
420
+ end
421
+
422
+ def clean_attrs(attrs)
423
+ # avoid the "ArgumentError: Identifier must be a string of size > 0 in
424
+ # order to be treeified" error when setting object.attributes
425
+ attrs.delete('id') if attrs['id'].blank?
426
+ attrs
427
+ end
428
+
429
+ def collection_type(attrs)
430
+ return attrs if attrs['collection_type_gid'].present?
431
+
432
+ attrs['collection_type_gid'] = Hyrax::CollectionType.find_or_create_default_collection_type.to_global_id.to_s
433
+ attrs
434
+ end
435
+
436
+ def conditionally_set_reindex_extent
437
+ return unless defined?(Hyrax::Adapters::NestingIndexAdapter)
438
+ return unless object.respond_to?(:reindex_extent)
439
+ object.reindex_extent = Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX
440
+ end
441
+
442
+ def conditionally_destroy_existing_files
443
+ return unless @replace_files
444
+
445
+ return if [Bulkrax.collection_model_class, Bulkrax.file_model_class].include?(klass)
446
+
447
+ destroy_existing_files
448
+ end
449
+
450
+ # Regardless of what the Parser gives us, these are the properties we are
451
+ # prepared to accept.
452
+ def permitted_attributes
453
+ klass.properties.keys.map(&:to_sym) + base_permitted_attributes
454
+ end
455
+
456
+ # Return a copy of the given attributes, such that all values that are empty
457
+ # or an array of all empty values are fully emptied. (See implementation
458
+ # details)
459
+ #
460
+ # @param attributes [Hash]
461
+ # @return [Hash]
462
+ #
463
+ # @see https://github.com/emory-libraries/dlp-curate/issues/1973
464
+ def remove_blank_hash_values(attributes)
465
+ dupe = attributes.dup
466
+ dupe.each do |key, values|
467
+ if values.is_a?(Array) && values.all? { |value| value.is_a?(String) && value.empty? }
468
+ dupe[key] = []
469
+ elsif values.is_a?(String) && values.empty?
470
+ dupe[key] = nil
471
+ end
472
+ end
473
+ dupe
474
+ end
475
+
476
+ # Override if we need to map the attributes from the parser in
477
+ # a way that is compatible with how the factory needs them.
478
+ def transform_attributes(update: false)
479
+ @transform_attributes = attributes.slice(*permitted_attributes)
480
+ @transform_attributes.merge!(file_attributes(update_files?)) if with_files
481
+ @transform_attributes = remove_blank_hash_values(@transform_attributes) if transformation_removes_blank_hash_values?
482
+ update ? @transform_attributes.except(:id) : @transform_attributes
483
+ end
484
+
485
+ # update files is set, replace files is set or this is a create
486
+ def with_files
487
+ update_files || replace_files || !object
488
+ end
489
+ end
490
+ # rubocop:enable Metrics/ClassLength
491
+ end