bulkrax 7.0.0 → 8.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/javascripts/bulkrax/datatables.js +1 -1
  3. data/app/controllers/bulkrax/exporters_controller.rb +1 -1
  4. data/app/controllers/bulkrax/importers_controller.rb +2 -1
  5. data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
  6. data/app/factories/bulkrax/object_factory.rb +135 -163
  7. data/app/factories/bulkrax/object_factory_interface.rb +491 -0
  8. data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
  9. data/app/helpers/bulkrax/importers_helper.rb +1 -1
  10. data/app/helpers/bulkrax/validation_helper.rb +4 -4
  11. data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
  12. data/app/jobs/bulkrax/delete_job.rb +3 -2
  13. data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
  14. data/app/jobs/bulkrax/import_file_set_job.rb +5 -2
  15. data/app/jobs/bulkrax/importer_job.rb +18 -2
  16. data/app/matchers/bulkrax/application_matcher.rb +0 -2
  17. data/app/models/bulkrax/csv_collection_entry.rb +1 -1
  18. data/app/models/bulkrax/csv_entry.rb +7 -6
  19. data/app/models/bulkrax/entry.rb +7 -11
  20. data/app/models/bulkrax/exporter.rb +2 -2
  21. data/app/models/bulkrax/importer.rb +1 -3
  22. data/app/models/bulkrax/oai_entry.rb +0 -3
  23. data/app/models/bulkrax/oai_set_entry.rb +1 -1
  24. data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
  25. data/app/models/bulkrax/rdf_entry.rb +70 -69
  26. data/app/models/bulkrax/xml_entry.rb +0 -1
  27. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
  28. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  29. data/app/models/concerns/bulkrax/file_factory.rb +174 -118
  30. data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
  31. data/app/models/concerns/bulkrax/has_matchers.rb +28 -25
  32. data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
  33. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
  34. data/app/parsers/bulkrax/application_parser.rb +31 -7
  35. data/app/parsers/bulkrax/bagit_parser.rb +175 -174
  36. data/app/parsers/bulkrax/csv_parser.rb +15 -5
  37. data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
  38. data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
  39. data/app/parsers/bulkrax/xml_parser.rb +0 -2
  40. data/app/services/bulkrax/factory_class_finder.rb +2 -0
  41. data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
  42. data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
  43. data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
  44. data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
  45. data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
  46. data/app/views/bulkrax/entries/show.html.erb +9 -8
  47. data/app/views/bulkrax/exporters/edit.html.erb +1 -1
  48. data/app/views/bulkrax/exporters/new.html.erb +1 -1
  49. data/app/views/bulkrax/exporters/show.html.erb +4 -2
  50. data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
  51. data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
  52. data/app/views/bulkrax/importers/edit.html.erb +1 -1
  53. data/app/views/bulkrax/importers/new.html.erb +1 -1
  54. data/app/views/bulkrax/importers/show.html.erb +1 -1
  55. data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
  56. data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
  57. data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
  58. data/config/locales/bulkrax.en.yml +7 -0
  59. data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
  60. data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
  61. data/lib/bulkrax/engine.rb +23 -6
  62. data/lib/bulkrax/version.rb +1 -1
  63. data/lib/bulkrax.rb +54 -52
  64. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
  65. data/lib/tasks/bulkrax_tasks.rake +1 -0
  66. data/lib/tasks/reset.rake +4 -4
  67. metadata +24 -8
  68. data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
  69. data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
  70. data/lib/bulkrax/persistence_layer.rb +0 -38
@@ -0,0 +1,491 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ ##
5
+ # @abstract
6
+ #
7
+ # The purpose of the object factory is to provide an interface for interacting
8
+ # with the underlying data repository's storage. Each application that mounts
9
+ # Bulkrax should configure the appropriate object factory (via
10
+ # `Bulkrax.object_factory=`).
11
+ #
12
+ # The class methods are for issueing query/commands to the underlying
13
+ # repository.
14
+ #
15
+ # The instance methods are for mapping a {Bulkrax::Entry} to a corresponding
16
+ # data repository object (e.g. a Fedora Commons record or a Postgresql record
17
+ # via ActiveFedora::Base and/or Valkyrie).
18
+ #
19
+ # rubocop:disable Metrics/ClassLength
20
+ class ObjectFactoryInterface
21
+ extend ActiveModel::Callbacks
22
+ include DynamicRecordLookup
23
+
24
+ # We're inheriting from an ActiveRecord exception as that is something we
25
+ # know will be here; and something that the main_app will be expect to be
26
+ # able to handle.
27
+ class ObjectNotFoundError < ActiveRecord::RecordNotFound
28
+ end
29
+
30
+ # We're inheriting from an ActiveRecord exception as that is something
31
+ # we know will be here; and something that the main_app will be expect to be
32
+ # able to handle.
33
+ class RecordInvalid < ActiveRecord::RecordInvalid
34
+ end
35
+
36
+ ##
37
+ # @note This does not save either object. We need to do that in another
38
+ # loop. Why? Because we might be adding many items to the parent.
39
+ def self.add_child_to_parent_work(parent:, child:)
40
+ raise NotImplementedError, "#{self}.#{__method__}"
41
+ end
42
+
43
+ def self.add_resource_to_collection(collection:, resource:, user:)
44
+ raise NotImplementedError, "#{self}.#{__method__}"
45
+ end
46
+
47
+ ##
48
+ # Add the user to the collection; assuming the given collection is a
49
+ # Collection. This is also only something we use in Hyrax.
50
+ #
51
+ # @param collection [#id]
52
+ # @param user [User]
53
+ # @see Bulkrax.collection_model_class
54
+ def self.add_user_to_collection_permissions(collection:, user:)
55
+ return unless collection.is_a?(Bulkrax.collection_model_class)
56
+ return unless defined?(Hyrax)
57
+
58
+ permission_template = Hyrax::PermissionTemplate.find_or_create_by!(source_id: collection.id)
59
+
60
+ # NOTE: Should we extract the specific logic here? Also, does it make
61
+ # sense to apply permissions to the permission template (and then update)
62
+ # instead of applying permissions directly to the collection?
63
+ Hyrax::PermissionTemplateAccess.find_or_create_by!(
64
+ permission_template_id: permission_template.id,
65
+ agent_id: user.user_key,
66
+ agent_type: 'user',
67
+ access: 'manage'
68
+ )
69
+
70
+ # NOTE: This is a bit surprising that we'd add admin as a group.
71
+ Hyrax::PermissionTemplateAccess.find_or_create_by!(
72
+ permission_template_id: permission_template.id,
73
+ agent_id: 'admin',
74
+ agent_type: 'group',
75
+ access: 'manage'
76
+ )
77
+
78
+ if permission_template.respond_to?(:reset_access_controls_for)
79
+ # Hyrax 4+
80
+ # must pass interpret_visibility: true to avoid clobbering provided visibility
81
+ permission_template.reset_access_controls_for(collection: collection, interpret_visibility: true)
82
+ elsif collection.respond_to?(:reset_access_controls!)
83
+ # Hyrax 3 or earlier
84
+ collection.reset_access_controls!
85
+ else
86
+ raise "Unable to reset access controls for #{collection.class} ID=#{collection.id}"
87
+ end
88
+ end
89
+
90
+ ##
91
+ # @yield when Rails application is running in test environment.
92
+ def self.clean!
93
+ return true unless Rails.env.test?
94
+ yield
95
+ end
96
+
97
+ ##
98
+ # @return [String]
99
+ def self.default_admin_set_id
100
+ if defined?(Hyrax::AdminSetCreateService::DEFAULT_ID)
101
+ return Hyrax::AdminSetCreateService::DEFAULT_ID
102
+ elsif defined?(AdminSet::DEFAULT_ID)
103
+ return AdminSet::DEFAULT_ID
104
+ else
105
+ return 'admin_set/default'
106
+ end
107
+ end
108
+
109
+ ##
110
+ # @return [Object] when we have an existing admin set.
111
+ # @return [NilClass] when we the default admin set does not exist.
112
+ #
113
+ # @see .find_or_nil
114
+ def self.default_admin_set_or_nil
115
+ find_or_nil(default_admin_set_id)
116
+ end
117
+
118
+ ##
119
+ # @return [Array<String>]
120
+ def self.export_properties
121
+ raise NotImplementedError, "#{self}.#{__method__}"
122
+ end
123
+
124
+ ##
125
+ # @param field [String]
126
+ # @param model [Class]
127
+ #
128
+ # @return [TrueClass] when the given :field is a valid property on the given
129
+ # :model.
130
+
131
+ # @return [FalseClass] when the given :field is **not** a valid property on
132
+ # the given :model.
133
+ def self.field_supported?(field:, model:)
134
+ raise NotImplementedError, "#{self}.#{__method__}"
135
+ end
136
+
137
+ ##
138
+ # @param field [String]
139
+ # @param model [Class]
140
+ #
141
+ # @return [TrueClass] when the given :field is a multi-value property on the
142
+ # given :model.
143
+ # @return [FalseClass] when given :field is **not** a scalar (not
144
+ # multi-value) property on the given :model.
145
+ def self.field_multi_value?(field:, model:)
146
+ raise NotImplementedError, "#{self}.#{__method__}"
147
+ end
148
+
149
+ def self.find_or_create_default_admin_set
150
+ raise NotImplementedError, "#{self}.#{__method__}"
151
+ end
152
+
153
+ ##
154
+ # @param resource [Object]
155
+ #
156
+ # @return [Array<Object>] interrogate the given :object and return an array
157
+ # of object's file sets. When the object is a file set, return that
158
+ # file set as an Array of one element.
159
+ def self.file_sets_for(resource:)
160
+ raise NotImplementedError, "#{self}.#{__method__}"
161
+ end
162
+
163
+ ##
164
+ # @see ActiveFedora::Base.find
165
+ def self.find(id)
166
+ raise NotImplementedError, "#{self}.#{__method__}"
167
+ end
168
+
169
+ def self.find_or_nil(id)
170
+ find(id)
171
+ rescue NotImplementedError => e
172
+ raise e
173
+ rescue
174
+ nil
175
+ end
176
+
177
+ def self.publish(event:, **kwargs)
178
+ raise NotImplementedError, "#{self}.#{__method__}"
179
+ end
180
+
181
+ def self.query(q, **kwargs)
182
+ raise NotImplementedError, "#{self}.#{__method__}"
183
+ end
184
+
185
+ def self.save!(resource:, user:)
186
+ raise NotImplementedError, "#{self}.#{__method__}"
187
+ end
188
+
189
+ # rubocop:disable Metrics/ParameterLists
190
+ def self.search_by_property(value:, klass:, field: nil, search_field: nil, name_field: nil, verify_property: false)
191
+ raise NotImplementedError, "#{self}.#{__method__}"
192
+ end
193
+
194
+ def self.solr_name(field_name)
195
+ raise NotImplementedError, "#{self}.#{__method__}"
196
+ end
197
+
198
+ ##
199
+ # @param resources [Array<Object>]
200
+ def self.update_index(resources: [])
201
+ raise NotImplementedError, "#{self}.#{__method__}"
202
+ end
203
+
204
+ ##
205
+ # @param resource [Object] something that *might* have file_sets members.
206
+ def self.update_index_for_file_sets_of(resource:)
207
+ raise NotImplementedError, "#{self}.#{__method__}"
208
+ end
209
+ # rubocop:enable Metrics/ParameterLists
210
+
211
+ ##
212
+ # @api private
213
+ #
214
+ # These are the attributes that we assume all "work type" classes (e.g. the
215
+ # given :klass) will have in addition to their specific attributes.
216
+ #
217
+ # @return [Array<Symbol>]
218
+ # @see #permitted_attributes
219
+ class_attribute :base_permitted_attributes,
220
+ default: %i[
221
+ admin_set_id
222
+ edit_groups
223
+ edit_users
224
+ id
225
+ read_groups
226
+ visibility
227
+ work_members_attributes
228
+ ]
229
+
230
+ # @return [Boolean]
231
+ #
232
+ # @example
233
+ # Bulkrax::ObjectFactory.transformation_removes_blank_hash_values = true
234
+ #
235
+ # @see #transform_attributes
236
+ # @see https://github.com/samvera-labs/bulkrax/pull/708 For discussion concerning this feature
237
+ # @see https://github.com/samvera-labs/bulkrax/wiki/Interacting-with-Metadata For documentation
238
+ # concerning default behavior.
239
+ class_attribute :transformation_removes_blank_hash_values, default: false
240
+
241
+ define_model_callbacks :save, :create
242
+ attr_reader(
243
+ :attributes,
244
+ :importer_run_id,
245
+ :klass,
246
+ :object,
247
+ :related_parents_parsed_mapping,
248
+ :replace_files,
249
+ :source_identifier_value,
250
+ :update_files,
251
+ :user,
252
+ :work_identifier,
253
+ :work_identifier_search_field
254
+ )
255
+
256
+ # rubocop:disable Metrics/ParameterLists
257
+ def initialize(attributes:, source_identifier_value:, work_identifier:, work_identifier_search_field:, related_parents_parsed_mapping: nil, replace_files: false, user: nil, klass: nil, importer_run_id: nil, update_files: false)
258
+ @attributes = ActiveSupport::HashWithIndifferentAccess.new(attributes)
259
+ @replace_files = replace_files
260
+ @update_files = update_files
261
+ @user = user || User.batch_user
262
+ @work_identifier = work_identifier
263
+ @work_identifier_search_field = work_identifier_search_field
264
+ @related_parents_parsed_mapping = related_parents_parsed_mapping
265
+ @source_identifier_value = source_identifier_value
266
+ @klass = klass || Bulkrax.default_work_type.constantize
267
+ @importer_run_id = importer_run_id
268
+ end
269
+ # rubocop:enable Metrics/ParameterLists
270
+
271
+ ##
272
+ # NOTE: There has been a long-standing implementation where we might reset
273
+ # the @update_files when we call #file_attributes. As we refactor
274
+ # towards extracting a class, this attr_writer preserves the behavior.
275
+ #
276
+ # Jeremy here, I think the behavior of setting the instance variable when
277
+ # calling file_attributes is wrong, but now is not the time to untwine.
278
+ attr_writer :update_files
279
+
280
+ alias update_files? update_files
281
+
282
+ # An ActiveFedora bug when there are many habtm <-> has_many associations
283
+ # means they won't all get saved.
284
+ # https://github.com/projecthydra/active_fedora/issues/874 9+ years later,
285
+ # still open!
286
+ def create
287
+ attrs = transform_attributes
288
+ @object = klass.new
289
+ conditionally_set_reindex_extent
290
+ run_callbacks :save do
291
+ run_callbacks :create do
292
+ if klass == Bulkrax.collection_model_class
293
+ create_collection(attrs)
294
+ elsif klass == Bulkrax.file_model_class
295
+ create_file_set(attrs)
296
+ else
297
+ create_work(attrs)
298
+ end
299
+ end
300
+ end
301
+
302
+ apply_depositor_metadata
303
+ log_created(object)
304
+ end
305
+
306
+ def delete(_user)
307
+ raise NotImplementedError, "#{self.class}##{__method__}"
308
+ end
309
+
310
+ ##
311
+ # @api public
312
+ #
313
+ # @return [Object] when we've found the object by the entry's :id or by it's
314
+ # source_identifier
315
+ # @return [FalseClass] when we cannot find the object.
316
+ def find
317
+ find_by_id || search_by_identifier || false
318
+ end
319
+
320
+ ##
321
+ # @abstract
322
+ #
323
+ # @return [Object] when we've found the object by the entry's :id or by it's
324
+ # source_identifier
325
+ # @return [FalseClass] when we cannot find the object.
326
+ def find_by_id
327
+ raise NotImplementedError, "#{self.class}##{__method__}"
328
+ end
329
+
330
+ ##
331
+ # @return [Object] either the one found in persistence or the one created
332
+ # via the run method.
333
+ # @see .save!
334
+ def find_or_create
335
+ # Do we need to call save! This was how we previously did this but it
336
+ # seems odd that we'd not find it. Also, why not simply call create.
337
+ find || self.class.save!(object: run, user: @user)
338
+ end
339
+
340
+ def run
341
+ arg_hash = { id: attributes[:id], name: 'UPDATE', klass: klass }
342
+
343
+ @object = find
344
+ if object
345
+ conditionally_set_reindex_extent
346
+ ActiveSupport::Notifications.instrument('import.importer', arg_hash) { update }
347
+ else
348
+ ActiveSupport::Notifications.instrument('import.importer', arg_hash.merge(name: 'CREATE')) { create }
349
+ end
350
+ yield(object) if block_given?
351
+ object
352
+ end
353
+
354
+ def run!
355
+ self.run
356
+ # Create the error exception if the object is not validly saved for some
357
+ # reason
358
+ raise ObjectFactoryInterface::RecordInvalid, object if !object.persisted? || object.changed?
359
+ object
360
+ end
361
+
362
+ ##
363
+ # @return [FalseClass] when :source_identifier_value is blank or is not
364
+ # found via {.search_by_property} query.
365
+ # @return [Object] when we have a source_identifier_value value and we can
366
+ # find it in the data store.
367
+ def search_by_identifier
368
+ return false if source_identifier_value.blank?
369
+
370
+ self.class.search_by_property(
371
+ klass: klass,
372
+ search_field: work_identifier_search_field,
373
+ value: source_identifier_value,
374
+ name_field: work_identifier
375
+ )
376
+ end
377
+
378
+ def update
379
+ raise "Object doesn't exist" unless object
380
+ conditionally_destroy_existing_files
381
+
382
+ attrs = transform_attributes(update: true)
383
+ run_callbacks :save do
384
+ if klass == Bulkrax.collection_model_class
385
+ update_collection(attrs)
386
+ elsif klass == Bulkrax.file_model_class
387
+ update_file_set(attrs)
388
+ else
389
+ update_work(attrs)
390
+ end
391
+ end
392
+ apply_depositor_metadata
393
+ log_updated(object)
394
+ end
395
+
396
+ def add_user_to_collection_permissions(*args)
397
+ arguments = args.first
398
+ self.class.add_user_to_collection_permissions(**arguments)
399
+ end
400
+
401
+ def log_created(obj)
402
+ msg = "Created #{klass.model_name.human} #{obj.id}"
403
+ Rails.logger.info("#{msg} (#{Array(attributes[work_identifier]).first})")
404
+ end
405
+
406
+ def log_updated(obj)
407
+ msg = "Updated #{klass.model_name.human} #{obj.id}"
408
+ Rails.logger.info("#{msg} (#{Array(attributes[work_identifier]).first})")
409
+ end
410
+
411
+ def log_deleted_fs(obj)
412
+ msg = "Deleted All Files from #{obj.id}"
413
+ Rails.logger.info("#{msg} (#{Array(attributes[work_identifier]).first})")
414
+ end
415
+
416
+ private
417
+
418
+ def apply_depositor_metadata
419
+ object.apply_depositor_metadata(@user) && object.save! if object.depositor.nil?
420
+ end
421
+
422
+ def clean_attrs(attrs)
423
+ # avoid the "ArgumentError: Identifier must be a string of size > 0 in
424
+ # order to be treeified" error when setting object.attributes
425
+ attrs.delete('id') if attrs['id'].blank?
426
+ attrs
427
+ end
428
+
429
+ def collection_type(attrs)
430
+ return attrs if attrs['collection_type_gid'].present?
431
+
432
+ attrs['collection_type_gid'] = Hyrax::CollectionType.find_or_create_default_collection_type.to_global_id.to_s
433
+ attrs
434
+ end
435
+
436
+ def conditionally_set_reindex_extent
437
+ return unless defined?(Hyrax::Adapters::NestingIndexAdapter)
438
+ return unless object.respond_to?(:reindex_extent)
439
+ object.reindex_extent = Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX
440
+ end
441
+
442
+ def conditionally_destroy_existing_files
443
+ return unless @replace_files
444
+
445
+ return if [Bulkrax.collection_model_class, Bulkrax.file_model_class].include?(klass)
446
+
447
+ destroy_existing_files
448
+ end
449
+
450
+ # Regardless of what the Parser gives us, these are the properties we are
451
+ # prepared to accept.
452
+ def permitted_attributes
453
+ klass.properties.keys.map(&:to_sym) + base_permitted_attributes
454
+ end
455
+
456
+ # Return a copy of the given attributes, such that all values that are empty
457
+ # or an array of all empty values are fully emptied. (See implementation
458
+ # details)
459
+ #
460
+ # @param attributes [Hash]
461
+ # @return [Hash]
462
+ #
463
+ # @see https://github.com/emory-libraries/dlp-curate/issues/1973
464
+ def remove_blank_hash_values(attributes)
465
+ dupe = attributes.dup
466
+ dupe.each do |key, values|
467
+ if values.is_a?(Array) && values.all? { |value| value.is_a?(String) && value.empty? }
468
+ dupe[key] = []
469
+ elsif values.is_a?(String) && values.empty?
470
+ dupe[key] = nil
471
+ end
472
+ end
473
+ dupe
474
+ end
475
+
476
+ # Override if we need to map the attributes from the parser in
477
+ # a way that is compatible with how the factory needs them.
478
+ def transform_attributes(update: false)
479
+ @transform_attributes = attributes.slice(*permitted_attributes)
480
+ @transform_attributes.merge!(file_attributes(update_files?)) if with_files
481
+ @transform_attributes = remove_blank_hash_values(@transform_attributes) if transformation_removes_blank_hash_values?
482
+ update ? @transform_attributes.except(:id) : @transform_attributes
483
+ end
484
+
485
+ # update files is set, replace files is set or this is a create
486
+ def with_files
487
+ update_files || replace_files || !object
488
+ end
489
+ end
490
+ # rubocop:enable Metrics/ClassLength
491
+ end