bulkrax 7.0.0 → 8.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/javascripts/bulkrax/datatables.js +1 -1
  3. data/app/concerns/loggable.rb +25 -0
  4. data/app/controllers/bulkrax/exporters_controller.rb +1 -1
  5. data/app/controllers/bulkrax/importers_controller.rb +2 -1
  6. data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
  7. data/app/factories/bulkrax/object_factory.rb +135 -163
  8. data/app/factories/bulkrax/object_factory_interface.rb +483 -0
  9. data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
  10. data/app/factories/bulkrax/valkyrize-hyku.code-workspace +19 -0
  11. data/app/helpers/bulkrax/importers_helper.rb +1 -1
  12. data/app/helpers/bulkrax/validation_helper.rb +4 -4
  13. data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
  14. data/app/jobs/bulkrax/delete_job.rb +3 -2
  15. data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
  16. data/app/jobs/bulkrax/import_file_set_job.rb +23 -19
  17. data/app/jobs/bulkrax/importer_job.rb +18 -2
  18. data/app/matchers/bulkrax/application_matcher.rb +0 -2
  19. data/app/models/bulkrax/csv_collection_entry.rb +1 -1
  20. data/app/models/bulkrax/csv_entry.rb +7 -6
  21. data/app/models/bulkrax/entry.rb +7 -11
  22. data/app/models/bulkrax/exporter.rb +2 -2
  23. data/app/models/bulkrax/importer.rb +1 -3
  24. data/app/models/bulkrax/oai_entry.rb +0 -3
  25. data/app/models/bulkrax/oai_set_entry.rb +1 -1
  26. data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
  27. data/app/models/bulkrax/rdf_entry.rb +70 -69
  28. data/app/models/bulkrax/xml_entry.rb +0 -1
  29. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
  30. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  31. data/app/models/concerns/bulkrax/file_factory.rb +178 -118
  32. data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
  33. data/app/models/concerns/bulkrax/has_matchers.rb +39 -25
  34. data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
  35. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
  36. data/app/parsers/bulkrax/application_parser.rb +31 -7
  37. data/app/parsers/bulkrax/bagit_parser.rb +175 -174
  38. data/app/parsers/bulkrax/csv_parser.rb +15 -5
  39. data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
  40. data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
  41. data/app/parsers/bulkrax/xml_parser.rb +0 -2
  42. data/app/services/bulkrax/factory_class_finder.rb +2 -0
  43. data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
  44. data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
  45. data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
  46. data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
  47. data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
  48. data/app/views/bulkrax/entries/show.html.erb +9 -8
  49. data/app/views/bulkrax/exporters/edit.html.erb +1 -1
  50. data/app/views/bulkrax/exporters/new.html.erb +1 -1
  51. data/app/views/bulkrax/exporters/show.html.erb +4 -2
  52. data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
  53. data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
  54. data/app/views/bulkrax/importers/edit.html.erb +1 -1
  55. data/app/views/bulkrax/importers/new.html.erb +1 -1
  56. data/app/views/bulkrax/importers/show.html.erb +1 -1
  57. data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
  58. data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
  59. data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
  60. data/config/locales/bulkrax.en.yml +7 -0
  61. data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
  62. data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
  63. data/lib/bulkrax/engine.rb +23 -6
  64. data/lib/bulkrax/version.rb +1 -1
  65. data/lib/bulkrax.rb +54 -52
  66. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
  67. data/lib/tasks/bulkrax_tasks.rake +1 -0
  68. data/lib/tasks/reset.rake +4 -4
  69. metadata +25 -7
  70. data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
  71. data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
  72. data/lib/bulkrax/persistence_layer.rb +0 -38
@@ -0,0 +1,402 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ # rubocop:disable Metrics/ClassLength
5
+ class ValkyrieObjectFactory < ObjectFactoryInterface
6
+ class FileFactoryInnerWorkings < Bulkrax::FileFactory::InnerWorkings
7
+ def remove_file_set(file_set:)
8
+ file_metadata = Hyrax.custom_queries.find_files(file_set: file_set).first
9
+ raise "No file metadata records found for #{file_set.class} ID=#{file_set.id}" unless file_metadata
10
+
11
+ Hyrax::VersioningService.create(file_metadata, user, File.new(Bulkrax.removed_image_path))
12
+
13
+ ::ValkyrieCreateDerivativesJob.set(wait: 1.minute).perform_later(file_set.id, file_metadata.id)
14
+ end
15
+
16
+ ##
17
+ # Replace an existing :file_set's file with the :uploaded file.
18
+ #
19
+ # @param file_set [Hyrax::FileSet, Object]
20
+ # @param uploaded [Hyrax::UploadedFile]
21
+ #
22
+ # @return [NilClass]
23
+ def update_file_set(file_set:, uploaded:)
24
+ file_metadata = Hyrax.custom_queries.find_files(file_set: file_set).first
25
+ raise "No file metadata records found for #{file_set.class} ID=#{file_set.id}" unless file_metadata
26
+
27
+ uploaded_file = uploaded.file
28
+
29
+ # TODO: Is this accurate? We'll need to interrogate the file_metadata
30
+ # object. Should it be `file_metadata.checksum.first.to_s` Or something
31
+ # else?
32
+ return nil if file_metadata.checksum.first == Digest::SHA1.file(uploaded_file.path).to_s
33
+
34
+ Hyrax::VersioningService.create(file_metadata, user, uploaded_file)
35
+
36
+ ::ValkyrieCreateDerivativesJob.set(wait: 1.minute).perform_later(file_set.id, file_metadata.id)
37
+ nil
38
+ end
39
+ end
40
+
41
+ # TODO: the following module needs revisiting for Valkyrie work.
42
+ # proposal is to create Bulkrax::ValkyrieFileFactory.
43
+ include Bulkrax::FileFactory
44
+
45
+ self.file_set_factory_inner_workings_class = Bulkrax::ValkyrieObjectFactory::FileFactoryInnerWorkings
46
+
47
+ ##
48
+ # When you want a different set of transactions you can change the
49
+ # container.
50
+ #
51
+ # @note Within {Bulkrax::ValkyrieObjectFactory} there are several calls to
52
+ # transactions; so you'll need your container to register those
53
+ # transactions.
54
+ def self.transactions
55
+ @transactions || Hyrax::Transactions::Container
56
+ end
57
+
58
+ def transactions
59
+ self.class.transactions
60
+ end
61
+
62
+ ##
63
+ # @!group Class Method Interface
64
+
65
+ ##
66
+ # @note This does not save either object. We need to do that in another
67
+ # loop. Why? Because we might be adding many items to the parent.
68
+ def self.add_child_to_parent_work(parent:, child:)
69
+ return true if parent.member_ids.include?(child.id)
70
+
71
+ parent.member_ids << child.id
72
+ parent.save
73
+ end
74
+
75
+ def self.add_resource_to_collection(collection:, resource:, user:)
76
+ resource.member_of_collection_ids << collection.id
77
+ save!(resource: resource, user: user)
78
+ end
79
+
80
+ def self.field_multi_value?(field:, model:)
81
+ return false unless field_supported?(field: field, model: model)
82
+
83
+ if model.respond_to?(:schema)
84
+ dry_type = model.schema.key(field.to_sym)
85
+ return true if dry_type.respond_to?(:primitive) && dry_type.primitive == Array
86
+
87
+ false
88
+ else
89
+ Bulkrax::ObjectFactory.field_multi_value?(field: field, model: model)
90
+ end
91
+ end
92
+
93
+ def self.field_supported?(field:, model:)
94
+ if model.respond_to?(:schema)
95
+ schema_properties(model).include?(field)
96
+ else
97
+ # We *might* have a Fedora object, so we need to consider that approach as
98
+ # well.
99
+ Bulkrax::ObjectFactory.field_supported?(field: field, model: model)
100
+ end
101
+ end
102
+
103
+ def self.file_sets_for(resource:)
104
+ return [] if resource.blank?
105
+ return [resource] if resource.is_a?(Bulkrax.file_model_class)
106
+
107
+ Hyrax.query_service.custom_queries.find_child_file_sets(resource: resource)
108
+ end
109
+
110
+ def self.find(id)
111
+ Hyrax.query_service.find_by(id: id)
112
+ # Because Hyrax is not a hard dependency, we need to transform the Hyrax exception into a
113
+ # common exception so that callers can handle a generalize exception.
114
+ rescue Hyrax::ObjectNotFoundError => e
115
+ raise ObjectFactoryInterface::ObjectNotFoundError, e.message
116
+ end
117
+
118
+ def self.find_or_create_default_admin_set
119
+ Hyrax::AdminSetCreateService.find_or_create_default_admin_set
120
+ end
121
+
122
+ def self.solr_name(field_name)
123
+ # It's a bit unclear what this should be if we can't rely on Hyrax.
124
+ raise NotImplementedError, "#{self}.#{__method__}" unless defined?(Hyrax)
125
+ Hyrax.config.index_field_mapper.solr_name(field_name)
126
+ end
127
+
128
+ def self.publish(event:, **kwargs)
129
+ Hyrax.publisher.publish(event, **kwargs)
130
+ end
131
+
132
+ def self.query(q, **kwargs)
133
+ # Someone could choose ActiveFedora::SolrService. But I think we're
134
+ # assuming Valkyrie is specifcally working for Hyrax. Someone could make
135
+ # another object factory.
136
+ raise NotImplementedError, "#{self}.#{__method__}" unless defined?(Hyrax)
137
+ Hyrax::SolrService.query(q, **kwargs)
138
+ end
139
+
140
+ def self.save!(resource:, user:)
141
+ if resource.respond_to?(:save!)
142
+ resource.save!
143
+ else
144
+ result = Hyrax.persister.save(resource: resource)
145
+ raise Valkyrie::Persistence::ObjectNotFoundError unless result
146
+ Hyrax.index_adapter.save(resource: result)
147
+ if result.collection?
148
+ publish('collection.metadata.updated', collection: result, user: user)
149
+ else
150
+ publish('object.metadata.updated', object: result, user: user)
151
+ end
152
+ resource
153
+ end
154
+ end
155
+
156
+ def self.update_index(resources:)
157
+ Array(resources).each do |resource|
158
+ Hyrax.index_adapter.save(resource: resource)
159
+ end
160
+ end
161
+
162
+ def self.update_index_for_file_sets_of(resource:)
163
+ file_sets = Hyrax.query_service.custom_queries.find_child_file_sets(resource: resource)
164
+ update_index(resources: file_sets)
165
+ end
166
+
167
+ ##
168
+ # @param value [String]
169
+ # @param klass [Class, #where]
170
+ # @param field [String, Symbol] A convenience parameter where we pass the
171
+ # same value to search_field and name_field.
172
+ # @param name_field [String] the ActiveFedora::Base property name
173
+ # (e.g. "title")
174
+ # @return [NilClass] when no object is found.
175
+ # @return [Valkyrie::Resource] when a match is found, an instance of given
176
+ # :klass
177
+ # rubocop:disable Metrics/ParameterLists
178
+ def self.search_by_property(value:, klass:, field: nil, name_field: nil, **)
179
+ name_field ||= field
180
+ raise "Expected named_field or field got nil" if name_field.blank?
181
+ return if value.blank?
182
+
183
+ # Return nil or a single object.
184
+ Hyrax.query_service.custom_query.find_by_model_and_property_value(model: klass, property: name_field, value: value)
185
+ end
186
+ # rubocop:enable Metrics/ParameterLists
187
+
188
+ ##
189
+ # Retrieve properties from M3 model
190
+ # @param klass the model
191
+ # @return [Array<String>]
192
+ def self.schema_properties(klass)
193
+ @schema_properties_map ||= {}
194
+
195
+ klass_key = klass.name
196
+ @schema_properties_map[klass_key] = klass.schema.map { |k| k.name.to_s } unless @schema_properties_map.key?(klass_key)
197
+
198
+ @schema_properties_map[klass_key]
199
+ end
200
+
201
+ def self.ordered_file_sets_for(object)
202
+ return [] if object.blank?
203
+
204
+ Hyrax.custom_queries.find_child_file_sets(resource: object)
205
+ end
206
+
207
+ def delete(user)
208
+ obj = find
209
+ return false unless obj
210
+
211
+ Hyrax.persister.delete(resource: obj)
212
+ Hyrax.index_adapter.delete(resource: obj)
213
+ self.class.publish(event: 'object.deleted', object: obj, user: user)
214
+ end
215
+
216
+ def run!
217
+ run
218
+ # reload the object
219
+ object = find
220
+ return object if object.persisted?
221
+
222
+ raise(ObjectFactoryInterface::RecordInvalid, object)
223
+ end
224
+
225
+ private
226
+
227
+ def apply_depositor_metadata
228
+ return if object.depositor.present?
229
+
230
+ object.depositor = @user.email
231
+ object = Hyrax.persister.save(resource: object)
232
+ self.class.publish(event: "object.metadata.updated", object: object, user: @user)
233
+ object
234
+ end
235
+
236
+ def conditionall_apply_depositor_metadata
237
+ # We handle this in transactions
238
+ nil
239
+ end
240
+
241
+ def conditionally_set_reindex_extent
242
+ # Valkyrie does not concern itself with the reindex extent; no nesting
243
+ # indexers here!
244
+ nil
245
+ end
246
+
247
+ def create_file_set(attrs)
248
+ # TODO: Make it work for Valkyrie
249
+ end
250
+
251
+ def create_work(attrs)
252
+ # NOTE: We do not add relationships here; that is part of the create
253
+ # relationships job.
254
+ perform_transaction_for(object: object, attrs: attrs) do
255
+ transactions["change_set.create_work"]
256
+ .with_step_args(
257
+ 'work_resource.add_file_sets' => { uploaded_files: uploaded_files_from(attrs) },
258
+ "change_set.set_user_as_depositor" => { user: @user },
259
+ "work_resource.change_depositor" => { user: @user },
260
+ 'work_resource.save_acl' => { permissions_params: [attrs['visibility'] || 'open'].compact }
261
+ )
262
+ end
263
+ end
264
+
265
+ def create_collection(attrs)
266
+ # TODO: Handle Collection Type
267
+ #
268
+ # NOTE: We do not add relationships here; that is part of the create
269
+ # relationships job.
270
+ perform_transaction_for(object: object, attrs: attrs) do
271
+ transactions['change_set.create_collection']
272
+ .with_step_args(
273
+ 'change_set.set_user_as_depositor' => { user: @user },
274
+ 'collection_resource.apply_collection_type_permissions' => { user: @user }
275
+ )
276
+ end
277
+ end
278
+
279
+ def find_by_id
280
+ Hyrax.query_service.find_by(id: attributes[:id]) if attributes.key? :id
281
+ end
282
+
283
+ ##
284
+ # @param object [Valkyrie::Resource]
285
+ # @param attrs [Valkyrie::Resource]
286
+ # @return [Valkyrie::Resource] when we successfully processed the
287
+ # transaction (e.g. the transaction's data was valid according to
288
+ # the derived form)
289
+ #
290
+ # @yield the returned value of the yielded block should be a
291
+ # {Hyrax::Transactions::Transaction}. We yield because the we first
292
+ # want to check if the attributes are valid. And if so, then process
293
+ # the transaction, which is something that could trigger expensive
294
+ # operations. Put another way, don't do something expensive if the
295
+ # data is invalid.
296
+ #
297
+ # TODO What do we return when the calculated form fails?
298
+ # @raise [StandardError] when there was a failure calling the translation.
299
+ def perform_transaction_for(object:, attrs:)
300
+ form = Hyrax::Forms::ResourceForm.for(object).prepopulate!
301
+
302
+ # TODO: Handle validations
303
+ form.validate(attrs)
304
+
305
+ transaction = yield
306
+
307
+ result = transaction.call(form)
308
+
309
+ result.value_or do
310
+ msg = result.failure[0].to_s
311
+ msg += " - #{result.failure[1].full_messages.join(',')}" if result.failure[1].respond_to?(:full_messages)
312
+ raise StandardError, msg, result.trace
313
+ end
314
+ end
315
+
316
+ ##
317
+ # We accept attributes based on the model schema
318
+ #
319
+ # @return [Array<Symbols>]
320
+ def permitted_attributes
321
+ @permitted_attributes ||= (
322
+ base_permitted_attributes + if klass.respond_to?(:schema)
323
+ Bulkrax::ValkyrieObjectFactory.schema_properties(klass)
324
+ else
325
+ klass.properties.keys.map(&:to_sym)
326
+ end
327
+ ).uniq
328
+ end
329
+
330
+ def update_work(attrs)
331
+ perform_transaction_for(object: object, attrs: attrs) do
332
+ transactions["change_set.update_work"]
333
+ .with_step_args(
334
+ 'work_resource.add_file_sets' => { uploaded_files: uploaded_files_from(attrs) },
335
+ 'work_resource.save_acl' => { permissions_params: [attrs.try('visibility') || 'open'].compact }
336
+ )
337
+ end
338
+ end
339
+
340
+ def update_collection(attrs)
341
+ # NOTE: We do not add relationships here; that is part of the create
342
+ # relationships job.
343
+ perform_transaction_for(object: object, attrs: attrs) do
344
+ transactions['change_set.update_collection']
345
+ end
346
+ end
347
+
348
+ def update_file_set(attrs)
349
+ # TODO: Make it work
350
+ end
351
+
352
+ def uploaded_files_from(attrs)
353
+ uploaded_local_files(uploaded_files: attrs[:uploaded_files]) + uploaded_s3_files(remote_files: attrs[:remote_files])
354
+ end
355
+
356
+ def uploaded_local_files(uploaded_files: [])
357
+ Array.wrap(uploaded_files).map do |file_id|
358
+ Hyrax::UploadedFile.find(file_id)
359
+ end
360
+ end
361
+
362
+ def uploaded_s3_files(remote_files: {})
363
+ return [] if remote_files.blank?
364
+
365
+ s3_bucket_name = ENV.fetch("STAGING_AREA_S3_BUCKET", "comet-staging-area-#{Rails.env}")
366
+ s3_bucket = Rails.application.config.staging_area_s3_connection
367
+ .directories.get(s3_bucket_name)
368
+
369
+ remote_files.map { |r| r["url"] }.map do |key|
370
+ s3_bucket.files.get(key)
371
+ end.compact
372
+ end
373
+
374
+ # @Override Destroy existing files with Hyrax::Transactions
375
+ def destroy_existing_files
376
+ existing_files = Hyrax.custom_queries.find_child_file_sets(resource: object)
377
+
378
+ existing_files.each do |fs|
379
+ transactions["file_set.destroy"]
380
+ .with_step_args("file_set.remove_from_work" => { user: @user },
381
+ "file_set.delete" => { user: @user })
382
+ .call(fs)
383
+ .value!
384
+ end
385
+
386
+ @object.member_ids = @object.member_ids.reject { |m| existing_files.detect { |f| f.id == m } }
387
+ @object.rendering_ids = []
388
+ @object.representative_id = nil
389
+ @object.thumbnail_id = nil
390
+ end
391
+
392
+ def transform_attributes(update: false)
393
+ attrs = super.merge(alternate_ids: [source_identifier_value])
394
+ .symbolize_keys
395
+
396
+ attrs[:title] = [''] if attrs[:title].blank?
397
+ attrs[:creator] = [''] if attrs[:creator].blank?
398
+ attrs
399
+ end
400
+ end
401
+ # rubocop:enable Metrics/ClassLength
402
+ end
@@ -0,0 +1,19 @@
1
+ {
2
+ "folders": [
3
+ {
4
+ "path": "../../../../../hyku"
5
+ },
6
+ {
7
+ "path": "../../../../hyrax"
8
+ },
9
+ {
10
+ "path": "../../.."
11
+ },
12
+ {
13
+ "path": "../../../../iiif_print"
14
+ }
15
+ ],
16
+ "settings": {
17
+ "github.copilot.inlineSuggest.enable": true
18
+ }
19
+ }
@@ -6,7 +6,7 @@ module Bulkrax
6
6
  def available_admin_sets
7
7
  # Restrict available_admin_sets to only those current user can deposit to.
8
8
  @available_admin_sets ||= Hyrax::Collections::PermissionsService.source_ids_for_deposit(ability: current_ability, source_type: 'admin_set').map do |admin_set_id|
9
- [AdminSet.find(admin_set_id).title.first, admin_set_id]
9
+ [Bulkrax.object_factory.find_or_nil(admin_set_id)&.title&.first || admin_set_id, admin_set_id]
10
10
  end
11
11
  end
12
12
  end
@@ -20,14 +20,14 @@ module Bulkrax
20
20
  return unless defined?(::Hyrax)
21
21
 
22
22
  if params[:importer][:admin_set_id].blank?
23
- params[:importer][:admin_set_id] = AdminSet::DEFAULT_ID
23
+ params[:importer][:admin_set_id] = Bulkrax.object_factory.default_admin_set_id
24
24
  else
25
- AdminSet.find(params[:importer][:admin_set_id])
25
+ Bulkrax.object_factory.find(params[:importer][:admin_set_id])
26
26
  end
27
27
  return true
28
- rescue ActiveFedora::ObjectNotFoundError
28
+ rescue ActiveFedora::ObjectNotFoundError, Bulkrax::ObjectFactoryInterface::ObjectNotFoundError
29
29
  logger.warn("AdminSet #{params[:importer][:admin_set_id]} not found. Using default admin set.")
30
- params[:importer][:admin_set_id] = AdminSet::DEFAULT_ID
30
+ params[:importer][:admin_set_id] = Bulkrax.object_factory.default_admin_set_id
31
31
  return true
32
32
  end
33
33
 
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
+ ##
4
5
  # Responsible for creating parent-child relationships between Works and Collections.
5
6
  #
6
7
  # Handles three kinds of relationships:
@@ -42,6 +43,7 @@ module Bulkrax
42
43
 
43
44
  queue_as Bulkrax.config.ingest_queue_name
44
45
 
46
+ ##
45
47
  # @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifiers
46
48
  # @param importer_run [Bulkrax::ImporterRun] current importer run (needed to properly update counters)
47
49
  #
@@ -53,7 +55,7 @@ module Bulkrax
53
55
  #
54
56
  # rubocop:disable Metrics/MethodLength
55
57
  def perform(parent_identifier:, importer_run_id:) # rubocop:disable Metrics/AbcSize
56
- importer_run = Bulkrax::ImporterRun.find(importer_run_id)
58
+ @importer_run = Bulkrax::ImporterRun.find(importer_run_id)
57
59
  ability = Ability.new(importer_run.user)
58
60
 
59
61
  parent_entry, parent_record = find_record(parent_identifier, importer_run_id)
@@ -79,9 +81,9 @@ module Bulkrax
79
81
 
80
82
  # save record if members were added
81
83
  if @parent_record_members_added
82
- parent_record.save!
83
- # Ensure that the new relationship gets indexed onto the children
84
- @child_members_added.each(&:update_index)
84
+ Bulkrax.object_factory.save!(resource: parent_record, user: importer_run.user)
85
+ Bulkrax.object_factory.publish(event: 'object.membership.updated', object: parent_record)
86
+ Bulkrax.object_factory.update_index(resources: @child_members_added)
85
87
  end
86
88
  end
87
89
  else
@@ -104,7 +106,7 @@ module Bulkrax
104
106
  parent_entry&.set_status_info(errors.last, importer_run)
105
107
 
106
108
  # TODO: This can create an infinite job cycle, consider a time to live tracker.
107
- reschedule({ parent_identifier: parent_identifier, importer_run_id: importer_run_id })
109
+ reschedule(parent_identifier: parent_identifier, importer_run_id: importer_run_id)
108
110
  return false # stop current job from continuing to run after rescheduling
109
111
  else
110
112
  # rubocop:disable Rails/SkipsModelValidations
@@ -114,6 +116,8 @@ module Bulkrax
114
116
  end
115
117
  # rubocop:enable Metrics/MethodLength
116
118
 
119
+ attr_reader :importer_run
120
+
117
121
  private
118
122
 
119
123
  ##
@@ -151,25 +155,32 @@ module Bulkrax
151
155
  # We could do this outside of the loop, but that could lead to odd counter failures.
152
156
  ability.authorize!(:edit, parent_record)
153
157
 
154
- parent_record.is_a?(Collection) ? add_to_collection(child_record, parent_record) : add_to_work(child_record, parent_record)
158
+ if parent_record.is_a?(Bulkrax.collection_model_class)
159
+ add_to_collection(child_record, parent_record)
160
+ else
161
+ add_to_work(child_record, parent_record)
162
+ end
163
+
164
+ Bulkrax.object_factory.update_index_for_file_sets_of(resource: child_record) if update_child_records_works_file_sets?
155
165
 
156
- child_record.file_sets.each(&:update_index) if update_child_records_works_file_sets? && child_record.respond_to?(:file_sets)
157
166
  relationship.destroy
158
167
  end
159
168
 
160
169
  def add_to_collection(child_record, parent_record)
161
- parent_record.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX) if
162
- defined?(Hyrax::Adapters::NestingIndexAdapter)
163
- child_record.member_of_collections << parent_record
164
- child_record.save!
170
+ Bulkrax.object_factory.add_resource_to_collection(
171
+ collection: parent_record,
172
+ resource: child_record,
173
+ user: importer_run.user
174
+ )
165
175
  end
166
176
 
167
177
  def add_to_work(child_record, parent_record)
168
- return true if parent_record.ordered_members.to_a.include?(child_record)
169
-
170
- parent_record.ordered_members << child_record
171
- @parent_record_members_added = true
172
- @child_members_added << child_record
178
+ # NOTE: The .add_child_to_parent_work should not persist changes to the
179
+ # child nor parent. We'll do that elsewhere in this loop.
180
+ Bulkrax.object_factory.add_child_to_parent_work(
181
+ parent: parent_record,
182
+ child: child_record
183
+ )
173
184
  end
174
185
 
175
186
  def reschedule(parent_identifier:, importer_run_id:)
@@ -5,8 +5,9 @@ module Bulkrax
5
5
  queue_as Bulkrax.config.ingest_queue_name
6
6
 
7
7
  def perform(entry, importer_run)
8
- obj = entry.factory.find
9
- obj&.delete
8
+ user = importer_run.importer.user
9
+ entry.factory.delete(user)
10
+
10
11
  # rubocop:disable Rails/SkipsModelValidations
11
12
  ImporterRun.increment_counter(:deleted_records, importer_run.id)
12
13
  ImporterRun.decrement_counter(:enqueued_records, importer_run.id)
@@ -1,18 +1,31 @@
1
1
  # frozen_string_literal: true
2
-
3
2
  module Bulkrax
4
3
  class DownloadCloudFileJob < ApplicationJob
5
4
  queue_as Bulkrax.config.ingest_queue_name
6
5
 
6
+ include ActionView::Helpers::NumberHelper
7
+
7
8
  # Retrieve cloud file and write to the imports directory
8
9
  # Note: if using the file system, the mounted directory in
9
10
  # browse_everything MUST be shared by web and worker servers
10
11
  def perform(file, target_file)
11
12
  retriever = BrowseEverything::Retriever.new
13
+ last_logged_time = Time.zone.now
14
+ log_interval = 3.seconds
15
+
12
16
  retriever.download(file, target_file) do |filename, retrieved, total|
13
- # The block is still useful for showing progress, but the
14
- # first argument is the filename instead of a chunk of data.
17
+ percentage = (retrieved.to_f / total.to_f) * 100
18
+ current_time = Time.zone.now
19
+
20
+ if (current_time - last_logged_time) >= log_interval
21
+ # Use number_to_human_size for formatting
22
+ readable_retrieved = number_to_human_size(retrieved)
23
+ readable_total = number_to_human_size(total)
24
+ Rails.logger.info "Downloaded #{readable_retrieved} of #{readable_total}, #{filename}: #{percentage.round}% complete"
25
+ last_logged_time = current_time
26
+ end
15
27
  end
28
+ Rails.logger.info "Download complete: #{file['url']} to #{target_file}"
16
29
  end
17
30
  end
18
31
  end
@@ -16,7 +16,12 @@ module Bulkrax
16
16
  # e.g. "parents" or "parents_1"
17
17
  parent_identifier = (entry.raw_metadata[entry.related_parents_raw_mapping] || entry.raw_metadata["#{entry.related_parents_raw_mapping}_1"])&.strip
18
18
 
19
- validate_parent!(parent_identifier)
19
+ begin
20
+ validate_parent!(parent_identifier)
21
+ rescue MissingParentError => e
22
+ handle_retry(entry, importer_run_id, e)
23
+ return
24
+ end
20
25
 
21
26
  entry.build
22
27
  if entry.succeeded?
@@ -32,17 +37,6 @@ module Bulkrax
32
37
  entry.save!
33
38
  entry.importer.current_run = ImporterRun.find(importer_run_id)
34
39
  entry.importer.record_status
35
-
36
- rescue MissingParentError => e
37
- # try waiting for the parent record to be created
38
- entry.import_attempts += 1
39
- entry.save!
40
- if entry.import_attempts < 5
41
- ImportFileSetJob.set(wait: (entry.import_attempts + 1).minutes).perform_later(entry_id, importer_run_id)
42
- else
43
- ImporterRun.decrement_counter(:enqueued_records, importer_run_id) # rubocop:disable Rails/SkipsModelValidations
44
- entry.set_status_info(e)
45
- end
46
40
  end
47
41
 
48
42
  private
@@ -54,21 +48,31 @@ module Bulkrax
54
48
  return if parent_identifier.blank?
55
49
 
56
50
  find_parent_record(parent_identifier)
57
- check_parent_exists!(parent_identifier)
58
51
  check_parent_is_a_work!(parent_identifier)
59
52
  end
60
53
 
61
- def check_parent_exists!(parent_identifier)
62
- raise MissingParentError, %(Unable to find a record with the identifier "#{parent_identifier}") if parent_record.nil?
63
- end
64
-
65
54
  def check_parent_is_a_work!(parent_identifier)
66
- error_msg = %(A record with the ID "#{parent_identifier}" was found, but it was a #{parent_record.class}, which is not an valid/available work type)
67
- raise ::StandardError, error_msg unless curation_concern?(parent_record)
55
+ case parent_record
56
+ when Bulkrax.collection_model_class, Bulkrax.file_model_class
57
+ error_msg = %(A record with the ID "#{parent_identifier}" was found, but it was a #{parent_record.class}, which is not an valid/available work type)
58
+ raise ::StandardError, error_msg
59
+ end
68
60
  end
69
61
 
70
62
  def find_parent_record(parent_identifier)
71
63
  _, @parent_record = find_record(parent_identifier, importer_run_id)
64
+ raise MissingParentError, %(Unable to find a record with the identifier "#{parent_identifier}") unless parent_record
65
+ end
66
+
67
+ def handle_retry(entry, importer_run_id, e)
68
+ entry.import_attempts += 1
69
+ entry.save!
70
+ if entry.import_attempts < 5
71
+ ImportFileSetJob.set(wait: (entry.import_attempts + 1).minutes).perform_later(entry.id, importer_run_id)
72
+ else
73
+ ImporterRun.decrement_counter(:enqueued_records, importer_run_id) # rubocop:disable Rails/SkipsModelValidations
74
+ entry.set_status_info(e)
75
+ end
72
76
  end
73
77
  end
74
78
  end