bulkrax 7.0.0 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/javascripts/bulkrax/datatables.js +1 -1
  3. data/app/controllers/bulkrax/exporters_controller.rb +1 -1
  4. data/app/controllers/bulkrax/importers_controller.rb +2 -1
  5. data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
  6. data/app/factories/bulkrax/object_factory.rb +135 -163
  7. data/app/factories/bulkrax/object_factory_interface.rb +491 -0
  8. data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
  9. data/app/helpers/bulkrax/importers_helper.rb +1 -1
  10. data/app/helpers/bulkrax/validation_helper.rb +4 -4
  11. data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
  12. data/app/jobs/bulkrax/delete_job.rb +3 -2
  13. data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
  14. data/app/jobs/bulkrax/import_file_set_job.rb +5 -2
  15. data/app/jobs/bulkrax/importer_job.rb +18 -2
  16. data/app/matchers/bulkrax/application_matcher.rb +0 -2
  17. data/app/models/bulkrax/csv_collection_entry.rb +1 -1
  18. data/app/models/bulkrax/csv_entry.rb +7 -6
  19. data/app/models/bulkrax/entry.rb +7 -11
  20. data/app/models/bulkrax/exporter.rb +2 -2
  21. data/app/models/bulkrax/importer.rb +1 -3
  22. data/app/models/bulkrax/oai_entry.rb +0 -3
  23. data/app/models/bulkrax/oai_set_entry.rb +1 -1
  24. data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
  25. data/app/models/bulkrax/rdf_entry.rb +70 -69
  26. data/app/models/bulkrax/xml_entry.rb +0 -1
  27. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
  28. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  29. data/app/models/concerns/bulkrax/file_factory.rb +174 -118
  30. data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
  31. data/app/models/concerns/bulkrax/has_matchers.rb +28 -25
  32. data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
  33. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
  34. data/app/parsers/bulkrax/application_parser.rb +31 -7
  35. data/app/parsers/bulkrax/bagit_parser.rb +175 -174
  36. data/app/parsers/bulkrax/csv_parser.rb +15 -5
  37. data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
  38. data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
  39. data/app/parsers/bulkrax/xml_parser.rb +0 -2
  40. data/app/services/bulkrax/factory_class_finder.rb +2 -0
  41. data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
  42. data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
  43. data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
  44. data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
  45. data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
  46. data/app/views/bulkrax/entries/show.html.erb +9 -8
  47. data/app/views/bulkrax/exporters/edit.html.erb +1 -1
  48. data/app/views/bulkrax/exporters/new.html.erb +1 -1
  49. data/app/views/bulkrax/exporters/show.html.erb +4 -2
  50. data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
  51. data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
  52. data/app/views/bulkrax/importers/edit.html.erb +1 -1
  53. data/app/views/bulkrax/importers/new.html.erb +1 -1
  54. data/app/views/bulkrax/importers/show.html.erb +1 -1
  55. data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
  56. data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
  57. data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
  58. data/config/locales/bulkrax.en.yml +7 -0
  59. data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
  60. data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
  61. data/lib/bulkrax/engine.rb +23 -6
  62. data/lib/bulkrax/version.rb +1 -1
  63. data/lib/bulkrax.rb +54 -52
  64. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
  65. data/lib/tasks/bulkrax_tasks.rake +1 -0
  66. data/lib/tasks/reset.rake +4 -4
  67. metadata +24 -8
  68. data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
  69. data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
  70. data/lib/bulkrax/persistence_layer.rb +0 -38
@@ -0,0 +1,402 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ # rubocop:disable Metrics/ClassLength
5
+ class ValkyrieObjectFactory < ObjectFactoryInterface
6
+ class FileFactoryInnerWorkings < Bulkrax::FileFactory::InnerWorkings
7
+ def remove_file_set(file_set:)
8
+ file_metadata = Hyrax.custom_queries.find_files(file_set: file_set).first
9
+ raise "No file metadata records found for #{file_set.class} ID=#{file_set.id}" unless file_metadata
10
+
11
+ Hyrax::VersioningService.create(file_metadata, user, File.new(Bulkrax.removed_image_path))
12
+
13
+ ::ValkyrieCreateDerivativesJob.set(wait: 1.minute).perform_later(file_set.id, file_metadata.id)
14
+ end
15
+
16
+ ##
17
+ # Replace an existing :file_set's file with the :uploaded file.
18
+ #
19
+ # @param file_set [Hyrax::FileSet, Object]
20
+ # @param uploaded [Hyrax::UploadedFile]
21
+ #
22
+ # @return [NilClass]
23
+ def update_file_set(file_set:, uploaded:)
24
+ file_metadata = Hyrax.custom_queries.find_files(file_set: file_set).first
25
+ raise "No file metadata records found for #{file_set.class} ID=#{file_set.id}" unless file_metadata
26
+
27
+ uploaded_file = uploaded.file
28
+
29
+ # TODO: Is this accurate? We'll need to interrogate the file_metadata
30
+ # object. Should it be `file_metadata.checksum.first.to_s` Or something
31
+ # else?
32
+ return nil if file_metadata.checksum.first == Digest::SHA1.file(uploaded_file.path).to_s
33
+
34
+ Hyrax::VersioningService.create(file_metadata, user, uploaded_file)
35
+
36
+ ::ValkyrieCreateDerivativesJob.set(wait: 1.minute).perform_later(file_set.id, file_metadata.id)
37
+ nil
38
+ end
39
+ end
40
+
41
+ # TODO: the following module needs revisiting for Valkyrie work.
42
+ # proposal is to create Bulkrax::ValkyrieFileFactory.
43
+ include Bulkrax::FileFactory
44
+
45
+ self.file_set_factory_inner_workings_class = Bulkrax::ValkyrieObjectFactory::FileFactoryInnerWorkings
46
+
47
+ ##
48
+ # When you want a different set of transactions you can change the
49
+ # container.
50
+ #
51
+ # @note Within {Bulkrax::ValkyrieObjectFactory} there are several calls to
52
+ # transactions; so you'll need your container to register those
53
+ # transactions.
54
+ def self.transactions
55
+ @transactions || Hyrax::Transactions::Container
56
+ end
57
+
58
+ def transactions
59
+ self.class.transactions
60
+ end
61
+
62
+ ##
63
+ # @!group Class Method Interface
64
+
65
+ ##
66
+ # @note This does not save either object. We need to do that in another
67
+ # loop. Why? Because we might be adding many items to the parent.
68
+ def self.add_child_to_parent_work(parent:, child:)
69
+ return true if parent.member_ids.include?(child.id)
70
+
71
+ parent.member_ids << child.id
72
+ parent.save
73
+ end
74
+
75
+ def self.add_resource_to_collection(collection:, resource:, user:)
76
+ resource.member_of_collection_ids << collection.id
77
+ save!(resource: resource, user: user)
78
+ end
79
+
80
+ def self.field_multi_value?(field:, model:)
81
+ return false unless field_supported?(field: field, model: model)
82
+
83
+ if model.respond_to?(:schema)
84
+ dry_type = model.schema.key(field.to_sym)
85
+ return true if dry_type.respond_to?(:primitive) && dry_type.primitive == Array
86
+
87
+ false
88
+ else
89
+ Bulkrax::ObjectFactory.field_multi_value?(field: field, model: model)
90
+ end
91
+ end
92
+
93
+ def self.field_supported?(field:, model:)
94
+ if model.respond_to?(:schema)
95
+ schema_properties(model).include?(field)
96
+ else
97
+ # We *might* have a Fedora object, so we need to consider that approach as
98
+ # well.
99
+ Bulkrax::ObjectFactory.field_supported?(field: field, model: model)
100
+ end
101
+ end
102
+
103
+ def self.file_sets_for(resource:)
104
+ return [] if resource.blank?
105
+ return [resource] if resource.is_a?(Bulkrax.file_model_class)
106
+
107
+ Hyrax.query_service.custom_queries.find_child_file_sets(resource: resource)
108
+ end
109
+
110
+ def self.find(id)
111
+ Hyrax.query_service.find_by(id: id)
112
+ # Because Hyrax is not a hard dependency, we need to transform the Hyrax exception into a
113
+ # common exception so that callers can handle a generalize exception.
114
+ rescue Hyrax::ObjectNotFoundError => e
115
+ raise ObjectFactoryInterface::ObjectNotFoundError, e.message
116
+ end
117
+
118
+ def self.find_or_create_default_admin_set
119
+ Hyrax::AdminSetCreateService.find_or_create_default_admin_set
120
+ end
121
+
122
+ def self.solr_name(field_name)
123
+ # It's a bit unclear what this should be if we can't rely on Hyrax.
124
+ raise NotImplementedError, "#{self}.#{__method__}" unless defined?(Hyrax)
125
+ Hyrax.config.index_field_mapper.solr_name(field_name)
126
+ end
127
+
128
+ def self.publish(event:, **kwargs)
129
+ Hyrax.publisher.publish(event, **kwargs)
130
+ end
131
+
132
+ def self.query(q, **kwargs)
133
+ # Someone could choose ActiveFedora::SolrService. But I think we're
134
+ # assuming Valkyrie is specifcally working for Hyrax. Someone could make
135
+ # another object factory.
136
+ raise NotImplementedError, "#{self}.#{__method__}" unless defined?(Hyrax)
137
+ Hyrax::SolrService.query(q, **kwargs)
138
+ end
139
+
140
+ def self.save!(resource:, user:)
141
+ if resource.respond_to?(:save!)
142
+ resource.save!
143
+ else
144
+ result = Hyrax.persister.save(resource: resource)
145
+ raise Valkyrie::Persistence::ObjectNotFoundError unless result
146
+ Hyrax.index_adapter.save(resource: result)
147
+ if result.collection?
148
+ publish('collection.metadata.updated', collection: result, user: user)
149
+ else
150
+ publish('object.metadata.updated', object: result, user: user)
151
+ end
152
+ resource
153
+ end
154
+ end
155
+
156
+ def self.update_index(resources:)
157
+ Array(resources).each do |resource|
158
+ Hyrax.index_adapter.save(resource: resource)
159
+ end
160
+ end
161
+
162
+ def self.update_index_for_file_sets_of(resource:)
163
+ file_sets = Hyrax.query_service.custom_queries.find_child_file_sets(resource: resource)
164
+ update_index(resources: file_sets)
165
+ end
166
+
167
+ ##
168
+ # @param value [String]
169
+ # @param klass [Class, #where]
170
+ # @param field [String, Symbol] A convenience parameter where we pass the
171
+ # same value to search_field and name_field.
172
+ # @param name_field [String] the ActiveFedora::Base property name
173
+ # (e.g. "title")
174
+ # @return [NilClass] when no object is found.
175
+ # @return [Valkyrie::Resource] when a match is found, an instance of given
176
+ # :klass
177
+ # rubocop:disable Metrics/ParameterLists
178
+ def self.search_by_property(value:, klass:, field: nil, name_field: nil, **)
179
+ name_field ||= field
180
+ raise "Expected named_field or field got nil" if name_field.blank?
181
+ return if value.blank?
182
+
183
+ # Return nil or a single object.
184
+ Hyrax.query_service.custom_query.find_by_model_and_property_value(model: klass, property: name_field, value: value)
185
+ end
186
+ # rubocop:enable Metrics/ParameterLists
187
+
188
+ ##
189
+ # Retrieve properties from M3 model
190
+ # @param klass the model
191
+ # @return [Array<String>]
192
+ def self.schema_properties(klass)
193
+ @schema_properties_map ||= {}
194
+
195
+ klass_key = klass.name
196
+ @schema_properties_map[klass_key] = klass.schema.map { |k| k.name.to_s } unless @schema_properties_map.key?(klass_key)
197
+
198
+ @schema_properties_map[klass_key]
199
+ end
200
+
201
+ def self.ordered_file_sets_for(object)
202
+ return [] if object.blank?
203
+
204
+ Hyrax.custom_queries.find_child_file_sets(resource: object)
205
+ end
206
+
207
+ def delete(user)
208
+ obj = find
209
+ return false unless obj
210
+
211
+ Hyrax.persister.delete(resource: obj)
212
+ Hyrax.index_adapter.delete(resource: obj)
213
+ self.class.publish(event: 'object.deleted', object: obj, user: user)
214
+ end
215
+
216
+ def run!
217
+ run
218
+ # reload the object
219
+ object = find
220
+ return object if object.persisted?
221
+
222
+ raise(ObjectFactoryInterface::RecordInvalid, object)
223
+ end
224
+
225
+ private
226
+
227
+ def apply_depositor_metadata
228
+ return if object.depositor.present?
229
+
230
+ object.depositor = @user.email
231
+ object = Hyrax.persister.save(resource: object)
232
+ self.class.publish(event: "object.metadata.updated", object: object, user: @user)
233
+ object
234
+ end
235
+
236
+ def conditionall_apply_depositor_metadata
237
+ # We handle this in transactions
238
+ nil
239
+ end
240
+
241
+ def conditionally_set_reindex_extent
242
+ # Valkyrie does not concern itself with the reindex extent; no nesting
243
+ # indexers here!
244
+ nil
245
+ end
246
+
247
+ def create_file_set(attrs)
248
+ # TODO: Make it work for Valkyrie
249
+ end
250
+
251
+ def create_work(attrs)
252
+ # NOTE: We do not add relationships here; that is part of the create
253
+ # relationships job.
254
+ perform_transaction_for(object: object, attrs: attrs) do
255
+ transactions["change_set.create_work"]
256
+ .with_step_args(
257
+ 'work_resource.add_file_sets' => { uploaded_files: uploaded_files_from(attrs) },
258
+ "change_set.set_user_as_depositor" => { user: @user },
259
+ "work_resource.change_depositor" => { user: @user },
260
+ 'work_resource.save_acl' => { permissions_params: [attrs['visibility'] || 'open'].compact }
261
+ )
262
+ end
263
+ end
264
+
265
+ def create_collection(attrs)
266
+ # TODO: Handle Collection Type
267
+ #
268
+ # NOTE: We do not add relationships here; that is part of the create
269
+ # relationships job.
270
+ perform_transaction_for(object: object, attrs: attrs) do
271
+ transactions['change_set.create_collection']
272
+ .with_step_args(
273
+ 'change_set.set_user_as_depositor' => { user: @user },
274
+ 'collection_resource.apply_collection_type_permissions' => { user: @user }
275
+ )
276
+ end
277
+ end
278
+
279
+ def find_by_id
280
+ Hyrax.query_service.find_by(id: attributes[:id]) if attributes.key? :id
281
+ end
282
+
283
+ ##
284
+ # @param object [Valkyrie::Resource]
285
+ # @param attrs [Valkyrie::Resource]
286
+ # @return [Valkyrie::Resource] when we successfully processed the
287
+ # transaction (e.g. the transaction's data was valid according to
288
+ # the derived form)
289
+ #
290
+ # @yield the returned value of the yielded block should be a
291
+ # {Hyrax::Transactions::Transaction}. We yield because the we first
292
+ # want to check if the attributes are valid. And if so, then process
293
+ # the transaction, which is something that could trigger expensive
294
+ # operations. Put another way, don't do something expensive if the
295
+ # data is invalid.
296
+ #
297
+ # TODO What do we return when the calculated form fails?
298
+ # @raise [StandardError] when there was a failure calling the translation.
299
+ def perform_transaction_for(object:, attrs:)
300
+ form = Hyrax::Forms::ResourceForm.for(object).prepopulate!
301
+
302
+ # TODO: Handle validations
303
+ form.validate(attrs)
304
+
305
+ transaction = yield
306
+
307
+ result = transaction.call(form)
308
+
309
+ result.value_or do
310
+ msg = result.failure[0].to_s
311
+ msg += " - #{result.failure[1].full_messages.join(',')}" if result.failure[1].respond_to?(:full_messages)
312
+ raise StandardError, msg, result.trace
313
+ end
314
+ end
315
+
316
+ ##
317
+ # We accept attributes based on the model schema
318
+ #
319
+ # @return [Array<Symbols>]
320
+ def permitted_attributes
321
+ @permitted_attributes ||= (
322
+ base_permitted_attributes + if klass.respond_to?(:schema)
323
+ Bulkrax::ValkyrieObjectFactory.schema_properties(klass)
324
+ else
325
+ klass.properties.keys.map(&:to_sym)
326
+ end
327
+ ).uniq
328
+ end
329
+
330
+ def update_work(attrs)
331
+ perform_transaction_for(object: object, attrs: attrs) do
332
+ transactions["change_set.update_work"]
333
+ .with_step_args(
334
+ 'work_resource.add_file_sets' => { uploaded_files: uploaded_files_from(attrs) },
335
+ 'work_resource.save_acl' => { permissions_params: [attrs.try('visibility') || 'open'].compact }
336
+ )
337
+ end
338
+ end
339
+
340
+ def update_collection(attrs)
341
+ # NOTE: We do not add relationships here; that is part of the create
342
+ # relationships job.
343
+ perform_transaction_for(object: object, attrs: attrs) do
344
+ transactions['change_set.update_collection']
345
+ end
346
+ end
347
+
348
+ def update_file_set(attrs)
349
+ # TODO: Make it work
350
+ end
351
+
352
+ def uploaded_files_from(attrs)
353
+ uploaded_local_files(uploaded_files: attrs[:uploaded_files]) + uploaded_s3_files(remote_files: attrs[:remote_files])
354
+ end
355
+
356
+ def uploaded_local_files(uploaded_files: [])
357
+ Array.wrap(uploaded_files).map do |file_id|
358
+ Hyrax::UploadedFile.find(file_id)
359
+ end
360
+ end
361
+
362
+ def uploaded_s3_files(remote_files: {})
363
+ return [] if remote_files.blank?
364
+
365
+ s3_bucket_name = ENV.fetch("STAGING_AREA_S3_BUCKET", "comet-staging-area-#{Rails.env}")
366
+ s3_bucket = Rails.application.config.staging_area_s3_connection
367
+ .directories.get(s3_bucket_name)
368
+
369
+ remote_files.map { |r| r["url"] }.map do |key|
370
+ s3_bucket.files.get(key)
371
+ end.compact
372
+ end
373
+
374
+ # @Override Destroy existing files with Hyrax::Transactions
375
+ def destroy_existing_files
376
+ existing_files = Hyrax.custom_queries.find_child_file_sets(resource: object)
377
+
378
+ existing_files.each do |fs|
379
+ transactions["file_set.destroy"]
380
+ .with_step_args("file_set.remove_from_work" => { user: @user },
381
+ "file_set.delete" => { user: @user })
382
+ .call(fs)
383
+ .value!
384
+ end
385
+
386
+ @object.member_ids = @object.member_ids.reject { |m| existing_files.detect { |f| f.id == m } }
387
+ @object.rendering_ids = []
388
+ @object.representative_id = nil
389
+ @object.thumbnail_id = nil
390
+ end
391
+
392
+ def transform_attributes(update: false)
393
+ attrs = super.merge(alternate_ids: [source_identifier_value])
394
+ .symbolize_keys
395
+
396
+ attrs[:title] = [''] if attrs[:title].blank?
397
+ attrs[:creator] = [''] if attrs[:creator].blank?
398
+ attrs
399
+ end
400
+ end
401
+ # rubocop:enable Metrics/ClassLength
402
+ end
@@ -6,7 +6,7 @@ module Bulkrax
6
6
  def available_admin_sets
7
7
  # Restrict available_admin_sets to only those current user can deposit to.
8
8
  @available_admin_sets ||= Hyrax::Collections::PermissionsService.source_ids_for_deposit(ability: current_ability, source_type: 'admin_set').map do |admin_set_id|
9
- [AdminSet.find(admin_set_id).title.first, admin_set_id]
9
+ [Bulkrax.object_factory.find_or_nil(admin_set_id)&.title&.first || admin_set_id, admin_set_id]
10
10
  end
11
11
  end
12
12
  end
@@ -20,14 +20,14 @@ module Bulkrax
20
20
  return unless defined?(::Hyrax)
21
21
 
22
22
  if params[:importer][:admin_set_id].blank?
23
- params[:importer][:admin_set_id] = AdminSet::DEFAULT_ID
23
+ params[:importer][:admin_set_id] = Bulkrax.object_factory.default_admin_set_id
24
24
  else
25
- AdminSet.find(params[:importer][:admin_set_id])
25
+ Bulkrax.object_factory.find(params[:importer][:admin_set_id])
26
26
  end
27
27
  return true
28
- rescue ActiveFedora::ObjectNotFoundError
28
+ rescue ActiveFedora::ObjectNotFoundError, Bulkrax::ObjectFactoryInterface::ObjectNotFoundError
29
29
  logger.warn("AdminSet #{params[:importer][:admin_set_id]} not found. Using default admin set.")
30
- params[:importer][:admin_set_id] = AdminSet::DEFAULT_ID
30
+ params[:importer][:admin_set_id] = Bulkrax.object_factory.default_admin_set_id
31
31
  return true
32
32
  end
33
33
 
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
+ ##
4
5
  # Responsible for creating parent-child relationships between Works and Collections.
5
6
  #
6
7
  # Handles three kinds of relationships:
@@ -42,6 +43,7 @@ module Bulkrax
42
43
 
43
44
  queue_as Bulkrax.config.ingest_queue_name
44
45
 
46
+ ##
45
47
  # @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifiers
46
48
  # @param importer_run [Bulkrax::ImporterRun] current importer run (needed to properly update counters)
47
49
  #
@@ -53,7 +55,7 @@ module Bulkrax
53
55
  #
54
56
  # rubocop:disable Metrics/MethodLength
55
57
  def perform(parent_identifier:, importer_run_id:) # rubocop:disable Metrics/AbcSize
56
- importer_run = Bulkrax::ImporterRun.find(importer_run_id)
58
+ @importer_run = Bulkrax::ImporterRun.find(importer_run_id)
57
59
  ability = Ability.new(importer_run.user)
58
60
 
59
61
  parent_entry, parent_record = find_record(parent_identifier, importer_run_id)
@@ -79,9 +81,9 @@ module Bulkrax
79
81
 
80
82
  # save record if members were added
81
83
  if @parent_record_members_added
82
- parent_record.save!
83
- # Ensure that the new relationship gets indexed onto the children
84
- @child_members_added.each(&:update_index)
84
+ Bulkrax.object_factory.save!(resource: parent_record, user: importer_run.user)
85
+ Bulkrax.object_factory.publish(event: 'object.membership.updated', object: parent_record)
86
+ Bulkrax.object_factory.update_index(resources: @child_members_added)
85
87
  end
86
88
  end
87
89
  else
@@ -104,7 +106,7 @@ module Bulkrax
104
106
  parent_entry&.set_status_info(errors.last, importer_run)
105
107
 
106
108
  # TODO: This can create an infinite job cycle, consider a time to live tracker.
107
- reschedule({ parent_identifier: parent_identifier, importer_run_id: importer_run_id })
109
+ reschedule(parent_identifier: parent_identifier, importer_run_id: importer_run_id)
108
110
  return false # stop current job from continuing to run after rescheduling
109
111
  else
110
112
  # rubocop:disable Rails/SkipsModelValidations
@@ -114,6 +116,8 @@ module Bulkrax
114
116
  end
115
117
  # rubocop:enable Metrics/MethodLength
116
118
 
119
+ attr_reader :importer_run
120
+
117
121
  private
118
122
 
119
123
  ##
@@ -151,25 +155,32 @@ module Bulkrax
151
155
  # We could do this outside of the loop, but that could lead to odd counter failures.
152
156
  ability.authorize!(:edit, parent_record)
153
157
 
154
- parent_record.is_a?(Collection) ? add_to_collection(child_record, parent_record) : add_to_work(child_record, parent_record)
158
+ if parent_record.is_a?(Bulkrax.collection_model_class)
159
+ add_to_collection(child_record, parent_record)
160
+ else
161
+ add_to_work(child_record, parent_record)
162
+ end
163
+
164
+ Bulkrax.object_factory.update_index_for_file_sets_of(resource: child_record) if update_child_records_works_file_sets?
155
165
 
156
- child_record.file_sets.each(&:update_index) if update_child_records_works_file_sets? && child_record.respond_to?(:file_sets)
157
166
  relationship.destroy
158
167
  end
159
168
 
160
169
  def add_to_collection(child_record, parent_record)
161
- parent_record.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX) if
162
- defined?(Hyrax::Adapters::NestingIndexAdapter)
163
- child_record.member_of_collections << parent_record
164
- child_record.save!
170
+ Bulkrax.object_factory.add_resource_to_collection(
171
+ collection: parent_record,
172
+ resource: child_record,
173
+ user: importer_run.user
174
+ )
165
175
  end
166
176
 
167
177
  def add_to_work(child_record, parent_record)
168
- return true if parent_record.ordered_members.to_a.include?(child_record)
169
-
170
- parent_record.ordered_members << child_record
171
- @parent_record_members_added = true
172
- @child_members_added << child_record
178
+ # NOTE: The .add_child_to_parent_work should not persist changes to the
179
+ # child nor parent. We'll do that elsewhere in this loop.
180
+ Bulkrax.object_factory.add_child_to_parent_work(
181
+ parent: parent_record,
182
+ child: child_record
183
+ )
173
184
  end
174
185
 
175
186
  def reschedule(parent_identifier:, importer_run_id:)
@@ -5,8 +5,9 @@ module Bulkrax
5
5
  queue_as Bulkrax.config.ingest_queue_name
6
6
 
7
7
  def perform(entry, importer_run)
8
- obj = entry.factory.find
9
- obj&.delete
8
+ user = importer_run.importer.user
9
+ entry.factory.delete(user)
10
+
10
11
  # rubocop:disable Rails/SkipsModelValidations
11
12
  ImporterRun.increment_counter(:deleted_records, importer_run.id)
12
13
  ImporterRun.decrement_counter(:enqueued_records, importer_run.id)
@@ -1,18 +1,31 @@
1
1
  # frozen_string_literal: true
2
-
3
2
  module Bulkrax
4
3
  class DownloadCloudFileJob < ApplicationJob
5
4
  queue_as Bulkrax.config.ingest_queue_name
6
5
 
6
+ include ActionView::Helpers::NumberHelper
7
+
7
8
  # Retrieve cloud file and write to the imports directory
8
9
  # Note: if using the file system, the mounted directory in
9
10
  # browse_everything MUST be shared by web and worker servers
10
11
  def perform(file, target_file)
11
12
  retriever = BrowseEverything::Retriever.new
13
+ last_logged_time = Time.zone.now
14
+ log_interval = 3.seconds
15
+
12
16
  retriever.download(file, target_file) do |filename, retrieved, total|
13
- # The block is still useful for showing progress, but the
14
- # first argument is the filename instead of a chunk of data.
17
+ percentage = (retrieved.to_f / total.to_f) * 100
18
+ current_time = Time.zone.now
19
+
20
+ if (current_time - last_logged_time) >= log_interval
21
+ # Use number_to_human_size for formatting
22
+ readable_retrieved = number_to_human_size(retrieved)
23
+ readable_total = number_to_human_size(total)
24
+ Rails.logger.info "Downloaded #{readable_retrieved} of #{readable_total}, #{filename}: #{percentage.round}% complete"
25
+ last_logged_time = current_time
26
+ end
15
27
  end
28
+ Rails.logger.info "Download complete: #{file['url']} to #{target_file}"
16
29
  end
17
30
  end
18
31
  end
@@ -63,8 +63,11 @@ module Bulkrax
63
63
  end
64
64
 
65
65
  def check_parent_is_a_work!(parent_identifier)
66
- error_msg = %(A record with the ID "#{parent_identifier}" was found, but it was a #{parent_record.class}, which is not an valid/available work type)
67
- raise ::StandardError, error_msg unless curation_concern?(parent_record)
66
+ case parent_record
67
+ when Bulkrax.collection_model_class, Bulkrax.file_model_class
68
+ error_msg = %(A record with the ID "#{parent_identifier}" was found, but it was a #{parent_record.class}, which is not an valid/available work type)
69
+ raise ::StandardError, error_msg
70
+ end
68
71
  end
69
72
 
70
73
  def find_parent_record(parent_identifier)
@@ -6,6 +6,7 @@ module Bulkrax
6
6
 
7
7
  def perform(importer_id, only_updates_since_last_import = false)
8
8
  importer = Importer.find(importer_id)
9
+ return schedule(importer, Time.zone.now + 3.minutes, 'Rescheduling: cloud files are not ready yet') unless all_files_completed?(importer)
9
10
 
10
11
  importer.current_run
11
12
  unzip_imported_file(importer.parser)
@@ -16,6 +17,8 @@ module Bulkrax
16
17
  importer.set_status_info(e)
17
18
  end
18
19
 
20
+ private
21
+
19
22
  def import(importer, only_updates_since_last_import)
20
23
  importer.only_updates = only_updates_since_last_import || false
21
24
  return unless importer.valid_import?
@@ -36,8 +39,21 @@ module Bulkrax
36
39
  importer.current_run.save!
37
40
  end
38
41
 
39
- def schedule(importer)
40
- ImporterJob.set(wait_until: importer.next_import_at).perform_later(importer.id, true)
42
+ def schedule(importer, wait_until = importer.next_import_at, message = nil)
43
+ Rails.logger.info message if message
44
+ ImporterJob.set(wait_until: wait_until).perform_later(importer.id, true)
45
+ end
46
+
47
+ # checks the file sizes of the download files to match the original files
48
+ def all_files_completed?(importer)
49
+ cloud_files = importer.parser_fields['cloud_file_paths']
50
+ original_files = importer.parser_fields['original_file_paths']
51
+ return true unless cloud_files.present? && original_files.present?
52
+
53
+ imported_file_sizes = cloud_files.map { |_, v| v['file_size'].to_i }
54
+ original_file_sizes = original_files.map { |imported_file| File.size(imported_file) }
55
+
56
+ original_file_sizes == imported_file_sizes
41
57
  end
42
58
  end
43
59
  end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'language_list'
4
-
5
3
  module Bulkrax
6
4
  class ApplicationMatcher
7
5
  attr_accessor :to, :from, :parsed, :if, :split, :excluded, :nested_type
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class CsvCollectionEntry < CsvEntry
5
- self.default_work_type = "Collection"
5
+ self.default_work_type = Bulkrax.collection_model_class.to_s
6
6
 
7
7
  # Use identifier set by CsvParser#unique_collection_identifier, which falls back
8
8
  # on the Collection's first title if record[source_identifier] is not present