bulkrax 7.0.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/assets/javascripts/bulkrax/datatables.js +1 -1
- data/app/controllers/bulkrax/exporters_controller.rb +1 -1
- data/app/controllers/bulkrax/importers_controller.rb +2 -1
- data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
- data/app/factories/bulkrax/object_factory.rb +135 -163
- data/app/factories/bulkrax/object_factory_interface.rb +491 -0
- data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
- data/app/helpers/bulkrax/importers_helper.rb +1 -1
- data/app/helpers/bulkrax/validation_helper.rb +4 -4
- data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
- data/app/jobs/bulkrax/delete_job.rb +3 -2
- data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
- data/app/jobs/bulkrax/import_file_set_job.rb +5 -2
- data/app/jobs/bulkrax/importer_job.rb +18 -2
- data/app/matchers/bulkrax/application_matcher.rb +0 -2
- data/app/models/bulkrax/csv_collection_entry.rb +1 -1
- data/app/models/bulkrax/csv_entry.rb +7 -6
- data/app/models/bulkrax/entry.rb +7 -11
- data/app/models/bulkrax/exporter.rb +2 -2
- data/app/models/bulkrax/importer.rb +1 -3
- data/app/models/bulkrax/oai_entry.rb +0 -3
- data/app/models/bulkrax/oai_set_entry.rb +1 -1
- data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
- data/app/models/bulkrax/rdf_entry.rb +70 -69
- data/app/models/bulkrax/xml_entry.rb +0 -1
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
- data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/file_factory.rb +174 -118
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/has_matchers.rb +28 -25
- data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
- data/app/parsers/bulkrax/application_parser.rb +31 -7
- data/app/parsers/bulkrax/bagit_parser.rb +175 -174
- data/app/parsers/bulkrax/csv_parser.rb +15 -5
- data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
- data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
- data/app/parsers/bulkrax/xml_parser.rb +0 -2
- data/app/services/bulkrax/factory_class_finder.rb +2 -0
- data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
- data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
- data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
- data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/show.html.erb +9 -8
- data/app/views/bulkrax/exporters/edit.html.erb +1 -1
- data/app/views/bulkrax/exporters/new.html.erb +1 -1
- data/app/views/bulkrax/exporters/show.html.erb +4 -2
- data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
- data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
- data/app/views/bulkrax/importers/edit.html.erb +1 -1
- data/app/views/bulkrax/importers/new.html.erb +1 -1
- data/app/views/bulkrax/importers/show.html.erb +1 -1
- data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
- data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
- data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
- data/config/locales/bulkrax.en.yml +7 -0
- data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
- data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
- data/lib/bulkrax/engine.rb +23 -6
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +54 -52
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
- data/lib/tasks/bulkrax_tasks.rake +1 -0
- data/lib/tasks/reset.rake +4 -4
- metadata +24 -8
- data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
- data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
- data/lib/bulkrax/persistence_layer.rb +0 -38
@@ -0,0 +1,402 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
# rubocop:disable Metrics/ClassLength
|
5
|
+
class ValkyrieObjectFactory < ObjectFactoryInterface
|
6
|
+
class FileFactoryInnerWorkings < Bulkrax::FileFactory::InnerWorkings
|
7
|
+
def remove_file_set(file_set:)
|
8
|
+
file_metadata = Hyrax.custom_queries.find_files(file_set: file_set).first
|
9
|
+
raise "No file metadata records found for #{file_set.class} ID=#{file_set.id}" unless file_metadata
|
10
|
+
|
11
|
+
Hyrax::VersioningService.create(file_metadata, user, File.new(Bulkrax.removed_image_path))
|
12
|
+
|
13
|
+
::ValkyrieCreateDerivativesJob.set(wait: 1.minute).perform_later(file_set.id, file_metadata.id)
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# Replace an existing :file_set's file with the :uploaded file.
|
18
|
+
#
|
19
|
+
# @param file_set [Hyrax::FileSet, Object]
|
20
|
+
# @param uploaded [Hyrax::UploadedFile]
|
21
|
+
#
|
22
|
+
# @return [NilClass]
|
23
|
+
def update_file_set(file_set:, uploaded:)
|
24
|
+
file_metadata = Hyrax.custom_queries.find_files(file_set: file_set).first
|
25
|
+
raise "No file metadata records found for #{file_set.class} ID=#{file_set.id}" unless file_metadata
|
26
|
+
|
27
|
+
uploaded_file = uploaded.file
|
28
|
+
|
29
|
+
# TODO: Is this accurate? We'll need to interrogate the file_metadata
|
30
|
+
# object. Should it be `file_metadata.checksum.first.to_s` Or something
|
31
|
+
# else?
|
32
|
+
return nil if file_metadata.checksum.first == Digest::SHA1.file(uploaded_file.path).to_s
|
33
|
+
|
34
|
+
Hyrax::VersioningService.create(file_metadata, user, uploaded_file)
|
35
|
+
|
36
|
+
::ValkyrieCreateDerivativesJob.set(wait: 1.minute).perform_later(file_set.id, file_metadata.id)
|
37
|
+
nil
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# TODO: the following module needs revisiting for Valkyrie work.
|
42
|
+
# proposal is to create Bulkrax::ValkyrieFileFactory.
|
43
|
+
include Bulkrax::FileFactory
|
44
|
+
|
45
|
+
self.file_set_factory_inner_workings_class = Bulkrax::ValkyrieObjectFactory::FileFactoryInnerWorkings
|
46
|
+
|
47
|
+
##
|
48
|
+
# When you want a different set of transactions you can change the
|
49
|
+
# container.
|
50
|
+
#
|
51
|
+
# @note Within {Bulkrax::ValkyrieObjectFactory} there are several calls to
|
52
|
+
# transactions; so you'll need your container to register those
|
53
|
+
# transactions.
|
54
|
+
def self.transactions
|
55
|
+
@transactions || Hyrax::Transactions::Container
|
56
|
+
end
|
57
|
+
|
58
|
+
def transactions
|
59
|
+
self.class.transactions
|
60
|
+
end
|
61
|
+
|
62
|
+
##
|
63
|
+
# @!group Class Method Interface
|
64
|
+
|
65
|
+
##
|
66
|
+
# @note This does not save either object. We need to do that in another
|
67
|
+
# loop. Why? Because we might be adding many items to the parent.
|
68
|
+
def self.add_child_to_parent_work(parent:, child:)
|
69
|
+
return true if parent.member_ids.include?(child.id)
|
70
|
+
|
71
|
+
parent.member_ids << child.id
|
72
|
+
parent.save
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.add_resource_to_collection(collection:, resource:, user:)
|
76
|
+
resource.member_of_collection_ids << collection.id
|
77
|
+
save!(resource: resource, user: user)
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.field_multi_value?(field:, model:)
|
81
|
+
return false unless field_supported?(field: field, model: model)
|
82
|
+
|
83
|
+
if model.respond_to?(:schema)
|
84
|
+
dry_type = model.schema.key(field.to_sym)
|
85
|
+
return true if dry_type.respond_to?(:primitive) && dry_type.primitive == Array
|
86
|
+
|
87
|
+
false
|
88
|
+
else
|
89
|
+
Bulkrax::ObjectFactory.field_multi_value?(field: field, model: model)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def self.field_supported?(field:, model:)
|
94
|
+
if model.respond_to?(:schema)
|
95
|
+
schema_properties(model).include?(field)
|
96
|
+
else
|
97
|
+
# We *might* have a Fedora object, so we need to consider that approach as
|
98
|
+
# well.
|
99
|
+
Bulkrax::ObjectFactory.field_supported?(field: field, model: model)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def self.file_sets_for(resource:)
|
104
|
+
return [] if resource.blank?
|
105
|
+
return [resource] if resource.is_a?(Bulkrax.file_model_class)
|
106
|
+
|
107
|
+
Hyrax.query_service.custom_queries.find_child_file_sets(resource: resource)
|
108
|
+
end
|
109
|
+
|
110
|
+
def self.find(id)
|
111
|
+
Hyrax.query_service.find_by(id: id)
|
112
|
+
# Because Hyrax is not a hard dependency, we need to transform the Hyrax exception into a
|
113
|
+
# common exception so that callers can handle a generalize exception.
|
114
|
+
rescue Hyrax::ObjectNotFoundError => e
|
115
|
+
raise ObjectFactoryInterface::ObjectNotFoundError, e.message
|
116
|
+
end
|
117
|
+
|
118
|
+
def self.find_or_create_default_admin_set
|
119
|
+
Hyrax::AdminSetCreateService.find_or_create_default_admin_set
|
120
|
+
end
|
121
|
+
|
122
|
+
def self.solr_name(field_name)
|
123
|
+
# It's a bit unclear what this should be if we can't rely on Hyrax.
|
124
|
+
raise NotImplementedError, "#{self}.#{__method__}" unless defined?(Hyrax)
|
125
|
+
Hyrax.config.index_field_mapper.solr_name(field_name)
|
126
|
+
end
|
127
|
+
|
128
|
+
def self.publish(event:, **kwargs)
|
129
|
+
Hyrax.publisher.publish(event, **kwargs)
|
130
|
+
end
|
131
|
+
|
132
|
+
def self.query(q, **kwargs)
|
133
|
+
# Someone could choose ActiveFedora::SolrService. But I think we're
|
134
|
+
# assuming Valkyrie is specifcally working for Hyrax. Someone could make
|
135
|
+
# another object factory.
|
136
|
+
raise NotImplementedError, "#{self}.#{__method__}" unless defined?(Hyrax)
|
137
|
+
Hyrax::SolrService.query(q, **kwargs)
|
138
|
+
end
|
139
|
+
|
140
|
+
def self.save!(resource:, user:)
|
141
|
+
if resource.respond_to?(:save!)
|
142
|
+
resource.save!
|
143
|
+
else
|
144
|
+
result = Hyrax.persister.save(resource: resource)
|
145
|
+
raise Valkyrie::Persistence::ObjectNotFoundError unless result
|
146
|
+
Hyrax.index_adapter.save(resource: result)
|
147
|
+
if result.collection?
|
148
|
+
publish('collection.metadata.updated', collection: result, user: user)
|
149
|
+
else
|
150
|
+
publish('object.metadata.updated', object: result, user: user)
|
151
|
+
end
|
152
|
+
resource
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def self.update_index(resources:)
|
157
|
+
Array(resources).each do |resource|
|
158
|
+
Hyrax.index_adapter.save(resource: resource)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def self.update_index_for_file_sets_of(resource:)
|
163
|
+
file_sets = Hyrax.query_service.custom_queries.find_child_file_sets(resource: resource)
|
164
|
+
update_index(resources: file_sets)
|
165
|
+
end
|
166
|
+
|
167
|
+
##
|
168
|
+
# @param value [String]
|
169
|
+
# @param klass [Class, #where]
|
170
|
+
# @param field [String, Symbol] A convenience parameter where we pass the
|
171
|
+
# same value to search_field and name_field.
|
172
|
+
# @param name_field [String] the ActiveFedora::Base property name
|
173
|
+
# (e.g. "title")
|
174
|
+
# @return [NilClass] when no object is found.
|
175
|
+
# @return [Valkyrie::Resource] when a match is found, an instance of given
|
176
|
+
# :klass
|
177
|
+
# rubocop:disable Metrics/ParameterLists
|
178
|
+
def self.search_by_property(value:, klass:, field: nil, name_field: nil, **)
|
179
|
+
name_field ||= field
|
180
|
+
raise "Expected named_field or field got nil" if name_field.blank?
|
181
|
+
return if value.blank?
|
182
|
+
|
183
|
+
# Return nil or a single object.
|
184
|
+
Hyrax.query_service.custom_query.find_by_model_and_property_value(model: klass, property: name_field, value: value)
|
185
|
+
end
|
186
|
+
# rubocop:enable Metrics/ParameterLists
|
187
|
+
|
188
|
+
##
|
189
|
+
# Retrieve properties from M3 model
|
190
|
+
# @param klass the model
|
191
|
+
# @return [Array<String>]
|
192
|
+
def self.schema_properties(klass)
|
193
|
+
@schema_properties_map ||= {}
|
194
|
+
|
195
|
+
klass_key = klass.name
|
196
|
+
@schema_properties_map[klass_key] = klass.schema.map { |k| k.name.to_s } unless @schema_properties_map.key?(klass_key)
|
197
|
+
|
198
|
+
@schema_properties_map[klass_key]
|
199
|
+
end
|
200
|
+
|
201
|
+
def self.ordered_file_sets_for(object)
|
202
|
+
return [] if object.blank?
|
203
|
+
|
204
|
+
Hyrax.custom_queries.find_child_file_sets(resource: object)
|
205
|
+
end
|
206
|
+
|
207
|
+
def delete(user)
|
208
|
+
obj = find
|
209
|
+
return false unless obj
|
210
|
+
|
211
|
+
Hyrax.persister.delete(resource: obj)
|
212
|
+
Hyrax.index_adapter.delete(resource: obj)
|
213
|
+
self.class.publish(event: 'object.deleted', object: obj, user: user)
|
214
|
+
end
|
215
|
+
|
216
|
+
def run!
|
217
|
+
run
|
218
|
+
# reload the object
|
219
|
+
object = find
|
220
|
+
return object if object.persisted?
|
221
|
+
|
222
|
+
raise(ObjectFactoryInterface::RecordInvalid, object)
|
223
|
+
end
|
224
|
+
|
225
|
+
private
|
226
|
+
|
227
|
+
def apply_depositor_metadata
|
228
|
+
return if object.depositor.present?
|
229
|
+
|
230
|
+
object.depositor = @user.email
|
231
|
+
object = Hyrax.persister.save(resource: object)
|
232
|
+
self.class.publish(event: "object.metadata.updated", object: object, user: @user)
|
233
|
+
object
|
234
|
+
end
|
235
|
+
|
236
|
+
def conditionall_apply_depositor_metadata
|
237
|
+
# We handle this in transactions
|
238
|
+
nil
|
239
|
+
end
|
240
|
+
|
241
|
+
def conditionally_set_reindex_extent
|
242
|
+
# Valkyrie does not concern itself with the reindex extent; no nesting
|
243
|
+
# indexers here!
|
244
|
+
nil
|
245
|
+
end
|
246
|
+
|
247
|
+
def create_file_set(attrs)
|
248
|
+
# TODO: Make it work for Valkyrie
|
249
|
+
end
|
250
|
+
|
251
|
+
def create_work(attrs)
|
252
|
+
# NOTE: We do not add relationships here; that is part of the create
|
253
|
+
# relationships job.
|
254
|
+
perform_transaction_for(object: object, attrs: attrs) do
|
255
|
+
transactions["change_set.create_work"]
|
256
|
+
.with_step_args(
|
257
|
+
'work_resource.add_file_sets' => { uploaded_files: uploaded_files_from(attrs) },
|
258
|
+
"change_set.set_user_as_depositor" => { user: @user },
|
259
|
+
"work_resource.change_depositor" => { user: @user },
|
260
|
+
'work_resource.save_acl' => { permissions_params: [attrs['visibility'] || 'open'].compact }
|
261
|
+
)
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
def create_collection(attrs)
|
266
|
+
# TODO: Handle Collection Type
|
267
|
+
#
|
268
|
+
# NOTE: We do not add relationships here; that is part of the create
|
269
|
+
# relationships job.
|
270
|
+
perform_transaction_for(object: object, attrs: attrs) do
|
271
|
+
transactions['change_set.create_collection']
|
272
|
+
.with_step_args(
|
273
|
+
'change_set.set_user_as_depositor' => { user: @user },
|
274
|
+
'collection_resource.apply_collection_type_permissions' => { user: @user }
|
275
|
+
)
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
def find_by_id
|
280
|
+
Hyrax.query_service.find_by(id: attributes[:id]) if attributes.key? :id
|
281
|
+
end
|
282
|
+
|
283
|
+
##
|
284
|
+
# @param object [Valkyrie::Resource]
|
285
|
+
# @param attrs [Valkyrie::Resource]
|
286
|
+
# @return [Valkyrie::Resource] when we successfully processed the
|
287
|
+
# transaction (e.g. the transaction's data was valid according to
|
288
|
+
# the derived form)
|
289
|
+
#
|
290
|
+
# @yield the returned value of the yielded block should be a
|
291
|
+
# {Hyrax::Transactions::Transaction}. We yield because the we first
|
292
|
+
# want to check if the attributes are valid. And if so, then process
|
293
|
+
# the transaction, which is something that could trigger expensive
|
294
|
+
# operations. Put another way, don't do something expensive if the
|
295
|
+
# data is invalid.
|
296
|
+
#
|
297
|
+
# TODO What do we return when the calculated form fails?
|
298
|
+
# @raise [StandardError] when there was a failure calling the translation.
|
299
|
+
def perform_transaction_for(object:, attrs:)
|
300
|
+
form = Hyrax::Forms::ResourceForm.for(object).prepopulate!
|
301
|
+
|
302
|
+
# TODO: Handle validations
|
303
|
+
form.validate(attrs)
|
304
|
+
|
305
|
+
transaction = yield
|
306
|
+
|
307
|
+
result = transaction.call(form)
|
308
|
+
|
309
|
+
result.value_or do
|
310
|
+
msg = result.failure[0].to_s
|
311
|
+
msg += " - #{result.failure[1].full_messages.join(',')}" if result.failure[1].respond_to?(:full_messages)
|
312
|
+
raise StandardError, msg, result.trace
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
##
|
317
|
+
# We accept attributes based on the model schema
|
318
|
+
#
|
319
|
+
# @return [Array<Symbols>]
|
320
|
+
def permitted_attributes
|
321
|
+
@permitted_attributes ||= (
|
322
|
+
base_permitted_attributes + if klass.respond_to?(:schema)
|
323
|
+
Bulkrax::ValkyrieObjectFactory.schema_properties(klass)
|
324
|
+
else
|
325
|
+
klass.properties.keys.map(&:to_sym)
|
326
|
+
end
|
327
|
+
).uniq
|
328
|
+
end
|
329
|
+
|
330
|
+
def update_work(attrs)
|
331
|
+
perform_transaction_for(object: object, attrs: attrs) do
|
332
|
+
transactions["change_set.update_work"]
|
333
|
+
.with_step_args(
|
334
|
+
'work_resource.add_file_sets' => { uploaded_files: uploaded_files_from(attrs) },
|
335
|
+
'work_resource.save_acl' => { permissions_params: [attrs.try('visibility') || 'open'].compact }
|
336
|
+
)
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
def update_collection(attrs)
|
341
|
+
# NOTE: We do not add relationships here; that is part of the create
|
342
|
+
# relationships job.
|
343
|
+
perform_transaction_for(object: object, attrs: attrs) do
|
344
|
+
transactions['change_set.update_collection']
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
def update_file_set(attrs)
|
349
|
+
# TODO: Make it work
|
350
|
+
end
|
351
|
+
|
352
|
+
def uploaded_files_from(attrs)
|
353
|
+
uploaded_local_files(uploaded_files: attrs[:uploaded_files]) + uploaded_s3_files(remote_files: attrs[:remote_files])
|
354
|
+
end
|
355
|
+
|
356
|
+
def uploaded_local_files(uploaded_files: [])
|
357
|
+
Array.wrap(uploaded_files).map do |file_id|
|
358
|
+
Hyrax::UploadedFile.find(file_id)
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
362
|
+
def uploaded_s3_files(remote_files: {})
|
363
|
+
return [] if remote_files.blank?
|
364
|
+
|
365
|
+
s3_bucket_name = ENV.fetch("STAGING_AREA_S3_BUCKET", "comet-staging-area-#{Rails.env}")
|
366
|
+
s3_bucket = Rails.application.config.staging_area_s3_connection
|
367
|
+
.directories.get(s3_bucket_name)
|
368
|
+
|
369
|
+
remote_files.map { |r| r["url"] }.map do |key|
|
370
|
+
s3_bucket.files.get(key)
|
371
|
+
end.compact
|
372
|
+
end
|
373
|
+
|
374
|
+
# @Override Destroy existing files with Hyrax::Transactions
|
375
|
+
def destroy_existing_files
|
376
|
+
existing_files = Hyrax.custom_queries.find_child_file_sets(resource: object)
|
377
|
+
|
378
|
+
existing_files.each do |fs|
|
379
|
+
transactions["file_set.destroy"]
|
380
|
+
.with_step_args("file_set.remove_from_work" => { user: @user },
|
381
|
+
"file_set.delete" => { user: @user })
|
382
|
+
.call(fs)
|
383
|
+
.value!
|
384
|
+
end
|
385
|
+
|
386
|
+
@object.member_ids = @object.member_ids.reject { |m| existing_files.detect { |f| f.id == m } }
|
387
|
+
@object.rendering_ids = []
|
388
|
+
@object.representative_id = nil
|
389
|
+
@object.thumbnail_id = nil
|
390
|
+
end
|
391
|
+
|
392
|
+
def transform_attributes(update: false)
|
393
|
+
attrs = super.merge(alternate_ids: [source_identifier_value])
|
394
|
+
.symbolize_keys
|
395
|
+
|
396
|
+
attrs[:title] = [''] if attrs[:title].blank?
|
397
|
+
attrs[:creator] = [''] if attrs[:creator].blank?
|
398
|
+
attrs
|
399
|
+
end
|
400
|
+
end
|
401
|
+
# rubocop:enable Metrics/ClassLength
|
402
|
+
end
|
@@ -6,7 +6,7 @@ module Bulkrax
|
|
6
6
|
def available_admin_sets
|
7
7
|
# Restrict available_admin_sets to only those current user can deposit to.
|
8
8
|
@available_admin_sets ||= Hyrax::Collections::PermissionsService.source_ids_for_deposit(ability: current_ability, source_type: 'admin_set').map do |admin_set_id|
|
9
|
-
[
|
9
|
+
[Bulkrax.object_factory.find_or_nil(admin_set_id)&.title&.first || admin_set_id, admin_set_id]
|
10
10
|
end
|
11
11
|
end
|
12
12
|
end
|
@@ -20,14 +20,14 @@ module Bulkrax
|
|
20
20
|
return unless defined?(::Hyrax)
|
21
21
|
|
22
22
|
if params[:importer][:admin_set_id].blank?
|
23
|
-
params[:importer][:admin_set_id] =
|
23
|
+
params[:importer][:admin_set_id] = Bulkrax.object_factory.default_admin_set_id
|
24
24
|
else
|
25
|
-
|
25
|
+
Bulkrax.object_factory.find(params[:importer][:admin_set_id])
|
26
26
|
end
|
27
27
|
return true
|
28
|
-
rescue ActiveFedora::ObjectNotFoundError
|
28
|
+
rescue ActiveFedora::ObjectNotFoundError, Bulkrax::ObjectFactoryInterface::ObjectNotFoundError
|
29
29
|
logger.warn("AdminSet #{params[:importer][:admin_set_id]} not found. Using default admin set.")
|
30
|
-
params[:importer][:admin_set_id] =
|
30
|
+
params[:importer][:admin_set_id] = Bulkrax.object_factory.default_admin_set_id
|
31
31
|
return true
|
32
32
|
end
|
33
33
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Bulkrax
|
4
|
+
##
|
4
5
|
# Responsible for creating parent-child relationships between Works and Collections.
|
5
6
|
#
|
6
7
|
# Handles three kinds of relationships:
|
@@ -42,6 +43,7 @@ module Bulkrax
|
|
42
43
|
|
43
44
|
queue_as Bulkrax.config.ingest_queue_name
|
44
45
|
|
46
|
+
##
|
45
47
|
# @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifiers
|
46
48
|
# @param importer_run [Bulkrax::ImporterRun] current importer run (needed to properly update counters)
|
47
49
|
#
|
@@ -53,7 +55,7 @@ module Bulkrax
|
|
53
55
|
#
|
54
56
|
# rubocop:disable Metrics/MethodLength
|
55
57
|
def perform(parent_identifier:, importer_run_id:) # rubocop:disable Metrics/AbcSize
|
56
|
-
importer_run = Bulkrax::ImporterRun.find(importer_run_id)
|
58
|
+
@importer_run = Bulkrax::ImporterRun.find(importer_run_id)
|
57
59
|
ability = Ability.new(importer_run.user)
|
58
60
|
|
59
61
|
parent_entry, parent_record = find_record(parent_identifier, importer_run_id)
|
@@ -79,9 +81,9 @@ module Bulkrax
|
|
79
81
|
|
80
82
|
# save record if members were added
|
81
83
|
if @parent_record_members_added
|
82
|
-
|
83
|
-
|
84
|
-
@child_members_added
|
84
|
+
Bulkrax.object_factory.save!(resource: parent_record, user: importer_run.user)
|
85
|
+
Bulkrax.object_factory.publish(event: 'object.membership.updated', object: parent_record)
|
86
|
+
Bulkrax.object_factory.update_index(resources: @child_members_added)
|
85
87
|
end
|
86
88
|
end
|
87
89
|
else
|
@@ -104,7 +106,7 @@ module Bulkrax
|
|
104
106
|
parent_entry&.set_status_info(errors.last, importer_run)
|
105
107
|
|
106
108
|
# TODO: This can create an infinite job cycle, consider a time to live tracker.
|
107
|
-
reschedule(
|
109
|
+
reschedule(parent_identifier: parent_identifier, importer_run_id: importer_run_id)
|
108
110
|
return false # stop current job from continuing to run after rescheduling
|
109
111
|
else
|
110
112
|
# rubocop:disable Rails/SkipsModelValidations
|
@@ -114,6 +116,8 @@ module Bulkrax
|
|
114
116
|
end
|
115
117
|
# rubocop:enable Metrics/MethodLength
|
116
118
|
|
119
|
+
attr_reader :importer_run
|
120
|
+
|
117
121
|
private
|
118
122
|
|
119
123
|
##
|
@@ -151,25 +155,32 @@ module Bulkrax
|
|
151
155
|
# We could do this outside of the loop, but that could lead to odd counter failures.
|
152
156
|
ability.authorize!(:edit, parent_record)
|
153
157
|
|
154
|
-
parent_record.is_a?(
|
158
|
+
if parent_record.is_a?(Bulkrax.collection_model_class)
|
159
|
+
add_to_collection(child_record, parent_record)
|
160
|
+
else
|
161
|
+
add_to_work(child_record, parent_record)
|
162
|
+
end
|
163
|
+
|
164
|
+
Bulkrax.object_factory.update_index_for_file_sets_of(resource: child_record) if update_child_records_works_file_sets?
|
155
165
|
|
156
|
-
child_record.file_sets.each(&:update_index) if update_child_records_works_file_sets? && child_record.respond_to?(:file_sets)
|
157
166
|
relationship.destroy
|
158
167
|
end
|
159
168
|
|
160
169
|
def add_to_collection(child_record, parent_record)
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
170
|
+
Bulkrax.object_factory.add_resource_to_collection(
|
171
|
+
collection: parent_record,
|
172
|
+
resource: child_record,
|
173
|
+
user: importer_run.user
|
174
|
+
)
|
165
175
|
end
|
166
176
|
|
167
177
|
def add_to_work(child_record, parent_record)
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
178
|
+
# NOTE: The .add_child_to_parent_work should not persist changes to the
|
179
|
+
# child nor parent. We'll do that elsewhere in this loop.
|
180
|
+
Bulkrax.object_factory.add_child_to_parent_work(
|
181
|
+
parent: parent_record,
|
182
|
+
child: child_record
|
183
|
+
)
|
173
184
|
end
|
174
185
|
|
175
186
|
def reschedule(parent_identifier:, importer_run_id:)
|
@@ -5,8 +5,9 @@ module Bulkrax
|
|
5
5
|
queue_as Bulkrax.config.ingest_queue_name
|
6
6
|
|
7
7
|
def perform(entry, importer_run)
|
8
|
-
|
9
|
-
|
8
|
+
user = importer_run.importer.user
|
9
|
+
entry.factory.delete(user)
|
10
|
+
|
10
11
|
# rubocop:disable Rails/SkipsModelValidations
|
11
12
|
ImporterRun.increment_counter(:deleted_records, importer_run.id)
|
12
13
|
ImporterRun.decrement_counter(:enqueued_records, importer_run.id)
|
@@ -1,18 +1,31 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
2
|
module Bulkrax
|
4
3
|
class DownloadCloudFileJob < ApplicationJob
|
5
4
|
queue_as Bulkrax.config.ingest_queue_name
|
6
5
|
|
6
|
+
include ActionView::Helpers::NumberHelper
|
7
|
+
|
7
8
|
# Retrieve cloud file and write to the imports directory
|
8
9
|
# Note: if using the file system, the mounted directory in
|
9
10
|
# browse_everything MUST be shared by web and worker servers
|
10
11
|
def perform(file, target_file)
|
11
12
|
retriever = BrowseEverything::Retriever.new
|
13
|
+
last_logged_time = Time.zone.now
|
14
|
+
log_interval = 3.seconds
|
15
|
+
|
12
16
|
retriever.download(file, target_file) do |filename, retrieved, total|
|
13
|
-
|
14
|
-
|
17
|
+
percentage = (retrieved.to_f / total.to_f) * 100
|
18
|
+
current_time = Time.zone.now
|
19
|
+
|
20
|
+
if (current_time - last_logged_time) >= log_interval
|
21
|
+
# Use number_to_human_size for formatting
|
22
|
+
readable_retrieved = number_to_human_size(retrieved)
|
23
|
+
readable_total = number_to_human_size(total)
|
24
|
+
Rails.logger.info "Downloaded #{readable_retrieved} of #{readable_total}, #{filename}: #{percentage.round}% complete"
|
25
|
+
last_logged_time = current_time
|
26
|
+
end
|
15
27
|
end
|
28
|
+
Rails.logger.info "Download complete: #{file['url']} to #{target_file}"
|
16
29
|
end
|
17
30
|
end
|
18
31
|
end
|
@@ -63,8 +63,11 @@ module Bulkrax
|
|
63
63
|
end
|
64
64
|
|
65
65
|
def check_parent_is_a_work!(parent_identifier)
|
66
|
-
|
67
|
-
|
66
|
+
case parent_record
|
67
|
+
when Bulkrax.collection_model_class, Bulkrax.file_model_class
|
68
|
+
error_msg = %(A record with the ID "#{parent_identifier}" was found, but it was a #{parent_record.class}, which is not an valid/available work type)
|
69
|
+
raise ::StandardError, error_msg
|
70
|
+
end
|
68
71
|
end
|
69
72
|
|
70
73
|
def find_parent_record(parent_identifier)
|
@@ -6,6 +6,7 @@ module Bulkrax
|
|
6
6
|
|
7
7
|
def perform(importer_id, only_updates_since_last_import = false)
|
8
8
|
importer = Importer.find(importer_id)
|
9
|
+
return schedule(importer, Time.zone.now + 3.minutes, 'Rescheduling: cloud files are not ready yet') unless all_files_completed?(importer)
|
9
10
|
|
10
11
|
importer.current_run
|
11
12
|
unzip_imported_file(importer.parser)
|
@@ -16,6 +17,8 @@ module Bulkrax
|
|
16
17
|
importer.set_status_info(e)
|
17
18
|
end
|
18
19
|
|
20
|
+
private
|
21
|
+
|
19
22
|
def import(importer, only_updates_since_last_import)
|
20
23
|
importer.only_updates = only_updates_since_last_import || false
|
21
24
|
return unless importer.valid_import?
|
@@ -36,8 +39,21 @@ module Bulkrax
|
|
36
39
|
importer.current_run.save!
|
37
40
|
end
|
38
41
|
|
39
|
-
def schedule(importer)
|
40
|
-
|
42
|
+
def schedule(importer, wait_until = importer.next_import_at, message = nil)
|
43
|
+
Rails.logger.info message if message
|
44
|
+
ImporterJob.set(wait_until: wait_until).perform_later(importer.id, true)
|
45
|
+
end
|
46
|
+
|
47
|
+
# checks the file sizes of the download files to match the original files
|
48
|
+
def all_files_completed?(importer)
|
49
|
+
cloud_files = importer.parser_fields['cloud_file_paths']
|
50
|
+
original_files = importer.parser_fields['original_file_paths']
|
51
|
+
return true unless cloud_files.present? && original_files.present?
|
52
|
+
|
53
|
+
imported_file_sizes = cloud_files.map { |_, v| v['file_size'].to_i }
|
54
|
+
original_file_sizes = original_files.map { |imported_file| File.size(imported_file) }
|
55
|
+
|
56
|
+
original_file_sizes == imported_file_sizes
|
41
57
|
end
|
42
58
|
end
|
43
59
|
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
module Bulkrax
|
4
4
|
class CsvCollectionEntry < CsvEntry
|
5
|
-
self.default_work_type =
|
5
|
+
self.default_work_type = Bulkrax.collection_model_class.to_s
|
6
6
|
|
7
7
|
# Use identifier set by CsvParser#unique_collection_identifier, which falls back
|
8
8
|
# on the Collection's first title if record[source_identifier] is not present
|