bulkrax 7.0.0 → 8.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/assets/javascripts/bulkrax/datatables.js +1 -1
- data/app/controllers/bulkrax/exporters_controller.rb +1 -1
- data/app/controllers/bulkrax/importers_controller.rb +2 -1
- data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
- data/app/factories/bulkrax/object_factory.rb +135 -163
- data/app/factories/bulkrax/object_factory_interface.rb +491 -0
- data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
- data/app/helpers/bulkrax/importers_helper.rb +1 -1
- data/app/helpers/bulkrax/validation_helper.rb +4 -4
- data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
- data/app/jobs/bulkrax/delete_job.rb +3 -2
- data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
- data/app/jobs/bulkrax/import_file_set_job.rb +5 -2
- data/app/jobs/bulkrax/importer_job.rb +18 -2
- data/app/matchers/bulkrax/application_matcher.rb +0 -2
- data/app/models/bulkrax/csv_collection_entry.rb +1 -1
- data/app/models/bulkrax/csv_entry.rb +7 -6
- data/app/models/bulkrax/entry.rb +7 -11
- data/app/models/bulkrax/exporter.rb +2 -2
- data/app/models/bulkrax/importer.rb +1 -3
- data/app/models/bulkrax/oai_entry.rb +0 -3
- data/app/models/bulkrax/oai_set_entry.rb +1 -1
- data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
- data/app/models/bulkrax/rdf_entry.rb +70 -69
- data/app/models/bulkrax/xml_entry.rb +0 -1
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
- data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/file_factory.rb +174 -118
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/has_matchers.rb +28 -25
- data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
- data/app/parsers/bulkrax/application_parser.rb +31 -7
- data/app/parsers/bulkrax/bagit_parser.rb +175 -174
- data/app/parsers/bulkrax/csv_parser.rb +15 -5
- data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
- data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
- data/app/parsers/bulkrax/xml_parser.rb +0 -2
- data/app/services/bulkrax/factory_class_finder.rb +2 -0
- data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
- data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
- data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
- data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/show.html.erb +9 -8
- data/app/views/bulkrax/exporters/edit.html.erb +1 -1
- data/app/views/bulkrax/exporters/new.html.erb +1 -1
- data/app/views/bulkrax/exporters/show.html.erb +4 -2
- data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
- data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
- data/app/views/bulkrax/importers/edit.html.erb +1 -1
- data/app/views/bulkrax/importers/new.html.erb +1 -1
- data/app/views/bulkrax/importers/show.html.erb +1 -1
- data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
- data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
- data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
- data/config/locales/bulkrax.en.yml +7 -0
- data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
- data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
- data/lib/bulkrax/engine.rb +23 -6
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +54 -52
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
- data/lib/tasks/bulkrax_tasks.rake +1 -0
- data/lib/tasks/reset.rake +4 -4
- metadata +24 -8
- data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
- data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
- data/lib/bulkrax/persistence_layer.rb +0 -38
@@ -0,0 +1,402 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
# rubocop:disable Metrics/ClassLength
|
5
|
+
class ValkyrieObjectFactory < ObjectFactoryInterface
|
6
|
+
class FileFactoryInnerWorkings < Bulkrax::FileFactory::InnerWorkings
|
7
|
+
def remove_file_set(file_set:)
|
8
|
+
file_metadata = Hyrax.custom_queries.find_files(file_set: file_set).first
|
9
|
+
raise "No file metadata records found for #{file_set.class} ID=#{file_set.id}" unless file_metadata
|
10
|
+
|
11
|
+
Hyrax::VersioningService.create(file_metadata, user, File.new(Bulkrax.removed_image_path))
|
12
|
+
|
13
|
+
::ValkyrieCreateDerivativesJob.set(wait: 1.minute).perform_later(file_set.id, file_metadata.id)
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# Replace an existing :file_set's file with the :uploaded file.
|
18
|
+
#
|
19
|
+
# @param file_set [Hyrax::FileSet, Object]
|
20
|
+
# @param uploaded [Hyrax::UploadedFile]
|
21
|
+
#
|
22
|
+
# @return [NilClass]
|
23
|
+
def update_file_set(file_set:, uploaded:)
|
24
|
+
file_metadata = Hyrax.custom_queries.find_files(file_set: file_set).first
|
25
|
+
raise "No file metadata records found for #{file_set.class} ID=#{file_set.id}" unless file_metadata
|
26
|
+
|
27
|
+
uploaded_file = uploaded.file
|
28
|
+
|
29
|
+
# TODO: Is this accurate? We'll need to interrogate the file_metadata
|
30
|
+
# object. Should it be `file_metadata.checksum.first.to_s` Or something
|
31
|
+
# else?
|
32
|
+
return nil if file_metadata.checksum.first == Digest::SHA1.file(uploaded_file.path).to_s
|
33
|
+
|
34
|
+
Hyrax::VersioningService.create(file_metadata, user, uploaded_file)
|
35
|
+
|
36
|
+
::ValkyrieCreateDerivativesJob.set(wait: 1.minute).perform_later(file_set.id, file_metadata.id)
|
37
|
+
nil
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# TODO: the following module needs revisiting for Valkyrie work.
|
42
|
+
# proposal is to create Bulkrax::ValkyrieFileFactory.
|
43
|
+
include Bulkrax::FileFactory
|
44
|
+
|
45
|
+
self.file_set_factory_inner_workings_class = Bulkrax::ValkyrieObjectFactory::FileFactoryInnerWorkings
|
46
|
+
|
47
|
+
##
|
48
|
+
# When you want a different set of transactions you can change the
|
49
|
+
# container.
|
50
|
+
#
|
51
|
+
# @note Within {Bulkrax::ValkyrieObjectFactory} there are several calls to
|
52
|
+
# transactions; so you'll need your container to register those
|
53
|
+
# transactions.
|
54
|
+
def self.transactions
|
55
|
+
@transactions || Hyrax::Transactions::Container
|
56
|
+
end
|
57
|
+
|
58
|
+
def transactions
|
59
|
+
self.class.transactions
|
60
|
+
end
|
61
|
+
|
62
|
+
##
|
63
|
+
# @!group Class Method Interface
|
64
|
+
|
65
|
+
##
|
66
|
+
# @note This does not save either object. We need to do that in another
|
67
|
+
# loop. Why? Because we might be adding many items to the parent.
|
68
|
+
def self.add_child_to_parent_work(parent:, child:)
|
69
|
+
return true if parent.member_ids.include?(child.id)
|
70
|
+
|
71
|
+
parent.member_ids << child.id
|
72
|
+
parent.save
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.add_resource_to_collection(collection:, resource:, user:)
|
76
|
+
resource.member_of_collection_ids << collection.id
|
77
|
+
save!(resource: resource, user: user)
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.field_multi_value?(field:, model:)
|
81
|
+
return false unless field_supported?(field: field, model: model)
|
82
|
+
|
83
|
+
if model.respond_to?(:schema)
|
84
|
+
dry_type = model.schema.key(field.to_sym)
|
85
|
+
return true if dry_type.respond_to?(:primitive) && dry_type.primitive == Array
|
86
|
+
|
87
|
+
false
|
88
|
+
else
|
89
|
+
Bulkrax::ObjectFactory.field_multi_value?(field: field, model: model)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def self.field_supported?(field:, model:)
|
94
|
+
if model.respond_to?(:schema)
|
95
|
+
schema_properties(model).include?(field)
|
96
|
+
else
|
97
|
+
# We *might* have a Fedora object, so we need to consider that approach as
|
98
|
+
# well.
|
99
|
+
Bulkrax::ObjectFactory.field_supported?(field: field, model: model)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def self.file_sets_for(resource:)
|
104
|
+
return [] if resource.blank?
|
105
|
+
return [resource] if resource.is_a?(Bulkrax.file_model_class)
|
106
|
+
|
107
|
+
Hyrax.query_service.custom_queries.find_child_file_sets(resource: resource)
|
108
|
+
end
|
109
|
+
|
110
|
+
def self.find(id)
|
111
|
+
Hyrax.query_service.find_by(id: id)
|
112
|
+
# Because Hyrax is not a hard dependency, we need to transform the Hyrax exception into a
|
113
|
+
# common exception so that callers can handle a generalize exception.
|
114
|
+
rescue Hyrax::ObjectNotFoundError => e
|
115
|
+
raise ObjectFactoryInterface::ObjectNotFoundError, e.message
|
116
|
+
end
|
117
|
+
|
118
|
+
def self.find_or_create_default_admin_set
|
119
|
+
Hyrax::AdminSetCreateService.find_or_create_default_admin_set
|
120
|
+
end
|
121
|
+
|
122
|
+
def self.solr_name(field_name)
|
123
|
+
# It's a bit unclear what this should be if we can't rely on Hyrax.
|
124
|
+
raise NotImplementedError, "#{self}.#{__method__}" unless defined?(Hyrax)
|
125
|
+
Hyrax.config.index_field_mapper.solr_name(field_name)
|
126
|
+
end
|
127
|
+
|
128
|
+
def self.publish(event:, **kwargs)
|
129
|
+
Hyrax.publisher.publish(event, **kwargs)
|
130
|
+
end
|
131
|
+
|
132
|
+
def self.query(q, **kwargs)
|
133
|
+
# Someone could choose ActiveFedora::SolrService. But I think we're
|
134
|
+
# assuming Valkyrie is specifcally working for Hyrax. Someone could make
|
135
|
+
# another object factory.
|
136
|
+
raise NotImplementedError, "#{self}.#{__method__}" unless defined?(Hyrax)
|
137
|
+
Hyrax::SolrService.query(q, **kwargs)
|
138
|
+
end
|
139
|
+
|
140
|
+
def self.save!(resource:, user:)
|
141
|
+
if resource.respond_to?(:save!)
|
142
|
+
resource.save!
|
143
|
+
else
|
144
|
+
result = Hyrax.persister.save(resource: resource)
|
145
|
+
raise Valkyrie::Persistence::ObjectNotFoundError unless result
|
146
|
+
Hyrax.index_adapter.save(resource: result)
|
147
|
+
if result.collection?
|
148
|
+
publish('collection.metadata.updated', collection: result, user: user)
|
149
|
+
else
|
150
|
+
publish('object.metadata.updated', object: result, user: user)
|
151
|
+
end
|
152
|
+
resource
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def self.update_index(resources:)
|
157
|
+
Array(resources).each do |resource|
|
158
|
+
Hyrax.index_adapter.save(resource: resource)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def self.update_index_for_file_sets_of(resource:)
|
163
|
+
file_sets = Hyrax.query_service.custom_queries.find_child_file_sets(resource: resource)
|
164
|
+
update_index(resources: file_sets)
|
165
|
+
end
|
166
|
+
|
167
|
+
##
|
168
|
+
# @param value [String]
|
169
|
+
# @param klass [Class, #where]
|
170
|
+
# @param field [String, Symbol] A convenience parameter where we pass the
|
171
|
+
# same value to search_field and name_field.
|
172
|
+
# @param name_field [String] the ActiveFedora::Base property name
|
173
|
+
# (e.g. "title")
|
174
|
+
# @return [NilClass] when no object is found.
|
175
|
+
# @return [Valkyrie::Resource] when a match is found, an instance of given
|
176
|
+
# :klass
|
177
|
+
# rubocop:disable Metrics/ParameterLists
|
178
|
+
def self.search_by_property(value:, klass:, field: nil, name_field: nil, **)
|
179
|
+
name_field ||= field
|
180
|
+
raise "Expected named_field or field got nil" if name_field.blank?
|
181
|
+
return if value.blank?
|
182
|
+
|
183
|
+
# Return nil or a single object.
|
184
|
+
Hyrax.query_service.custom_query.find_by_model_and_property_value(model: klass, property: name_field, value: value)
|
185
|
+
end
|
186
|
+
# rubocop:enable Metrics/ParameterLists
|
187
|
+
|
188
|
+
##
|
189
|
+
# Retrieve properties from M3 model
|
190
|
+
# @param klass the model
|
191
|
+
# @return [Array<String>]
|
192
|
+
def self.schema_properties(klass)
|
193
|
+
@schema_properties_map ||= {}
|
194
|
+
|
195
|
+
klass_key = klass.name
|
196
|
+
@schema_properties_map[klass_key] = klass.schema.map { |k| k.name.to_s } unless @schema_properties_map.key?(klass_key)
|
197
|
+
|
198
|
+
@schema_properties_map[klass_key]
|
199
|
+
end
|
200
|
+
|
201
|
+
def self.ordered_file_sets_for(object)
|
202
|
+
return [] if object.blank?
|
203
|
+
|
204
|
+
Hyrax.custom_queries.find_child_file_sets(resource: object)
|
205
|
+
end
|
206
|
+
|
207
|
+
def delete(user)
|
208
|
+
obj = find
|
209
|
+
return false unless obj
|
210
|
+
|
211
|
+
Hyrax.persister.delete(resource: obj)
|
212
|
+
Hyrax.index_adapter.delete(resource: obj)
|
213
|
+
self.class.publish(event: 'object.deleted', object: obj, user: user)
|
214
|
+
end
|
215
|
+
|
216
|
+
def run!
|
217
|
+
run
|
218
|
+
# reload the object
|
219
|
+
object = find
|
220
|
+
return object if object.persisted?
|
221
|
+
|
222
|
+
raise(ObjectFactoryInterface::RecordInvalid, object)
|
223
|
+
end
|
224
|
+
|
225
|
+
private
|
226
|
+
|
227
|
+
def apply_depositor_metadata
|
228
|
+
return if object.depositor.present?
|
229
|
+
|
230
|
+
object.depositor = @user.email
|
231
|
+
object = Hyrax.persister.save(resource: object)
|
232
|
+
self.class.publish(event: "object.metadata.updated", object: object, user: @user)
|
233
|
+
object
|
234
|
+
end
|
235
|
+
|
236
|
+
def conditionall_apply_depositor_metadata
|
237
|
+
# We handle this in transactions
|
238
|
+
nil
|
239
|
+
end
|
240
|
+
|
241
|
+
def conditionally_set_reindex_extent
|
242
|
+
# Valkyrie does not concern itself with the reindex extent; no nesting
|
243
|
+
# indexers here!
|
244
|
+
nil
|
245
|
+
end
|
246
|
+
|
247
|
+
def create_file_set(attrs)
|
248
|
+
# TODO: Make it work for Valkyrie
|
249
|
+
end
|
250
|
+
|
251
|
+
def create_work(attrs)
|
252
|
+
# NOTE: We do not add relationships here; that is part of the create
|
253
|
+
# relationships job.
|
254
|
+
perform_transaction_for(object: object, attrs: attrs) do
|
255
|
+
transactions["change_set.create_work"]
|
256
|
+
.with_step_args(
|
257
|
+
'work_resource.add_file_sets' => { uploaded_files: uploaded_files_from(attrs) },
|
258
|
+
"change_set.set_user_as_depositor" => { user: @user },
|
259
|
+
"work_resource.change_depositor" => { user: @user },
|
260
|
+
'work_resource.save_acl' => { permissions_params: [attrs['visibility'] || 'open'].compact }
|
261
|
+
)
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
def create_collection(attrs)
|
266
|
+
# TODO: Handle Collection Type
|
267
|
+
#
|
268
|
+
# NOTE: We do not add relationships here; that is part of the create
|
269
|
+
# relationships job.
|
270
|
+
perform_transaction_for(object: object, attrs: attrs) do
|
271
|
+
transactions['change_set.create_collection']
|
272
|
+
.with_step_args(
|
273
|
+
'change_set.set_user_as_depositor' => { user: @user },
|
274
|
+
'collection_resource.apply_collection_type_permissions' => { user: @user }
|
275
|
+
)
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
def find_by_id
|
280
|
+
Hyrax.query_service.find_by(id: attributes[:id]) if attributes.key? :id
|
281
|
+
end
|
282
|
+
|
283
|
+
##
|
284
|
+
# @param object [Valkyrie::Resource]
|
285
|
+
# @param attrs [Valkyrie::Resource]
|
286
|
+
# @return [Valkyrie::Resource] when we successfully processed the
|
287
|
+
# transaction (e.g. the transaction's data was valid according to
|
288
|
+
# the derived form)
|
289
|
+
#
|
290
|
+
# @yield the returned value of the yielded block should be a
|
291
|
+
# {Hyrax::Transactions::Transaction}. We yield because the we first
|
292
|
+
# want to check if the attributes are valid. And if so, then process
|
293
|
+
# the transaction, which is something that could trigger expensive
|
294
|
+
# operations. Put another way, don't do something expensive if the
|
295
|
+
# data is invalid.
|
296
|
+
#
|
297
|
+
# TODO What do we return when the calculated form fails?
|
298
|
+
# @raise [StandardError] when there was a failure calling the translation.
|
299
|
+
def perform_transaction_for(object:, attrs:)
|
300
|
+
form = Hyrax::Forms::ResourceForm.for(object).prepopulate!
|
301
|
+
|
302
|
+
# TODO: Handle validations
|
303
|
+
form.validate(attrs)
|
304
|
+
|
305
|
+
transaction = yield
|
306
|
+
|
307
|
+
result = transaction.call(form)
|
308
|
+
|
309
|
+
result.value_or do
|
310
|
+
msg = result.failure[0].to_s
|
311
|
+
msg += " - #{result.failure[1].full_messages.join(',')}" if result.failure[1].respond_to?(:full_messages)
|
312
|
+
raise StandardError, msg, result.trace
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
##
|
317
|
+
# We accept attributes based on the model schema
|
318
|
+
#
|
319
|
+
# @return [Array<Symbols>]
|
320
|
+
def permitted_attributes
|
321
|
+
@permitted_attributes ||= (
|
322
|
+
base_permitted_attributes + if klass.respond_to?(:schema)
|
323
|
+
Bulkrax::ValkyrieObjectFactory.schema_properties(klass)
|
324
|
+
else
|
325
|
+
klass.properties.keys.map(&:to_sym)
|
326
|
+
end
|
327
|
+
).uniq
|
328
|
+
end
|
329
|
+
|
330
|
+
def update_work(attrs)
|
331
|
+
perform_transaction_for(object: object, attrs: attrs) do
|
332
|
+
transactions["change_set.update_work"]
|
333
|
+
.with_step_args(
|
334
|
+
'work_resource.add_file_sets' => { uploaded_files: uploaded_files_from(attrs) },
|
335
|
+
'work_resource.save_acl' => { permissions_params: [attrs.try('visibility') || 'open'].compact }
|
336
|
+
)
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
def update_collection(attrs)
|
341
|
+
# NOTE: We do not add relationships here; that is part of the create
|
342
|
+
# relationships job.
|
343
|
+
perform_transaction_for(object: object, attrs: attrs) do
|
344
|
+
transactions['change_set.update_collection']
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
def update_file_set(attrs)
|
349
|
+
# TODO: Make it work
|
350
|
+
end
|
351
|
+
|
352
|
+
def uploaded_files_from(attrs)
|
353
|
+
uploaded_local_files(uploaded_files: attrs[:uploaded_files]) + uploaded_s3_files(remote_files: attrs[:remote_files])
|
354
|
+
end
|
355
|
+
|
356
|
+
def uploaded_local_files(uploaded_files: [])
|
357
|
+
Array.wrap(uploaded_files).map do |file_id|
|
358
|
+
Hyrax::UploadedFile.find(file_id)
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
362
|
+
def uploaded_s3_files(remote_files: {})
|
363
|
+
return [] if remote_files.blank?
|
364
|
+
|
365
|
+
s3_bucket_name = ENV.fetch("STAGING_AREA_S3_BUCKET", "comet-staging-area-#{Rails.env}")
|
366
|
+
s3_bucket = Rails.application.config.staging_area_s3_connection
|
367
|
+
.directories.get(s3_bucket_name)
|
368
|
+
|
369
|
+
remote_files.map { |r| r["url"] }.map do |key|
|
370
|
+
s3_bucket.files.get(key)
|
371
|
+
end.compact
|
372
|
+
end
|
373
|
+
|
374
|
+
# @Override Destroy existing files with Hyrax::Transactions
|
375
|
+
def destroy_existing_files
|
376
|
+
existing_files = Hyrax.custom_queries.find_child_file_sets(resource: object)
|
377
|
+
|
378
|
+
existing_files.each do |fs|
|
379
|
+
transactions["file_set.destroy"]
|
380
|
+
.with_step_args("file_set.remove_from_work" => { user: @user },
|
381
|
+
"file_set.delete" => { user: @user })
|
382
|
+
.call(fs)
|
383
|
+
.value!
|
384
|
+
end
|
385
|
+
|
386
|
+
@object.member_ids = @object.member_ids.reject { |m| existing_files.detect { |f| f.id == m } }
|
387
|
+
@object.rendering_ids = []
|
388
|
+
@object.representative_id = nil
|
389
|
+
@object.thumbnail_id = nil
|
390
|
+
end
|
391
|
+
|
392
|
+
def transform_attributes(update: false)
|
393
|
+
attrs = super.merge(alternate_ids: [source_identifier_value])
|
394
|
+
.symbolize_keys
|
395
|
+
|
396
|
+
attrs[:title] = [''] if attrs[:title].blank?
|
397
|
+
attrs[:creator] = [''] if attrs[:creator].blank?
|
398
|
+
attrs
|
399
|
+
end
|
400
|
+
end
|
401
|
+
# rubocop:enable Metrics/ClassLength
|
402
|
+
end
|
@@ -6,7 +6,7 @@ module Bulkrax
|
|
6
6
|
def available_admin_sets
|
7
7
|
# Restrict available_admin_sets to only those current user can deposit to.
|
8
8
|
@available_admin_sets ||= Hyrax::Collections::PermissionsService.source_ids_for_deposit(ability: current_ability, source_type: 'admin_set').map do |admin_set_id|
|
9
|
-
[
|
9
|
+
[Bulkrax.object_factory.find_or_nil(admin_set_id)&.title&.first || admin_set_id, admin_set_id]
|
10
10
|
end
|
11
11
|
end
|
12
12
|
end
|
@@ -20,14 +20,14 @@ module Bulkrax
|
|
20
20
|
return unless defined?(::Hyrax)
|
21
21
|
|
22
22
|
if params[:importer][:admin_set_id].blank?
|
23
|
-
params[:importer][:admin_set_id] =
|
23
|
+
params[:importer][:admin_set_id] = Bulkrax.object_factory.default_admin_set_id
|
24
24
|
else
|
25
|
-
|
25
|
+
Bulkrax.object_factory.find(params[:importer][:admin_set_id])
|
26
26
|
end
|
27
27
|
return true
|
28
|
-
rescue ActiveFedora::ObjectNotFoundError
|
28
|
+
rescue ActiveFedora::ObjectNotFoundError, Bulkrax::ObjectFactoryInterface::ObjectNotFoundError
|
29
29
|
logger.warn("AdminSet #{params[:importer][:admin_set_id]} not found. Using default admin set.")
|
30
|
-
params[:importer][:admin_set_id] =
|
30
|
+
params[:importer][:admin_set_id] = Bulkrax.object_factory.default_admin_set_id
|
31
31
|
return true
|
32
32
|
end
|
33
33
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Bulkrax
|
4
|
+
##
|
4
5
|
# Responsible for creating parent-child relationships between Works and Collections.
|
5
6
|
#
|
6
7
|
# Handles three kinds of relationships:
|
@@ -42,6 +43,7 @@ module Bulkrax
|
|
42
43
|
|
43
44
|
queue_as Bulkrax.config.ingest_queue_name
|
44
45
|
|
46
|
+
##
|
45
47
|
# @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifiers
|
46
48
|
# @param importer_run [Bulkrax::ImporterRun] current importer run (needed to properly update counters)
|
47
49
|
#
|
@@ -53,7 +55,7 @@ module Bulkrax
|
|
53
55
|
#
|
54
56
|
# rubocop:disable Metrics/MethodLength
|
55
57
|
def perform(parent_identifier:, importer_run_id:) # rubocop:disable Metrics/AbcSize
|
56
|
-
importer_run = Bulkrax::ImporterRun.find(importer_run_id)
|
58
|
+
@importer_run = Bulkrax::ImporterRun.find(importer_run_id)
|
57
59
|
ability = Ability.new(importer_run.user)
|
58
60
|
|
59
61
|
parent_entry, parent_record = find_record(parent_identifier, importer_run_id)
|
@@ -79,9 +81,9 @@ module Bulkrax
|
|
79
81
|
|
80
82
|
# save record if members were added
|
81
83
|
if @parent_record_members_added
|
82
|
-
|
83
|
-
|
84
|
-
@child_members_added
|
84
|
+
Bulkrax.object_factory.save!(resource: parent_record, user: importer_run.user)
|
85
|
+
Bulkrax.object_factory.publish(event: 'object.membership.updated', object: parent_record)
|
86
|
+
Bulkrax.object_factory.update_index(resources: @child_members_added)
|
85
87
|
end
|
86
88
|
end
|
87
89
|
else
|
@@ -104,7 +106,7 @@ module Bulkrax
|
|
104
106
|
parent_entry&.set_status_info(errors.last, importer_run)
|
105
107
|
|
106
108
|
# TODO: This can create an infinite job cycle, consider a time to live tracker.
|
107
|
-
reschedule(
|
109
|
+
reschedule(parent_identifier: parent_identifier, importer_run_id: importer_run_id)
|
108
110
|
return false # stop current job from continuing to run after rescheduling
|
109
111
|
else
|
110
112
|
# rubocop:disable Rails/SkipsModelValidations
|
@@ -114,6 +116,8 @@ module Bulkrax
|
|
114
116
|
end
|
115
117
|
# rubocop:enable Metrics/MethodLength
|
116
118
|
|
119
|
+
attr_reader :importer_run
|
120
|
+
|
117
121
|
private
|
118
122
|
|
119
123
|
##
|
@@ -151,25 +155,32 @@ module Bulkrax
|
|
151
155
|
# We could do this outside of the loop, but that could lead to odd counter failures.
|
152
156
|
ability.authorize!(:edit, parent_record)
|
153
157
|
|
154
|
-
parent_record.is_a?(
|
158
|
+
if parent_record.is_a?(Bulkrax.collection_model_class)
|
159
|
+
add_to_collection(child_record, parent_record)
|
160
|
+
else
|
161
|
+
add_to_work(child_record, parent_record)
|
162
|
+
end
|
163
|
+
|
164
|
+
Bulkrax.object_factory.update_index_for_file_sets_of(resource: child_record) if update_child_records_works_file_sets?
|
155
165
|
|
156
|
-
child_record.file_sets.each(&:update_index) if update_child_records_works_file_sets? && child_record.respond_to?(:file_sets)
|
157
166
|
relationship.destroy
|
158
167
|
end
|
159
168
|
|
160
169
|
def add_to_collection(child_record, parent_record)
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
170
|
+
Bulkrax.object_factory.add_resource_to_collection(
|
171
|
+
collection: parent_record,
|
172
|
+
resource: child_record,
|
173
|
+
user: importer_run.user
|
174
|
+
)
|
165
175
|
end
|
166
176
|
|
167
177
|
def add_to_work(child_record, parent_record)
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
178
|
+
# NOTE: The .add_child_to_parent_work should not persist changes to the
|
179
|
+
# child nor parent. We'll do that elsewhere in this loop.
|
180
|
+
Bulkrax.object_factory.add_child_to_parent_work(
|
181
|
+
parent: parent_record,
|
182
|
+
child: child_record
|
183
|
+
)
|
173
184
|
end
|
174
185
|
|
175
186
|
def reschedule(parent_identifier:, importer_run_id:)
|
@@ -5,8 +5,9 @@ module Bulkrax
|
|
5
5
|
queue_as Bulkrax.config.ingest_queue_name
|
6
6
|
|
7
7
|
def perform(entry, importer_run)
|
8
|
-
|
9
|
-
|
8
|
+
user = importer_run.importer.user
|
9
|
+
entry.factory.delete(user)
|
10
|
+
|
10
11
|
# rubocop:disable Rails/SkipsModelValidations
|
11
12
|
ImporterRun.increment_counter(:deleted_records, importer_run.id)
|
12
13
|
ImporterRun.decrement_counter(:enqueued_records, importer_run.id)
|
@@ -1,18 +1,31 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
2
|
module Bulkrax
|
4
3
|
class DownloadCloudFileJob < ApplicationJob
|
5
4
|
queue_as Bulkrax.config.ingest_queue_name
|
6
5
|
|
6
|
+
include ActionView::Helpers::NumberHelper
|
7
|
+
|
7
8
|
# Retrieve cloud file and write to the imports directory
|
8
9
|
# Note: if using the file system, the mounted directory in
|
9
10
|
# browse_everything MUST be shared by web and worker servers
|
10
11
|
def perform(file, target_file)
|
11
12
|
retriever = BrowseEverything::Retriever.new
|
13
|
+
last_logged_time = Time.zone.now
|
14
|
+
log_interval = 3.seconds
|
15
|
+
|
12
16
|
retriever.download(file, target_file) do |filename, retrieved, total|
|
13
|
-
|
14
|
-
|
17
|
+
percentage = (retrieved.to_f / total.to_f) * 100
|
18
|
+
current_time = Time.zone.now
|
19
|
+
|
20
|
+
if (current_time - last_logged_time) >= log_interval
|
21
|
+
# Use number_to_human_size for formatting
|
22
|
+
readable_retrieved = number_to_human_size(retrieved)
|
23
|
+
readable_total = number_to_human_size(total)
|
24
|
+
Rails.logger.info "Downloaded #{readable_retrieved} of #{readable_total}, #{filename}: #{percentage.round}% complete"
|
25
|
+
last_logged_time = current_time
|
26
|
+
end
|
15
27
|
end
|
28
|
+
Rails.logger.info "Download complete: #{file['url']} to #{target_file}"
|
16
29
|
end
|
17
30
|
end
|
18
31
|
end
|
@@ -63,8 +63,11 @@ module Bulkrax
|
|
63
63
|
end
|
64
64
|
|
65
65
|
def check_parent_is_a_work!(parent_identifier)
|
66
|
-
|
67
|
-
|
66
|
+
case parent_record
|
67
|
+
when Bulkrax.collection_model_class, Bulkrax.file_model_class
|
68
|
+
error_msg = %(A record with the ID "#{parent_identifier}" was found, but it was a #{parent_record.class}, which is not an valid/available work type)
|
69
|
+
raise ::StandardError, error_msg
|
70
|
+
end
|
68
71
|
end
|
69
72
|
|
70
73
|
def find_parent_record(parent_identifier)
|
@@ -6,6 +6,7 @@ module Bulkrax
|
|
6
6
|
|
7
7
|
def perform(importer_id, only_updates_since_last_import = false)
|
8
8
|
importer = Importer.find(importer_id)
|
9
|
+
return schedule(importer, Time.zone.now + 3.minutes, 'Rescheduling: cloud files are not ready yet') unless all_files_completed?(importer)
|
9
10
|
|
10
11
|
importer.current_run
|
11
12
|
unzip_imported_file(importer.parser)
|
@@ -16,6 +17,8 @@ module Bulkrax
|
|
16
17
|
importer.set_status_info(e)
|
17
18
|
end
|
18
19
|
|
20
|
+
private
|
21
|
+
|
19
22
|
def import(importer, only_updates_since_last_import)
|
20
23
|
importer.only_updates = only_updates_since_last_import || false
|
21
24
|
return unless importer.valid_import?
|
@@ -36,8 +39,21 @@ module Bulkrax
|
|
36
39
|
importer.current_run.save!
|
37
40
|
end
|
38
41
|
|
39
|
-
def schedule(importer)
|
40
|
-
|
42
|
+
def schedule(importer, wait_until = importer.next_import_at, message = nil)
|
43
|
+
Rails.logger.info message if message
|
44
|
+
ImporterJob.set(wait_until: wait_until).perform_later(importer.id, true)
|
45
|
+
end
|
46
|
+
|
47
|
+
# checks the file sizes of the download files to match the original files
|
48
|
+
def all_files_completed?(importer)
|
49
|
+
cloud_files = importer.parser_fields['cloud_file_paths']
|
50
|
+
original_files = importer.parser_fields['original_file_paths']
|
51
|
+
return true unless cloud_files.present? && original_files.present?
|
52
|
+
|
53
|
+
imported_file_sizes = cloud_files.map { |_, v| v['file_size'].to_i }
|
54
|
+
original_file_sizes = original_files.map { |imported_file| File.size(imported_file) }
|
55
|
+
|
56
|
+
original_file_sizes == imported_file_sizes
|
41
57
|
end
|
42
58
|
end
|
43
59
|
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
module Bulkrax
|
4
4
|
class CsvCollectionEntry < CsvEntry
|
5
|
-
self.default_work_type =
|
5
|
+
self.default_work_type = Bulkrax.collection_model_class.to_s
|
6
6
|
|
7
7
|
# Use identifier set by CsvParser#unique_collection_identifier, which falls back
|
8
8
|
# on the Collection's first title if record[source_identifier] is not present
|