bulkrax 9.0.2 → 9.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/assets/javascripts/bulkrax/datatables.js +12 -0
- data/app/factories/bulkrax/object_factory.rb +1 -1
- data/app/factories/bulkrax/valkyrie_object_factory.rb +66 -25
- data/app/jobs/bulkrax/create_relationships_job.rb +123 -76
- data/app/jobs/bulkrax/importer_job.rb +1 -0
- data/app/matchers/bulkrax/application_matcher.rb +2 -1
- data/app/models/bulkrax/importer.rb +9 -1
- data/app/models/bulkrax/status.rb +1 -1
- data/app/parsers/bulkrax/application_parser.rb +17 -2
- data/app/parsers/bulkrax/csv_parser.rb +21 -2
- data/app/parsers/bulkrax/oai_dc_parser.rb +0 -2
- data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +6 -11
- data/app/services/wings/custom_queries/find_by_source_identifier.rb +9 -6
- data/app/views/bulkrax/entries/show.html.erb +8 -6
- data/app/views/bulkrax/importers/_bagit_fields.html.erb +1 -1
- data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
- data/app/views/bulkrax/importers/_oai_fields.html.erb +1 -1
- data/app/views/bulkrax/importers/_xml_fields.html.erb +1 -1
- data/app/views/bulkrax/importers/show.html.erb +4 -4
- data/app/views/bulkrax/shared/_entries_tab.html.erb +1 -1
- data/config/locales/bulkrax.en.yml +5 -3
- data/lib/bulkrax/engine.rb +1 -1
- data/lib/bulkrax/version.rb +1 -1
- data/lib/generators/bulkrax/templates/bin/importer +1 -5
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 29af1006486f6d0a48b2ff2f9f0417ca1dd3fbf0cea791de156cf5e37ec19e53
|
4
|
+
data.tar.gz: b3f2d406e03c22e5ed6a7b2f1d075e1babbd46bf82c05fa96237e2fc56490fbc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 43f29db766c379a8c1d5f03ec8584a0492cccdcdcc63b5cf2936fcf97ebd8115adcb8a104944ff082c2fc2e15691f104047a0396d74e00124e6f4eb22f33f71e
|
7
|
+
data.tar.gz: a7cbacf2d07dcd987bd7935485023d1a856dc0372ab2ef98f4cd0dc4c54297640ec8e401180dd2d366d69bbf469635881f589ae3b9a01ce5a4bb626b4e21dadb
|
@@ -3,6 +3,10 @@ Blacklight.onLoad(function() {
|
|
3
3
|
$('#importer-show-table').DataTable( {
|
4
4
|
'processing': true,
|
5
5
|
'serverSide': true,
|
6
|
+
'width': '100%',
|
7
|
+
'autoWidth': false,
|
8
|
+
'scrollX': true,
|
9
|
+
'scrollCollapse': true,
|
6
10
|
"ajax": window.location.href.replace(/(\/(importers|exporters)\/\d+)/, "$1/entry_table.json"),
|
7
11
|
"pageLength": 30,
|
8
12
|
"lengthMenu": [[30, 100, 200], [30, 100, 200]],
|
@@ -15,6 +19,14 @@ Blacklight.onLoad(function() {
|
|
15
19
|
{ "data": "errors", "orderable": false },
|
16
20
|
{ "data": "actions", "orderable": false }
|
17
21
|
],
|
22
|
+
drawCallback: function() {
|
23
|
+
// Remove the inline styles that DataTables adds to the scrollHeadInner and table elements
|
24
|
+
// it's not perfect but better than the style being applied
|
25
|
+
setTimeout(function() {
|
26
|
+
$('.dataTables_scrollHeadInner').removeAttr('style');
|
27
|
+
$('.table.table-striped.dataTable.no-footer').removeAttr('style');
|
28
|
+
}, 100);
|
29
|
+
},
|
18
30
|
initComplete: function () {
|
19
31
|
// Add entry class filter
|
20
32
|
entrySelect.bind(this)()
|
@@ -91,7 +91,7 @@ module Bulkrax
|
|
91
91
|
# @note HEY WE'RE USING THIS FOR A WINGS CUSTOM QUERY. BE CAREFUL WITH
|
92
92
|
# REMOVING IT.
|
93
93
|
#
|
94
|
-
# @see # {Wings::CustomQueries::FindBySourceIdentifier#
|
94
|
+
# @see # {Wings::CustomQueries::FindBySourceIdentifier#find_by_property_value}
|
95
95
|
def self.search_by_property(value:, klass:, field: nil, search_field: nil, name_field: nil, verify_property: false)
|
96
96
|
return nil unless klass.respond_to?(:where)
|
97
97
|
# We're not going to try to match nil nor "".
|
@@ -38,12 +38,54 @@ module Bulkrax
|
|
38
38
|
end
|
39
39
|
end
|
40
40
|
|
41
|
+
# Customized create method for Valkyrie so that @object gets set
|
42
|
+
def create
|
43
|
+
attrs = transform_attributes
|
44
|
+
@object = klass.new
|
45
|
+
conditionally_set_reindex_extent
|
46
|
+
run_callbacks :save do
|
47
|
+
run_callbacks :create do
|
48
|
+
@object = if klass == Bulkrax.collection_model_class
|
49
|
+
create_collection(attrs)
|
50
|
+
elsif klass == Bulkrax.file_model_class
|
51
|
+
create_file_set(attrs)
|
52
|
+
else
|
53
|
+
create_work(attrs)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
apply_depositor_metadata
|
59
|
+
log_created(@object)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Customized update method for Valkyrie so that @object gets set
|
63
|
+
def update
|
64
|
+
raise "Object doesn't exist" unless object
|
65
|
+
conditionally_destroy_existing_files
|
66
|
+
|
67
|
+
attrs = transform_attributes(update: true)
|
68
|
+
run_callbacks :save do
|
69
|
+
@object = if klass == Bulkrax.collection_model_class
|
70
|
+
update_collection(attrs)
|
71
|
+
elsif klass == Bulkrax.file_model_class
|
72
|
+
update_file_set(attrs)
|
73
|
+
else
|
74
|
+
update_work(attrs)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
apply_depositor_metadata
|
78
|
+
log_updated(@object)
|
79
|
+
end
|
80
|
+
|
41
81
|
# TODO: the following module needs revisiting for Valkyrie work.
|
42
82
|
# proposal is to create Bulkrax::ValkyrieFileFactory.
|
43
83
|
include Bulkrax::FileFactory
|
44
84
|
|
45
85
|
self.file_set_factory_inner_workings_class = Bulkrax::ValkyrieObjectFactory::FileFactoryInnerWorkings
|
46
86
|
|
87
|
+
delegate :transactions, to: :class
|
88
|
+
|
47
89
|
##
|
48
90
|
# When you want a different set of transactions you can change the
|
49
91
|
# container.
|
@@ -55,24 +97,25 @@ module Bulkrax
|
|
55
97
|
@transactions || Hyrax::Transactions::Container
|
56
98
|
end
|
57
99
|
|
58
|
-
def transactions
|
59
|
-
self.class.transactions
|
60
|
-
end
|
61
|
-
|
62
100
|
##
|
63
101
|
# @!group Class Method Interface
|
64
102
|
|
65
103
|
##
|
66
|
-
#
|
67
|
-
#
|
104
|
+
# When adding a child to a parent work, we save the parent.
|
105
|
+
# Locking appears inconsistent, so we are finding the parent and
|
106
|
+
# saving it with each child, but waiting until the end to reindex.
|
107
|
+
# To do this we are bypassing the save! method defined below
|
68
108
|
def self.add_child_to_parent_work(parent:, child:)
|
109
|
+
parent = self.find(parent.id)
|
69
110
|
return true if parent.member_ids.include?(child.id)
|
70
|
-
|
71
111
|
parent.member_ids << child.id
|
72
|
-
|
112
|
+
Hyrax.persister.save(resource: parent)
|
73
113
|
end
|
74
114
|
|
115
|
+
##
|
116
|
+
# The resource added to a collection can be either a work or another collection.
|
75
117
|
def self.add_resource_to_collection(collection:, resource:, user:)
|
118
|
+
resource = self.find(resource.id)
|
76
119
|
resource.member_of_collection_ids << collection.id
|
77
120
|
save!(resource: resource, user: user)
|
78
121
|
end
|
@@ -127,6 +170,8 @@ module Bulkrax
|
|
127
170
|
end
|
128
171
|
|
129
172
|
def self.publish(event:, **kwargs)
|
173
|
+
# It's a bit unclear what this should be if we can't rely on Hyrax.
|
174
|
+
raise NotImplementedError, "#{self}.#{__method__}" unless defined?(Hyrax)
|
130
175
|
Hyrax.publisher.publish(event, **kwargs)
|
131
176
|
end
|
132
177
|
|
@@ -139,19 +184,19 @@ module Bulkrax
|
|
139
184
|
end
|
140
185
|
|
141
186
|
def self.save!(resource:, user:)
|
142
|
-
if
|
143
|
-
resource.save!
|
144
|
-
else
|
187
|
+
if defined?(Hyrax)
|
145
188
|
result = Hyrax.persister.save(resource: resource)
|
146
189
|
raise Valkyrie::Persistence::ObjectNotFoundError unless result
|
147
190
|
Hyrax.index_adapter.save(resource: result)
|
148
191
|
if result.collection?
|
149
|
-
publish('collection.metadata.updated', collection: result, user: user)
|
192
|
+
self.publish(event: 'collection.metadata.updated', collection: result, user: user)
|
150
193
|
else
|
151
|
-
publish('object.metadata.updated', object: result, user: user)
|
194
|
+
self.publish(event: 'object.metadata.updated', object: result, user: user)
|
152
195
|
end
|
153
|
-
|
196
|
+
else
|
197
|
+
resource.save!
|
154
198
|
end
|
199
|
+
resource
|
155
200
|
end
|
156
201
|
|
157
202
|
def self.update_index(resources:)
|
@@ -176,13 +221,12 @@ module Bulkrax
|
|
176
221
|
# @return [Valkyrie::Resource] when a match is found, an instance of given
|
177
222
|
# :klass
|
178
223
|
# rubocop:disable Metrics/ParameterLists
|
179
|
-
def self.search_by_property(value:,
|
224
|
+
def self.search_by_property(value:, field: nil, name_field: nil, search_field:, **)
|
180
225
|
name_field ||= field
|
181
226
|
raise "Expected named_field or field got nil" if name_field.blank?
|
182
227
|
return if value.blank?
|
183
|
-
|
184
228
|
# Return nil or a single object.
|
185
|
-
Hyrax.query_service.
|
229
|
+
Hyrax.query_service.custom_queries.find_by_property_value(property: name_field, value: value, search_field: search_field)
|
186
230
|
end
|
187
231
|
# rubocop:enable Metrics/ParameterLists
|
188
232
|
|
@@ -212,7 +256,7 @@ module Bulkrax
|
|
212
256
|
|
213
257
|
Hyrax.persister.delete(resource: obj)
|
214
258
|
Hyrax.index_adapter.delete(resource: obj)
|
215
|
-
|
259
|
+
Hyrax.publisher.publish('object.deleted', object: obj, user: user)
|
216
260
|
end
|
217
261
|
|
218
262
|
def run!
|
@@ -231,7 +275,7 @@ module Bulkrax
|
|
231
275
|
|
232
276
|
@object.depositor = @user.email
|
233
277
|
object = Hyrax.persister.save(resource: @object)
|
234
|
-
|
278
|
+
Hyrax.publisher.publish("object.metadata.updated", object: object, user: @user)
|
235
279
|
object
|
236
280
|
end
|
237
281
|
|
@@ -337,7 +381,7 @@ module Bulkrax
|
|
337
381
|
end
|
338
382
|
|
339
383
|
def find_by_id
|
340
|
-
|
384
|
+
find(id: attributes[:id]) if attributes.key? :id
|
341
385
|
end
|
342
386
|
|
343
387
|
##
|
@@ -433,7 +477,6 @@ module Bulkrax
|
|
433
477
|
remote_files.map do |r|
|
434
478
|
file_path = download_file(r["url"])
|
435
479
|
next unless file_path
|
436
|
-
|
437
480
|
create_uploaded_file(file_path, r["file_name"])
|
438
481
|
end.compact
|
439
482
|
end
|
@@ -449,8 +492,7 @@ module Bulkrax
|
|
449
492
|
file.rewind
|
450
493
|
file.path
|
451
494
|
rescue => e
|
452
|
-
|
453
|
-
nil
|
495
|
+
raise "Failed to download file from #{url}: #{e.message}"
|
454
496
|
end
|
455
497
|
end
|
456
498
|
|
@@ -460,8 +502,7 @@ module Bulkrax
|
|
460
502
|
file.close
|
461
503
|
uploaded_file
|
462
504
|
rescue => e
|
463
|
-
|
464
|
-
nil
|
505
|
+
raise "Failed to create Hyrax::UploadedFile for #{file_name}: #{e.message}"
|
465
506
|
end
|
466
507
|
|
467
508
|
# @Override Destroy existing files with Hyrax::Transactions
|
@@ -44,7 +44,7 @@ module Bulkrax
|
|
44
44
|
|
45
45
|
queue_as Bulkrax.config.ingest_queue_name
|
46
46
|
|
47
|
-
attr_accessor :user, :importer_run, :errors
|
47
|
+
attr_accessor :user, :importer_run, :errors, :importer_run_id, :ability, :number_of_successes, :number_of_failures
|
48
48
|
##
|
49
49
|
# @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifiers
|
50
50
|
# @param importer_run [Bulkrax::ImporterRun] current importer run (needed to properly update counters)
|
@@ -57,72 +57,52 @@ module Bulkrax
|
|
57
57
|
#
|
58
58
|
# rubocop:disable Metrics/MethodLength
|
59
59
|
def perform(parent_identifier:, importer_run_id: nil, run_user: nil, failure_count: 0) # rubocop:disable Metrics/AbcSize
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
errors = []
|
60
|
+
@importer_run_id = importer_run_id
|
61
|
+
@importer_run = Bulkrax::ImporterRun.find(@importer_run_id) if @importer_run_id
|
62
|
+
@user = run_user || importer_run&.user
|
63
|
+
@ability = Ability.new(@user)
|
64
|
+
|
65
|
+
@number_of_successes = 0
|
66
|
+
@number_of_failures = 0
|
67
|
+
@errors = []
|
69
68
|
@parent_record_members_added = false
|
70
|
-
@child_members_added = []
|
71
69
|
|
70
|
+
parent_entry, parent_record = find_record(parent_identifier, @importer_run_id)
|
72
71
|
if parent_record
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
number_of_failures += 1
|
82
|
-
rel.set_status_info(e, importer_run)
|
83
|
-
errors << e
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
# save record if members were added
|
88
|
-
if @parent_record_members_added
|
89
|
-
Bulkrax.object_factory.save!(resource: parent_record, user: user)
|
90
|
-
Bulkrax.object_factory.publish(event: 'object.membership.updated', object: parent_record)
|
91
|
-
Bulkrax.object_factory.update_index(resources: @child_members_added)
|
92
|
-
end
|
72
|
+
# Works and collections are different breeds of animals:
|
73
|
+
# - works know both their children (file_sets and child works) in member_ids
|
74
|
+
# - works and collections know their parents (collections) in member_of_collection_ids
|
75
|
+
# We need to handle the two differently by locking the records appropriately to avoid race condition errors.
|
76
|
+
if parent_record.is_a?(Bulkrax.collection_model_class)
|
77
|
+
process_parent_as_collection(parent_record: parent_record, parent_identifier: parent_identifier)
|
78
|
+
else
|
79
|
+
process_parent_as_work(parent_record: parent_record, parent_identifier: parent_identifier)
|
93
80
|
end
|
94
81
|
else
|
95
|
-
|
96
|
-
|
97
|
-
# unavailable.
|
98
|
-
#
|
99
|
-
# We have chosen not to duplicate that "number of errors" as it does not seem like the
|
100
|
-
# correct pattern for reporting a singular error (the previous pattern being one error per
|
101
|
-
# child who's parent is not yet created).
|
102
|
-
number_of_failures = 1
|
103
|
-
errors = ["Parent record not yet available for creating relationships with children records."]
|
82
|
+
@number_of_failures = 1
|
83
|
+
@errors = ["Parent record #{parent_identifier} not yet available for creating relationships with children records."]
|
104
84
|
end
|
105
85
|
|
106
|
-
if errors.present?
|
86
|
+
if @errors.present?
|
107
87
|
# rubocop:disable Rails/SkipsModelValidations
|
108
|
-
ImporterRun.update_counters(importer_run_id, failed_relationships: number_of_failures)
|
88
|
+
ImporterRun.update_counters(@importer_run_id, failed_relationships: @number_of_failures)
|
109
89
|
# rubocop:enable Rails/SkipsModelValidations
|
110
90
|
|
111
|
-
parent_entry&.set_status_info(errors.last, importer_run)
|
91
|
+
parent_entry&.set_status_info(@errors.last, importer_run)
|
112
92
|
failure_count += 1
|
113
93
|
|
114
94
|
if failure_count < max_failure_count
|
115
95
|
reschedule(
|
116
96
|
parent_identifier: parent_identifier,
|
117
|
-
importer_run_id: importer_run_id,
|
118
|
-
run_user:
|
97
|
+
importer_run_id: @importer_run_id,
|
98
|
+
run_user: @user,
|
119
99
|
failure_count: failure_count
|
120
100
|
)
|
121
101
|
end
|
122
|
-
return errors # stop current job from continuing to run after rescheduling
|
102
|
+
return @errors # stop current job from continuing to run after rescheduling
|
123
103
|
else
|
124
104
|
# rubocop:disable Rails/SkipsModelValidations
|
125
|
-
ImporterRun.update_counters(importer_run_id, processed_relationships: number_of_successes)
|
105
|
+
ImporterRun.update_counters(@importer_run_id, processed_relationships: @number_of_successes)
|
126
106
|
# rubocop:enable Rails/SkipsModelValidations
|
127
107
|
end
|
128
108
|
end
|
@@ -132,6 +112,8 @@ module Bulkrax
|
|
132
112
|
|
133
113
|
##
|
134
114
|
# We can use Hyrax's lock manager when we have one available.
|
115
|
+
# However it's not certain that this is actually working, so to be
|
116
|
+
# as safe as possible, we will reload resources before we update.
|
135
117
|
if defined?(::Hyrax)
|
136
118
|
include Hyrax::Lockable
|
137
119
|
|
@@ -151,46 +133,111 @@ module Bulkrax
|
|
151
133
|
alias conditionally_acquire_lock_for acquire_lock_for
|
152
134
|
end
|
153
135
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
136
|
+
# When the parent is a collection, we save the relationship on each child.
|
137
|
+
# The parent does not need to be saved, as the relationship is stored on the child.
|
138
|
+
# but we do reindex the parent after all the children are added.
|
139
|
+
def process_parent_as_collection(parent_record:, parent_identifier:)
|
140
|
+
ActiveRecord::Base.uncached do
|
141
|
+
Bulkrax::PendingRelationship.where(parent_id: parent_identifier, importer_run_id: @importer_run_id)
|
142
|
+
.ordered.find_each do |rel|
|
143
|
+
raise "#{rel} needs a child to create relationship" if rel.child_id.nil?
|
144
|
+
raise "#{rel} needs a parent to create relationship" if rel.parent_id.nil?
|
145
|
+
add_to_collection(relationship: rel, parent_record: parent_record, ability: ability)
|
146
|
+
@number_of_successes += 1
|
147
|
+
@parent_record_members_added = true
|
148
|
+
rescue => e
|
149
|
+
rel.update(status_message: e.message)
|
150
|
+
@number_of_failures += 1
|
151
|
+
@errors << e
|
152
|
+
end
|
153
|
+
end
|
162
154
|
|
163
|
-
|
155
|
+
# if collection members were added, we reindex the collection
|
156
|
+
# The collection members have already saved the relationships
|
157
|
+
# To index the parent, we want to make sure we have the latest version of the parent,
|
158
|
+
# because another job may have updated it in the meantime.
|
159
|
+
return unless @parent_record_members_added
|
160
|
+
reloaded_parent = Bulkrax.object_factory.find(parent_record.id)
|
161
|
+
Bulkrax.object_factory.update_index(resources: [reloaded_parent])
|
162
|
+
Bulkrax.object_factory.publish(event: 'object.membership.updated', object: reloaded_parent, user: @user)
|
163
|
+
end
|
164
164
|
|
165
|
-
|
166
|
-
|
165
|
+
# When the parent is a work, we save the relationship on the parent.
|
166
|
+
# We prefer to save all of the member relationships and then save the parent once. Concurrent
|
167
|
+
# jobs may be trying to save the parent at the same time, so we need to lock the parent
|
168
|
+
# record while we are adding the children to it.
|
169
|
+
# However the locking appears to not be working so as a workaround we will save each member as we go,
|
170
|
+
# but only index the parent once at the end.
|
171
|
+
def process_parent_as_work(parent_record:, parent_identifier:)
|
172
|
+
conditionally_acquire_lock_for(parent_record.id.to_s) do
|
173
|
+
ActiveRecord::Base.uncached do
|
174
|
+
Bulkrax::PendingRelationship.where(parent_id: parent_identifier, importer_run_id: @importer_run_id)
|
175
|
+
.ordered.find_each do |rel|
|
176
|
+
raise "#{rel} needs a child to create relationship" if rel.child_id.nil?
|
177
|
+
raise "#{rel} needs a parent to create relationship" if rel.parent_id.nil?
|
178
|
+
add_to_work(relationship: rel, parent_record: parent_record, ability: ability)
|
179
|
+
self.number_of_successes += 1
|
180
|
+
@parent_record_members_added = true
|
181
|
+
rescue => e
|
182
|
+
rel.update(status_message: e.message)
|
183
|
+
@number_of_failures += 1
|
184
|
+
@errors << e
|
185
|
+
end
|
186
|
+
end
|
167
187
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
188
|
+
# save record if members were added
|
189
|
+
if @parent_record_members_added
|
190
|
+
reloaded_parent = Bulkrax.object_factory.find(parent_record.id)
|
191
|
+
Bulkrax.object_factory.update_index(resources: [reloaded_parent])
|
192
|
+
Bulkrax.object_factory.publish(event: 'object.membership.updated', object: reloaded_parent, user: @user)
|
193
|
+
end
|
172
194
|
end
|
173
|
-
|
174
|
-
Bulkrax.object_factory.update_index_for_file_sets_of(resource: child_record) if update_child_records_works_file_sets?
|
175
|
-
|
176
|
-
relationship.destroy
|
177
195
|
end
|
178
196
|
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
197
|
+
# NOTE: the child changes are saved in the object factory.
|
198
|
+
def add_to_collection(relationship:, parent_record:, ability:)
|
199
|
+
ActiveRecord::Base.uncached do
|
200
|
+
_child_entry, child_record = find_record(relationship.child_id, @importer_run_id)
|
201
|
+
raise "#{relationship} could not find child record" unless child_record
|
202
|
+
raise "Cannot add child collection (ID=#{relationship.child_id}) to parent work (ID=#{relationship.parent_id})" if child_record.collection? && parent_record.work?
|
203
|
+
ability.authorize!(:edit, child_record)
|
204
|
+
# We could do this outside of the loop, but that could lead to odd counter failures.
|
205
|
+
ability.authorize!(:edit, parent_record)
|
206
|
+
# It is important to lock the child records as they are the ones being saved.
|
207
|
+
# However, locking doesn't seem to be working so we will reload the child record before saving.
|
208
|
+
# This is a workaround for the fact that the lock manager doesn't seem to be working.
|
209
|
+
conditionally_acquire_lock_for(child_record.id.to_s) do
|
210
|
+
Bulkrax.object_factory.add_resource_to_collection(
|
211
|
+
collection: parent_record,
|
212
|
+
resource: child_record,
|
213
|
+
user: @user
|
214
|
+
)
|
215
|
+
end
|
216
|
+
relationship.destroy
|
217
|
+
end
|
185
218
|
end
|
186
219
|
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
220
|
+
# NOTE: we only update the parent's member_ids and prefer to not save the parent until all children are added.
|
221
|
+
# However, the locking appears to be working so as a workaround we will save each member as we go.
|
222
|
+
# This is a workaround for the fact that the lock manager doesn't seem to be working.
|
223
|
+
# To avoid having to reload the parent, we return the updated parent to the calling method.
|
224
|
+
def add_to_work(relationship:, parent_record:, ability:)
|
225
|
+
_child_entry, child_record = find_record(relationship.child_id, @importer_run_id)
|
226
|
+
raise "#{relationship} could not find child record" unless child_record
|
227
|
+
raise "Cannot add child collection (ID=#{relationship.child_id}) to parent work (ID=#{relationship.parent_id})" if child_record.collection? && parent_record.work?
|
228
|
+
|
229
|
+
ability.authorize!(:edit, child_record)
|
230
|
+
# We could do this outside of the loop, but that could lead to odd counter failures.
|
231
|
+
ability.authorize!(:edit, parent_record)
|
232
|
+
updated_parent = Bulkrax.object_factory.add_child_to_parent_work(
|
191
233
|
parent: parent_record,
|
192
234
|
child: child_record
|
193
235
|
)
|
236
|
+
# default is false for this... do not typically need to index file sets of child records
|
237
|
+
Bulkrax.object_factory.update_index_for_file_sets_of(resource: child_record) if update_child_records_works_file_sets?
|
238
|
+
relationship.destroy
|
239
|
+
|
240
|
+
updated_parent
|
194
241
|
end
|
195
242
|
|
196
243
|
def reschedule(**kargs)
|
@@ -16,8 +16,9 @@ module Bulkrax
|
|
16
16
|
|
17
17
|
def result(_parser, content)
|
18
18
|
return nil if self.excluded == true || Bulkrax.reserved_properties.include?(self.to)
|
19
|
+
# rubocop:disable Style/RedundantParentheses
|
19
20
|
return nil if self.if && (!self.if.is_a?(Array) && self.if.length != 2)
|
20
|
-
|
21
|
+
# rubocop:enable Style/RedundantParentheses
|
21
22
|
if self.if
|
22
23
|
return unless content.send(self.if[0], Regexp.new(self.if[1]))
|
23
24
|
end
|
@@ -237,8 +237,16 @@ module Bulkrax
|
|
237
237
|
# end
|
238
238
|
|
239
239
|
# If the import data is zipped, unzip it to this path
|
240
|
-
def importer_unzip_path
|
240
|
+
def importer_unzip_path(mkdir: false)
|
241
241
|
@importer_unzip_path ||= File.join(parser.base_path, "import_#{path_string}")
|
242
|
+
return @importer_unzip_path if Dir.exist?(@importer_unzip_path) || mkdir == true
|
243
|
+
|
244
|
+
# turns "tmp/imports/tenant/import_1_20250122035229_1" to "tmp/imports/tenant/import_1_20250122035229"
|
245
|
+
base_importer_unzip_path = @importer_unzip_path.split('_')[0...-1].join('_')
|
246
|
+
|
247
|
+
# If we don't have an existing unzip path, we'll try and find it.
|
248
|
+
# Just in case there are multiple paths, we sort by the number at the end of the path and get the last one
|
249
|
+
@importer_unzip_path = Dir.glob(base_importer_unzip_path + '*').sort_by { |path| path.split(base_importer_unzip_path).last[1..-1].to_i }.last
|
242
250
|
end
|
243
251
|
|
244
252
|
def errored_entries_csv_path
|
@@ -23,7 +23,7 @@ module Bulkrax
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def latest?
|
26
|
-
# TODO: remove if
|
26
|
+
# TODO: remove if statement when we stop supporting Hyrax < 4
|
27
27
|
self.id == if Gem::Version.new(Rails::VERSION::STRING) >= Gem::Version.new('6.0.0')
|
28
28
|
self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pick(:id)
|
29
29
|
else
|
@@ -432,7 +432,7 @@ module Bulkrax
|
|
432
432
|
|
433
433
|
Zip::File.open(file_to_unzip) do |zip_file|
|
434
434
|
zip_file.each do |entry|
|
435
|
-
entry_path = File.join(importer_unzip_path, entry.name)
|
435
|
+
entry_path = File.join(importer_unzip_path(mkdir: true), entry.name)
|
436
436
|
FileUtils.mkdir_p(File.dirname(entry_path))
|
437
437
|
zip_file.extract(entry, entry_path) unless File.exist?(entry_path)
|
438
438
|
end
|
@@ -440,12 +440,27 @@ module Bulkrax
|
|
440
440
|
end
|
441
441
|
|
442
442
|
def untar(file_to_untar)
|
443
|
-
Dir.mkdir(importer_unzip_path) unless File.directory?(importer_unzip_path)
|
443
|
+
Dir.mkdir(importer_unzip_path(mkdir: true)) unless File.directory?(importer_unzip_path(mkdir: true))
|
444
444
|
command = "tar -xzf #{Shellwords.escape(file_to_untar)} -C #{Shellwords.escape(importer_unzip_path)}"
|
445
445
|
result = system(command)
|
446
446
|
raise "Failed to extract #{file_to_untar}" unless result
|
447
447
|
end
|
448
448
|
|
449
|
+
# File names referenced in CSVs have spaces replaced with underscores
|
450
|
+
# @see Bulkrax::CsvParser#file_paths
|
451
|
+
def remove_spaces_from_filenames
|
452
|
+
files = Dir.glob(File.join(importer_unzip_path, 'files', '*'))
|
453
|
+
files_with_spaces = files.select { |f| f.split('/').last.match?(' ') }
|
454
|
+
return if files_with_spaces.blank?
|
455
|
+
|
456
|
+
files_with_spaces.map! { |path| Pathname.new(path) }
|
457
|
+
files_with_spaces.each do |path|
|
458
|
+
filename = path.basename
|
459
|
+
filename_without_spaces = filename.to_s.tr(' ', '_')
|
460
|
+
path.rename(File.join(path.dirname, filename_without_spaces))
|
461
|
+
end
|
462
|
+
end
|
463
|
+
|
449
464
|
def zip
|
450
465
|
FileUtils.mkdir_p(exporter_export_zip_path)
|
451
466
|
|
@@ -341,7 +341,16 @@ module Bulkrax
|
|
341
341
|
file_mapping = Bulkrax.field_mappings.dig(self.class.to_s, 'file', :from)&.first&.to_sym || :file
|
342
342
|
next if r[file_mapping].blank?
|
343
343
|
|
344
|
-
|
344
|
+
split_value = Bulkrax.field_mappings.dig(self.class.to_s, :file, :split)
|
345
|
+
split_pattern = case split_value
|
346
|
+
when Regexp
|
347
|
+
split_value
|
348
|
+
when String
|
349
|
+
Regexp.new(split_value)
|
350
|
+
else
|
351
|
+
Bulkrax.multi_value_element_split_on
|
352
|
+
end
|
353
|
+
r[file_mapping].split(split_pattern).map do |f|
|
345
354
|
file = File.join(path_to_files, f.tr(' ', '_'))
|
346
355
|
if File.exist?(file) # rubocop:disable Style/GuardClause
|
347
356
|
file
|
@@ -360,6 +369,10 @@ module Bulkrax
|
|
360
369
|
@path_to_files = File.join(
|
361
370
|
zip? ? importer_unzip_path : File.dirname(import_file_path), 'files', filename
|
362
371
|
)
|
372
|
+
|
373
|
+
return @path_to_files if File.exist?(@path_to_files)
|
374
|
+
|
375
|
+
File.join(importer_unzip_path, 'files', filename) if file? && zip?
|
363
376
|
end
|
364
377
|
|
365
378
|
private
|
@@ -379,9 +392,15 @@ module Bulkrax
|
|
379
392
|
# We expect a single CSV at the top level of the zip in the CSVParser
|
380
393
|
# but we are willing to go look for it if need be
|
381
394
|
def real_import_file_path
|
382
|
-
return Dir["#{importer_unzip_path}/**/*.csv"].first if file? && zip?
|
395
|
+
return Dir["#{importer_unzip_path}/**/*.csv"].reject { |path| in_files_dir?(path) }.first if file? && zip?
|
383
396
|
|
384
397
|
parser_fields['import_file_path']
|
385
398
|
end
|
399
|
+
|
400
|
+
# If there are CSVs that are meant to be attachments in the files directory,
|
401
|
+
# we don't want to consider them as the import CSV
|
402
|
+
def in_files_dir?(path)
|
403
|
+
File.dirname(path).ends_with?('files')
|
404
|
+
end
|
386
405
|
end
|
387
406
|
end
|
@@ -61,8 +61,6 @@ module Bulkrax
|
|
61
61
|
['contributor', 'coverage', 'creator', 'date', 'description', 'format', 'identifier', 'language', 'publisher', 'relation', 'rights', 'source', 'subject', 'title', 'type']
|
62
62
|
end
|
63
63
|
|
64
|
-
delegate :list_sets, to: :client
|
65
|
-
|
66
64
|
def create_objects(types = [])
|
67
65
|
types.each do |object_type|
|
68
66
|
send("create_#{object_type.pluralize}")
|
@@ -6,7 +6,7 @@ module Hyrax
|
|
6
6
|
# @see https://github.com/samvera/valkyrie/wiki/Queries#custom-queries
|
7
7
|
class FindBySourceIdentifier
|
8
8
|
def self.queries
|
9
|
-
[:
|
9
|
+
[:find_by_property_value]
|
10
10
|
end
|
11
11
|
|
12
12
|
def initialize(query_service:)
|
@@ -18,30 +18,25 @@ module Hyrax
|
|
18
18
|
delegate :orm_class, to: :resource_factory
|
19
19
|
|
20
20
|
##
|
21
|
-
# @param model [Class, #internal_resource]
|
22
21
|
# @param property [#to_s] the name of the property we're attempting to
|
23
22
|
# query.
|
24
23
|
# @param value [#to_s] the propety's value that we're trying to match.
|
25
24
|
#
|
26
25
|
# @return [NilClass] when no record was found
|
27
26
|
# @return [Valkyrie::Resource] when a record was found
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
sql_query = sql_for_find_by_model_and_property_value
|
32
|
-
# NOTE: Do we need to ask the model for it's internal_resource?
|
33
|
-
# TODO: no => undefined method `internal_resource' for Image:Class
|
34
|
-
query_service.run_query(sql_query, model, property, value).first
|
27
|
+
def find_by_property_value(property:, value:, **)
|
28
|
+
sql_query = sql_for_find_by_property_value
|
29
|
+
query_service.run_query(sql_query, property, value.to_s).first
|
35
30
|
end
|
36
31
|
|
37
32
|
private
|
38
33
|
|
39
|
-
def
|
34
|
+
def sql_for_find_by_property_value
|
40
35
|
# NOTE: This is querying the first element of the property, but we might
|
41
36
|
# want to check all of the elements.
|
42
37
|
<<-SQL
|
43
38
|
SELECT * FROM orm_resources
|
44
|
-
WHERE
|
39
|
+
WHERE metadata -> ? ->> 0 = ?
|
45
40
|
LIMIT 1;
|
46
41
|
SQL
|
47
42
|
end
|
@@ -6,7 +6,7 @@ module Wings
|
|
6
6
|
# Custom query override specific to Wings
|
7
7
|
|
8
8
|
def self.queries
|
9
|
-
[:
|
9
|
+
[:find_by_property_value]
|
10
10
|
end
|
11
11
|
|
12
12
|
attr_reader :query_service
|
@@ -16,11 +16,14 @@ module Wings
|
|
16
16
|
@query_service = query_service
|
17
17
|
end
|
18
18
|
|
19
|
-
|
20
|
-
|
21
|
-
#
|
22
|
-
#
|
23
|
-
|
19
|
+
# rubocop:disable Lint/UnusedMethodArgument
|
20
|
+
def find_by_property_value(property:, value:, search_field:, use_valkyrie: Hyrax.config.use_valkyrie?)
|
21
|
+
# rubocop:enable Lint/UnusedMethodArgument
|
22
|
+
# NOTE: This is using the Bulkrax::ObjectFactory (e.g. the one envisioned for ActiveFedora).
|
23
|
+
# In doing this, we avoid the situation where Bulkrax::ValkyrieObjectFactory calls this custom query.
|
24
|
+
|
25
|
+
# This is doing a solr search so we have to use the search_field instead of the property
|
26
|
+
af_object = Bulkrax::ObjectFactory.search_by_property(value: value, klass: ActiveFedora::Base, field: search_field)
|
24
27
|
|
25
28
|
return if af_object.blank?
|
26
29
|
return af_object unless use_valkyrie
|
@@ -33,14 +33,16 @@
|
|
33
33
|
|
34
34
|
<p class='bulkrax-p-align'>
|
35
35
|
<% if @importer.present? %>
|
36
|
-
|
36
|
+
<%# TODO Consider how to account for Bulkrax.collection_model_class %>
|
37
37
|
<% factory_record = @entry.factory.find %>
|
38
|
-
<% if factory_record.present?
|
39
|
-
|
40
|
-
<%
|
41
|
-
|
38
|
+
<% if factory_record.present? %>
|
39
|
+
<% factory_record_class = factory_record.class %>
|
40
|
+
<% factory_record_class_human = factory_record_class.model_name.human %>
|
41
|
+
<strong><%= factory_record_class_human %> Link:</strong>
|
42
|
+
<% if defined?(Hyrax) && factory_record_class_human == 'Collection' %>
|
43
|
+
<%= link_to factory_record_class_human, hyrax.polymorphic_path(factory_record) %>
|
42
44
|
<% else %>
|
43
|
-
<%= link_to
|
45
|
+
<%= link_to factory_record_class_human, main_app.polymorphic_path(factory_record) %>
|
44
46
|
<% end %>
|
45
47
|
<% else %>
|
46
48
|
<strong>Item Link:</strong> Item has not yet been imported successfully
|
@@ -35,7 +35,7 @@
|
|
35
35
|
input_html: { class: 'form-control' } ,
|
36
36
|
required: false
|
37
37
|
%>
|
38
|
-
<%= fi.input :override_rights_statement, as: :boolean, hint: 'If checked, always use the selected rights
|
38
|
+
<%= fi.input :override_rights_statement, as: :boolean, hint: 'If checked, always use the selected rights statement. If unchecked, use rights or rights_statement from the record and only use the provided value if dc:rights is blank.', input_html: { checked: (importer.parser_fields['override_rights_statement'] == "1") } %>
|
39
39
|
<% end %>
|
40
40
|
<h4>Bag or Bags to Import:</h4>
|
41
41
|
<p>File upload and Cloud File upload must be a Zip file containing a single BagIt Bag, or a folder containing multiple BagIt Bags.</p>
|
@@ -20,7 +20,7 @@
|
|
20
20
|
input_html: { class: 'form-control' },
|
21
21
|
required: false
|
22
22
|
%>
|
23
|
-
<%= fi.input :override_rights_statement, as: :boolean, hint: 'If checked, always use the selected rights
|
23
|
+
<%= fi.input :override_rights_statement, as: :boolean, hint: 'If checked, always use the selected rights statement. If unchecked, use rights or rights_statement from the record and only use the provided value if dc:rights is blank.', input_html: { checked: (importer.parser_fields['override_rights_statement'] == "1") } %>
|
24
24
|
<% end %>
|
25
25
|
<h4>Add CSV or ZIP File to Import:</h4>
|
26
26
|
<%# accept a single file upload; data files and bags will need to be added another way %>
|
@@ -26,7 +26,7 @@
|
|
26
26
|
input_html: { class: 'form-control' },
|
27
27
|
required: false
|
28
28
|
%>
|
29
|
-
<%= fi.input :override_rights_statement, as: :boolean, hint: 'If checked, always use the selected rights
|
29
|
+
<%= fi.input :override_rights_statement, as: :boolean, hint: 'If checked, always use the selected rights statement. If unchecked, use dc:rights from the record and only use the provided value if dc:rights is blank.', input_html: { checked: (importer.parser_fields['override_rights_statement'] == "1") } %>
|
30
30
|
<% end %>
|
31
31
|
<%= fi.input :thumbnail_url, required: false, as: :string, input_html: { value: importer.parser_fields['thumbnail_url'] } %>
|
32
32
|
<div class="help-block well well-sm">
|
@@ -41,7 +41,7 @@
|
|
41
41
|
item_helper: rights_statements.method(:include_current_value),
|
42
42
|
input_html: { class: 'form-control' },
|
43
43
|
required: false %>
|
44
|
-
<%= fi.input :override_rights_statement, as: :boolean, hint: 'If checked, always use the selected rights
|
44
|
+
<%= fi.input :override_rights_statement, as: :boolean, hint: 'If checked, always use the selected rights statement. If unchecked, use rights or rights_statement from the record and only use the provided value if dc:rights is blank.', input_html: { checked: (importer.parser_fields['override_rights_statement'] == "1") } %>
|
45
45
|
<% end %>
|
46
46
|
<h4>XML and files to Import:</h4>
|
47
47
|
<p>File upload and Cloud File upload MUST be a either a single XML file (for metadata only import) OR a Zip file containing the XML files and data files, each in a separate folder.</p>
|
@@ -1,10 +1,10 @@
|
|
1
|
-
<div class="col-xs-12 main-header">
|
1
|
+
<div class="col-xs-12 main-header d-flex justify-content-between align-items-center">
|
2
2
|
<h1><span class="fa fa-cloud-upload" aria-hidden="true"></span> Importer: <%= @importer.name %></h1>
|
3
3
|
<div class="pull-right">
|
4
|
-
<%= link_to 'Download Original File', importer_original_file_path(@importer.id), class: 'btn btn-primary', data: { turbolinks: false } if @importer.original_file %>
|
4
|
+
<%= link_to 'Download Original File', importer_original_file_path(@importer.id), class: 'btn btn-primary text-nowrap', data: { turbolinks: false } if @importer.original_file %>
|
5
5
|
<% if @importer.failed_entries? %>
|
6
|
-
<%= link_to 'Export Errored Entries', importer_export_errors_path(@importer.id), class: 'btn btn-primary', data: { turbolinks: false }%>
|
7
|
-
<%= link_to 'Upload Corrected Entries', importer_upload_corrected_entries_path(@importer.id), class: 'btn btn-primary' if @importer.parser.is_a?(Bulkrax::CsvParser) %>
|
6
|
+
<%= link_to 'Export Errored Entries', importer_export_errors_path(@importer.id), class: 'btn btn-primary text-nowrap', data: { turbolinks: false }%>
|
7
|
+
<%= link_to 'Upload Corrected Entries', importer_upload_corrected_entries_path(@importer.id), class: 'btn btn-primary text-nowrap' if @importer.parser.is_a?(Bulkrax::CsvParser) %>
|
8
8
|
<% end %>
|
9
9
|
</div>
|
10
10
|
</div>
|
@@ -12,5 +12,5 @@
|
|
12
12
|
</tr>
|
13
13
|
</thead>
|
14
14
|
</table>
|
15
|
-
<div id='importer-entry-classes' class='hidden'><%= [item.parser.entry_class.to_s, item.parser.collection_entry_class.to_s, item.parser.file_set_entry_class.to_s].compact.join('|') %></div>
|
15
|
+
<div id='importer-entry-classes' class='hidden d-none'><%= [item.parser.entry_class.to_s, item.parser.collection_entry_class.to_s, item.parser.file_set_entry_class.to_s].compact.join('|') %></div>
|
16
16
|
</div>
|
@@ -1,9 +1,9 @@
|
|
1
1
|
en:
|
2
|
-
helpers:
|
3
|
-
action:
|
2
|
+
helpers:
|
3
|
+
action:
|
4
4
|
importer:
|
5
5
|
new: "New"
|
6
|
-
exporter:
|
6
|
+
exporter:
|
7
7
|
new: "New"
|
8
8
|
bulkrax:
|
9
9
|
admin:
|
@@ -75,6 +75,8 @@ en:
|
|
75
75
|
identifier: Identifier
|
76
76
|
entry_id: Entry ID
|
77
77
|
status: Status
|
78
|
+
type: Type
|
79
|
+
updated_at: Updated At
|
78
80
|
errors: Errors
|
79
81
|
status_set_at: Status Set At
|
80
82
|
actions: Actions
|
data/lib/bulkrax/engine.rb
CHANGED
data/lib/bulkrax/version.rb
CHANGED
@@ -16,7 +16,7 @@ def main(opts = {})
|
|
16
16
|
headers['Authorization'] = "Token: #{opts.delete(:auth_token)}"
|
17
17
|
params = build_params(opts)
|
18
18
|
|
19
|
-
logger.info("POST to #{url} - PARAMS #{params}")
|
19
|
+
Rails.logger.info("POST to #{url} - PARAMS #{params}")
|
20
20
|
|
21
21
|
conn = Faraday.new(
|
22
22
|
url: url,
|
@@ -92,10 +92,6 @@ def build_url(importer_id, url, port = nil)
|
|
92
92
|
return url
|
93
93
|
end
|
94
94
|
|
95
|
-
def logger
|
96
|
-
Rails.logger
|
97
|
-
end
|
98
|
-
|
99
95
|
def version
|
100
96
|
puts "Bulkrax #{Bulkrax::VERSION}"
|
101
97
|
puts "Slop #{Slop::VERSION}"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 9.0
|
4
|
+
version: 9.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-05-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|