bulkrax 9.0.2 → 9.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ab6b7a8920333f225336a80591a32f401aef2cfbe466d961c6f1f7a731757625
4
- data.tar.gz: 6aa05fc7028e0f1a7f8eb40b2de8b23ba5108a254d0623bb9307a6dff2daf579
3
+ metadata.gz: 29af1006486f6d0a48b2ff2f9f0417ca1dd3fbf0cea791de156cf5e37ec19e53
4
+ data.tar.gz: b3f2d406e03c22e5ed6a7b2f1d075e1babbd46bf82c05fa96237e2fc56490fbc
5
5
  SHA512:
6
- metadata.gz: 33fb1b5b369b6efcc535b80887c74e919e8b8b431b2199dbef35093319f58017d1bd34498d0c1c6df8c86f94eae4f117af2b88c2753eb0065db7630e8f480128
7
- data.tar.gz: 214580d620b0c40edd3351ed315d041c86036a1453233a045b35b21991316aa9f1a397032d07d7d6a56a659aa871515c2e64535f238e02d8b591355c1b8f9760
6
+ metadata.gz: 43f29db766c379a8c1d5f03ec8584a0492cccdcdcc63b5cf2936fcf97ebd8115adcb8a104944ff082c2fc2e15691f104047a0396d74e00124e6f4eb22f33f71e
7
+ data.tar.gz: a7cbacf2d07dcd987bd7935485023d1a856dc0372ab2ef98f4cd0dc4c54297640ec8e401180dd2d366d69bbf469635881f589ae3b9a01ce5a4bb626b4e21dadb
@@ -3,6 +3,10 @@ Blacklight.onLoad(function() {
3
3
  $('#importer-show-table').DataTable( {
4
4
  'processing': true,
5
5
  'serverSide': true,
6
+ 'width': '100%',
7
+ 'autoWidth': false,
8
+ 'scrollX': true,
9
+ 'scrollCollapse': true,
6
10
  "ajax": window.location.href.replace(/(\/(importers|exporters)\/\d+)/, "$1/entry_table.json"),
7
11
  "pageLength": 30,
8
12
  "lengthMenu": [[30, 100, 200], [30, 100, 200]],
@@ -15,6 +19,14 @@ Blacklight.onLoad(function() {
15
19
  { "data": "errors", "orderable": false },
16
20
  { "data": "actions", "orderable": false }
17
21
  ],
22
+ drawCallback: function() {
23
+ // Remove the inline styles that DataTables adds to the scrollHeadInner and table elements
24
+ // it's not perfect but better than the style being applied
25
+ setTimeout(function() {
26
+ $('.dataTables_scrollHeadInner').removeAttr('style');
27
+ $('.table.table-striped.dataTable.no-footer').removeAttr('style');
28
+ }, 100);
29
+ },
18
30
  initComplete: function () {
19
31
  // Add entry class filter
20
32
  entrySelect.bind(this)()
@@ -91,7 +91,7 @@ module Bulkrax
91
91
  # @note HEY WE'RE USING THIS FOR A WINGS CUSTOM QUERY. BE CAREFUL WITH
92
92
  # REMOVING IT.
93
93
  #
94
- # @see # {Wings::CustomQueries::FindBySourceIdentifier#find_by_model_and_property_value}
94
+ # @see # {Wings::CustomQueries::FindBySourceIdentifier#find_by_property_value}
95
95
  def self.search_by_property(value:, klass:, field: nil, search_field: nil, name_field: nil, verify_property: false)
96
96
  return nil unless klass.respond_to?(:where)
97
97
  # We're not going to try to match nil nor "".
@@ -38,12 +38,54 @@ module Bulkrax
38
38
  end
39
39
  end
40
40
 
41
+ # Customized create method for Valkyrie so that @object gets set
42
+ def create
43
+ attrs = transform_attributes
44
+ @object = klass.new
45
+ conditionally_set_reindex_extent
46
+ run_callbacks :save do
47
+ run_callbacks :create do
48
+ @object = if klass == Bulkrax.collection_model_class
49
+ create_collection(attrs)
50
+ elsif klass == Bulkrax.file_model_class
51
+ create_file_set(attrs)
52
+ else
53
+ create_work(attrs)
54
+ end
55
+ end
56
+ end
57
+
58
+ apply_depositor_metadata
59
+ log_created(@object)
60
+ end
61
+
62
+ # Customized update method for Valkyrie so that @object gets set
63
+ def update
64
+ raise "Object doesn't exist" unless object
65
+ conditionally_destroy_existing_files
66
+
67
+ attrs = transform_attributes(update: true)
68
+ run_callbacks :save do
69
+ @object = if klass == Bulkrax.collection_model_class
70
+ update_collection(attrs)
71
+ elsif klass == Bulkrax.file_model_class
72
+ update_file_set(attrs)
73
+ else
74
+ update_work(attrs)
75
+ end
76
+ end
77
+ apply_depositor_metadata
78
+ log_updated(@object)
79
+ end
80
+
41
81
  # TODO: the following module needs revisiting for Valkyrie work.
42
82
  # proposal is to create Bulkrax::ValkyrieFileFactory.
43
83
  include Bulkrax::FileFactory
44
84
 
45
85
  self.file_set_factory_inner_workings_class = Bulkrax::ValkyrieObjectFactory::FileFactoryInnerWorkings
46
86
 
87
+ delegate :transactions, to: :class
88
+
47
89
  ##
48
90
  # When you want a different set of transactions you can change the
49
91
  # container.
@@ -55,24 +97,25 @@ module Bulkrax
55
97
  @transactions || Hyrax::Transactions::Container
56
98
  end
57
99
 
58
- def transactions
59
- self.class.transactions
60
- end
61
-
62
100
  ##
63
101
  # @!group Class Method Interface
64
102
 
65
103
  ##
66
- # @note This does not save either object. We need to do that in another
67
- # loop. Why? Because we might be adding many items to the parent.
104
+ # When adding a child to a parent work, we save the parent.
105
+ # Locking appears inconsistent, so we are finding the parent and
106
+ # saving it with each child, but waiting until the end to reindex.
107
+ # To do this we are bypassing the save! method defined below
68
108
  def self.add_child_to_parent_work(parent:, child:)
109
+ parent = self.find(parent.id)
69
110
  return true if parent.member_ids.include?(child.id)
70
-
71
111
  parent.member_ids << child.id
72
- parent.save
112
+ Hyrax.persister.save(resource: parent)
73
113
  end
74
114
 
115
+ ##
116
+ # The resource added to a collection can be either a work or another collection.
75
117
  def self.add_resource_to_collection(collection:, resource:, user:)
118
+ resource = self.find(resource.id)
76
119
  resource.member_of_collection_ids << collection.id
77
120
  save!(resource: resource, user: user)
78
121
  end
@@ -127,6 +170,8 @@ module Bulkrax
127
170
  end
128
171
 
129
172
  def self.publish(event:, **kwargs)
173
+ # It's a bit unclear what this should be if we can't rely on Hyrax.
174
+ raise NotImplementedError, "#{self}.#{__method__}" unless defined?(Hyrax)
130
175
  Hyrax.publisher.publish(event, **kwargs)
131
176
  end
132
177
 
@@ -139,19 +184,19 @@ module Bulkrax
139
184
  end
140
185
 
141
186
  def self.save!(resource:, user:)
142
- if resource.respond_to?(:save!)
143
- resource.save!
144
- else
187
+ if defined?(Hyrax)
145
188
  result = Hyrax.persister.save(resource: resource)
146
189
  raise Valkyrie::Persistence::ObjectNotFoundError unless result
147
190
  Hyrax.index_adapter.save(resource: result)
148
191
  if result.collection?
149
- publish('collection.metadata.updated', collection: result, user: user)
192
+ self.publish(event: 'collection.metadata.updated', collection: result, user: user)
150
193
  else
151
- publish('object.metadata.updated', object: result, user: user)
194
+ self.publish(event: 'object.metadata.updated', object: result, user: user)
152
195
  end
153
- resource
196
+ else
197
+ resource.save!
154
198
  end
199
+ resource
155
200
  end
156
201
 
157
202
  def self.update_index(resources:)
@@ -176,13 +221,12 @@ module Bulkrax
176
221
  # @return [Valkyrie::Resource] when a match is found, an instance of given
177
222
  # :klass
178
223
  # rubocop:disable Metrics/ParameterLists
179
- def self.search_by_property(value:, klass:, field: nil, name_field: nil, **)
224
+ def self.search_by_property(value:, field: nil, name_field: nil, search_field:, **)
180
225
  name_field ||= field
181
226
  raise "Expected named_field or field got nil" if name_field.blank?
182
227
  return if value.blank?
183
-
184
228
  # Return nil or a single object.
185
- Hyrax.query_service.custom_query.find_by_model_and_property_value(model: klass, property: name_field, value: value)
229
+ Hyrax.query_service.custom_queries.find_by_property_value(property: name_field, value: value, search_field: search_field)
186
230
  end
187
231
  # rubocop:enable Metrics/ParameterLists
188
232
 
@@ -212,7 +256,7 @@ module Bulkrax
212
256
 
213
257
  Hyrax.persister.delete(resource: obj)
214
258
  Hyrax.index_adapter.delete(resource: obj)
215
- self.class.publish(event: 'object.deleted', object: obj, user: user)
259
+ Hyrax.publisher.publish('object.deleted', object: obj, user: user)
216
260
  end
217
261
 
218
262
  def run!
@@ -231,7 +275,7 @@ module Bulkrax
231
275
 
232
276
  @object.depositor = @user.email
233
277
  object = Hyrax.persister.save(resource: @object)
234
- self.class.publish(event: "object.metadata.updated", object: object, user: @user)
278
+ Hyrax.publisher.publish("object.metadata.updated", object: object, user: @user)
235
279
  object
236
280
  end
237
281
 
@@ -337,7 +381,7 @@ module Bulkrax
337
381
  end
338
382
 
339
383
  def find_by_id
340
- Hyrax.query_service.find_by(id: attributes[:id]) if attributes.key? :id
384
+ find(id: attributes[:id]) if attributes.key? :id
341
385
  end
342
386
 
343
387
  ##
@@ -433,7 +477,6 @@ module Bulkrax
433
477
  remote_files.map do |r|
434
478
  file_path = download_file(r["url"])
435
479
  next unless file_path
436
-
437
480
  create_uploaded_file(file_path, r["file_name"])
438
481
  end.compact
439
482
  end
@@ -449,8 +492,7 @@ module Bulkrax
449
492
  file.rewind
450
493
  file.path
451
494
  rescue => e
452
- Rails.logger.debug "Failed to download file from #{url}: #{e.message}"
453
- nil
495
+ raise "Failed to download file from #{url}: #{e.message}"
454
496
  end
455
497
  end
456
498
 
@@ -460,8 +502,7 @@ module Bulkrax
460
502
  file.close
461
503
  uploaded_file
462
504
  rescue => e
463
- Rails.logger.debug "Failed to create Hyrax::UploadedFile for #{file_name}: #{e.message}"
464
- nil
505
+ raise "Failed to create Hyrax::UploadedFile for #{file_name}: #{e.message}"
465
506
  end
466
507
 
467
508
  # @Override Destroy existing files with Hyrax::Transactions
@@ -44,7 +44,7 @@ module Bulkrax
44
44
 
45
45
  queue_as Bulkrax.config.ingest_queue_name
46
46
 
47
- attr_accessor :user, :importer_run, :errors
47
+ attr_accessor :user, :importer_run, :errors, :importer_run_id, :ability, :number_of_successes, :number_of_failures
48
48
  ##
49
49
  # @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifiers
50
50
  # @param importer_run [Bulkrax::ImporterRun] current importer run (needed to properly update counters)
@@ -57,72 +57,52 @@ module Bulkrax
57
57
  #
58
58
  # rubocop:disable Metrics/MethodLength
59
59
  def perform(parent_identifier:, importer_run_id: nil, run_user: nil, failure_count: 0) # rubocop:disable Metrics/AbcSize
60
- importer_run = Bulkrax::ImporterRun.find(importer_run_id) if importer_run_id
61
- user = run_user || importer_run&.user
62
- ability = Ability.new(user)
63
-
64
- parent_entry, parent_record = find_record(parent_identifier, importer_run_id)
65
-
66
- number_of_successes = 0
67
- number_of_failures = 0
68
- errors = []
60
+ @importer_run_id = importer_run_id
61
+ @importer_run = Bulkrax::ImporterRun.find(@importer_run_id) if @importer_run_id
62
+ @user = run_user || importer_run&.user
63
+ @ability = Ability.new(@user)
64
+
65
+ @number_of_successes = 0
66
+ @number_of_failures = 0
67
+ @errors = []
69
68
  @parent_record_members_added = false
70
- @child_members_added = []
71
69
 
70
+ parent_entry, parent_record = find_record(parent_identifier, @importer_run_id)
72
71
  if parent_record
73
- conditionally_acquire_lock_for(parent_record.id) do
74
- ActiveRecord::Base.uncached do
75
- Bulkrax::PendingRelationship.where(parent_id: parent_identifier)
76
- .ordered.find_each do |rel|
77
- process(relationship: rel, importer_run_id: importer_run_id, parent_record: parent_record, ability: ability)
78
- number_of_successes += 1
79
- @parent_record_members_added = true
80
- rescue => e
81
- number_of_failures += 1
82
- rel.set_status_info(e, importer_run)
83
- errors << e
84
- end
85
- end
86
-
87
- # save record if members were added
88
- if @parent_record_members_added
89
- Bulkrax.object_factory.save!(resource: parent_record, user: user)
90
- Bulkrax.object_factory.publish(event: 'object.membership.updated', object: parent_record)
91
- Bulkrax.object_factory.update_index(resources: @child_members_added)
92
- end
72
+ # Works and collections are different breeds of animals:
73
+ # - works know both their children (file_sets and child works) in member_ids
74
+ # - works and collections know their parents (collections) in member_of_collection_ids
75
+ # We need to handle the two differently by locking the records appropriately to avoid race condition errors.
76
+ if parent_record.is_a?(Bulkrax.collection_model_class)
77
+ process_parent_as_collection(parent_record: parent_record, parent_identifier: parent_identifier)
78
+ else
79
+ process_parent_as_work(parent_record: parent_record, parent_identifier: parent_identifier)
93
80
  end
94
81
  else
95
- # In moving the check of the parent record "up" we've exposed a hidden reporting foible.
96
- # Namely we were reporting one error per child record when the parent record was itself
97
- # unavailable.
98
- #
99
- # We have chosen not to duplicate that "number of errors" as it does not seem like the
100
- # correct pattern for reporting a singular error (the previous pattern being one error per
101
- # child who's parent is not yet created).
102
- number_of_failures = 1
103
- errors = ["Parent record not yet available for creating relationships with children records."]
82
+ @number_of_failures = 1
83
+ @errors = ["Parent record #{parent_identifier} not yet available for creating relationships with children records."]
104
84
  end
105
85
 
106
- if errors.present?
86
+ if @errors.present?
107
87
  # rubocop:disable Rails/SkipsModelValidations
108
- ImporterRun.update_counters(importer_run_id, failed_relationships: number_of_failures)
88
+ ImporterRun.update_counters(@importer_run_id, failed_relationships: @number_of_failures)
109
89
  # rubocop:enable Rails/SkipsModelValidations
110
90
 
111
- parent_entry&.set_status_info(errors.last, importer_run)
91
+ parent_entry&.set_status_info(@errors.last, importer_run)
112
92
  failure_count += 1
113
93
 
114
94
  if failure_count < max_failure_count
115
95
  reschedule(
116
96
  parent_identifier: parent_identifier,
117
- importer_run_id: importer_run_id,
118
- run_user: run_user,
97
+ importer_run_id: @importer_run_id,
98
+ run_user: @user,
119
99
  failure_count: failure_count
120
100
  )
121
101
  end
122
- return errors # stop current job from continuing to run after rescheduling
102
+ return @errors # stop current job from continuing to run after rescheduling
123
103
  else
124
104
  # rubocop:disable Rails/SkipsModelValidations
125
- ImporterRun.update_counters(importer_run_id, processed_relationships: number_of_successes)
105
+ ImporterRun.update_counters(@importer_run_id, processed_relationships: @number_of_successes)
126
106
  # rubocop:enable Rails/SkipsModelValidations
127
107
  end
128
108
  end
@@ -132,6 +112,8 @@ module Bulkrax
132
112
 
133
113
  ##
134
114
  # We can use Hyrax's lock manager when we have one available.
115
+ # However it's not certain that this is actually working, so to be
116
+ # as safe as possible, we will reload resources before we update.
135
117
  if defined?(::Hyrax)
136
118
  include Hyrax::Lockable
137
119
 
@@ -151,46 +133,111 @@ module Bulkrax
151
133
  alias conditionally_acquire_lock_for acquire_lock_for
152
134
  end
153
135
 
154
- def process(relationship:, importer_run_id:, parent_record:, ability:)
155
- raise "#{relationship} needs a child to create relationship" if relationship.child_id.nil?
156
- raise "#{relationship} needs a parent to create relationship" if relationship.parent_id.nil?
157
-
158
- _child_entry, child_record = find_record(relationship.child_id, importer_run_id)
159
- raise "#{relationship} could not find child record" unless child_record
160
-
161
- raise "Cannot add child collection (ID=#{relationship.child_id}) to parent work (ID=#{relationship.parent_id})" if child_record.collection? && parent_record.work?
136
+ # When the parent is a collection, we save the relationship on each child.
137
+ # The parent does not need to be saved, as the relationship is stored on the child.
138
+ # but we do reindex the parent after all the children are added.
139
+ def process_parent_as_collection(parent_record:, parent_identifier:)
140
+ ActiveRecord::Base.uncached do
141
+ Bulkrax::PendingRelationship.where(parent_id: parent_identifier, importer_run_id: @importer_run_id)
142
+ .ordered.find_each do |rel|
143
+ raise "#{rel} needs a child to create relationship" if rel.child_id.nil?
144
+ raise "#{rel} needs a parent to create relationship" if rel.parent_id.nil?
145
+ add_to_collection(relationship: rel, parent_record: parent_record, ability: ability)
146
+ @number_of_successes += 1
147
+ @parent_record_members_added = true
148
+ rescue => e
149
+ rel.update(status_message: e.message)
150
+ @number_of_failures += 1
151
+ @errors << e
152
+ end
153
+ end
162
154
 
163
- ability.authorize!(:edit, child_record)
155
+ # if collection members were added, we reindex the collection
156
+ # The collection members have already saved the relationships
157
+ # To index the parent, we want to make sure we have the latest version of the parent,
158
+ # because another job may have updated it in the meantime.
159
+ return unless @parent_record_members_added
160
+ reloaded_parent = Bulkrax.object_factory.find(parent_record.id)
161
+ Bulkrax.object_factory.update_index(resources: [reloaded_parent])
162
+ Bulkrax.object_factory.publish(event: 'object.membership.updated', object: reloaded_parent, user: @user)
163
+ end
164
164
 
165
- # We could do this outside of the loop, but that could lead to odd counter failures.
166
- ability.authorize!(:edit, parent_record)
165
+ # When the parent is a work, we save the relationship on the parent.
166
+ # We prefer to save all of the member relationships and then save the parent once. Concurrent
167
+ # jobs may be trying to save the parent at the same time, so we need to lock the parent
168
+ # record while we are adding the children to it.
169
+ # However the locking appears to not be working so as a workaround we will save each member as we go,
170
+ # but only index the parent once at the end.
171
+ def process_parent_as_work(parent_record:, parent_identifier:)
172
+ conditionally_acquire_lock_for(parent_record.id.to_s) do
173
+ ActiveRecord::Base.uncached do
174
+ Bulkrax::PendingRelationship.where(parent_id: parent_identifier, importer_run_id: @importer_run_id)
175
+ .ordered.find_each do |rel|
176
+ raise "#{rel} needs a child to create relationship" if rel.child_id.nil?
177
+ raise "#{rel} needs a parent to create relationship" if rel.parent_id.nil?
178
+ add_to_work(relationship: rel, parent_record: parent_record, ability: ability)
179
+ self.number_of_successes += 1
180
+ @parent_record_members_added = true
181
+ rescue => e
182
+ rel.update(status_message: e.message)
183
+ @number_of_failures += 1
184
+ @errors << e
185
+ end
186
+ end
167
187
 
168
- if parent_record.is_a?(Bulkrax.collection_model_class)
169
- add_to_collection(child_record, parent_record)
170
- else
171
- add_to_work(child_record, parent_record)
188
+ # save record if members were added
189
+ if @parent_record_members_added
190
+ reloaded_parent = Bulkrax.object_factory.find(parent_record.id)
191
+ Bulkrax.object_factory.update_index(resources: [reloaded_parent])
192
+ Bulkrax.object_factory.publish(event: 'object.membership.updated', object: reloaded_parent, user: @user)
193
+ end
172
194
  end
173
-
174
- Bulkrax.object_factory.update_index_for_file_sets_of(resource: child_record) if update_child_records_works_file_sets?
175
-
176
- relationship.destroy
177
195
  end
178
196
 
179
- def add_to_collection(child_record, parent_record)
180
- Bulkrax.object_factory.add_resource_to_collection(
181
- collection: parent_record,
182
- resource: child_record,
183
- user: user
184
- )
197
+ # NOTE: the child changes are saved in the object factory.
198
+ def add_to_collection(relationship:, parent_record:, ability:)
199
+ ActiveRecord::Base.uncached do
200
+ _child_entry, child_record = find_record(relationship.child_id, @importer_run_id)
201
+ raise "#{relationship} could not find child record" unless child_record
202
+ raise "Cannot add child collection (ID=#{relationship.child_id}) to parent work (ID=#{relationship.parent_id})" if child_record.collection? && parent_record.work?
203
+ ability.authorize!(:edit, child_record)
204
+ # We could do this outside of the loop, but that could lead to odd counter failures.
205
+ ability.authorize!(:edit, parent_record)
206
+ # It is important to lock the child records as they are the ones being saved.
207
+ # However, locking doesn't seem to be working so we will reload the child record before saving.
208
+ # This is a workaround for the fact that the lock manager doesn't seem to be working.
209
+ conditionally_acquire_lock_for(child_record.id.to_s) do
210
+ Bulkrax.object_factory.add_resource_to_collection(
211
+ collection: parent_record,
212
+ resource: child_record,
213
+ user: @user
214
+ )
215
+ end
216
+ relationship.destroy
217
+ end
185
218
  end
186
219
 
187
- def add_to_work(child_record, parent_record)
188
- # NOTE: The .add_child_to_parent_work should not persist changes to the
189
- # child nor parent. We'll do that elsewhere in this loop.
190
- Bulkrax.object_factory.add_child_to_parent_work(
220
+ # NOTE: we only update the parent's member_ids and prefer to not save the parent until all children are added.
221
+ # However, the locking appears to be working so as a workaround we will save each member as we go.
222
+ # This is a workaround for the fact that the lock manager doesn't seem to be working.
223
+ # To avoid having to reload the parent, we return the updated parent to the calling method.
224
+ def add_to_work(relationship:, parent_record:, ability:)
225
+ _child_entry, child_record = find_record(relationship.child_id, @importer_run_id)
226
+ raise "#{relationship} could not find child record" unless child_record
227
+ raise "Cannot add child collection (ID=#{relationship.child_id}) to parent work (ID=#{relationship.parent_id})" if child_record.collection? && parent_record.work?
228
+
229
+ ability.authorize!(:edit, child_record)
230
+ # We could do this outside of the loop, but that could lead to odd counter failures.
231
+ ability.authorize!(:edit, parent_record)
232
+ updated_parent = Bulkrax.object_factory.add_child_to_parent_work(
191
233
  parent: parent_record,
192
234
  child: child_record
193
235
  )
236
+ # default is false for this... do not typically need to index file sets of child records
237
+ Bulkrax.object_factory.update_index_for_file_sets_of(resource: child_record) if update_child_records_works_file_sets?
238
+ relationship.destroy
239
+
240
+ updated_parent
194
241
  end
195
242
 
196
243
  def reschedule(**kargs)
@@ -30,6 +30,7 @@ module Bulkrax
30
30
  return unless parser.file? && parser.zip?
31
31
 
32
32
  parser.unzip(parser.parser_fields['import_file_path'])
33
+ parser.remove_spaces_from_filenames
33
34
  end
34
35
 
35
36
  def update_current_run_counters(importer)
@@ -16,8 +16,9 @@ module Bulkrax
16
16
 
17
17
  def result(_parser, content)
18
18
  return nil if self.excluded == true || Bulkrax.reserved_properties.include?(self.to)
19
+ # rubocop:disable Style/RedundantParentheses
19
20
  return nil if self.if && (!self.if.is_a?(Array) && self.if.length != 2)
20
-
21
+ # rubocop:enable Style/RedundantParentheses
21
22
  if self.if
22
23
  return unless content.send(self.if[0], Regexp.new(self.if[1]))
23
24
  end
@@ -237,8 +237,16 @@ module Bulkrax
237
237
  # end
238
238
 
239
239
  # If the import data is zipped, unzip it to this path
240
- def importer_unzip_path
240
+ def importer_unzip_path(mkdir: false)
241
241
  @importer_unzip_path ||= File.join(parser.base_path, "import_#{path_string}")
242
+ return @importer_unzip_path if Dir.exist?(@importer_unzip_path) || mkdir == true
243
+
244
+ # turns "tmp/imports/tenant/import_1_20250122035229_1" to "tmp/imports/tenant/import_1_20250122035229"
245
+ base_importer_unzip_path = @importer_unzip_path.split('_')[0...-1].join('_')
246
+
247
+ # If we don't have an existing unzip path, we'll try and find it.
248
+ # Just in case there are multiple paths, we sort by the number at the end of the path and get the last one
249
+ @importer_unzip_path = Dir.glob(base_importer_unzip_path + '*').sort_by { |path| path.split(base_importer_unzip_path).last[1..-1].to_i }.last
242
250
  end
243
251
 
244
252
  def errored_entries_csv_path
@@ -23,7 +23,7 @@ module Bulkrax
23
23
  end
24
24
 
25
25
  def latest?
26
- # TODO: remove if statment when we stop supporting Hyrax < 4
26
+ # TODO: remove if statement when we stop supporting Hyrax < 4
27
27
  self.id == if Gem::Version.new(Rails::VERSION::STRING) >= Gem::Version.new('6.0.0')
28
28
  self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pick(:id)
29
29
  else
@@ -432,7 +432,7 @@ module Bulkrax
432
432
 
433
433
  Zip::File.open(file_to_unzip) do |zip_file|
434
434
  zip_file.each do |entry|
435
- entry_path = File.join(importer_unzip_path, entry.name)
435
+ entry_path = File.join(importer_unzip_path(mkdir: true), entry.name)
436
436
  FileUtils.mkdir_p(File.dirname(entry_path))
437
437
  zip_file.extract(entry, entry_path) unless File.exist?(entry_path)
438
438
  end
@@ -440,12 +440,27 @@ module Bulkrax
440
440
  end
441
441
 
442
442
  def untar(file_to_untar)
443
- Dir.mkdir(importer_unzip_path) unless File.directory?(importer_unzip_path)
443
+ Dir.mkdir(importer_unzip_path(mkdir: true)) unless File.directory?(importer_unzip_path(mkdir: true))
444
444
  command = "tar -xzf #{Shellwords.escape(file_to_untar)} -C #{Shellwords.escape(importer_unzip_path)}"
445
445
  result = system(command)
446
446
  raise "Failed to extract #{file_to_untar}" unless result
447
447
  end
448
448
 
449
+ # File names referenced in CSVs have spaces replaced with underscores
450
+ # @see Bulkrax::CsvParser#file_paths
451
+ def remove_spaces_from_filenames
452
+ files = Dir.glob(File.join(importer_unzip_path, 'files', '*'))
453
+ files_with_spaces = files.select { |f| f.split('/').last.match?(' ') }
454
+ return if files_with_spaces.blank?
455
+
456
+ files_with_spaces.map! { |path| Pathname.new(path) }
457
+ files_with_spaces.each do |path|
458
+ filename = path.basename
459
+ filename_without_spaces = filename.to_s.tr(' ', '_')
460
+ path.rename(File.join(path.dirname, filename_without_spaces))
461
+ end
462
+ end
463
+
449
464
  def zip
450
465
  FileUtils.mkdir_p(exporter_export_zip_path)
451
466
 
@@ -341,7 +341,16 @@ module Bulkrax
341
341
  file_mapping = Bulkrax.field_mappings.dig(self.class.to_s, 'file', :from)&.first&.to_sym || :file
342
342
  next if r[file_mapping].blank?
343
343
 
344
- r[file_mapping].split(Bulkrax.multi_value_element_split_on).map do |f|
344
+ split_value = Bulkrax.field_mappings.dig(self.class.to_s, :file, :split)
345
+ split_pattern = case split_value
346
+ when Regexp
347
+ split_value
348
+ when String
349
+ Regexp.new(split_value)
350
+ else
351
+ Bulkrax.multi_value_element_split_on
352
+ end
353
+ r[file_mapping].split(split_pattern).map do |f|
345
354
  file = File.join(path_to_files, f.tr(' ', '_'))
346
355
  if File.exist?(file) # rubocop:disable Style/GuardClause
347
356
  file
@@ -360,6 +369,10 @@ module Bulkrax
360
369
  @path_to_files = File.join(
361
370
  zip? ? importer_unzip_path : File.dirname(import_file_path), 'files', filename
362
371
  )
372
+
373
+ return @path_to_files if File.exist?(@path_to_files)
374
+
375
+ File.join(importer_unzip_path, 'files', filename) if file? && zip?
363
376
  end
364
377
 
365
378
  private
@@ -379,9 +392,15 @@ module Bulkrax
379
392
  # We expect a single CSV at the top level of the zip in the CSVParser
380
393
  # but we are willing to go look for it if need be
381
394
  def real_import_file_path
382
- return Dir["#{importer_unzip_path}/**/*.csv"].first if file? && zip?
395
+ return Dir["#{importer_unzip_path}/**/*.csv"].reject { |path| in_files_dir?(path) }.first if file? && zip?
383
396
 
384
397
  parser_fields['import_file_path']
385
398
  end
399
+
400
+ # If there are CSVs that are meant to be attachments in the files directory,
401
+ # we don't want to consider them as the import CSV
402
+ def in_files_dir?(path)
403
+ File.dirname(path).ends_with?('files')
404
+ end
386
405
  end
387
406
  end
@@ -61,8 +61,6 @@ module Bulkrax
61
61
  ['contributor', 'coverage', 'creator', 'date', 'description', 'format', 'identifier', 'language', 'publisher', 'relation', 'rights', 'source', 'subject', 'title', 'type']
62
62
  end
63
63
 
64
- delegate :list_sets, to: :client
65
-
66
64
  def create_objects(types = [])
67
65
  types.each do |object_type|
68
66
  send("create_#{object_type.pluralize}")
@@ -6,7 +6,7 @@ module Hyrax
6
6
  # @see https://github.com/samvera/valkyrie/wiki/Queries#custom-queries
7
7
  class FindBySourceIdentifier
8
8
  def self.queries
9
- [:find_by_model_and_property_value]
9
+ [:find_by_property_value]
10
10
  end
11
11
 
12
12
  def initialize(query_service:)
@@ -18,30 +18,25 @@ module Hyrax
18
18
  delegate :orm_class, to: :resource_factory
19
19
 
20
20
  ##
21
- # @param model [Class, #internal_resource]
22
21
  # @param property [#to_s] the name of the property we're attempting to
23
22
  # query.
24
23
  # @param value [#to_s] the propety's value that we're trying to match.
25
24
  #
26
25
  # @return [NilClass] when no record was found
27
26
  # @return [Valkyrie::Resource] when a record was found
28
- #
29
- # @note This is not a real estate transaction nor a Zillow lookup.
30
- def find_by_model_and_property_value(model:, property:, value:)
31
- sql_query = sql_for_find_by_model_and_property_value
32
- # NOTE: Do we need to ask the model for it's internal_resource?
33
- # TODO: no => undefined method `internal_resource' for Image:Class
34
- query_service.run_query(sql_query, model, property, value).first
27
+ def find_by_property_value(property:, value:, **)
28
+ sql_query = sql_for_find_by_property_value
29
+ query_service.run_query(sql_query, property, value.to_s).first
35
30
  end
36
31
 
37
32
  private
38
33
 
39
- def sql_for_find_by_model_and_property_value
34
+ def sql_for_find_by_property_value
40
35
  # NOTE: This is querying the first element of the property, but we might
41
36
  # want to check all of the elements.
42
37
  <<-SQL
43
38
  SELECT * FROM orm_resources
44
- WHERE internal_resource = ? AND metadata -> ? ->> 0 = ?
39
+ WHERE metadata -> ? ->> 0 = ?
45
40
  LIMIT 1;
46
41
  SQL
47
42
  end
@@ -6,7 +6,7 @@ module Wings
6
6
  # Custom query override specific to Wings
7
7
 
8
8
  def self.queries
9
- [:find_by_model_and_property_value]
9
+ [:find_by_property_value]
10
10
  end
11
11
 
12
12
  attr_reader :query_service
@@ -16,11 +16,14 @@ module Wings
16
16
  @query_service = query_service
17
17
  end
18
18
 
19
- def find_by_model_and_property_value(model:, property:, value:, use_valkyrie: Hyrax.config.use_valkyrie?)
20
- # NOTE: This is using the Bulkrax::ObjectFactory (e.g. the one
21
- # envisioned for ActiveFedora). In doing this, we avoid the situation
22
- # where Bulkrax::ValkyrieObjectFactory calls this custom query.
23
- af_object = Bulkrax::ObjectFactory.search_by_property(value: value, klass: model, field: property)
19
+ # rubocop:disable Lint/UnusedMethodArgument
20
+ def find_by_property_value(property:, value:, search_field:, use_valkyrie: Hyrax.config.use_valkyrie?)
21
+ # rubocop:enable Lint/UnusedMethodArgument
22
+ # NOTE: This is using the Bulkrax::ObjectFactory (e.g. the one envisioned for ActiveFedora).
23
+ # In doing this, we avoid the situation where Bulkrax::ValkyrieObjectFactory calls this custom query.
24
+
25
+ # This is doing a solr search so we have to use the search_field instead of the property
26
+ af_object = Bulkrax::ObjectFactory.search_by_property(value: value, klass: ActiveFedora::Base, field: search_field)
24
27
 
25
28
  return if af_object.blank?
26
29
  return af_object unless use_valkyrie
@@ -33,14 +33,16 @@
33
33
 
34
34
  <p class='bulkrax-p-align'>
35
35
  <% if @importer.present? %>
36
- <%# TODO Consider how to account for Bulkrax.collection_model_class %>
36
+ <%# TODO Consider how to account for Bulkrax.collection_model_class %>
37
37
  <% factory_record = @entry.factory.find %>
38
- <% if factory_record.present? && @entry.factory_class %>
39
- <strong><%= @entry.factory_class.model_name.human %> Link:</strong>
40
- <% if defined?(Hyrax) && @entry.factory_class.model_name.human == 'Collection' %>
41
- <%= link_to @entry.factory_class.model_name.human, hyrax.polymorphic_path(factory_record) %>
38
+ <% if factory_record.present? %>
39
+ <% factory_record_class = factory_record.class %>
40
+ <% factory_record_class_human = factory_record_class.model_name.human %>
41
+ <strong><%= factory_record_class_human %> Link:</strong>
42
+ <% if defined?(Hyrax) && factory_record_class_human == 'Collection' %>
43
+ <%= link_to factory_record_class_human, hyrax.polymorphic_path(factory_record) %>
42
44
  <% else %>
43
- <%= link_to @entry.factory_class.model_name.human, main_app.polymorphic_path(factory_record) %>
45
+ <%= link_to factory_record_class_human, main_app.polymorphic_path(factory_record) %>
44
46
  <% end %>
45
47
  <% else %>
46
48
  <strong>Item Link:</strong> Item has not yet been imported successfully
@@ -35,7 +35,7 @@
35
35
  input_html: { class: 'form-control' } ,
36
36
  required: false
37
37
  %>
38
- <%= fi.input :override_rights_statement, as: :boolean, hint: 'If checked, always use the selected rights statment. If unchecked, use rights or rights_statement from the record and only use the provided value if dc:rights is blank.', input_html: { checked: (importer.parser_fields['override_rights_statement'] == "1") } %>
38
+ <%= fi.input :override_rights_statement, as: :boolean, hint: 'If checked, always use the selected rights statement. If unchecked, use rights or rights_statement from the record and only use the provided value if dc:rights is blank.', input_html: { checked: (importer.parser_fields['override_rights_statement'] == "1") } %>
39
39
  <% end %>
40
40
  <h4>Bag or Bags to Import:</h4>
41
41
  <p>File upload and Cloud File upload must be a Zip file containing a single BagIt Bag, or a folder containing multiple BagIt Bags.</p>
@@ -20,7 +20,7 @@
20
20
  input_html: { class: 'form-control' },
21
21
  required: false
22
22
  %>
23
- <%= fi.input :override_rights_statement, as: :boolean, hint: 'If checked, always use the selected rights statment. If unchecked, use rights or rights_statement from the record and only use the provided value if dc:rights is blank.', input_html: { checked: (importer.parser_fields['override_rights_statement'] == "1") } %>
23
+ <%= fi.input :override_rights_statement, as: :boolean, hint: 'If checked, always use the selected rights statement. If unchecked, use rights or rights_statement from the record and only use the provided value if dc:rights is blank.', input_html: { checked: (importer.parser_fields['override_rights_statement'] == "1") } %>
24
24
  <% end %>
25
25
  <h4>Add CSV or ZIP File to Import:</h4>
26
26
  <%# accept a single file upload; data files and bags will need to be added another way %>
@@ -26,7 +26,7 @@
26
26
  input_html: { class: 'form-control' },
27
27
  required: false
28
28
  %>
29
- <%= fi.input :override_rights_statement, as: :boolean, hint: 'If checked, always use the selected rights statment. If unchecked, use dc:rights from the record and only use the provided value if dc:rights is blank.', input_html: { checked: (importer.parser_fields['override_rights_statement'] == "1") } %>
29
+ <%= fi.input :override_rights_statement, as: :boolean, hint: 'If checked, always use the selected rights statement. If unchecked, use dc:rights from the record and only use the provided value if dc:rights is blank.', input_html: { checked: (importer.parser_fields['override_rights_statement'] == "1") } %>
30
30
  <% end %>
31
31
  <%= fi.input :thumbnail_url, required: false, as: :string, input_html: { value: importer.parser_fields['thumbnail_url'] } %>
32
32
  <div class="help-block well well-sm">
@@ -41,7 +41,7 @@
41
41
  item_helper: rights_statements.method(:include_current_value),
42
42
  input_html: { class: 'form-control' },
43
43
  required: false %>
44
- <%= fi.input :override_rights_statement, as: :boolean, hint: 'If checked, always use the selected rights statment. If unchecked, use rights or rights_statement from the record and only use the provided value if dc:rights is blank.', input_html: { checked: (importer.parser_fields['override_rights_statement'] == "1") } %>
44
+ <%= fi.input :override_rights_statement, as: :boolean, hint: 'If checked, always use the selected rights statement. If unchecked, use rights or rights_statement from the record and only use the provided value if dc:rights is blank.', input_html: { checked: (importer.parser_fields['override_rights_statement'] == "1") } %>
45
45
  <% end %>
46
46
  <h4>XML and files to Import:</h4>
47
47
  <p>File upload and Cloud File upload MUST be a either a single XML file (for metadata only import) OR a Zip file containing the XML files and data files, each in a separate folder.</p>
@@ -1,10 +1,10 @@
1
- <div class="col-xs-12 main-header">
1
+ <div class="col-xs-12 main-header d-flex justify-content-between align-items-center">
2
2
  <h1><span class="fa fa-cloud-upload" aria-hidden="true"></span> Importer: <%= @importer.name %></h1>
3
3
  <div class="pull-right">
4
- <%= link_to 'Download Original File', importer_original_file_path(@importer.id), class: 'btn btn-primary', data: { turbolinks: false } if @importer.original_file %>
4
+ <%= link_to 'Download Original File', importer_original_file_path(@importer.id), class: 'btn btn-primary text-nowrap', data: { turbolinks: false } if @importer.original_file %>
5
5
  <% if @importer.failed_entries? %>
6
- <%= link_to 'Export Errored Entries', importer_export_errors_path(@importer.id), class: 'btn btn-primary', data: { turbolinks: false }%>
7
- <%= link_to 'Upload Corrected Entries', importer_upload_corrected_entries_path(@importer.id), class: 'btn btn-primary' if @importer.parser.is_a?(Bulkrax::CsvParser) %>
6
+ <%= link_to 'Export Errored Entries', importer_export_errors_path(@importer.id), class: 'btn btn-primary text-nowrap', data: { turbolinks: false }%>
7
+ <%= link_to 'Upload Corrected Entries', importer_upload_corrected_entries_path(@importer.id), class: 'btn btn-primary text-nowrap' if @importer.parser.is_a?(Bulkrax::CsvParser) %>
8
8
  <% end %>
9
9
  </div>
10
10
  </div>
@@ -12,5 +12,5 @@
12
12
  </tr>
13
13
  </thead>
14
14
  </table>
15
- <div id='importer-entry-classes' class='hidden'><%= [item.parser.entry_class.to_s, item.parser.collection_entry_class.to_s, item.parser.file_set_entry_class.to_s].compact.join('|') %></div>
15
+ <div id='importer-entry-classes' class='hidden d-none'><%= [item.parser.entry_class.to_s, item.parser.collection_entry_class.to_s, item.parser.file_set_entry_class.to_s].compact.join('|') %></div>
16
16
  </div>
@@ -1,9 +1,9 @@
1
1
  en:
2
- helpers:
3
- action:
2
+ helpers:
3
+ action:
4
4
  importer:
5
5
  new: "New"
6
- exporter:
6
+ exporter:
7
7
  new: "New"
8
8
  bulkrax:
9
9
  admin:
@@ -75,6 +75,8 @@ en:
75
75
  identifier: Identifier
76
76
  entry_id: Entry ID
77
77
  status: Status
78
+ type: Type
79
+ updated_at: Updated At
78
80
  errors: Errors
79
81
  status_set_at: Status Set At
80
82
  actions: Actions
@@ -35,7 +35,7 @@ module Bulkrax
35
35
  ActionController::Base.view_paths = paths.uniq
36
36
 
37
37
  custom_query_strategies = {
38
- find_by_model_and_property_value: :find_single_or_nil
38
+ find_by_property_value: :find_single_or_nil
39
39
  }
40
40
 
41
41
  if defined?(::Goddess::CustomQueryContainer)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- VERSION = '9.0.2'
4
+ VERSION = '9.1.0'
5
5
  end
@@ -16,7 +16,7 @@ def main(opts = {})
16
16
  headers['Authorization'] = "Token: #{opts.delete(:auth_token)}"
17
17
  params = build_params(opts)
18
18
 
19
- logger.info("POST to #{url} - PARAMS #{params}")
19
+ Rails.logger.info("POST to #{url} - PARAMS #{params}")
20
20
 
21
21
  conn = Faraday.new(
22
22
  url: url,
@@ -92,10 +92,6 @@ def build_url(importer_id, url, port = nil)
92
92
  return url
93
93
  end
94
94
 
95
- def logger
96
- Rails.logger
97
- end
98
-
99
95
  def version
100
96
  puts "Bulkrax #{Bulkrax::VERSION}"
101
97
  puts "Slop #{Slop::VERSION}"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bulkrax
3
3
  version: !ruby/object:Gem::Version
4
- version: 9.0.2
4
+ version: 9.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Kaufman
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-01-17 00:00:00.000000000 Z
11
+ date: 2025-05-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails