bulkrax 2.2.4 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9ca4838e8fe86d901fd9196ab8c1a84f7072fae590a1fc411f8ee057756f9db9
4
- data.tar.gz: 8b6dde2983bac873a5dbbd0f70aab512199344f00a2a7cbdcbea5f5b2a9fe4c5
3
+ metadata.gz: 264fedacd4fc13cbd7450068ff6f16317b9ff33e6f28edf48484652fcdf50c0a
4
+ data.tar.gz: c91b6d74984569513ae567cc879558fbbc5374b6379ca272c744d5be5972ef45
5
5
  SHA512:
6
- metadata.gz: 14ee5cdf3bfee220faebd377b6ce25979397c4b72ac9de3f10d92d13f57d60fffecb089e26f7bdc1c5ccb147430fb637635053e0a601a1f0f96ad2082d1d10b9
7
- data.tar.gz: 0236d3b24309e7a0338d1fb38d40e0ab852ff452514311c5e7711fc8efcc39a19a48a044622b2168d43d1fd1517cd6a61a87273af350ab0dd91c6576230911d6
6
+ metadata.gz: 501a1a4bae256c70c35524e3a41d46f25c4d464c180355b10ae47ea13acc1ee20c22a1763c4908ce3821162b7ebd7c1c3a4b975f27ce32a9014a50d04c1c1f18
7
+ data.tar.gz: 63fbe35c7bf2496d46434fc1d1b8775e4d1b3c5c43529ed258d030ec6816d8a89ad7c9e90a9dc73e21e699f7efabee1dc6279d59840defce087eeeb7db023483
@@ -3,7 +3,10 @@
3
3
  require 'csv'
4
4
 
5
5
  module Bulkrax
6
- class CsvEntry < Entry
6
+ # TODO: We need to rework this class some to address the Metrics/ClassLength rubocop offense.
7
+ # We do too much in these entry classes. We need to extract the common logic from the various
8
+ # entry models into a module that can be shared between them.
9
+ class CsvEntry < Entry # rubocop:disable Metrics/ClassLength
7
10
  serialize :raw_metadata, JSON
8
11
 
9
12
  def self.fields_from_data(data)
@@ -229,10 +232,19 @@ module Bulkrax
229
232
  'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
230
233
  ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
231
234
  )
232
- @possible_collection_ids ||= record.inject([]) do |memo, (key, value)|
233
- memo += value.split(/\s*[:;|]\s*/) if self.class.collection_field.to_s == key_without_numbers(key) && value.present?
234
- memo
235
- end || []
235
+ return @possible_collection_ids if @possible_collection_ids.present?
236
+
237
+ collection_field_mapping = self.class.collection_field
238
+ return [] unless collection_field_mapping.present? && record[collection_field_mapping].present?
239
+
240
+ identifiers = []
241
+ split_titles = record[collection_field_mapping].split(/\s*[;|]\s*/)
242
+ split_titles.each do |c_title|
243
+ matching_collection_entries = importerexporter.entries.select { |e| e.raw_metadata['title'] == c_title }
244
+ raise ::StandardError, 'Only expected to find one matching entry' if matching_collection_entries.count > 1
245
+ identifiers << matching_collection_entries.first&.identifier
246
+ end
247
+ @possible_collection_ids = identifiers.compact.presence || []
236
248
  end
237
249
 
238
250
  def collections_created?
@@ -88,31 +88,30 @@ module Bulkrax
88
88
  collections.each_with_index do |collection, index|
89
89
  next if collection.blank?
90
90
  break if records.find_index(collection).present? && limit_reached?(limit, records.find_index(collection))
91
- ActiveSupport::Deprecation.warn(
92
- 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
93
- ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
94
- )
95
91
 
96
92
  ## BEGIN
97
93
  # Add required metadata to collections being imported using the collection_field_mapping, which only have a :title
98
94
  # TODO: Remove once collection_field_mapping is removed
99
- metadata = if collection.delete(:from_collection_field_mapping)
100
- uci = unique_collection_identifier(collection)
101
- {
102
- title: collection[:title],
103
- work_identifier => uci,
104
- source_identifier => uci,
105
- visibility: 'open',
106
- collection_type_gid: ::Hyrax::CollectionType.find_or_create_default_collection_type.gid
107
- }
108
- end
95
+ metadata = add_required_collection_metadata(collection)
109
96
  collection_hash = metadata.presence || collection
110
97
  ## END
111
98
 
112
99
  new_entry = find_or_create_entry(collection_entry_class, collection_hash[source_identifier], 'Bulkrax::Importer', collection_hash)
113
100
  increment_counters(index, collection: true)
114
101
  # TODO: add support for :delete option
115
- ImportCollectionJob.perform_now(new_entry.id, current_run.id)
102
+ if collection.key?(:from_collection_field_mapping)
103
+ ActiveSupport::Deprecation.warn(
104
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
105
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
106
+ )
107
+ # When importing collections using the deprecated collection_field_mapping, the collection MUST be created
108
+ # before the work, so we use #perform_now to make sure that happens. The downside is, if a collection fails
109
+ # to import, it will stop the rest of the collections from importing successfully.
110
+ # TODO: Remove once collection_field_mapping is removed
111
+ ImportCollectionJob.perform_now(new_entry.id, current_run.id)
112
+ else
113
+ ImportCollectionJob.perform_later(new_entry.id, current_run.id)
114
+ end
116
115
  end
117
116
  importer.record_status
118
117
  rescue StandardError => e
@@ -152,6 +151,25 @@ module Bulkrax
152
151
  status_info(e)
153
152
  end
154
153
 
154
+ # Add required metadata to collections being imported using the collection_field_mapping, which only have a :title
155
+ # TODO: Remove once collection_field_mapping is removed
156
+ def add_required_collection_metadata(raw_collection_data)
157
+ return unless raw_collection_data.key?(:from_collection_field_mapping)
158
+ ActiveSupport::Deprecation.warn(
159
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
160
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
161
+ )
162
+
163
+ uci = unique_collection_identifier(raw_collection_data)
164
+ {
165
+ title: raw_collection_data[:title],
166
+ work_identifier => uci,
167
+ source_identifier => uci,
168
+ visibility: 'open',
169
+ collection_type_gid: ::Hyrax::CollectionType.find_or_create_default_collection_type.gid
170
+ }
171
+ end
172
+
155
173
  def write_partial_import_file(file)
156
174
  import_filename = import_file_path.split('/').last
157
175
  partial_import_filename = "#{File.basename(import_filename, '.csv')}_corrected_entries.csv"
@@ -188,13 +206,13 @@ module Bulkrax
188
206
 
189
207
  case importerexporter.export_from
190
208
  when 'all'
191
- @work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", rows: 2_147_483_647).map(&:id)
192
- @collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", rows: 2_147_483_647).map(&:id)
193
- @file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", rows: 2_147_483_647).map(&:id)
209
+ @work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
210
+ @collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
211
+ @file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
194
212
  when 'collection'
195
- @work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", rows: 2_000_000_000).map(&:id)
213
+ @work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
196
214
  when 'worktype'
197
- @work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", rows: 2_000_000_000).map(&:id)
215
+ @work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
198
216
  when 'importer'
199
217
  set_ids_for_exporting_from_importer
200
218
  end
@@ -214,7 +232,7 @@ module Bulkrax
214
232
  extra_filters = extra_filters.presence || '*:*'
215
233
 
216
234
  { :@work_ids => ::Hyrax.config.curation_concerns, :@collection_ids => [::Collection], :@file_set_ids => [::FileSet] }.each do |instance_var, models_to_search|
217
- instance_variable_set(instance_var, ActiveFedora::SolrService.get(
235
+ instance_variable_set(instance_var, ActiveFedora::SolrService.post(
218
236
  extra_filters.to_s,
219
237
  fq: [
220
238
  "#{work_identifier}_sim:(#{complete_entry_identifiers.join(' OR ')})",
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- VERSION = '2.2.4'
4
+ VERSION = '2.3.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bulkrax
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.4
4
+ version: 2.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Kaufman
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-02-25 00:00:00.000000000 Z
11
+ date: 2022-03-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails