bulkrax 4.0.0 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eb56d86ee90ae9e1cf0628504694e1301ab8f2d6b24ffa8fd323f8953a8ee956
4
- data.tar.gz: 71056b077e300f27eee3bcccd9d7e2bee2fc7bdf2fc6ba9248b69a29f3994f9c
3
+ metadata.gz: 61886547fff51b48446b9dfaf420f14deea3fc86e9c5d8b44c1886087ee8086a
4
+ data.tar.gz: d19557cd24341cbd6001e835fff081dca349eeeab818b6b8bcf4c4cb91e5b692
5
5
  SHA512:
6
- metadata.gz: 05ea49e6f2c5e73cbddacf35dcaf9de499760d7093e3ae8f3ce4ea5ab28e25d065b7877607436fcbe02d21e17c2df940b0224f1ea7d638a602486ce807d99981
7
- data.tar.gz: ecdda29924e09793e62684f16ebcd79cd90ab9e8204d011b89a577ca667a644709ff2df5b525ac6c32355426038c05d9fc3d74c6efb20ee7cfab653d9b89b67a
6
+ metadata.gz: d14c4c440b00f6fe0e689e506b77775b6bbfdf5e6a9c64c5310ccd84947a6f71880c52518575839534bb90be89cc7a198c9fd4d51831f6814f18380391abc8e2
7
+ data.tar.gz: 2c743fa2532fc672437240492faa2b6660194afff19c0ee27e6c6a9a8a402f7ae55ee0ecec84dad2ce474ecf86579218f47df578773e01d8409922f013cd9123
@@ -12,7 +12,8 @@ module Bulkrax
12
12
  def perform(entry_id, importer_run_id)
13
13
  @importer_run_id = importer_run_id
14
14
  entry = Entry.find(entry_id)
15
- parent_identifier = entry.raw_metadata[entry.related_parents_raw_mapping]&.strip
15
+ # e.g. "parents" or "parents_1"
16
+ parent_identifier = (entry.raw_metadata[entry.related_parents_raw_mapping] || entry.raw_metadata["#{entry.related_parents_raw_mapping}_1"])&.strip
16
17
 
17
18
  validate_parent!(parent_identifier)
18
19
 
@@ -57,7 +58,7 @@ module Bulkrax
57
58
  end
58
59
 
59
60
  def check_parent_exists!(parent_identifier)
60
- raise MissingParentError, %(Unable to find a record with the identifier "#{parent_identifier}") if parent_record.blank?
61
+ raise MissingParentError, %(Unable to find a record with the identifier "#{parent_identifier}") if parent_record.nil?
61
62
  end
62
63
 
63
64
  def check_parent_is_a_work!(parent_identifier)
@@ -66,8 +67,7 @@ module Bulkrax
66
67
  end
67
68
 
68
69
  def find_parent_record(parent_identifier)
69
- @parent_record ||= find_record(parent_identifier, importer_run_id)
70
- @parent_record = parent_record.last if parent_record.is_a? Array
70
+ _, @parent_record = find_record(parent_identifier, importer_run_id)
71
71
  end
72
72
  end
73
73
  end
@@ -14,15 +14,13 @@ module Bulkrax
14
14
  validates :name, presence: true
15
15
  validates :parser_klass, presence: true
16
16
 
17
- delegate :write, :create_from_collection, :create_from_collections_metadata, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
17
+ delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
18
18
 
19
19
  def export
20
20
  current_run && setup_export_path
21
21
  case self.export_from
22
22
  when 'collection'
23
23
  create_from_collection
24
- when 'collections metadata'
25
- create_from_collections_metadata
26
24
  when 'importer'
27
25
  create_from_importer
28
26
  when 'worktype'
@@ -89,7 +87,6 @@ module Bulkrax
89
87
  [
90
88
  [I18n.t('bulkrax.exporter.labels.importer'), 'importer'],
91
89
  [I18n.t('bulkrax.exporter.labels.collection'), 'collection'],
92
- [I18n.t('bulkrax.exporter.labels.collections_metadata'), 'collections metadata'],
93
90
  [I18n.t('bulkrax.exporter.labels.worktype'), 'worktype'],
94
91
  [I18n.t('bulkrax.exporter.labels.all'), 'all']
95
92
  ]
@@ -75,7 +75,7 @@ module Bulkrax
75
75
  def get_field_mapping_hash_for(key)
76
76
  return instance_variable_get("@#{key}_hash") if instance_variable_get("@#{key}_hash").present?
77
77
 
78
- mapping = importerexporter.field_mapping == [{}] ? {} : importerexporter.field_mapping
78
+ mapping = importerexporter.field_mapping.is_a?(Hash) ? importerexporter.field_mapping : {}
79
79
  instance_variable_set(
80
80
  "@#{key}_hash",
81
81
  mapping&.with_indifferent_access&.select { |_, h| h.key?(key) }
@@ -264,6 +264,8 @@ module Bulkrax
264
264
 
265
265
  Dir["#{exporter_export_path}/**"].each do |folder|
266
266
  zip_path = "#{exporter_export_zip_path.split('/').last}_#{folder.split('/').last}.zip"
267
+ FileUtils.rm_rf("#{exporter_export_zip_path}/#{zip_path}")
268
+
267
269
  Zip::File.open(File.join("#{exporter_export_zip_path}/#{zip_path}"), create: true) do |zip_file|
268
270
  Dir["#{folder}/**/**"].each do |file|
269
271
  zip_file.add(file.sub("#{folder}/", ''), file)
@@ -97,28 +97,6 @@ module Bulkrax
97
97
  @total = 0
98
98
  end
99
99
 
100
- def current_record_ids
101
- @work_ids = []
102
- @collection_ids = []
103
- @file_set_ids = []
104
-
105
- case importerexporter.export_from
106
- when 'all'
107
- @work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
108
- @file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
109
- when 'collection'
110
- @work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
111
- when 'worktype'
112
- @work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
113
- when 'importer'
114
- set_ids_for_exporting_from_importer
115
- end
116
-
117
- find_child_file_sets(@work_ids) if importerexporter.export_from == 'collection' || importerexporter.export_from == 'worktype'
118
-
119
- @work_ids + @collection_ids + @file_set_ids
120
- end
121
-
122
100
  # export methods
123
101
 
124
102
  # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
@@ -128,14 +106,23 @@ module Bulkrax
128
106
 
129
107
  folder_count = 1
130
108
  records_in_folder = 0
109
+ work_entries = importerexporter.entries.where(identifier: @work_ids)
110
+ collection_entries = importerexporter.entries.where(identifier: @collection_ids)
111
+ file_set_entries = importerexporter.entries.where(identifier: @file_set_ids)
131
112
 
132
- importerexporter.entries.where(identifier: current_record_ids)[0..limit || total].each do |entry|
113
+ work_entries[0..limit || total].each do |entry|
133
114
  record = ActiveFedora::Base.find(entry.identifier)
134
- next unless Hyrax.config.curation_concerns.include?(record.class)
115
+ next unless record
135
116
 
136
117
  bag_entries = [entry]
137
- file_set_entries = Bulkrax::CsvFileSetEntry.where(importerexporter_id: importerexporter.id).where("parsed_metadata LIKE '%#{record.id}%'")
138
- file_set_entries.each { |fse| bag_entries << fse }
118
+
119
+ if record.member_of_collection_ids.present?
120
+ collection_entries.each { |ce| bag_entries << ce if ce.parsed_metadata.value?(record.id) }
121
+ end
122
+
123
+ if record.file_sets.present?
124
+ file_set_entries.each { |fse| bag_entries << fse if fse.parsed_metadata.value?(record.id) }
125
+ end
139
126
 
140
127
  records_in_folder += bag_entries.count
141
128
  if records_in_folder > records_split_count
@@ -153,7 +140,7 @@ module Bulkrax
153
140
  file.write(io.read)
154
141
  file.close
155
142
  begin
156
- bag.add_file(file_name, file.path)
143
+ bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank?
157
144
  rescue => e
158
145
  entry.status_info(e)
159
146
  status_info(e)
@@ -184,6 +184,7 @@ module Bulkrax
184
184
  current_record_ids
185
185
  end
186
186
 
187
+ # rubocop:disable Metrics/AbcSize
187
188
  def current_record_ids
188
189
  @work_ids = []
189
190
  @collection_ids = []
@@ -195,18 +196,21 @@ module Bulkrax
195
196
  @collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
196
197
  @file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
197
198
  when 'collection'
198
- @work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
199
+ @work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters} AND has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')})", method: :post, rows: 2_000_000_000).map(&:id)
200
+ # get the parent collection and child collections
199
201
  @collection_ids = ActiveFedora::SolrService.query("id:#{importerexporter.export_source} #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
200
- when 'collections metadata'
201
- @collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
202
+ @collection_ids += ActiveFedora::SolrService.query("has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source}", method: :post, rows: 2_147_483_647).map(&:id)
202
203
  when 'worktype'
203
204
  @work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
204
205
  when 'importer'
205
206
  set_ids_for_exporting_from_importer
206
207
  end
207
208
 
209
+ find_child_file_sets(@work_ids) if importerexporter.export_from == 'collection'
210
+
208
211
  @work_ids + @collection_ids + @file_set_ids
209
212
  end
213
+ # rubocop:enable Metrics/AbcSize
210
214
 
211
215
  # find the related file set ids so entries can be made for export
212
216
  def find_child_file_sets(work_ids)
@@ -262,7 +266,6 @@ module Bulkrax
262
266
  end
263
267
  end
264
268
  alias create_from_collection create_new_entries
265
- alias create_from_collections_metadata create_new_entries
266
269
  alias create_from_importer create_new_entries
267
270
  alias create_from_worktype create_new_entries
268
271
  alias create_from_all create_new_entries
@@ -280,6 +283,10 @@ module Bulkrax
280
283
  CsvFileSetEntry
281
284
  end
282
285
 
286
+ def valid_entry_types
287
+ ['Bulkrax::CsvCollectionEntry', 'Bulkrax::CsvFileSetEntry', 'Bulkrax::CsvEntry']
288
+ end
289
+
283
290
  # TODO: figure out why using the version of this method that's in the bagit parser
284
291
  # breaks specs for the "if importer?" line
285
292
  def total
@@ -321,8 +328,10 @@ module Bulkrax
321
328
  def write_files
322
329
  require 'open-uri'
323
330
  folder_count = 0
331
+ sorted_entries = sort_entries(importerexporter.entries.uniq(&:identifier))
332
+ .select { |e| valid_entry_types.include?(e.type) }
324
333
 
325
- importerexporter.entries.where(identifier: current_record_ids)[0..limit || total].in_groups_of(records_split_count, false) do |group|
334
+ sorted_entries[0..limit || total].in_groups_of(records_split_count, false) do |group|
326
335
  folder_count += 1
327
336
 
328
337
  CSV.open(setup_export_file(folder_count), "w", headers: export_headers, write_headers: true) do |csv|
@@ -338,6 +347,8 @@ module Bulkrax
338
347
 
339
348
  def store_files(identifier, folder_count)
340
349
  record = ActiveFedora::Base.find(identifier)
350
+ return unless record
351
+
341
352
  file_sets = record.file_set? ? Array.wrap(record) : record.file_sets
342
353
  file_sets << record.thumbnail if exporter.include_thumbnails && record.thumbnail.present? && record.work?
343
354
  file_sets.each do |fs|
@@ -383,6 +394,20 @@ module Bulkrax
383
394
  @object_names
384
395
  end
385
396
 
397
+ def sort_entries(entries)
398
+ # always export models in the same order: work, collection, file set
399
+ entries.sort_by do |entry|
400
+ case entry.type
401
+ when 'Bulkrax::CsvCollectionEntry'
402
+ '1'
403
+ when 'Bulkrax::CsvFileSetEntry'
404
+ '2'
405
+ else
406
+ '0'
407
+ end
408
+ end
409
+ end
410
+
386
411
  def sort_headers(headers)
387
412
  # converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
388
413
  # while keeping objects grouped together
@@ -41,11 +41,6 @@
41
41
  <% when 'collection' %>
42
42
  <% collection = Collection.find(@exporter.export_source) %>
43
43
  <%= link_to collection&.title&.first, hyrax.dashboard_collection_path(collection.id) %>
44
- <% when 'collections metadata' %>
45
- <% collections = Collection.all %>
46
- <% collections.each_with_index do |c, i| %>
47
- <%= link_to c&.title&.first, hyrax.dashboard_collection_path(c.id) %><%= ',' if i != collections.count - 1 %>
48
- <% end %>
49
44
  <% when 'importer' %>
50
45
  <% importer = Bulkrax::Importer.find(@exporter.export_source) %>
51
46
  <%= link_to importer.name, bulkrax.importer_path(importer.id) %>
@@ -8,7 +8,6 @@ en:
8
8
  labels:
9
9
  all: All
10
10
  collection: Collection
11
- collections_metadata: All Collections' Metadata (only)
12
11
  export_format: Export Format
13
12
  export_from: Export From
14
13
  export_source: Export Source
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- VERSION = '4.0.0'
4
+ VERSION = '4.2.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bulkrax
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.0
4
+ version: 4.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Kaufman
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-15 00:00:00.000000000 Z
11
+ date: 2022-07-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails