bulkrax 4.0.0 → 4.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eb56d86ee90ae9e1cf0628504694e1301ab8f2d6b24ffa8fd323f8953a8ee956
4
- data.tar.gz: 71056b077e300f27eee3bcccd9d7e2bee2fc7bdf2fc6ba9248b69a29f3994f9c
3
+ metadata.gz: 61886547fff51b48446b9dfaf420f14deea3fc86e9c5d8b44c1886087ee8086a
4
+ data.tar.gz: d19557cd24341cbd6001e835fff081dca349eeeab818b6b8bcf4c4cb91e5b692
5
5
  SHA512:
6
- metadata.gz: 05ea49e6f2c5e73cbddacf35dcaf9de499760d7093e3ae8f3ce4ea5ab28e25d065b7877607436fcbe02d21e17c2df940b0224f1ea7d638a602486ce807d99981
7
- data.tar.gz: ecdda29924e09793e62684f16ebcd79cd90ab9e8204d011b89a577ca667a644709ff2df5b525ac6c32355426038c05d9fc3d74c6efb20ee7cfab653d9b89b67a
6
+ metadata.gz: d14c4c440b00f6fe0e689e506b77775b6bbfdf5e6a9c64c5310ccd84947a6f71880c52518575839534bb90be89cc7a198c9fd4d51831f6814f18380391abc8e2
7
+ data.tar.gz: 2c743fa2532fc672437240492faa2b6660194afff19c0ee27e6c6a9a8a402f7ae55ee0ecec84dad2ce474ecf86579218f47df578773e01d8409922f013cd9123
@@ -12,7 +12,8 @@ module Bulkrax
12
12
  def perform(entry_id, importer_run_id)
13
13
  @importer_run_id = importer_run_id
14
14
  entry = Entry.find(entry_id)
15
- parent_identifier = entry.raw_metadata[entry.related_parents_raw_mapping]&.strip
15
+ # e.g. "parents" or "parents_1"
16
+ parent_identifier = (entry.raw_metadata[entry.related_parents_raw_mapping] || entry.raw_metadata["#{entry.related_parents_raw_mapping}_1"])&.strip
16
17
 
17
18
  validate_parent!(parent_identifier)
18
19
 
@@ -57,7 +58,7 @@ module Bulkrax
57
58
  end
58
59
 
59
60
  def check_parent_exists!(parent_identifier)
60
- raise MissingParentError, %(Unable to find a record with the identifier "#{parent_identifier}") if parent_record.blank?
61
+ raise MissingParentError, %(Unable to find a record with the identifier "#{parent_identifier}") if parent_record.nil?
61
62
  end
62
63
 
63
64
  def check_parent_is_a_work!(parent_identifier)
@@ -66,8 +67,7 @@ module Bulkrax
66
67
  end
67
68
 
68
69
  def find_parent_record(parent_identifier)
69
- @parent_record ||= find_record(parent_identifier, importer_run_id)
70
- @parent_record = parent_record.last if parent_record.is_a? Array
70
+ _, @parent_record = find_record(parent_identifier, importer_run_id)
71
71
  end
72
72
  end
73
73
  end
@@ -14,15 +14,13 @@ module Bulkrax
14
14
  validates :name, presence: true
15
15
  validates :parser_klass, presence: true
16
16
 
17
- delegate :write, :create_from_collection, :create_from_collections_metadata, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
17
+ delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
18
18
 
19
19
  def export
20
20
  current_run && setup_export_path
21
21
  case self.export_from
22
22
  when 'collection'
23
23
  create_from_collection
24
- when 'collections metadata'
25
- create_from_collections_metadata
26
24
  when 'importer'
27
25
  create_from_importer
28
26
  when 'worktype'
@@ -89,7 +87,6 @@ module Bulkrax
89
87
  [
90
88
  [I18n.t('bulkrax.exporter.labels.importer'), 'importer'],
91
89
  [I18n.t('bulkrax.exporter.labels.collection'), 'collection'],
92
- [I18n.t('bulkrax.exporter.labels.collections_metadata'), 'collections metadata'],
93
90
  [I18n.t('bulkrax.exporter.labels.worktype'), 'worktype'],
94
91
  [I18n.t('bulkrax.exporter.labels.all'), 'all']
95
92
  ]
@@ -75,7 +75,7 @@ module Bulkrax
75
75
  def get_field_mapping_hash_for(key)
76
76
  return instance_variable_get("@#{key}_hash") if instance_variable_get("@#{key}_hash").present?
77
77
 
78
- mapping = importerexporter.field_mapping == [{}] ? {} : importerexporter.field_mapping
78
+ mapping = importerexporter.field_mapping.is_a?(Hash) ? importerexporter.field_mapping : {}
79
79
  instance_variable_set(
80
80
  "@#{key}_hash",
81
81
  mapping&.with_indifferent_access&.select { |_, h| h.key?(key) }
@@ -264,6 +264,8 @@ module Bulkrax
264
264
 
265
265
  Dir["#{exporter_export_path}/**"].each do |folder|
266
266
  zip_path = "#{exporter_export_zip_path.split('/').last}_#{folder.split('/').last}.zip"
267
+ FileUtils.rm_rf("#{exporter_export_zip_path}/#{zip_path}")
268
+
267
269
  Zip::File.open(File.join("#{exporter_export_zip_path}/#{zip_path}"), create: true) do |zip_file|
268
270
  Dir["#{folder}/**/**"].each do |file|
269
271
  zip_file.add(file.sub("#{folder}/", ''), file)
@@ -97,28 +97,6 @@ module Bulkrax
97
97
  @total = 0
98
98
  end
99
99
 
100
- def current_record_ids
101
- @work_ids = []
102
- @collection_ids = []
103
- @file_set_ids = []
104
-
105
- case importerexporter.export_from
106
- when 'all'
107
- @work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
108
- @file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
109
- when 'collection'
110
- @work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
111
- when 'worktype'
112
- @work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
113
- when 'importer'
114
- set_ids_for_exporting_from_importer
115
- end
116
-
117
- find_child_file_sets(@work_ids) if importerexporter.export_from == 'collection' || importerexporter.export_from == 'worktype'
118
-
119
- @work_ids + @collection_ids + @file_set_ids
120
- end
121
-
122
100
  # export methods
123
101
 
124
102
  # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
@@ -128,14 +106,23 @@ module Bulkrax
128
106
 
129
107
  folder_count = 1
130
108
  records_in_folder = 0
109
+ work_entries = importerexporter.entries.where(identifier: @work_ids)
110
+ collection_entries = importerexporter.entries.where(identifier: @collection_ids)
111
+ file_set_entries = importerexporter.entries.where(identifier: @file_set_ids)
131
112
 
132
- importerexporter.entries.where(identifier: current_record_ids)[0..limit || total].each do |entry|
113
+ work_entries[0..limit || total].each do |entry|
133
114
  record = ActiveFedora::Base.find(entry.identifier)
134
- next unless Hyrax.config.curation_concerns.include?(record.class)
115
+ next unless record
135
116
 
136
117
  bag_entries = [entry]
137
- file_set_entries = Bulkrax::CsvFileSetEntry.where(importerexporter_id: importerexporter.id).where("parsed_metadata LIKE '%#{record.id}%'")
138
- file_set_entries.each { |fse| bag_entries << fse }
118
+
119
+ if record.member_of_collection_ids.present?
120
+ collection_entries.each { |ce| bag_entries << ce if ce.parsed_metadata.value?(record.id) }
121
+ end
122
+
123
+ if record.file_sets.present?
124
+ file_set_entries.each { |fse| bag_entries << fse if fse.parsed_metadata.value?(record.id) }
125
+ end
139
126
 
140
127
  records_in_folder += bag_entries.count
141
128
  if records_in_folder > records_split_count
@@ -153,7 +140,7 @@ module Bulkrax
153
140
  file.write(io.read)
154
141
  file.close
155
142
  begin
156
- bag.add_file(file_name, file.path)
143
+ bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank?
157
144
  rescue => e
158
145
  entry.status_info(e)
159
146
  status_info(e)
@@ -184,6 +184,7 @@ module Bulkrax
184
184
  current_record_ids
185
185
  end
186
186
 
187
+ # rubocop:disable Metrics/AbcSize
187
188
  def current_record_ids
188
189
  @work_ids = []
189
190
  @collection_ids = []
@@ -195,18 +196,21 @@ module Bulkrax
195
196
  @collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
196
197
  @file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
197
198
  when 'collection'
198
- @work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
199
+ @work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters} AND has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')})", method: :post, rows: 2_000_000_000).map(&:id)
200
+ # get the parent collection and child collections
199
201
  @collection_ids = ActiveFedora::SolrService.query("id:#{importerexporter.export_source} #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
200
- when 'collections metadata'
201
- @collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
202
+ @collection_ids += ActiveFedora::SolrService.query("has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source}", method: :post, rows: 2_147_483_647).map(&:id)
202
203
  when 'worktype'
203
204
  @work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
204
205
  when 'importer'
205
206
  set_ids_for_exporting_from_importer
206
207
  end
207
208
 
209
+ find_child_file_sets(@work_ids) if importerexporter.export_from == 'collection'
210
+
208
211
  @work_ids + @collection_ids + @file_set_ids
209
212
  end
213
+ # rubocop:enable Metrics/AbcSize
210
214
 
211
215
  # find the related file set ids so entries can be made for export
212
216
  def find_child_file_sets(work_ids)
@@ -262,7 +266,6 @@ module Bulkrax
262
266
  end
263
267
  end
264
268
  alias create_from_collection create_new_entries
265
- alias create_from_collections_metadata create_new_entries
266
269
  alias create_from_importer create_new_entries
267
270
  alias create_from_worktype create_new_entries
268
271
  alias create_from_all create_new_entries
@@ -280,6 +283,10 @@ module Bulkrax
280
283
  CsvFileSetEntry
281
284
  end
282
285
 
286
+ def valid_entry_types
287
+ ['Bulkrax::CsvCollectionEntry', 'Bulkrax::CsvFileSetEntry', 'Bulkrax::CsvEntry']
288
+ end
289
+
283
290
  # TODO: figure out why using the version of this method that's in the bagit parser
284
291
  # breaks specs for the "if importer?" line
285
292
  def total
@@ -321,8 +328,10 @@ module Bulkrax
321
328
  def write_files
322
329
  require 'open-uri'
323
330
  folder_count = 0
331
+ sorted_entries = sort_entries(importerexporter.entries.uniq(&:identifier))
332
+ .select { |e| valid_entry_types.include?(e.type) }
324
333
 
325
- importerexporter.entries.where(identifier: current_record_ids)[0..limit || total].in_groups_of(records_split_count, false) do |group|
334
+ sorted_entries[0..limit || total].in_groups_of(records_split_count, false) do |group|
326
335
  folder_count += 1
327
336
 
328
337
  CSV.open(setup_export_file(folder_count), "w", headers: export_headers, write_headers: true) do |csv|
@@ -338,6 +347,8 @@ module Bulkrax
338
347
 
339
348
  def store_files(identifier, folder_count)
340
349
  record = ActiveFedora::Base.find(identifier)
350
+ return unless record
351
+
341
352
  file_sets = record.file_set? ? Array.wrap(record) : record.file_sets
342
353
  file_sets << record.thumbnail if exporter.include_thumbnails && record.thumbnail.present? && record.work?
343
354
  file_sets.each do |fs|
@@ -383,6 +394,20 @@ module Bulkrax
383
394
  @object_names
384
395
  end
385
396
 
397
+ def sort_entries(entries)
398
+ # always export models in the same order: work, collection, file set
399
+ entries.sort_by do |entry|
400
+ case entry.type
401
+ when 'Bulkrax::CsvCollectionEntry'
402
+ '1'
403
+ when 'Bulkrax::CsvFileSetEntry'
404
+ '2'
405
+ else
406
+ '0'
407
+ end
408
+ end
409
+ end
410
+
386
411
  def sort_headers(headers)
387
412
  # converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
388
413
  # while keeping objects grouped together
@@ -41,11 +41,6 @@
41
41
  <% when 'collection' %>
42
42
  <% collection = Collection.find(@exporter.export_source) %>
43
43
  <%= link_to collection&.title&.first, hyrax.dashboard_collection_path(collection.id) %>
44
- <% when 'collections metadata' %>
45
- <% collections = Collection.all %>
46
- <% collections.each_with_index do |c, i| %>
47
- <%= link_to c&.title&.first, hyrax.dashboard_collection_path(c.id) %><%= ',' if i != collections.count - 1 %>
48
- <% end %>
49
44
  <% when 'importer' %>
50
45
  <% importer = Bulkrax::Importer.find(@exporter.export_source) %>
51
46
  <%= link_to importer.name, bulkrax.importer_path(importer.id) %>
@@ -8,7 +8,6 @@ en:
8
8
  labels:
9
9
  all: All
10
10
  collection: Collection
11
- collections_metadata: All Collections' Metadata (only)
12
11
  export_format: Export Format
13
12
  export_from: Export From
14
13
  export_source: Export Source
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- VERSION = '4.0.0'
4
+ VERSION = '4.2.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bulkrax
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.0
4
+ version: 4.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Kaufman
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-15 00:00:00.000000000 Z
11
+ date: 2022-07-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails