bulkrax 4.0.0 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/jobs/bulkrax/import_file_set_job.rb +4 -4
- data/app/models/bulkrax/exporter.rb +1 -4
- data/app/parsers/bulkrax/application_parser.rb +3 -1
- data/app/parsers/bulkrax/bagit_parser.rb +14 -27
- data/app/parsers/bulkrax/csv_parser.rb +30 -5
- data/app/views/bulkrax/exporters/show.html.erb +0 -5
- data/config/locales/bulkrax.en.yml +0 -1
- data/lib/bulkrax/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 61886547fff51b48446b9dfaf420f14deea3fc86e9c5d8b44c1886087ee8086a
|
4
|
+
data.tar.gz: d19557cd24341cbd6001e835fff081dca349eeeab818b6b8bcf4c4cb91e5b692
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d14c4c440b00f6fe0e689e506b77775b6bbfdf5e6a9c64c5310ccd84947a6f71880c52518575839534bb90be89cc7a198c9fd4d51831f6814f18380391abc8e2
|
7
|
+
data.tar.gz: 2c743fa2532fc672437240492faa2b6660194afff19c0ee27e6c6a9a8a402f7ae55ee0ecec84dad2ce474ecf86579218f47df578773e01d8409922f013cd9123
|
@@ -12,7 +12,8 @@ module Bulkrax
|
|
12
12
|
def perform(entry_id, importer_run_id)
|
13
13
|
@importer_run_id = importer_run_id
|
14
14
|
entry = Entry.find(entry_id)
|
15
|
-
|
15
|
+
# e.g. "parents" or "parents_1"
|
16
|
+
parent_identifier = (entry.raw_metadata[entry.related_parents_raw_mapping] || entry.raw_metadata["#{entry.related_parents_raw_mapping}_1"])&.strip
|
16
17
|
|
17
18
|
validate_parent!(parent_identifier)
|
18
19
|
|
@@ -57,7 +58,7 @@ module Bulkrax
|
|
57
58
|
end
|
58
59
|
|
59
60
|
def check_parent_exists!(parent_identifier)
|
60
|
-
raise MissingParentError, %(Unable to find a record with the identifier "#{parent_identifier}") if parent_record.
|
61
|
+
raise MissingParentError, %(Unable to find a record with the identifier "#{parent_identifier}") if parent_record.nil?
|
61
62
|
end
|
62
63
|
|
63
64
|
def check_parent_is_a_work!(parent_identifier)
|
@@ -66,8 +67,7 @@ module Bulkrax
|
|
66
67
|
end
|
67
68
|
|
68
69
|
def find_parent_record(parent_identifier)
|
69
|
-
@parent_record
|
70
|
-
@parent_record = parent_record.last if parent_record.is_a? Array
|
70
|
+
_, @parent_record = find_record(parent_identifier, importer_run_id)
|
71
71
|
end
|
72
72
|
end
|
73
73
|
end
|
@@ -14,15 +14,13 @@ module Bulkrax
|
|
14
14
|
validates :name, presence: true
|
15
15
|
validates :parser_klass, presence: true
|
16
16
|
|
17
|
-
delegate :write, :create_from_collection, :
|
17
|
+
delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
|
18
18
|
|
19
19
|
def export
|
20
20
|
current_run && setup_export_path
|
21
21
|
case self.export_from
|
22
22
|
when 'collection'
|
23
23
|
create_from_collection
|
24
|
-
when 'collections metadata'
|
25
|
-
create_from_collections_metadata
|
26
24
|
when 'importer'
|
27
25
|
create_from_importer
|
28
26
|
when 'worktype'
|
@@ -89,7 +87,6 @@ module Bulkrax
|
|
89
87
|
[
|
90
88
|
[I18n.t('bulkrax.exporter.labels.importer'), 'importer'],
|
91
89
|
[I18n.t('bulkrax.exporter.labels.collection'), 'collection'],
|
92
|
-
[I18n.t('bulkrax.exporter.labels.collections_metadata'), 'collections metadata'],
|
93
90
|
[I18n.t('bulkrax.exporter.labels.worktype'), 'worktype'],
|
94
91
|
[I18n.t('bulkrax.exporter.labels.all'), 'all']
|
95
92
|
]
|
@@ -75,7 +75,7 @@ module Bulkrax
|
|
75
75
|
def get_field_mapping_hash_for(key)
|
76
76
|
return instance_variable_get("@#{key}_hash") if instance_variable_get("@#{key}_hash").present?
|
77
77
|
|
78
|
-
mapping = importerexporter.field_mapping
|
78
|
+
mapping = importerexporter.field_mapping.is_a?(Hash) ? importerexporter.field_mapping : {}
|
79
79
|
instance_variable_set(
|
80
80
|
"@#{key}_hash",
|
81
81
|
mapping&.with_indifferent_access&.select { |_, h| h.key?(key) }
|
@@ -264,6 +264,8 @@ module Bulkrax
|
|
264
264
|
|
265
265
|
Dir["#{exporter_export_path}/**"].each do |folder|
|
266
266
|
zip_path = "#{exporter_export_zip_path.split('/').last}_#{folder.split('/').last}.zip"
|
267
|
+
FileUtils.rm_rf("#{exporter_export_zip_path}/#{zip_path}")
|
268
|
+
|
267
269
|
Zip::File.open(File.join("#{exporter_export_zip_path}/#{zip_path}"), create: true) do |zip_file|
|
268
270
|
Dir["#{folder}/**/**"].each do |file|
|
269
271
|
zip_file.add(file.sub("#{folder}/", ''), file)
|
@@ -97,28 +97,6 @@ module Bulkrax
|
|
97
97
|
@total = 0
|
98
98
|
end
|
99
99
|
|
100
|
-
def current_record_ids
|
101
|
-
@work_ids = []
|
102
|
-
@collection_ids = []
|
103
|
-
@file_set_ids = []
|
104
|
-
|
105
|
-
case importerexporter.export_from
|
106
|
-
when 'all'
|
107
|
-
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
108
|
-
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
109
|
-
when 'collection'
|
110
|
-
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
111
|
-
when 'worktype'
|
112
|
-
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
113
|
-
when 'importer'
|
114
|
-
set_ids_for_exporting_from_importer
|
115
|
-
end
|
116
|
-
|
117
|
-
find_child_file_sets(@work_ids) if importerexporter.export_from == 'collection' || importerexporter.export_from == 'worktype'
|
118
|
-
|
119
|
-
@work_ids + @collection_ids + @file_set_ids
|
120
|
-
end
|
121
|
-
|
122
100
|
# export methods
|
123
101
|
|
124
102
|
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
@@ -128,14 +106,23 @@ module Bulkrax
|
|
128
106
|
|
129
107
|
folder_count = 1
|
130
108
|
records_in_folder = 0
|
109
|
+
work_entries = importerexporter.entries.where(identifier: @work_ids)
|
110
|
+
collection_entries = importerexporter.entries.where(identifier: @collection_ids)
|
111
|
+
file_set_entries = importerexporter.entries.where(identifier: @file_set_ids)
|
131
112
|
|
132
|
-
|
113
|
+
work_entries[0..limit || total].each do |entry|
|
133
114
|
record = ActiveFedora::Base.find(entry.identifier)
|
134
|
-
next unless
|
115
|
+
next unless record
|
135
116
|
|
136
117
|
bag_entries = [entry]
|
137
|
-
|
138
|
-
|
118
|
+
|
119
|
+
if record.member_of_collection_ids.present?
|
120
|
+
collection_entries.each { |ce| bag_entries << ce if ce.parsed_metadata.value?(record.id) }
|
121
|
+
end
|
122
|
+
|
123
|
+
if record.file_sets.present?
|
124
|
+
file_set_entries.each { |fse| bag_entries << fse if fse.parsed_metadata.value?(record.id) }
|
125
|
+
end
|
139
126
|
|
140
127
|
records_in_folder += bag_entries.count
|
141
128
|
if records_in_folder > records_split_count
|
@@ -153,7 +140,7 @@ module Bulkrax
|
|
153
140
|
file.write(io.read)
|
154
141
|
file.close
|
155
142
|
begin
|
156
|
-
bag.add_file(file_name, file.path)
|
143
|
+
bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank?
|
157
144
|
rescue => e
|
158
145
|
entry.status_info(e)
|
159
146
|
status_info(e)
|
@@ -184,6 +184,7 @@ module Bulkrax
|
|
184
184
|
current_record_ids
|
185
185
|
end
|
186
186
|
|
187
|
+
# rubocop:disable Metrics/AbcSize
|
187
188
|
def current_record_ids
|
188
189
|
@work_ids = []
|
189
190
|
@collection_ids = []
|
@@ -195,18 +196,21 @@ module Bulkrax
|
|
195
196
|
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
196
197
|
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
197
198
|
when 'collection'
|
198
|
-
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
199
|
+
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters} AND has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')})", method: :post, rows: 2_000_000_000).map(&:id)
|
200
|
+
# get the parent collection and child collections
|
199
201
|
@collection_ids = ActiveFedora::SolrService.query("id:#{importerexporter.export_source} #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
200
|
-
|
201
|
-
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
202
|
+
@collection_ids += ActiveFedora::SolrService.query("has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source}", method: :post, rows: 2_147_483_647).map(&:id)
|
202
203
|
when 'worktype'
|
203
204
|
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
204
205
|
when 'importer'
|
205
206
|
set_ids_for_exporting_from_importer
|
206
207
|
end
|
207
208
|
|
209
|
+
find_child_file_sets(@work_ids) if importerexporter.export_from == 'collection'
|
210
|
+
|
208
211
|
@work_ids + @collection_ids + @file_set_ids
|
209
212
|
end
|
213
|
+
# rubocop:enable Metrics/AbcSize
|
210
214
|
|
211
215
|
# find the related file set ids so entries can be made for export
|
212
216
|
def find_child_file_sets(work_ids)
|
@@ -262,7 +266,6 @@ module Bulkrax
|
|
262
266
|
end
|
263
267
|
end
|
264
268
|
alias create_from_collection create_new_entries
|
265
|
-
alias create_from_collections_metadata create_new_entries
|
266
269
|
alias create_from_importer create_new_entries
|
267
270
|
alias create_from_worktype create_new_entries
|
268
271
|
alias create_from_all create_new_entries
|
@@ -280,6 +283,10 @@ module Bulkrax
|
|
280
283
|
CsvFileSetEntry
|
281
284
|
end
|
282
285
|
|
286
|
+
def valid_entry_types
|
287
|
+
['Bulkrax::CsvCollectionEntry', 'Bulkrax::CsvFileSetEntry', 'Bulkrax::CsvEntry']
|
288
|
+
end
|
289
|
+
|
283
290
|
# TODO: figure out why using the version of this method that's in the bagit parser
|
284
291
|
# breaks specs for the "if importer?" line
|
285
292
|
def total
|
@@ -321,8 +328,10 @@ module Bulkrax
|
|
321
328
|
def write_files
|
322
329
|
require 'open-uri'
|
323
330
|
folder_count = 0
|
331
|
+
sorted_entries = sort_entries(importerexporter.entries.uniq(&:identifier))
|
332
|
+
.select { |e| valid_entry_types.include?(e.type) }
|
324
333
|
|
325
|
-
|
334
|
+
sorted_entries[0..limit || total].in_groups_of(records_split_count, false) do |group|
|
326
335
|
folder_count += 1
|
327
336
|
|
328
337
|
CSV.open(setup_export_file(folder_count), "w", headers: export_headers, write_headers: true) do |csv|
|
@@ -338,6 +347,8 @@ module Bulkrax
|
|
338
347
|
|
339
348
|
def store_files(identifier, folder_count)
|
340
349
|
record = ActiveFedora::Base.find(identifier)
|
350
|
+
return unless record
|
351
|
+
|
341
352
|
file_sets = record.file_set? ? Array.wrap(record) : record.file_sets
|
342
353
|
file_sets << record.thumbnail if exporter.include_thumbnails && record.thumbnail.present? && record.work?
|
343
354
|
file_sets.each do |fs|
|
@@ -383,6 +394,20 @@ module Bulkrax
|
|
383
394
|
@object_names
|
384
395
|
end
|
385
396
|
|
397
|
+
def sort_entries(entries)
|
398
|
+
# always export models in the same order: work, collection, file set
|
399
|
+
entries.sort_by do |entry|
|
400
|
+
case entry.type
|
401
|
+
when 'Bulkrax::CsvCollectionEntry'
|
402
|
+
'1'
|
403
|
+
when 'Bulkrax::CsvFileSetEntry'
|
404
|
+
'2'
|
405
|
+
else
|
406
|
+
'0'
|
407
|
+
end
|
408
|
+
end
|
409
|
+
end
|
410
|
+
|
386
411
|
def sort_headers(headers)
|
387
412
|
# converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
|
388
413
|
# while keeping objects grouped together
|
@@ -41,11 +41,6 @@
|
|
41
41
|
<% when 'collection' %>
|
42
42
|
<% collection = Collection.find(@exporter.export_source) %>
|
43
43
|
<%= link_to collection&.title&.first, hyrax.dashboard_collection_path(collection.id) %>
|
44
|
-
<% when 'collections metadata' %>
|
45
|
-
<% collections = Collection.all %>
|
46
|
-
<% collections.each_with_index do |c, i| %>
|
47
|
-
<%= link_to c&.title&.first, hyrax.dashboard_collection_path(c.id) %><%= ',' if i != collections.count - 1 %>
|
48
|
-
<% end %>
|
49
44
|
<% when 'importer' %>
|
50
45
|
<% importer = Bulkrax::Importer.find(@exporter.export_source) %>
|
51
46
|
<%= link_to importer.name, bulkrax.importer_path(importer.id) %>
|
data/lib/bulkrax/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-07-
|
11
|
+
date: 2022-07-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|