bulkrax 4.0.0 → 4.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/jobs/bulkrax/import_file_set_job.rb +4 -4
- data/app/models/bulkrax/exporter.rb +1 -4
- data/app/parsers/bulkrax/application_parser.rb +3 -1
- data/app/parsers/bulkrax/bagit_parser.rb +14 -27
- data/app/parsers/bulkrax/csv_parser.rb +30 -5
- data/app/views/bulkrax/exporters/show.html.erb +0 -5
- data/config/locales/bulkrax.en.yml +0 -1
- data/lib/bulkrax/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 61886547fff51b48446b9dfaf420f14deea3fc86e9c5d8b44c1886087ee8086a
|
4
|
+
data.tar.gz: d19557cd24341cbd6001e835fff081dca349eeeab818b6b8bcf4c4cb91e5b692
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d14c4c440b00f6fe0e689e506b77775b6bbfdf5e6a9c64c5310ccd84947a6f71880c52518575839534bb90be89cc7a198c9fd4d51831f6814f18380391abc8e2
|
7
|
+
data.tar.gz: 2c743fa2532fc672437240492faa2b6660194afff19c0ee27e6c6a9a8a402f7ae55ee0ecec84dad2ce474ecf86579218f47df578773e01d8409922f013cd9123
|
@@ -12,7 +12,8 @@ module Bulkrax
|
|
12
12
|
def perform(entry_id, importer_run_id)
|
13
13
|
@importer_run_id = importer_run_id
|
14
14
|
entry = Entry.find(entry_id)
|
15
|
-
|
15
|
+
# e.g. "parents" or "parents_1"
|
16
|
+
parent_identifier = (entry.raw_metadata[entry.related_parents_raw_mapping] || entry.raw_metadata["#{entry.related_parents_raw_mapping}_1"])&.strip
|
16
17
|
|
17
18
|
validate_parent!(parent_identifier)
|
18
19
|
|
@@ -57,7 +58,7 @@ module Bulkrax
|
|
57
58
|
end
|
58
59
|
|
59
60
|
def check_parent_exists!(parent_identifier)
|
60
|
-
raise MissingParentError, %(Unable to find a record with the identifier "#{parent_identifier}") if parent_record.
|
61
|
+
raise MissingParentError, %(Unable to find a record with the identifier "#{parent_identifier}") if parent_record.nil?
|
61
62
|
end
|
62
63
|
|
63
64
|
def check_parent_is_a_work!(parent_identifier)
|
@@ -66,8 +67,7 @@ module Bulkrax
|
|
66
67
|
end
|
67
68
|
|
68
69
|
def find_parent_record(parent_identifier)
|
69
|
-
@parent_record
|
70
|
-
@parent_record = parent_record.last if parent_record.is_a? Array
|
70
|
+
_, @parent_record = find_record(parent_identifier, importer_run_id)
|
71
71
|
end
|
72
72
|
end
|
73
73
|
end
|
@@ -14,15 +14,13 @@ module Bulkrax
|
|
14
14
|
validates :name, presence: true
|
15
15
|
validates :parser_klass, presence: true
|
16
16
|
|
17
|
-
delegate :write, :create_from_collection, :
|
17
|
+
delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
|
18
18
|
|
19
19
|
def export
|
20
20
|
current_run && setup_export_path
|
21
21
|
case self.export_from
|
22
22
|
when 'collection'
|
23
23
|
create_from_collection
|
24
|
-
when 'collections metadata'
|
25
|
-
create_from_collections_metadata
|
26
24
|
when 'importer'
|
27
25
|
create_from_importer
|
28
26
|
when 'worktype'
|
@@ -89,7 +87,6 @@ module Bulkrax
|
|
89
87
|
[
|
90
88
|
[I18n.t('bulkrax.exporter.labels.importer'), 'importer'],
|
91
89
|
[I18n.t('bulkrax.exporter.labels.collection'), 'collection'],
|
92
|
-
[I18n.t('bulkrax.exporter.labels.collections_metadata'), 'collections metadata'],
|
93
90
|
[I18n.t('bulkrax.exporter.labels.worktype'), 'worktype'],
|
94
91
|
[I18n.t('bulkrax.exporter.labels.all'), 'all']
|
95
92
|
]
|
@@ -75,7 +75,7 @@ module Bulkrax
|
|
75
75
|
def get_field_mapping_hash_for(key)
|
76
76
|
return instance_variable_get("@#{key}_hash") if instance_variable_get("@#{key}_hash").present?
|
77
77
|
|
78
|
-
mapping = importerexporter.field_mapping
|
78
|
+
mapping = importerexporter.field_mapping.is_a?(Hash) ? importerexporter.field_mapping : {}
|
79
79
|
instance_variable_set(
|
80
80
|
"@#{key}_hash",
|
81
81
|
mapping&.with_indifferent_access&.select { |_, h| h.key?(key) }
|
@@ -264,6 +264,8 @@ module Bulkrax
|
|
264
264
|
|
265
265
|
Dir["#{exporter_export_path}/**"].each do |folder|
|
266
266
|
zip_path = "#{exporter_export_zip_path.split('/').last}_#{folder.split('/').last}.zip"
|
267
|
+
FileUtils.rm_rf("#{exporter_export_zip_path}/#{zip_path}")
|
268
|
+
|
267
269
|
Zip::File.open(File.join("#{exporter_export_zip_path}/#{zip_path}"), create: true) do |zip_file|
|
268
270
|
Dir["#{folder}/**/**"].each do |file|
|
269
271
|
zip_file.add(file.sub("#{folder}/", ''), file)
|
@@ -97,28 +97,6 @@ module Bulkrax
|
|
97
97
|
@total = 0
|
98
98
|
end
|
99
99
|
|
100
|
-
def current_record_ids
|
101
|
-
@work_ids = []
|
102
|
-
@collection_ids = []
|
103
|
-
@file_set_ids = []
|
104
|
-
|
105
|
-
case importerexporter.export_from
|
106
|
-
when 'all'
|
107
|
-
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
108
|
-
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
109
|
-
when 'collection'
|
110
|
-
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
111
|
-
when 'worktype'
|
112
|
-
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
113
|
-
when 'importer'
|
114
|
-
set_ids_for_exporting_from_importer
|
115
|
-
end
|
116
|
-
|
117
|
-
find_child_file_sets(@work_ids) if importerexporter.export_from == 'collection' || importerexporter.export_from == 'worktype'
|
118
|
-
|
119
|
-
@work_ids + @collection_ids + @file_set_ids
|
120
|
-
end
|
121
|
-
|
122
100
|
# export methods
|
123
101
|
|
124
102
|
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
@@ -128,14 +106,23 @@ module Bulkrax
|
|
128
106
|
|
129
107
|
folder_count = 1
|
130
108
|
records_in_folder = 0
|
109
|
+
work_entries = importerexporter.entries.where(identifier: @work_ids)
|
110
|
+
collection_entries = importerexporter.entries.where(identifier: @collection_ids)
|
111
|
+
file_set_entries = importerexporter.entries.where(identifier: @file_set_ids)
|
131
112
|
|
132
|
-
|
113
|
+
work_entries[0..limit || total].each do |entry|
|
133
114
|
record = ActiveFedora::Base.find(entry.identifier)
|
134
|
-
next unless
|
115
|
+
next unless record
|
135
116
|
|
136
117
|
bag_entries = [entry]
|
137
|
-
|
138
|
-
|
118
|
+
|
119
|
+
if record.member_of_collection_ids.present?
|
120
|
+
collection_entries.each { |ce| bag_entries << ce if ce.parsed_metadata.value?(record.id) }
|
121
|
+
end
|
122
|
+
|
123
|
+
if record.file_sets.present?
|
124
|
+
file_set_entries.each { |fse| bag_entries << fse if fse.parsed_metadata.value?(record.id) }
|
125
|
+
end
|
139
126
|
|
140
127
|
records_in_folder += bag_entries.count
|
141
128
|
if records_in_folder > records_split_count
|
@@ -153,7 +140,7 @@ module Bulkrax
|
|
153
140
|
file.write(io.read)
|
154
141
|
file.close
|
155
142
|
begin
|
156
|
-
bag.add_file(file_name, file.path)
|
143
|
+
bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank?
|
157
144
|
rescue => e
|
158
145
|
entry.status_info(e)
|
159
146
|
status_info(e)
|
@@ -184,6 +184,7 @@ module Bulkrax
|
|
184
184
|
current_record_ids
|
185
185
|
end
|
186
186
|
|
187
|
+
# rubocop:disable Metrics/AbcSize
|
187
188
|
def current_record_ids
|
188
189
|
@work_ids = []
|
189
190
|
@collection_ids = []
|
@@ -195,18 +196,21 @@ module Bulkrax
|
|
195
196
|
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
196
197
|
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
197
198
|
when 'collection'
|
198
|
-
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
199
|
+
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters} AND has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')})", method: :post, rows: 2_000_000_000).map(&:id)
|
200
|
+
# get the parent collection and child collections
|
199
201
|
@collection_ids = ActiveFedora::SolrService.query("id:#{importerexporter.export_source} #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
200
|
-
|
201
|
-
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
202
|
+
@collection_ids += ActiveFedora::SolrService.query("has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source}", method: :post, rows: 2_147_483_647).map(&:id)
|
202
203
|
when 'worktype'
|
203
204
|
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
204
205
|
when 'importer'
|
205
206
|
set_ids_for_exporting_from_importer
|
206
207
|
end
|
207
208
|
|
209
|
+
find_child_file_sets(@work_ids) if importerexporter.export_from == 'collection'
|
210
|
+
|
208
211
|
@work_ids + @collection_ids + @file_set_ids
|
209
212
|
end
|
213
|
+
# rubocop:enable Metrics/AbcSize
|
210
214
|
|
211
215
|
# find the related file set ids so entries can be made for export
|
212
216
|
def find_child_file_sets(work_ids)
|
@@ -262,7 +266,6 @@ module Bulkrax
|
|
262
266
|
end
|
263
267
|
end
|
264
268
|
alias create_from_collection create_new_entries
|
265
|
-
alias create_from_collections_metadata create_new_entries
|
266
269
|
alias create_from_importer create_new_entries
|
267
270
|
alias create_from_worktype create_new_entries
|
268
271
|
alias create_from_all create_new_entries
|
@@ -280,6 +283,10 @@ module Bulkrax
|
|
280
283
|
CsvFileSetEntry
|
281
284
|
end
|
282
285
|
|
286
|
+
def valid_entry_types
|
287
|
+
['Bulkrax::CsvCollectionEntry', 'Bulkrax::CsvFileSetEntry', 'Bulkrax::CsvEntry']
|
288
|
+
end
|
289
|
+
|
283
290
|
# TODO: figure out why using the version of this method that's in the bagit parser
|
284
291
|
# breaks specs for the "if importer?" line
|
285
292
|
def total
|
@@ -321,8 +328,10 @@ module Bulkrax
|
|
321
328
|
def write_files
|
322
329
|
require 'open-uri'
|
323
330
|
folder_count = 0
|
331
|
+
sorted_entries = sort_entries(importerexporter.entries.uniq(&:identifier))
|
332
|
+
.select { |e| valid_entry_types.include?(e.type) }
|
324
333
|
|
325
|
-
|
334
|
+
sorted_entries[0..limit || total].in_groups_of(records_split_count, false) do |group|
|
326
335
|
folder_count += 1
|
327
336
|
|
328
337
|
CSV.open(setup_export_file(folder_count), "w", headers: export_headers, write_headers: true) do |csv|
|
@@ -338,6 +347,8 @@ module Bulkrax
|
|
338
347
|
|
339
348
|
def store_files(identifier, folder_count)
|
340
349
|
record = ActiveFedora::Base.find(identifier)
|
350
|
+
return unless record
|
351
|
+
|
341
352
|
file_sets = record.file_set? ? Array.wrap(record) : record.file_sets
|
342
353
|
file_sets << record.thumbnail if exporter.include_thumbnails && record.thumbnail.present? && record.work?
|
343
354
|
file_sets.each do |fs|
|
@@ -383,6 +394,20 @@ module Bulkrax
|
|
383
394
|
@object_names
|
384
395
|
end
|
385
396
|
|
397
|
+
def sort_entries(entries)
|
398
|
+
# always export models in the same order: work, collection, file set
|
399
|
+
entries.sort_by do |entry|
|
400
|
+
case entry.type
|
401
|
+
when 'Bulkrax::CsvCollectionEntry'
|
402
|
+
'1'
|
403
|
+
when 'Bulkrax::CsvFileSetEntry'
|
404
|
+
'2'
|
405
|
+
else
|
406
|
+
'0'
|
407
|
+
end
|
408
|
+
end
|
409
|
+
end
|
410
|
+
|
386
411
|
def sort_headers(headers)
|
387
412
|
# converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
|
388
413
|
# while keeping objects grouped together
|
@@ -41,11 +41,6 @@
|
|
41
41
|
<% when 'collection' %>
|
42
42
|
<% collection = Collection.find(@exporter.export_source) %>
|
43
43
|
<%= link_to collection&.title&.first, hyrax.dashboard_collection_path(collection.id) %>
|
44
|
-
<% when 'collections metadata' %>
|
45
|
-
<% collections = Collection.all %>
|
46
|
-
<% collections.each_with_index do |c, i| %>
|
47
|
-
<%= link_to c&.title&.first, hyrax.dashboard_collection_path(c.id) %><%= ',' if i != collections.count - 1 %>
|
48
|
-
<% end %>
|
49
44
|
<% when 'importer' %>
|
50
45
|
<% importer = Bulkrax::Importer.find(@exporter.export_source) %>
|
51
46
|
<%= link_to importer.name, bulkrax.importer_path(importer.id) %>
|
data/lib/bulkrax/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-07-
|
11
|
+
date: 2022-07-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|