bulkrax 3.2.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/bulkrax/exporters_controller.rb +4 -4
- data/app/{models → controllers}/concerns/bulkrax/download_behavior.rb +1 -5
- data/app/models/bulkrax/csv_entry.rb +27 -1
- data/app/models/bulkrax/entry.rb +12 -0
- data/app/models/bulkrax/exporter.rb +8 -0
- data/app/models/concerns/bulkrax/export_behavior.rb +4 -1
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +8 -0
- data/app/parsers/bulkrax/application_parser.rb +39 -3
- data/app/parsers/bulkrax/bagit_parser.rb +188 -20
- data/app/views/bulkrax/exporters/_form.html.erb +10 -0
- data/app/views/bulkrax/exporters/show.html.erb +12 -0
- data/config/locales/bulkrax.en.yml +5 -0
- data/db/migrate/20220412233954_add_include_thumbnails_to_bulkrax_exporters.rb +5 -0
- data/db/migrate/20220413180915_add_generated_metadata_to_bulkrax_exporters.rb +5 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +1 -0
- metadata +9 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4c6719caedaf4adb707986e0b6771f1025ce1e08d0bf46afa78a85d99faded2b
|
|
4
|
+
data.tar.gz: 7af41a63f79c6d9792066cf545f35bb723c7667970f0ea1e3a87c04dceda28d0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7ed10c67e81885b4ff3b40d2731b93b790495e19cad51fb9dfb0dcc5bebcd4c60fecc86fe0932f058d100a7937cedab43c1df050c98efd2107c086fdb2fd93a2
|
|
7
|
+
data.tar.gz: 5a81d4cd7d0289b5d27a13876f20c29b0b40f0989ecbac7caaf26599a1cbdea5c27a2473f759783690562345e8b5d7c515a4f4a0f92d5241803897887e2e9214
|
|
@@ -101,12 +101,12 @@ module Bulkrax
|
|
|
101
101
|
def exporter_params
|
|
102
102
|
params[:exporter][:export_source] = params[:exporter]["export_source_#{params[:exporter][:export_from]}".to_sym]
|
|
103
103
|
if params[:exporter][:date_filter] == "1"
|
|
104
|
-
params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type,
|
|
105
|
-
:parser_klass, :limit, :start_date, :finish_date, :work_visibility,
|
|
104
|
+
params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type, :generated_metadata,
|
|
105
|
+
:include_thumbnails, :parser_klass, :limit, :start_date, :finish_date, :work_visibility,
|
|
106
106
|
:workflow_status, field_mapping: {})
|
|
107
107
|
else
|
|
108
|
-
params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type,
|
|
109
|
-
:parser_klass, :limit, :work_visibility, :workflow_status,
|
|
108
|
+
params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type, :generated_metadata,
|
|
109
|
+
:include_thumbnails, :parser_klass, :limit, :work_visibility, :workflow_status,
|
|
110
110
|
field_mapping: {}).merge(start_date: nil, finish_date: nil)
|
|
111
111
|
end
|
|
112
112
|
end
|
|
@@ -42,7 +42,7 @@ module Bulkrax
|
|
|
42
42
|
def send_file_contents
|
|
43
43
|
self.status = 200
|
|
44
44
|
prepare_file_headers
|
|
45
|
-
|
|
45
|
+
send_file file
|
|
46
46
|
end
|
|
47
47
|
|
|
48
48
|
def prepare_file_headers
|
|
@@ -53,9 +53,5 @@ module Bulkrax
|
|
|
53
53
|
response.headers['Last-Modified'] = File.mtime(file_path).utc.strftime("%a, %d %b %Y %T GMT")
|
|
54
54
|
self.content_type = download_content_type
|
|
55
55
|
end
|
|
56
|
-
|
|
57
|
-
def stream_body(iostream)
|
|
58
|
-
self.response_body = iostream
|
|
59
|
-
end
|
|
60
56
|
end
|
|
61
57
|
end
|
|
@@ -99,6 +99,7 @@ module Bulkrax
|
|
|
99
99
|
build_files_metadata unless hyrax_record.is_a?(Collection)
|
|
100
100
|
build_relationship_metadata
|
|
101
101
|
build_mapping_metadata
|
|
102
|
+
self.save!
|
|
102
103
|
|
|
103
104
|
self.parsed_metadata
|
|
104
105
|
end
|
|
@@ -140,6 +141,7 @@ module Bulkrax
|
|
|
140
141
|
end
|
|
141
142
|
|
|
142
143
|
def build_mapping_metadata
|
|
144
|
+
mapping = fetch_field_mapping
|
|
143
145
|
mapping.each do |key, value|
|
|
144
146
|
# these keys are handled by other methods
|
|
145
147
|
next if ['model', 'file', related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
|
|
@@ -217,6 +219,25 @@ module Bulkrax
|
|
|
217
219
|
end
|
|
218
220
|
end
|
|
219
221
|
|
|
222
|
+
def build_files
|
|
223
|
+
file_mapping = mapping['file']&.[]('from')&.first || 'file'
|
|
224
|
+
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
|
225
|
+
|
|
226
|
+
filenames = map_file_sets(file_sets)
|
|
227
|
+
handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
|
|
228
|
+
build_thumbnail_files if hyrax_record.work?
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def build_thumbnail_files
|
|
232
|
+
return unless importerexporter.include_thumbnails
|
|
233
|
+
|
|
234
|
+
thumbnail_mapping = 'thumbnail_file'
|
|
235
|
+
file_sets = Array.wrap(hyrax_record.thumbnail)
|
|
236
|
+
|
|
237
|
+
filenames = map_file_sets(file_sets)
|
|
238
|
+
handle_join_on_export(thumbnail_mapping, filenames, false)
|
|
239
|
+
end
|
|
240
|
+
|
|
220
241
|
def handle_join_on_export(key, values, join)
|
|
221
242
|
if join
|
|
222
243
|
parsed_metadata[key] = values.join(' | ') # TODO: make split char dynamic
|
|
@@ -252,7 +273,6 @@ module Bulkrax
|
|
|
252
273
|
raise ::StandardError, 'Only expected to find one matching entry' if matching_collection_entries.count > 1
|
|
253
274
|
identifiers << matching_collection_entries.first&.identifier
|
|
254
275
|
end
|
|
255
|
-
|
|
256
276
|
@collection_identifiers = identifiers.compact.presence || []
|
|
257
277
|
end
|
|
258
278
|
|
|
@@ -283,5 +303,11 @@ module Bulkrax
|
|
|
283
303
|
return f if File.exist?(f)
|
|
284
304
|
raise "File #{f} does not exist"
|
|
285
305
|
end
|
|
306
|
+
|
|
307
|
+
private
|
|
308
|
+
|
|
309
|
+
def map_file_sets(file_sets)
|
|
310
|
+
file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
|
|
311
|
+
end
|
|
286
312
|
end
|
|
287
313
|
end
|
data/app/models/bulkrax/entry.rb
CHANGED
|
@@ -4,6 +4,8 @@ module Bulkrax
|
|
|
4
4
|
# Custom error class for collections_created?
|
|
5
5
|
class CollectionsCreatedError < RuntimeError; end
|
|
6
6
|
class OAIError < RuntimeError; end
|
|
7
|
+
# TODO: remove when ApplicationParser#bagit_zip_file_size_check is removed
|
|
8
|
+
class BagitZipError < RuntimeError; end
|
|
7
9
|
class Entry < ApplicationRecord
|
|
8
10
|
include Bulkrax::HasMatchers
|
|
9
11
|
include Bulkrax::ImportBehavior
|
|
@@ -34,6 +36,7 @@ module Bulkrax
|
|
|
34
36
|
delegate :client,
|
|
35
37
|
:collection_name,
|
|
36
38
|
:user,
|
|
39
|
+
:generated_metadata_mapping,
|
|
37
40
|
:related_parents_raw_mapping,
|
|
38
41
|
:related_parents_parsed_mapping,
|
|
39
42
|
:related_children_raw_mapping,
|
|
@@ -70,6 +73,15 @@ module Bulkrax
|
|
|
70
73
|
parser&.work_identifier&.to_s || 'source'
|
|
71
74
|
end
|
|
72
75
|
|
|
76
|
+
# Returns field_mapping hash based on whether or not generated metadata should be included
|
|
77
|
+
def fetch_field_mapping
|
|
78
|
+
return self.mapping if importerexporter.generated_metadata
|
|
79
|
+
|
|
80
|
+
self.mapping.each do |key, value|
|
|
81
|
+
self.mapping.delete(key) if value[generated_metadata_mapping]
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
73
85
|
def self.parent_field(parser)
|
|
74
86
|
parser.related_parents_parsed_mapping
|
|
75
87
|
end
|
|
@@ -51,6 +51,14 @@ module Bulkrax
|
|
|
51
51
|
self.start_date.present? || self.finish_date.present?
|
|
52
52
|
end
|
|
53
53
|
|
|
54
|
+
def include_thumbnails?
|
|
55
|
+
self.include_thumbnails
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def generated_metadata?
|
|
59
|
+
self.generated_metadata
|
|
60
|
+
end
|
|
61
|
+
|
|
54
62
|
def work_visibility_list
|
|
55
63
|
[
|
|
56
64
|
['Any', ''],
|
|
@@ -7,6 +7,8 @@ module Bulkrax
|
|
|
7
7
|
|
|
8
8
|
def build_for_exporter
|
|
9
9
|
build_export_metadata
|
|
10
|
+
# TODO(alishaevn): determine if the line below is still necessary
|
|
11
|
+
# the csv and bagit parsers also have write_files methods
|
|
10
12
|
write_files if export_type == 'full' && !importerexporter.parser_klass.include?('Bagit')
|
|
11
13
|
rescue RSolr::Error::Http, CollectionsCreatedError => e
|
|
12
14
|
raise e
|
|
@@ -28,6 +30,7 @@ module Bulkrax
|
|
|
28
30
|
return if hyrax_record.is_a?(Collection)
|
|
29
31
|
|
|
30
32
|
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
|
33
|
+
file_sets << hyrax_record.thumbnail if hyrax_record.thumbnail.present? && hyrax_record.work? && exporter.include_thumbnails
|
|
31
34
|
file_sets.each do |fs|
|
|
32
35
|
path = File.join(exporter_export_path, 'files')
|
|
33
36
|
FileUtils.mkdir_p(path)
|
|
@@ -48,7 +51,7 @@ module Bulkrax
|
|
|
48
51
|
fn = file_set.original_file.file_name.first
|
|
49
52
|
mime = Mime::Type.lookup(file_set.original_file.mime_type)
|
|
50
53
|
ext_mime = MIME::Types.of(file_set.original_file.file_name).first
|
|
51
|
-
if fn.include?(file_set.id) || importerexporter.metadata_only?
|
|
54
|
+
if fn.include?(file_set.id) || importerexporter.metadata_only? || importerexporter.parser_klass.include?('Bagit')
|
|
52
55
|
filename = "#{fn}.#{mime.to_sym}"
|
|
53
56
|
filename = fn if mime.to_s == ext_mime.to_s
|
|
54
57
|
else
|
|
@@ -28,5 +28,13 @@ module Bulkrax
|
|
|
28
28
|
|
|
29
29
|
raise StandardError, 'File set must be related to at least one work'
|
|
30
30
|
end
|
|
31
|
+
|
|
32
|
+
def parent_jobs
|
|
33
|
+
false # FileSet relationships are handled in ObjectFactory#create_file_set
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def child_jobs
|
|
37
|
+
raise ::StandardError, 'A FileSet cannot be a parent of a Collection, Work, or other FileSet'
|
|
38
|
+
end
|
|
31
39
|
end
|
|
32
40
|
end
|
|
@@ -51,6 +51,10 @@ module Bulkrax
|
|
|
51
51
|
@work_identifier ||= get_field_mapping_hash_for('source_identifier')&.keys&.first&.to_sym || :source
|
|
52
52
|
end
|
|
53
53
|
|
|
54
|
+
def generated_metadata_mapping
|
|
55
|
+
@generated_metadata_mapping ||= 'generated'
|
|
56
|
+
end
|
|
57
|
+
|
|
54
58
|
def related_parents_raw_mapping
|
|
55
59
|
@related_parents_raw_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.values&.first&.[]('from')&.first
|
|
56
60
|
end
|
|
@@ -242,15 +246,48 @@ module Bulkrax
|
|
|
242
246
|
def write
|
|
243
247
|
write_files
|
|
244
248
|
zip
|
|
249
|
+
# uncomment next line to debug for faulty zipping during bagit export
|
|
250
|
+
bagit_zip_file_size_check if importerexporter.parser_klass.include?('Bagit')
|
|
245
251
|
end
|
|
246
252
|
|
|
247
253
|
def unzip(file_to_unzip)
|
|
248
|
-
|
|
254
|
+
Zip::File.open(file_to_unzip) do |zip_file|
|
|
255
|
+
zip_file.each do |entry|
|
|
256
|
+
entry_path = File.join(importer_unzip_path, entry.name)
|
|
257
|
+
FileUtils.mkdir_p(File.dirname(entry_path))
|
|
258
|
+
zip_file.extract(entry, entry_path) unless File.exist?(entry_path)
|
|
259
|
+
end
|
|
260
|
+
end
|
|
249
261
|
end
|
|
250
262
|
|
|
251
263
|
def zip
|
|
252
264
|
FileUtils.rm_rf(exporter_export_zip_path)
|
|
253
|
-
|
|
265
|
+
Zip::File.open(exporter_export_zip_path, create: true) do |zip_file|
|
|
266
|
+
Dir["#{exporter_export_path}/**/**"].each do |file|
|
|
267
|
+
zip_file.add(file.sub("#{exporter_export_path}/", ''), file)
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
# TODO: remove Entry::BagitZipError as well as this method when we're sure it's not needed
|
|
273
|
+
def bagit_zip_file_size_check
|
|
274
|
+
Zip::File.open(exporter_export_zip_path) do |zip_file|
|
|
275
|
+
zip_file.select { |entry| entry.name.include?('data/') && entry.file? }.each do |zipped_file|
|
|
276
|
+
Dir["#{exporter_export_path}/**/data/*"].select { |file| file.include?(zipped_file.name) }.each do |file|
|
|
277
|
+
begin
|
|
278
|
+
raise BagitZipError, "Invalid Bag, file size mismatch for #{file.sub("#{exporter_export_path}/", '')}" if File.size(file) != zipped_file.size
|
|
279
|
+
rescue BagitZipError => e
|
|
280
|
+
matched_entry_ids = importerexporter.entry_ids.select do |id|
|
|
281
|
+
Bulkrax::Entry.find(id).identifier.include?(zipped_file.name.split('/').first)
|
|
282
|
+
end
|
|
283
|
+
matched_entry_ids.each do |entry_id|
|
|
284
|
+
Bulkrax::Entry.find(entry_id).status_info(e)
|
|
285
|
+
status_info('Complete (with failures)')
|
|
286
|
+
end
|
|
287
|
+
end
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
end
|
|
254
291
|
end
|
|
255
292
|
|
|
256
293
|
# Is this a file?
|
|
@@ -272,7 +309,6 @@ module Bulkrax
|
|
|
272
309
|
|
|
273
310
|
def real_import_file_path
|
|
274
311
|
return importer_unzip_path if file? && zip?
|
|
275
|
-
|
|
276
312
|
parser_fields['import_file_path']
|
|
277
313
|
end
|
|
278
314
|
end
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Bulkrax
|
|
4
|
-
class BagitParser < ApplicationParser
|
|
4
|
+
class BagitParser < ApplicationParser # rubocop:disable Metrics/ClassLength
|
|
5
|
+
include ExportBehavior
|
|
6
|
+
|
|
5
7
|
def self.export_supported?
|
|
6
|
-
|
|
8
|
+
true
|
|
7
9
|
end
|
|
8
10
|
|
|
9
11
|
def valid_import?
|
|
@@ -14,19 +16,11 @@ module Bulkrax
|
|
|
14
16
|
end
|
|
15
17
|
|
|
16
18
|
def entry_class
|
|
17
|
-
parser_fields['metadata_format'
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def collection_entry_class
|
|
21
|
-
parser_fields['metadata_format'].gsub('Entry', 'CollectionEntry').constantize
|
|
22
|
-
rescue
|
|
23
|
-
Entry
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
def file_set_entry_class
|
|
27
|
-
csv_format = Bulkrax::Importer.last.parser_fields['metadata_format'] == "Bulkrax::CsvEntry"
|
|
28
|
-
csv_format ? CsvFileSetEntry : RdfFileSetEntry
|
|
19
|
+
rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
|
|
20
|
+
rdf_format ? RdfEntry : CsvEntry
|
|
29
21
|
end
|
|
22
|
+
alias collection_entry_class entry_class
|
|
23
|
+
alias file_set_entry_class entry_class
|
|
30
24
|
|
|
31
25
|
# Take a random sample of 10 metadata_paths and work out the import fields from that
|
|
32
26
|
def import_fields
|
|
@@ -101,7 +95,185 @@ module Bulkrax
|
|
|
101
95
|
end
|
|
102
96
|
|
|
103
97
|
def total
|
|
104
|
-
|
|
98
|
+
importerexporter.entries.count
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def extra_filters
|
|
102
|
+
output = ""
|
|
103
|
+
if importerexporter.start_date.present?
|
|
104
|
+
start_dt = importerexporter.start_date.to_datetime.strftime('%FT%TZ')
|
|
105
|
+
finish_dt = importerexporter.finish_date.present? ? importerexporter.finish_date.to_datetime.end_of_day.strftime('%FT%TZ') : "NOW"
|
|
106
|
+
output += " AND system_modified_dtsi:[#{start_dt} TO #{finish_dt}]"
|
|
107
|
+
end
|
|
108
|
+
output += importerexporter.work_visibility.present? ? " AND visibility_ssi:#{importerexporter.work_visibility}" : ""
|
|
109
|
+
output += importerexporter.workflow_status.present? ? " AND workflow_state_name_ssim:#{importerexporter.workflow_status}" : ""
|
|
110
|
+
output
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def current_record_ids
|
|
114
|
+
@work_ids = []
|
|
115
|
+
@collection_ids = []
|
|
116
|
+
@file_set_ids = []
|
|
117
|
+
|
|
118
|
+
case importerexporter.export_from
|
|
119
|
+
when 'all'
|
|
120
|
+
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
|
121
|
+
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
|
122
|
+
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
|
123
|
+
when 'collection'
|
|
124
|
+
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
|
125
|
+
when 'worktype'
|
|
126
|
+
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
|
127
|
+
when 'importer'
|
|
128
|
+
set_ids_for_exporting_from_importer
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
@work_ids + @collection_ids + @file_set_ids
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Set the following instance variables: @work_ids, @collection_ids, @file_set_ids
|
|
135
|
+
# @see #current_record_ids
|
|
136
|
+
def set_ids_for_exporting_from_importer
|
|
137
|
+
entry_ids = Importer.find(importerexporter.export_source).entries.pluck(:id)
|
|
138
|
+
complete_statuses = Status.latest_by_statusable
|
|
139
|
+
.includes(:statusable)
|
|
140
|
+
.where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
|
|
141
|
+
|
|
142
|
+
complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
|
|
143
|
+
extra_filters = extra_filters.presence || '*:*'
|
|
144
|
+
|
|
145
|
+
{ :@work_ids => ::Hyrax.config.curation_concerns, :@collection_ids => [::Collection], :@file_set_ids => [::FileSet] }.each do |instance_var, models_to_search|
|
|
146
|
+
instance_variable_set(instance_var, ActiveFedora::SolrService.post(
|
|
147
|
+
extra_filters.to_s,
|
|
148
|
+
fq: [
|
|
149
|
+
%(#{::Solrizer.solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
|
|
150
|
+
"has_model_ssim:(#{models_to_search.join(' OR ')})"
|
|
151
|
+
],
|
|
152
|
+
fl: 'id',
|
|
153
|
+
rows: 2_000_000_000
|
|
154
|
+
)['response']['docs'].map { |obj| obj['id'] })
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def create_new_entries
|
|
159
|
+
current_record_ids.each_with_index do |id, index|
|
|
160
|
+
break if limit_reached?(limit, index)
|
|
161
|
+
|
|
162
|
+
this_entry_class = if @collection_ids.include?(id)
|
|
163
|
+
collection_entry_class
|
|
164
|
+
elsif @file_set_ids.include?(id)
|
|
165
|
+
file_set_entry_class
|
|
166
|
+
else
|
|
167
|
+
entry_class
|
|
168
|
+
end
|
|
169
|
+
new_entry = find_or_create_entry(this_entry_class, id, 'Bulkrax::Exporter')
|
|
170
|
+
|
|
171
|
+
begin
|
|
172
|
+
entry = ExportWorkJob.perform_now(new_entry.id, current_run.id)
|
|
173
|
+
rescue => e
|
|
174
|
+
Rails.logger.info("#{e.message} was detected during export")
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
self.headers |= entry.parsed_metadata.keys if entry
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
alias create_from_collection create_new_entries
|
|
181
|
+
alias create_from_importer create_new_entries
|
|
182
|
+
alias create_from_worktype create_new_entries
|
|
183
|
+
alias create_from_all create_new_entries
|
|
184
|
+
|
|
185
|
+
# export methods
|
|
186
|
+
|
|
187
|
+
# rubocop:disable Metrics/AbcSize
|
|
188
|
+
def write_files
|
|
189
|
+
require 'open-uri'
|
|
190
|
+
require 'socket'
|
|
191
|
+
importerexporter.entries.where(identifier: current_record_ids)[0..limit || total].each do |e|
|
|
192
|
+
bag = BagIt::Bag.new setup_bagit_folder(e.identifier)
|
|
193
|
+
w = ActiveFedora::Base.find(e.identifier)
|
|
194
|
+
next unless Hyrax.config.curation_concerns.include?(w.class)
|
|
195
|
+
|
|
196
|
+
w.file_sets.each do |fs|
|
|
197
|
+
file_name = filename(fs)
|
|
198
|
+
next if file_name.blank?
|
|
199
|
+
io = open(fs.original_file.uri)
|
|
200
|
+
file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
|
|
201
|
+
file.write(io.read)
|
|
202
|
+
file.close
|
|
203
|
+
bag.add_file(file_name, file.path)
|
|
204
|
+
end
|
|
205
|
+
CSV.open(setup_csv_metadata_export_file(e.identifier), "w", headers: export_headers, write_headers: true) do |csv|
|
|
206
|
+
csv << e.parsed_metadata
|
|
207
|
+
end
|
|
208
|
+
write_triples(e)
|
|
209
|
+
bag.manifest!(algo: 'sha256')
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
# rubocop:enable Metrics/AbcSize
|
|
213
|
+
|
|
214
|
+
def setup_csv_metadata_export_file(id)
|
|
215
|
+
File.join(importerexporter.exporter_export_path, id, 'metadata.csv')
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def key_allowed(key)
|
|
219
|
+
!Bulkrax.reserved_properties.include?(key) &&
|
|
220
|
+
new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
|
|
221
|
+
key != source_identifier.to_s
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# All possible column names
|
|
225
|
+
def export_headers
|
|
226
|
+
headers = sort_headers(self.headers)
|
|
227
|
+
|
|
228
|
+
# we don't want access_control_id exported and we want file at the end
|
|
229
|
+
headers.delete('access_control_id') if headers.include?('access_control_id')
|
|
230
|
+
|
|
231
|
+
# add the headers below at the beginning or end to maintain the preexisting export behavior
|
|
232
|
+
headers.prepend('model')
|
|
233
|
+
headers.prepend(source_identifier.to_s)
|
|
234
|
+
headers.prepend('id')
|
|
235
|
+
|
|
236
|
+
headers.uniq
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
def object_names
|
|
240
|
+
return @object_names if @object_names
|
|
241
|
+
|
|
242
|
+
@object_names = mapping.values.map { |value| value['object'] }
|
|
243
|
+
@object_names.uniq!.delete(nil)
|
|
244
|
+
|
|
245
|
+
@object_names
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def sort_headers(headers)
|
|
249
|
+
# converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
|
|
250
|
+
# while keeping objects grouped together
|
|
251
|
+
headers.sort_by do |item|
|
|
252
|
+
number = item.match(/\d+/)&.[](0) || 0.to_s
|
|
253
|
+
sort_number = number.rjust(4, "0")
|
|
254
|
+
object_prefix = object_names.detect { |o| item.match(/^#{o}/) } || item
|
|
255
|
+
remainder = item.gsub(/^#{object_prefix}_/, '').gsub(/_#{number}/, '')
|
|
256
|
+
"#{object_prefix}_#{sort_number}_#{remainder}"
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
def setup_triple_metadata_export_file(id)
|
|
261
|
+
File.join(importerexporter.exporter_export_path, id, 'metadata.nt')
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
def setup_bagit_folder(id)
|
|
265
|
+
File.join(importerexporter.exporter_export_path, id)
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
def write_triples(e)
|
|
269
|
+
sd = SolrDocument.find(e.identifier)
|
|
270
|
+
return if sd.nil?
|
|
271
|
+
|
|
272
|
+
req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname })
|
|
273
|
+
rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples)
|
|
274
|
+
File.open(setup_triple_metadata_export_file(e.identifier), "w") do |triples|
|
|
275
|
+
triples.write(rdf)
|
|
276
|
+
end
|
|
105
277
|
end
|
|
106
278
|
|
|
107
279
|
def required_elements?(keys)
|
|
@@ -126,11 +298,7 @@ module Bulkrax
|
|
|
126
298
|
def bags
|
|
127
299
|
return @bags if @bags.present?
|
|
128
300
|
new_bag = bag(import_file_path)
|
|
129
|
-
@bags =
|
|
130
|
-
[new_bag]
|
|
131
|
-
else
|
|
132
|
-
Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
|
|
133
|
-
end
|
|
301
|
+
@bags = new_bag ? [new_bag] : Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
|
|
134
302
|
@bags.delete(nil)
|
|
135
303
|
raise StandardError, 'No valid bags found' if @bags.blank?
|
|
136
304
|
return @bags
|
|
@@ -60,6 +60,16 @@
|
|
|
60
60
|
hint: 'leave blank or 0 for all records',
|
|
61
61
|
label: t('bulkrax.exporter.labels.limit') %>
|
|
62
62
|
|
|
63
|
+
<%= form.input :generated_metadata?,
|
|
64
|
+
as: :boolean,
|
|
65
|
+
label: t('bulkrax.exporter.labels.generated_metadata'),
|
|
66
|
+
hint: t('bulkrax.exporter.hints.generated_metadata') %>
|
|
67
|
+
|
|
68
|
+
<%= form.input :include_thumbnails?,
|
|
69
|
+
as: :boolean,
|
|
70
|
+
label: t('bulkrax.exporter.labels.include_thumbnails'),
|
|
71
|
+
hint: t('bulkrax.exporter.hints.include_thumbnails') %>
|
|
72
|
+
|
|
63
73
|
<%= form.input :date_filter,
|
|
64
74
|
as: :boolean,
|
|
65
75
|
label: t('bulkrax.exporter.labels.filter_by_date') %>
|
|
@@ -57,6 +57,18 @@
|
|
|
57
57
|
<strong><%= t('bulkrax.exporter.labels.limit') %>:</strong>
|
|
58
58
|
<%= @exporter.limit %>
|
|
59
59
|
</p>
|
|
60
|
+
|
|
61
|
+
<p class='bulkrax-p-align'>
|
|
62
|
+
<strong><%= t('bulkrax.exporter.labels.generated_metadata') %>:</strong>
|
|
63
|
+
<%= @exporter.generated_metadata %>
|
|
64
|
+
</p>
|
|
65
|
+
|
|
66
|
+
<p class='bulkrax-p-align'>
|
|
67
|
+
<strong><%= t('bulkrax.exporter.labels.include_thumbnails') %>:</strong>
|
|
68
|
+
<%= @exporter.include_thumbnails %>
|
|
69
|
+
</p>
|
|
70
|
+
|
|
71
|
+
|
|
60
72
|
<%= render partial: 'bulkrax/shared/bulkrax_errors', locals: {item: @exporter} %>
|
|
61
73
|
|
|
62
74
|
<%= render partial: 'bulkrax/shared/bulkrax_field_mapping', locals: {item: @exporter} %>
|
|
@@ -16,6 +16,8 @@ en:
|
|
|
16
16
|
filter_by_date: Filter By Date
|
|
17
17
|
finish_date: End Date
|
|
18
18
|
full: Metadata and Files
|
|
19
|
+
include_thumbnails: Include Thumbnails?
|
|
20
|
+
generated_metadata: Include Generated Metadata?
|
|
19
21
|
importer: Importer
|
|
20
22
|
limit: Limit
|
|
21
23
|
metadata: Metadata Only
|
|
@@ -35,3 +37,6 @@ en:
|
|
|
35
37
|
ingested: "Ingested"
|
|
36
38
|
unapproved: "Unapproved"
|
|
37
39
|
needs_repair: "Needs Repair"
|
|
40
|
+
hints:
|
|
41
|
+
include_thumbnails: "These exported fields currently cannot be imported."
|
|
42
|
+
generated_metadata: "These exported fields currently cannot be imported."
|
data/lib/bulkrax/version.rb
CHANGED
data/lib/bulkrax.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: bulkrax
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.
|
|
4
|
+
version: 3.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Rob Kaufman
|
|
8
|
-
autorequire:
|
|
8
|
+
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2022-
|
|
11
|
+
date: 2022-06-08 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rails
|
|
@@ -253,6 +253,7 @@ files:
|
|
|
253
253
|
- app/controllers/bulkrax/exporters_controller.rb
|
|
254
254
|
- app/controllers/bulkrax/importers_controller.rb
|
|
255
255
|
- app/controllers/concerns/bulkrax/api.rb
|
|
256
|
+
- app/controllers/concerns/bulkrax/download_behavior.rb
|
|
256
257
|
- app/factories/bulkrax/object_factory.rb
|
|
257
258
|
- app/helpers/bulkrax/application_helper.rb
|
|
258
259
|
- app/helpers/bulkrax/exporters_helper.rb
|
|
@@ -297,7 +298,6 @@ files:
|
|
|
297
298
|
- app/models/bulkrax/rdf_file_set_entry.rb
|
|
298
299
|
- app/models/bulkrax/status.rb
|
|
299
300
|
- app/models/bulkrax/xml_entry.rb
|
|
300
|
-
- app/models/concerns/bulkrax/download_behavior.rb
|
|
301
301
|
- app/models/concerns/bulkrax/dynamic_record_lookup.rb
|
|
302
302
|
- app/models/concerns/bulkrax/errored_entries.rb
|
|
303
303
|
- app/models/concerns/bulkrax/export_behavior.rb
|
|
@@ -372,6 +372,8 @@ files:
|
|
|
372
372
|
- db/migrate/20220119213325_add_work_counters_to_importer_runs.rb
|
|
373
373
|
- db/migrate/20220301001839_create_bulkrax_pending_relationships.rb
|
|
374
374
|
- db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb
|
|
375
|
+
- db/migrate/20220412233954_add_include_thumbnails_to_bulkrax_exporters.rb
|
|
376
|
+
- db/migrate/20220413180915_add_generated_metadata_to_bulkrax_exporters.rb
|
|
375
377
|
- lib/bulkrax.rb
|
|
376
378
|
- lib/bulkrax/engine.rb
|
|
377
379
|
- lib/bulkrax/version.rb
|
|
@@ -387,7 +389,7 @@ homepage: https://github.com/samvera-labs/bulkrax
|
|
|
387
389
|
licenses:
|
|
388
390
|
- Apache-2.0
|
|
389
391
|
metadata: {}
|
|
390
|
-
post_install_message:
|
|
392
|
+
post_install_message:
|
|
391
393
|
rdoc_options: []
|
|
392
394
|
require_paths:
|
|
393
395
|
- lib
|
|
@@ -402,8 +404,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
402
404
|
- !ruby/object:Gem::Version
|
|
403
405
|
version: '0'
|
|
404
406
|
requirements: []
|
|
405
|
-
rubygems_version: 3.
|
|
406
|
-
signing_key:
|
|
407
|
+
rubygems_version: 3.0.3
|
|
408
|
+
signing_key:
|
|
407
409
|
specification_version: 4
|
|
408
410
|
summary: Import and export tool for Hyrax and Hyku
|
|
409
411
|
test_files: []
|