bulkrax 3.1.1 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/bulkrax/exporters_controller.rb +4 -4
- data/app/{models → controllers}/concerns/bulkrax/download_behavior.rb +1 -5
- data/app/jobs/bulkrax/create_relationships_job.rb +1 -3
- data/app/jobs/bulkrax/delete_collection_job.rb +5 -0
- data/app/jobs/bulkrax/delete_file_set_job.rb +5 -0
- data/app/jobs/bulkrax/delete_job.rb +20 -0
- data/app/jobs/bulkrax/delete_work_job.rb +1 -16
- data/app/models/bulkrax/csv_entry.rb +44 -23
- data/app/models/bulkrax/entry.rb +12 -0
- data/app/models/bulkrax/exporter.rb +8 -0
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +1 -1
- data/app/models/concerns/bulkrax/export_behavior.rb +12 -6
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +8 -0
- data/app/parsers/bulkrax/application_parser.rb +39 -3
- data/app/parsers/bulkrax/bagit_parser.rb +188 -20
- data/app/parsers/bulkrax/csv_parser.rb +4 -2
- data/app/views/bulkrax/exporters/_form.html.erb +10 -0
- data/app/views/bulkrax/exporters/show.html.erb +12 -0
- data/app/views/bulkrax/importers/show.html.erb +1 -1
- data/config/locales/bulkrax.en.yml +5 -0
- data/db/migrate/20220412233954_add_include_thumbnails_to_bulkrax_exporters.rb +5 -0
- data/db/migrate/20220413180915_add_generated_metadata_to_bulkrax_exporters.rb +5 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +1 -0
- metadata +12 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4c6719caedaf4adb707986e0b6771f1025ce1e08d0bf46afa78a85d99faded2b
|
4
|
+
data.tar.gz: 7af41a63f79c6d9792066cf545f35bb723c7667970f0ea1e3a87c04dceda28d0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ed10c67e81885b4ff3b40d2731b93b790495e19cad51fb9dfb0dcc5bebcd4c60fecc86fe0932f058d100a7937cedab43c1df050c98efd2107c086fdb2fd93a2
|
7
|
+
data.tar.gz: 5a81d4cd7d0289b5d27a13876f20c29b0b40f0989ecbac7caaf26599a1cbdea5c27a2473f759783690562345e8b5d7c515a4f4a0f92d5241803897887e2e9214
|
@@ -101,12 +101,12 @@ module Bulkrax
|
|
101
101
|
def exporter_params
|
102
102
|
params[:exporter][:export_source] = params[:exporter]["export_source_#{params[:exporter][:export_from]}".to_sym]
|
103
103
|
if params[:exporter][:date_filter] == "1"
|
104
|
-
params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type,
|
105
|
-
:parser_klass, :limit, :start_date, :finish_date, :work_visibility,
|
104
|
+
params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type, :generated_metadata,
|
105
|
+
:include_thumbnails, :parser_klass, :limit, :start_date, :finish_date, :work_visibility,
|
106
106
|
:workflow_status, field_mapping: {})
|
107
107
|
else
|
108
|
-
params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type,
|
109
|
-
:parser_klass, :limit, :work_visibility, :workflow_status,
|
108
|
+
params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type, :generated_metadata,
|
109
|
+
:include_thumbnails, :parser_klass, :limit, :work_visibility, :workflow_status,
|
110
110
|
field_mapping: {}).merge(start_date: nil, finish_date: nil)
|
111
111
|
end
|
112
112
|
end
|
@@ -42,7 +42,7 @@ module Bulkrax
|
|
42
42
|
def send_file_contents
|
43
43
|
self.status = 200
|
44
44
|
prepare_file_headers
|
45
|
-
|
45
|
+
send_file file
|
46
46
|
end
|
47
47
|
|
48
48
|
def prepare_file_headers
|
@@ -53,9 +53,5 @@ module Bulkrax
|
|
53
53
|
response.headers['Last-Modified'] = File.mtime(file_path).utc.strftime("%a, %d %b %Y %T GMT")
|
54
54
|
self.content_type = download_content_type
|
55
55
|
end
|
56
|
-
|
57
|
-
def stream_body(iostream)
|
58
|
-
self.response_body = iostream
|
59
|
-
end
|
60
56
|
end
|
61
57
|
end
|
@@ -49,13 +49,11 @@ module Bulkrax
|
|
49
49
|
reschedule({ parent_identifier: parent_identifier, importer_run_id: importer_run_id })
|
50
50
|
return false # stop current job from continuing to run after rescheduling
|
51
51
|
end
|
52
|
-
importer_id = ImporterRun.find(importer_run_id).importer_id
|
53
52
|
@parent_entry ||= Bulkrax::Entry.where(identifier: parent_identifier,
|
54
|
-
importerexporter_id: importer_id,
|
53
|
+
importerexporter_id: ImporterRun.find(importer_run_id).importer_id,
|
55
54
|
importerexporter_type: "Bulkrax::Importer").first
|
56
55
|
create_relationships
|
57
56
|
pending_relationships.each(&:destroy)
|
58
|
-
Bulkrax::Importer.find(importer_id).record_status
|
59
57
|
rescue ::StandardError => e
|
60
58
|
parent_entry ? parent_entry.status_info(e) : child_entry.status_info(e)
|
61
59
|
Bulkrax::ImporterRun.find(importer_run_id).increment!(:failed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class DeleteJob < ApplicationJob
|
5
|
+
queue_as :import
|
6
|
+
|
7
|
+
# rubocop:disable Rails/SkipsModelValidations
|
8
|
+
def perform(entry, importer_run)
|
9
|
+
obj = entry.factory.find
|
10
|
+
obj&.delete
|
11
|
+
ImporterRun.find(importer_run.id).increment!(:deleted_records)
|
12
|
+
ImporterRun.find(importer_run.id).decrement!(:enqueued_records)
|
13
|
+
entry.save!
|
14
|
+
entry.importer.current_run = ImporterRun.find(importer_run.id)
|
15
|
+
entry.importer.record_status
|
16
|
+
entry.status_info("Deleted", ImporterRun.find(importer_run.id))
|
17
|
+
end
|
18
|
+
# rubocop:enable Rails/SkipsModelValidations
|
19
|
+
end
|
20
|
+
end
|
@@ -1,20 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Bulkrax
|
4
|
-
class DeleteWorkJob <
|
5
|
-
queue_as :import
|
6
|
-
|
7
|
-
# rubocop:disable Rails/SkipsModelValidations
|
8
|
-
def perform(entry, importer_run)
|
9
|
-
work = entry.factory.find
|
10
|
-
work&.delete
|
11
|
-
ImporterRun.find(importer_run.id).increment!(:deleted_records)
|
12
|
-
ImporterRun.find(importer_run.id).decrement!(:enqueued_records)
|
13
|
-
entry.save!
|
14
|
-
entry.importer.current_run = ImporterRun.find(importer_run.id)
|
15
|
-
entry.importer.record_status
|
16
|
-
entry.status_info("Deleted", ImporterRun.find(importer_run.id))
|
17
|
-
end
|
18
|
-
# rubocop:enable Rails/SkipsModelValidations
|
19
|
-
end
|
4
|
+
class DeleteWorkJob < DeleteJob; end
|
20
5
|
end
|
@@ -93,17 +93,32 @@ module Bulkrax
|
|
93
93
|
end
|
94
94
|
|
95
95
|
def build_export_metadata
|
96
|
-
# make_round_trippable
|
97
96
|
self.parsed_metadata = {}
|
98
|
-
|
99
|
-
|
100
|
-
|
97
|
+
|
98
|
+
build_system_metadata
|
99
|
+
build_files_metadata unless hyrax_record.is_a?(Collection)
|
101
100
|
build_relationship_metadata
|
102
101
|
build_mapping_metadata
|
103
|
-
|
102
|
+
self.save!
|
103
|
+
|
104
104
|
self.parsed_metadata
|
105
105
|
end
|
106
106
|
|
107
|
+
# Metadata required by Bulkrax for round-tripping
|
108
|
+
def build_system_metadata
|
109
|
+
self.parsed_metadata['id'] = hyrax_record.id
|
110
|
+
self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
|
111
|
+
self.parsed_metadata[key_for_export('model')] = hyrax_record.has_model.first
|
112
|
+
end
|
113
|
+
|
114
|
+
def build_files_metadata
|
115
|
+
file_mapping = key_for_export('file')
|
116
|
+
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
117
|
+
filenames = file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
|
118
|
+
|
119
|
+
handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
|
120
|
+
end
|
121
|
+
|
107
122
|
def build_relationship_metadata
|
108
123
|
# Includes all relationship methods for all exportable record types (works, Collections, FileSets)
|
109
124
|
relationship_methods = {
|
@@ -126,13 +141,12 @@ module Bulkrax
|
|
126
141
|
end
|
127
142
|
|
128
143
|
def build_mapping_metadata
|
144
|
+
mapping = fetch_field_mapping
|
129
145
|
mapping.each do |key, value|
|
130
|
-
|
131
|
-
next if
|
132
|
-
# relationships handled by #build_relationship_metadata
|
133
|
-
next if [related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
|
134
|
-
next if key == 'file' # handled by #build_files
|
146
|
+
# these keys are handled by other methods
|
147
|
+
next if ['model', 'file', related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
|
135
148
|
next if value['excluded']
|
149
|
+
next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
|
136
150
|
|
137
151
|
object_key = key if value.key?('object')
|
138
152
|
next unless hyrax_record.respond_to?(key.to_s) || object_key.present?
|
@@ -209,8 +223,19 @@ module Bulkrax
|
|
209
223
|
file_mapping = mapping['file']&.[]('from')&.first || 'file'
|
210
224
|
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
211
225
|
|
212
|
-
filenames = file_sets
|
226
|
+
filenames = map_file_sets(file_sets)
|
213
227
|
handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
|
228
|
+
build_thumbnail_files if hyrax_record.work?
|
229
|
+
end
|
230
|
+
|
231
|
+
def build_thumbnail_files
|
232
|
+
return unless importerexporter.include_thumbnails
|
233
|
+
|
234
|
+
thumbnail_mapping = 'thumbnail_file'
|
235
|
+
file_sets = Array.wrap(hyrax_record.thumbnail)
|
236
|
+
|
237
|
+
filenames = map_file_sets(file_sets)
|
238
|
+
handle_join_on_export(thumbnail_mapping, filenames, false)
|
214
239
|
end
|
215
240
|
|
216
241
|
def handle_join_on_export(key, values, join)
|
@@ -224,16 +249,6 @@ module Bulkrax
|
|
224
249
|
end
|
225
250
|
end
|
226
251
|
|
227
|
-
# In order for the existing exported hyrax_record, to be updated by a re-import
|
228
|
-
# we need a unique value in system_identifier
|
229
|
-
# add the existing hyrax_record id to system_identifier
|
230
|
-
def make_round_trippable
|
231
|
-
values = hyrax_record.send(work_identifier.to_s).to_a
|
232
|
-
values << hyrax_record.id
|
233
|
-
hyrax_record.send("#{work_identifier}=", values)
|
234
|
-
hyrax_record.save
|
235
|
-
end
|
236
|
-
|
237
252
|
def record
|
238
253
|
@record ||= raw_metadata
|
239
254
|
end
|
@@ -258,12 +273,12 @@ module Bulkrax
|
|
258
273
|
raise ::StandardError, 'Only expected to find one matching entry' if matching_collection_entries.count > 1
|
259
274
|
identifiers << matching_collection_entries.first&.identifier
|
260
275
|
end
|
261
|
-
|
262
276
|
@collection_identifiers = identifiers.compact.presence || []
|
263
277
|
end
|
264
278
|
|
265
279
|
def collections_created?
|
266
|
-
|
280
|
+
# TODO: look into if this method is still needed after new relationships code
|
281
|
+
true
|
267
282
|
end
|
268
283
|
|
269
284
|
def find_collection_ids
|
@@ -288,5 +303,11 @@ module Bulkrax
|
|
288
303
|
return f if File.exist?(f)
|
289
304
|
raise "File #{f} does not exist"
|
290
305
|
end
|
306
|
+
|
307
|
+
private
|
308
|
+
|
309
|
+
def map_file_sets(file_sets)
|
310
|
+
file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
|
311
|
+
end
|
291
312
|
end
|
292
313
|
end
|
data/app/models/bulkrax/entry.rb
CHANGED
@@ -4,6 +4,8 @@ module Bulkrax
|
|
4
4
|
# Custom error class for collections_created?
|
5
5
|
class CollectionsCreatedError < RuntimeError; end
|
6
6
|
class OAIError < RuntimeError; end
|
7
|
+
# TODO: remove when ApplicationParser#bagit_zip_file_size_check is removed
|
8
|
+
class BagitZipError < RuntimeError; end
|
7
9
|
class Entry < ApplicationRecord
|
8
10
|
include Bulkrax::HasMatchers
|
9
11
|
include Bulkrax::ImportBehavior
|
@@ -34,6 +36,7 @@ module Bulkrax
|
|
34
36
|
delegate :client,
|
35
37
|
:collection_name,
|
36
38
|
:user,
|
39
|
+
:generated_metadata_mapping,
|
37
40
|
:related_parents_raw_mapping,
|
38
41
|
:related_parents_parsed_mapping,
|
39
42
|
:related_children_raw_mapping,
|
@@ -70,6 +73,15 @@ module Bulkrax
|
|
70
73
|
parser&.work_identifier&.to_s || 'source'
|
71
74
|
end
|
72
75
|
|
76
|
+
# Returns field_mapping hash based on whether or not generated metadata should be included
|
77
|
+
def fetch_field_mapping
|
78
|
+
return self.mapping if importerexporter.generated_metadata
|
79
|
+
|
80
|
+
self.mapping.each do |key, value|
|
81
|
+
self.mapping.delete(key) if value[generated_metadata_mapping]
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
73
85
|
def self.parent_field(parser)
|
74
86
|
parser.related_parents_parsed_mapping
|
75
87
|
end
|
@@ -51,6 +51,14 @@ module Bulkrax
|
|
51
51
|
self.start_date.present? || self.finish_date.present?
|
52
52
|
end
|
53
53
|
|
54
|
+
def include_thumbnails?
|
55
|
+
self.include_thumbnails
|
56
|
+
end
|
57
|
+
|
58
|
+
def generated_metadata?
|
59
|
+
self.generated_metadata
|
60
|
+
end
|
61
|
+
|
54
62
|
def work_visibility_list
|
55
63
|
[
|
56
64
|
['Any', ''],
|
@@ -12,7 +12,7 @@ module Bulkrax
|
|
12
12
|
# check for our entry in our current importer first
|
13
13
|
importer_id = ImporterRun.find(importer_run_id).importer_id
|
14
14
|
default_scope = { identifier: identifier, importerexporter_type: 'Bulkrax::Importer' }
|
15
|
-
record = Entry.find_by(default_scope
|
15
|
+
record = Entry.find_by(default_scope.merge({ importerexporter_id: importer_id })) || Entry.find_by(default_scope)
|
16
16
|
|
17
17
|
# TODO(alishaevn): discuss whether we are only looking for Collection models here
|
18
18
|
# use ActiveFedora::Base.find(identifier) instead?
|
@@ -7,6 +7,8 @@ module Bulkrax
|
|
7
7
|
|
8
8
|
def build_for_exporter
|
9
9
|
build_export_metadata
|
10
|
+
# TODO(alishaevn): determine if the line below is still necessary
|
11
|
+
# the csv and bagit parsers also have write_files methods
|
10
12
|
write_files if export_type == 'full' && !importerexporter.parser_klass.include?('Bagit')
|
11
13
|
rescue RSolr::Error::Http, CollectionsCreatedError => e
|
12
14
|
raise e
|
@@ -28,6 +30,7 @@ module Bulkrax
|
|
28
30
|
return if hyrax_record.is_a?(Collection)
|
29
31
|
|
30
32
|
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
33
|
+
file_sets << hyrax_record.thumbnail if hyrax_record.thumbnail.present? && hyrax_record.work? && exporter.include_thumbnails
|
31
34
|
file_sets.each do |fs|
|
32
35
|
path = File.join(exporter_export_path, 'files')
|
33
36
|
FileUtils.mkdir_p(path)
|
@@ -42,19 +45,22 @@ module Bulkrax
|
|
42
45
|
end
|
43
46
|
end
|
44
47
|
|
45
|
-
# Prepend the file_set id to ensure a unique filename
|
48
|
+
# Prepend the file_set id to ensure a unique filename and also one that is not longer than 255 characters
|
46
49
|
def filename(file_set)
|
47
50
|
return if file_set.original_file.blank?
|
48
51
|
fn = file_set.original_file.file_name.first
|
49
52
|
mime = Mime::Type.lookup(file_set.original_file.mime_type)
|
50
53
|
ext_mime = MIME::Types.of(file_set.original_file.file_name).first
|
51
|
-
if fn.include?(file_set.id) || importerexporter.metadata_only?
|
52
|
-
|
53
|
-
|
54
|
+
if fn.include?(file_set.id) || importerexporter.metadata_only? || importerexporter.parser_klass.include?('Bagit')
|
55
|
+
filename = "#{fn}.#{mime.to_sym}"
|
56
|
+
filename = fn if mime.to_s == ext_mime.to_s
|
54
57
|
else
|
55
|
-
|
56
|
-
|
58
|
+
filename = "#{file_set.id}_#{fn}.#{mime.to_sym}"
|
59
|
+
filename = "#{file_set.id}_#{fn}" if mime.to_s == ext_mime.to_s
|
57
60
|
end
|
61
|
+
# Remove extention truncate and reattach
|
62
|
+
ext = File.extname(filename)
|
63
|
+
"#{File.basename(filename, ext)[0...(220 - ext.length)]}#{ext}"
|
58
64
|
end
|
59
65
|
end
|
60
66
|
end
|
@@ -28,5 +28,13 @@ module Bulkrax
|
|
28
28
|
|
29
29
|
raise StandardError, 'File set must be related to at least one work'
|
30
30
|
end
|
31
|
+
|
32
|
+
def parent_jobs
|
33
|
+
false # FileSet relationships are handled in ObjectFactory#create_file_set
|
34
|
+
end
|
35
|
+
|
36
|
+
def child_jobs
|
37
|
+
raise ::StandardError, 'A FileSet cannot be a parent of a Collection, Work, or other FileSet'
|
38
|
+
end
|
31
39
|
end
|
32
40
|
end
|
@@ -51,6 +51,10 @@ module Bulkrax
|
|
51
51
|
@work_identifier ||= get_field_mapping_hash_for('source_identifier')&.keys&.first&.to_sym || :source
|
52
52
|
end
|
53
53
|
|
54
|
+
def generated_metadata_mapping
|
55
|
+
@generated_metadata_mapping ||= 'generated'
|
56
|
+
end
|
57
|
+
|
54
58
|
def related_parents_raw_mapping
|
55
59
|
@related_parents_raw_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.values&.first&.[]('from')&.first
|
56
60
|
end
|
@@ -242,15 +246,48 @@ module Bulkrax
|
|
242
246
|
def write
|
243
247
|
write_files
|
244
248
|
zip
|
249
|
+
# uncomment next line to debug for faulty zipping during bagit export
|
250
|
+
bagit_zip_file_size_check if importerexporter.parser_klass.include?('Bagit')
|
245
251
|
end
|
246
252
|
|
247
253
|
def unzip(file_to_unzip)
|
248
|
-
|
254
|
+
Zip::File.open(file_to_unzip) do |zip_file|
|
255
|
+
zip_file.each do |entry|
|
256
|
+
entry_path = File.join(importer_unzip_path, entry.name)
|
257
|
+
FileUtils.mkdir_p(File.dirname(entry_path))
|
258
|
+
zip_file.extract(entry, entry_path) unless File.exist?(entry_path)
|
259
|
+
end
|
260
|
+
end
|
249
261
|
end
|
250
262
|
|
251
263
|
def zip
|
252
264
|
FileUtils.rm_rf(exporter_export_zip_path)
|
253
|
-
|
265
|
+
Zip::File.open(exporter_export_zip_path, create: true) do |zip_file|
|
266
|
+
Dir["#{exporter_export_path}/**/**"].each do |file|
|
267
|
+
zip_file.add(file.sub("#{exporter_export_path}/", ''), file)
|
268
|
+
end
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
# TODO: remove Entry::BagitZipError as well as this method when we're sure it's not needed
|
273
|
+
def bagit_zip_file_size_check
|
274
|
+
Zip::File.open(exporter_export_zip_path) do |zip_file|
|
275
|
+
zip_file.select { |entry| entry.name.include?('data/') && entry.file? }.each do |zipped_file|
|
276
|
+
Dir["#{exporter_export_path}/**/data/*"].select { |file| file.include?(zipped_file.name) }.each do |file|
|
277
|
+
begin
|
278
|
+
raise BagitZipError, "Invalid Bag, file size mismatch for #{file.sub("#{exporter_export_path}/", '')}" if File.size(file) != zipped_file.size
|
279
|
+
rescue BagitZipError => e
|
280
|
+
matched_entry_ids = importerexporter.entry_ids.select do |id|
|
281
|
+
Bulkrax::Entry.find(id).identifier.include?(zipped_file.name.split('/').first)
|
282
|
+
end
|
283
|
+
matched_entry_ids.each do |entry_id|
|
284
|
+
Bulkrax::Entry.find(entry_id).status_info(e)
|
285
|
+
status_info('Complete (with failures)')
|
286
|
+
end
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
end
|
254
291
|
end
|
255
292
|
|
256
293
|
# Is this a file?
|
@@ -272,7 +309,6 @@ module Bulkrax
|
|
272
309
|
|
273
310
|
def real_import_file_path
|
274
311
|
return importer_unzip_path if file? && zip?
|
275
|
-
|
276
312
|
parser_fields['import_file_path']
|
277
313
|
end
|
278
314
|
end
|
@@ -1,9 +1,11 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Bulkrax
|
4
|
-
class BagitParser < ApplicationParser
|
4
|
+
class BagitParser < ApplicationParser # rubocop:disable Metrics/ClassLength
|
5
|
+
include ExportBehavior
|
6
|
+
|
5
7
|
def self.export_supported?
|
6
|
-
|
8
|
+
true
|
7
9
|
end
|
8
10
|
|
9
11
|
def valid_import?
|
@@ -14,19 +16,11 @@ module Bulkrax
|
|
14
16
|
end
|
15
17
|
|
16
18
|
def entry_class
|
17
|
-
parser_fields['metadata_format'
|
18
|
-
|
19
|
-
|
20
|
-
def collection_entry_class
|
21
|
-
parser_fields['metadata_format'].gsub('Entry', 'CollectionEntry').constantize
|
22
|
-
rescue
|
23
|
-
Entry
|
24
|
-
end
|
25
|
-
|
26
|
-
def file_set_entry_class
|
27
|
-
csv_format = Bulkrax::Importer.last.parser_fields['metadata_format'] == "Bulkrax::CsvEntry"
|
28
|
-
csv_format ? CsvFileSetEntry : RdfFileSetEntry
|
19
|
+
rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
|
20
|
+
rdf_format ? RdfEntry : CsvEntry
|
29
21
|
end
|
22
|
+
alias collection_entry_class entry_class
|
23
|
+
alias file_set_entry_class entry_class
|
30
24
|
|
31
25
|
# Take a random sample of 10 metadata_paths and work out the import fields from that
|
32
26
|
def import_fields
|
@@ -101,7 +95,185 @@ module Bulkrax
|
|
101
95
|
end
|
102
96
|
|
103
97
|
def total
|
104
|
-
|
98
|
+
importerexporter.entries.count
|
99
|
+
end
|
100
|
+
|
101
|
+
def extra_filters
|
102
|
+
output = ""
|
103
|
+
if importerexporter.start_date.present?
|
104
|
+
start_dt = importerexporter.start_date.to_datetime.strftime('%FT%TZ')
|
105
|
+
finish_dt = importerexporter.finish_date.present? ? importerexporter.finish_date.to_datetime.end_of_day.strftime('%FT%TZ') : "NOW"
|
106
|
+
output += " AND system_modified_dtsi:[#{start_dt} TO #{finish_dt}]"
|
107
|
+
end
|
108
|
+
output += importerexporter.work_visibility.present? ? " AND visibility_ssi:#{importerexporter.work_visibility}" : ""
|
109
|
+
output += importerexporter.workflow_status.present? ? " AND workflow_state_name_ssim:#{importerexporter.workflow_status}" : ""
|
110
|
+
output
|
111
|
+
end
|
112
|
+
|
113
|
+
def current_record_ids
|
114
|
+
@work_ids = []
|
115
|
+
@collection_ids = []
|
116
|
+
@file_set_ids = []
|
117
|
+
|
118
|
+
case importerexporter.export_from
|
119
|
+
when 'all'
|
120
|
+
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
121
|
+
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
122
|
+
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
123
|
+
when 'collection'
|
124
|
+
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
125
|
+
when 'worktype'
|
126
|
+
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
127
|
+
when 'importer'
|
128
|
+
set_ids_for_exporting_from_importer
|
129
|
+
end
|
130
|
+
|
131
|
+
@work_ids + @collection_ids + @file_set_ids
|
132
|
+
end
|
133
|
+
|
134
|
+
# Set the following instance variables: @work_ids, @collection_ids, @file_set_ids
|
135
|
+
# @see #current_record_ids
|
136
|
+
def set_ids_for_exporting_from_importer
|
137
|
+
entry_ids = Importer.find(importerexporter.export_source).entries.pluck(:id)
|
138
|
+
complete_statuses = Status.latest_by_statusable
|
139
|
+
.includes(:statusable)
|
140
|
+
.where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
|
141
|
+
|
142
|
+
complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
|
143
|
+
extra_filters = extra_filters.presence || '*:*'
|
144
|
+
|
145
|
+
{ :@work_ids => ::Hyrax.config.curation_concerns, :@collection_ids => [::Collection], :@file_set_ids => [::FileSet] }.each do |instance_var, models_to_search|
|
146
|
+
instance_variable_set(instance_var, ActiveFedora::SolrService.post(
|
147
|
+
extra_filters.to_s,
|
148
|
+
fq: [
|
149
|
+
%(#{::Solrizer.solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
|
150
|
+
"has_model_ssim:(#{models_to_search.join(' OR ')})"
|
151
|
+
],
|
152
|
+
fl: 'id',
|
153
|
+
rows: 2_000_000_000
|
154
|
+
)['response']['docs'].map { |obj| obj['id'] })
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def create_new_entries
|
159
|
+
current_record_ids.each_with_index do |id, index|
|
160
|
+
break if limit_reached?(limit, index)
|
161
|
+
|
162
|
+
this_entry_class = if @collection_ids.include?(id)
|
163
|
+
collection_entry_class
|
164
|
+
elsif @file_set_ids.include?(id)
|
165
|
+
file_set_entry_class
|
166
|
+
else
|
167
|
+
entry_class
|
168
|
+
end
|
169
|
+
new_entry = find_or_create_entry(this_entry_class, id, 'Bulkrax::Exporter')
|
170
|
+
|
171
|
+
begin
|
172
|
+
entry = ExportWorkJob.perform_now(new_entry.id, current_run.id)
|
173
|
+
rescue => e
|
174
|
+
Rails.logger.info("#{e.message} was detected during export")
|
175
|
+
end
|
176
|
+
|
177
|
+
self.headers |= entry.parsed_metadata.keys if entry
|
178
|
+
end
|
179
|
+
end
|
180
|
+
alias create_from_collection create_new_entries
|
181
|
+
alias create_from_importer create_new_entries
|
182
|
+
alias create_from_worktype create_new_entries
|
183
|
+
alias create_from_all create_new_entries
|
184
|
+
|
185
|
+
# export methods
|
186
|
+
|
187
|
+
# rubocop:disable Metrics/AbcSize
|
188
|
+
def write_files
|
189
|
+
require 'open-uri'
|
190
|
+
require 'socket'
|
191
|
+
importerexporter.entries.where(identifier: current_record_ids)[0..limit || total].each do |e|
|
192
|
+
bag = BagIt::Bag.new setup_bagit_folder(e.identifier)
|
193
|
+
w = ActiveFedora::Base.find(e.identifier)
|
194
|
+
next unless Hyrax.config.curation_concerns.include?(w.class)
|
195
|
+
|
196
|
+
w.file_sets.each do |fs|
|
197
|
+
file_name = filename(fs)
|
198
|
+
next if file_name.blank?
|
199
|
+
io = open(fs.original_file.uri)
|
200
|
+
file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
|
201
|
+
file.write(io.read)
|
202
|
+
file.close
|
203
|
+
bag.add_file(file_name, file.path)
|
204
|
+
end
|
205
|
+
CSV.open(setup_csv_metadata_export_file(e.identifier), "w", headers: export_headers, write_headers: true) do |csv|
|
206
|
+
csv << e.parsed_metadata
|
207
|
+
end
|
208
|
+
write_triples(e)
|
209
|
+
bag.manifest!(algo: 'sha256')
|
210
|
+
end
|
211
|
+
end
|
212
|
+
# rubocop:enable Metrics/AbcSize
|
213
|
+
|
214
|
+
def setup_csv_metadata_export_file(id)
|
215
|
+
File.join(importerexporter.exporter_export_path, id, 'metadata.csv')
|
216
|
+
end
|
217
|
+
|
218
|
+
def key_allowed(key)
|
219
|
+
!Bulkrax.reserved_properties.include?(key) &&
|
220
|
+
new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
|
221
|
+
key != source_identifier.to_s
|
222
|
+
end
|
223
|
+
|
224
|
+
# All possible column names
|
225
|
+
def export_headers
|
226
|
+
headers = sort_headers(self.headers)
|
227
|
+
|
228
|
+
# we don't want access_control_id exported and we want file at the end
|
229
|
+
headers.delete('access_control_id') if headers.include?('access_control_id')
|
230
|
+
|
231
|
+
# add the headers below at the beginning or end to maintain the preexisting export behavior
|
232
|
+
headers.prepend('model')
|
233
|
+
headers.prepend(source_identifier.to_s)
|
234
|
+
headers.prepend('id')
|
235
|
+
|
236
|
+
headers.uniq
|
237
|
+
end
|
238
|
+
|
239
|
+
def object_names
|
240
|
+
return @object_names if @object_names
|
241
|
+
|
242
|
+
@object_names = mapping.values.map { |value| value['object'] }
|
243
|
+
@object_names.uniq!.delete(nil)
|
244
|
+
|
245
|
+
@object_names
|
246
|
+
end
|
247
|
+
|
248
|
+
def sort_headers(headers)
|
249
|
+
# converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
|
250
|
+
# while keeping objects grouped together
|
251
|
+
headers.sort_by do |item|
|
252
|
+
number = item.match(/\d+/)&.[](0) || 0.to_s
|
253
|
+
sort_number = number.rjust(4, "0")
|
254
|
+
object_prefix = object_names.detect { |o| item.match(/^#{o}/) } || item
|
255
|
+
remainder = item.gsub(/^#{object_prefix}_/, '').gsub(/_#{number}/, '')
|
256
|
+
"#{object_prefix}_#{sort_number}_#{remainder}"
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
def setup_triple_metadata_export_file(id)
|
261
|
+
File.join(importerexporter.exporter_export_path, id, 'metadata.nt')
|
262
|
+
end
|
263
|
+
|
264
|
+
def setup_bagit_folder(id)
|
265
|
+
File.join(importerexporter.exporter_export_path, id)
|
266
|
+
end
|
267
|
+
|
268
|
+
def write_triples(e)
|
269
|
+
sd = SolrDocument.find(e.identifier)
|
270
|
+
return if sd.nil?
|
271
|
+
|
272
|
+
req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname })
|
273
|
+
rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples)
|
274
|
+
File.open(setup_triple_metadata_export_file(e.identifier), "w") do |triples|
|
275
|
+
triples.write(rdf)
|
276
|
+
end
|
105
277
|
end
|
106
278
|
|
107
279
|
def required_elements?(keys)
|
@@ -126,11 +298,7 @@ module Bulkrax
|
|
126
298
|
def bags
|
127
299
|
return @bags if @bags.present?
|
128
300
|
new_bag = bag(import_file_path)
|
129
|
-
@bags =
|
130
|
-
[new_bag]
|
131
|
-
else
|
132
|
-
Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
|
133
|
-
end
|
301
|
+
@bags = new_bag ? [new_bag] : Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
|
134
302
|
@bags.delete(nil)
|
135
303
|
raise StandardError, 'No valid bags found' if @bags.blank?
|
136
304
|
return @bags
|
@@ -11,12 +11,15 @@ module Bulkrax
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def records(_opts = {})
|
14
|
+
return @records if @records.present?
|
15
|
+
|
14
16
|
file_for_import = only_updates ? parser_fields['partial_import_file_path'] : import_file_path
|
15
17
|
# data for entry does not need source_identifier for csv, because csvs are read sequentially and mapped after raw data is read.
|
16
18
|
csv_data = entry_class.read_data(file_for_import)
|
17
19
|
importer.parser_fields['total'] = csv_data.count
|
18
20
|
importer.save
|
19
|
-
|
21
|
+
|
22
|
+
@records = csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
|
20
23
|
end
|
21
24
|
|
22
25
|
def build_records
|
@@ -145,7 +148,6 @@ module Bulkrax
|
|
145
148
|
'Bulkrax::Importer',
|
146
149
|
current_record.to_h)
|
147
150
|
if current_record[:delete].present?
|
148
|
-
# TODO: create a "Delete" job for file_sets and collections
|
149
151
|
"Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
|
150
152
|
else
|
151
153
|
"Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id)
|
@@ -60,6 +60,16 @@
|
|
60
60
|
hint: 'leave blank or 0 for all records',
|
61
61
|
label: t('bulkrax.exporter.labels.limit') %>
|
62
62
|
|
63
|
+
<%= form.input :generated_metadata?,
|
64
|
+
as: :boolean,
|
65
|
+
label: t('bulkrax.exporter.labels.generated_metadata'),
|
66
|
+
hint: t('bulkrax.exporter.hints.generated_metadata') %>
|
67
|
+
|
68
|
+
<%= form.input :include_thumbnails?,
|
69
|
+
as: :boolean,
|
70
|
+
label: t('bulkrax.exporter.labels.include_thumbnails'),
|
71
|
+
hint: t('bulkrax.exporter.hints.include_thumbnails') %>
|
72
|
+
|
63
73
|
<%= form.input :date_filter,
|
64
74
|
as: :boolean,
|
65
75
|
label: t('bulkrax.exporter.labels.filter_by_date') %>
|
@@ -57,6 +57,18 @@
|
|
57
57
|
<strong><%= t('bulkrax.exporter.labels.limit') %>:</strong>
|
58
58
|
<%= @exporter.limit %>
|
59
59
|
</p>
|
60
|
+
|
61
|
+
<p class='bulkrax-p-align'>
|
62
|
+
<strong><%= t('bulkrax.exporter.labels.generated_metadata') %>:</strong>
|
63
|
+
<%= @exporter.generated_metadata %>
|
64
|
+
</p>
|
65
|
+
|
66
|
+
<p class='bulkrax-p-align'>
|
67
|
+
<strong><%= t('bulkrax.exporter.labels.include_thumbnails') %>:</strong>
|
68
|
+
<%= @exporter.include_thumbnails %>
|
69
|
+
</p>
|
70
|
+
|
71
|
+
|
60
72
|
<%= render partial: 'bulkrax/shared/bulkrax_errors', locals: {item: @exporter} %>
|
61
73
|
|
62
74
|
<%= render partial: 'bulkrax/shared/bulkrax_field_mapping', locals: {item: @exporter} %>
|
@@ -178,7 +178,7 @@
|
|
178
178
|
<% elsif e.status == "Pending" %>
|
179
179
|
<td><span class="glyphicon glyphicon-option-horizontal" style="color: blue;"></span> <%= e.status %></td>
|
180
180
|
<% else %>
|
181
|
-
<td><span class="glyphicon glyphicon-remove" style="color: red
|
181
|
+
<td><span class="glyphicon glyphicon-remove" style="color: <%= e.status == 'Deleted' ? 'green' : 'red' %>;"></span> <%= e.status %></td>
|
182
182
|
<% end %>
|
183
183
|
<% if e.last_error.present? %>
|
184
184
|
<td><%= link_to e.last_error.dig("error_class"), bulkrax.importer_entry_path(@importer.id, e.id) %></td>
|
@@ -16,6 +16,8 @@ en:
|
|
16
16
|
filter_by_date: Filter By Date
|
17
17
|
finish_date: End Date
|
18
18
|
full: Metadata and Files
|
19
|
+
include_thumbnails: Include Thumbnails?
|
20
|
+
generated_metadata: Include Generated Metadata?
|
19
21
|
importer: Importer
|
20
22
|
limit: Limit
|
21
23
|
metadata: Metadata Only
|
@@ -35,3 +37,6 @@ en:
|
|
35
37
|
ingested: "Ingested"
|
36
38
|
unapproved: "Unapproved"
|
37
39
|
needs_repair: "Needs Repair"
|
40
|
+
hints:
|
41
|
+
include_thumbnails: "These exported fields currently cannot be imported."
|
42
|
+
generated_metadata: "These exported fields currently cannot be imported."
|
data/lib/bulkrax/version.rb
CHANGED
data/lib/bulkrax.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-06-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -253,6 +253,7 @@ files:
|
|
253
253
|
- app/controllers/bulkrax/exporters_controller.rb
|
254
254
|
- app/controllers/bulkrax/importers_controller.rb
|
255
255
|
- app/controllers/concerns/bulkrax/api.rb
|
256
|
+
- app/controllers/concerns/bulkrax/download_behavior.rb
|
256
257
|
- app/factories/bulkrax/object_factory.rb
|
257
258
|
- app/helpers/bulkrax/application_helper.rb
|
258
259
|
- app/helpers/bulkrax/exporters_helper.rb
|
@@ -260,6 +261,9 @@ files:
|
|
260
261
|
- app/helpers/bulkrax/validation_helper.rb
|
261
262
|
- app/jobs/bulkrax/application_job.rb
|
262
263
|
- app/jobs/bulkrax/create_relationships_job.rb
|
264
|
+
- app/jobs/bulkrax/delete_collection_job.rb
|
265
|
+
- app/jobs/bulkrax/delete_file_set_job.rb
|
266
|
+
- app/jobs/bulkrax/delete_job.rb
|
263
267
|
- app/jobs/bulkrax/delete_work_job.rb
|
264
268
|
- app/jobs/bulkrax/download_cloud_file_job.rb
|
265
269
|
- app/jobs/bulkrax/export_work_job.rb
|
@@ -294,7 +298,6 @@ files:
|
|
294
298
|
- app/models/bulkrax/rdf_file_set_entry.rb
|
295
299
|
- app/models/bulkrax/status.rb
|
296
300
|
- app/models/bulkrax/xml_entry.rb
|
297
|
-
- app/models/concerns/bulkrax/download_behavior.rb
|
298
301
|
- app/models/concerns/bulkrax/dynamic_record_lookup.rb
|
299
302
|
- app/models/concerns/bulkrax/errored_entries.rb
|
300
303
|
- app/models/concerns/bulkrax/export_behavior.rb
|
@@ -369,6 +372,8 @@ files:
|
|
369
372
|
- db/migrate/20220119213325_add_work_counters_to_importer_runs.rb
|
370
373
|
- db/migrate/20220301001839_create_bulkrax_pending_relationships.rb
|
371
374
|
- db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb
|
375
|
+
- db/migrate/20220412233954_add_include_thumbnails_to_bulkrax_exporters.rb
|
376
|
+
- db/migrate/20220413180915_add_generated_metadata_to_bulkrax_exporters.rb
|
372
377
|
- lib/bulkrax.rb
|
373
378
|
- lib/bulkrax/engine.rb
|
374
379
|
- lib/bulkrax/version.rb
|
@@ -384,7 +389,7 @@ homepage: https://github.com/samvera-labs/bulkrax
|
|
384
389
|
licenses:
|
385
390
|
- Apache-2.0
|
386
391
|
metadata: {}
|
387
|
-
post_install_message:
|
392
|
+
post_install_message:
|
388
393
|
rdoc_options: []
|
389
394
|
require_paths:
|
390
395
|
- lib
|
@@ -399,8 +404,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
399
404
|
- !ruby/object:Gem::Version
|
400
405
|
version: '0'
|
401
406
|
requirements: []
|
402
|
-
rubygems_version: 3.
|
403
|
-
signing_key:
|
407
|
+
rubygems_version: 3.0.3
|
408
|
+
signing_key:
|
404
409
|
specification_version: 4
|
405
410
|
summary: Import and export tool for Hyrax and Hyku
|
406
411
|
test_files: []
|