bulkrax 3.1.1 → 3.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/controllers/bulkrax/exporters_controller.rb +4 -4
- data/app/{models → controllers}/concerns/bulkrax/download_behavior.rb +1 -5
- data/app/jobs/bulkrax/create_relationships_job.rb +1 -3
- data/app/jobs/bulkrax/delete_collection_job.rb +5 -0
- data/app/jobs/bulkrax/delete_file_set_job.rb +5 -0
- data/app/jobs/bulkrax/delete_job.rb +20 -0
- data/app/jobs/bulkrax/delete_work_job.rb +1 -16
- data/app/models/bulkrax/csv_entry.rb +44 -23
- data/app/models/bulkrax/entry.rb +12 -0
- data/app/models/bulkrax/exporter.rb +8 -0
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +1 -1
- data/app/models/concerns/bulkrax/export_behavior.rb +12 -6
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +8 -0
- data/app/parsers/bulkrax/application_parser.rb +39 -3
- data/app/parsers/bulkrax/bagit_parser.rb +188 -20
- data/app/parsers/bulkrax/csv_parser.rb +4 -2
- data/app/views/bulkrax/exporters/_form.html.erb +10 -0
- data/app/views/bulkrax/exporters/show.html.erb +12 -0
- data/app/views/bulkrax/importers/show.html.erb +1 -1
- data/config/locales/bulkrax.en.yml +5 -0
- data/db/migrate/20220412233954_add_include_thumbnails_to_bulkrax_exporters.rb +5 -0
- data/db/migrate/20220413180915_add_generated_metadata_to_bulkrax_exporters.rb +5 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +1 -0
- metadata +12 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4c6719caedaf4adb707986e0b6771f1025ce1e08d0bf46afa78a85d99faded2b
|
4
|
+
data.tar.gz: 7af41a63f79c6d9792066cf545f35bb723c7667970f0ea1e3a87c04dceda28d0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ed10c67e81885b4ff3b40d2731b93b790495e19cad51fb9dfb0dcc5bebcd4c60fecc86fe0932f058d100a7937cedab43c1df050c98efd2107c086fdb2fd93a2
|
7
|
+
data.tar.gz: 5a81d4cd7d0289b5d27a13876f20c29b0b40f0989ecbac7caaf26599a1cbdea5c27a2473f759783690562345e8b5d7c515a4f4a0f92d5241803897887e2e9214
|
@@ -101,12 +101,12 @@ module Bulkrax
|
|
101
101
|
def exporter_params
|
102
102
|
params[:exporter][:export_source] = params[:exporter]["export_source_#{params[:exporter][:export_from]}".to_sym]
|
103
103
|
if params[:exporter][:date_filter] == "1"
|
104
|
-
params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type,
|
105
|
-
:parser_klass, :limit, :start_date, :finish_date, :work_visibility,
|
104
|
+
params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type, :generated_metadata,
|
105
|
+
:include_thumbnails, :parser_klass, :limit, :start_date, :finish_date, :work_visibility,
|
106
106
|
:workflow_status, field_mapping: {})
|
107
107
|
else
|
108
|
-
params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type,
|
109
|
-
:parser_klass, :limit, :work_visibility, :workflow_status,
|
108
|
+
params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type, :generated_metadata,
|
109
|
+
:include_thumbnails, :parser_klass, :limit, :work_visibility, :workflow_status,
|
110
110
|
field_mapping: {}).merge(start_date: nil, finish_date: nil)
|
111
111
|
end
|
112
112
|
end
|
@@ -42,7 +42,7 @@ module Bulkrax
|
|
42
42
|
def send_file_contents
|
43
43
|
self.status = 200
|
44
44
|
prepare_file_headers
|
45
|
-
|
45
|
+
send_file file
|
46
46
|
end
|
47
47
|
|
48
48
|
def prepare_file_headers
|
@@ -53,9 +53,5 @@ module Bulkrax
|
|
53
53
|
response.headers['Last-Modified'] = File.mtime(file_path).utc.strftime("%a, %d %b %Y %T GMT")
|
54
54
|
self.content_type = download_content_type
|
55
55
|
end
|
56
|
-
|
57
|
-
def stream_body(iostream)
|
58
|
-
self.response_body = iostream
|
59
|
-
end
|
60
56
|
end
|
61
57
|
end
|
@@ -49,13 +49,11 @@ module Bulkrax
|
|
49
49
|
reschedule({ parent_identifier: parent_identifier, importer_run_id: importer_run_id })
|
50
50
|
return false # stop current job from continuing to run after rescheduling
|
51
51
|
end
|
52
|
-
importer_id = ImporterRun.find(importer_run_id).importer_id
|
53
52
|
@parent_entry ||= Bulkrax::Entry.where(identifier: parent_identifier,
|
54
|
-
importerexporter_id: importer_id,
|
53
|
+
importerexporter_id: ImporterRun.find(importer_run_id).importer_id,
|
55
54
|
importerexporter_type: "Bulkrax::Importer").first
|
56
55
|
create_relationships
|
57
56
|
pending_relationships.each(&:destroy)
|
58
|
-
Bulkrax::Importer.find(importer_id).record_status
|
59
57
|
rescue ::StandardError => e
|
60
58
|
parent_entry ? parent_entry.status_info(e) : child_entry.status_info(e)
|
61
59
|
Bulkrax::ImporterRun.find(importer_run_id).increment!(:failed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class DeleteJob < ApplicationJob
|
5
|
+
queue_as :import
|
6
|
+
|
7
|
+
# rubocop:disable Rails/SkipsModelValidations
|
8
|
+
def perform(entry, importer_run)
|
9
|
+
obj = entry.factory.find
|
10
|
+
obj&.delete
|
11
|
+
ImporterRun.find(importer_run.id).increment!(:deleted_records)
|
12
|
+
ImporterRun.find(importer_run.id).decrement!(:enqueued_records)
|
13
|
+
entry.save!
|
14
|
+
entry.importer.current_run = ImporterRun.find(importer_run.id)
|
15
|
+
entry.importer.record_status
|
16
|
+
entry.status_info("Deleted", ImporterRun.find(importer_run.id))
|
17
|
+
end
|
18
|
+
# rubocop:enable Rails/SkipsModelValidations
|
19
|
+
end
|
20
|
+
end
|
@@ -1,20 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Bulkrax
|
4
|
-
class DeleteWorkJob <
|
5
|
-
queue_as :import
|
6
|
-
|
7
|
-
# rubocop:disable Rails/SkipsModelValidations
|
8
|
-
def perform(entry, importer_run)
|
9
|
-
work = entry.factory.find
|
10
|
-
work&.delete
|
11
|
-
ImporterRun.find(importer_run.id).increment!(:deleted_records)
|
12
|
-
ImporterRun.find(importer_run.id).decrement!(:enqueued_records)
|
13
|
-
entry.save!
|
14
|
-
entry.importer.current_run = ImporterRun.find(importer_run.id)
|
15
|
-
entry.importer.record_status
|
16
|
-
entry.status_info("Deleted", ImporterRun.find(importer_run.id))
|
17
|
-
end
|
18
|
-
# rubocop:enable Rails/SkipsModelValidations
|
19
|
-
end
|
4
|
+
class DeleteWorkJob < DeleteJob; end
|
20
5
|
end
|
@@ -93,17 +93,32 @@ module Bulkrax
|
|
93
93
|
end
|
94
94
|
|
95
95
|
def build_export_metadata
|
96
|
-
# make_round_trippable
|
97
96
|
self.parsed_metadata = {}
|
98
|
-
|
99
|
-
|
100
|
-
|
97
|
+
|
98
|
+
build_system_metadata
|
99
|
+
build_files_metadata unless hyrax_record.is_a?(Collection)
|
101
100
|
build_relationship_metadata
|
102
101
|
build_mapping_metadata
|
103
|
-
|
102
|
+
self.save!
|
103
|
+
|
104
104
|
self.parsed_metadata
|
105
105
|
end
|
106
106
|
|
107
|
+
# Metadata required by Bulkrax for round-tripping
|
108
|
+
def build_system_metadata
|
109
|
+
self.parsed_metadata['id'] = hyrax_record.id
|
110
|
+
self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
|
111
|
+
self.parsed_metadata[key_for_export('model')] = hyrax_record.has_model.first
|
112
|
+
end
|
113
|
+
|
114
|
+
def build_files_metadata
|
115
|
+
file_mapping = key_for_export('file')
|
116
|
+
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
117
|
+
filenames = file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
|
118
|
+
|
119
|
+
handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
|
120
|
+
end
|
121
|
+
|
107
122
|
def build_relationship_metadata
|
108
123
|
# Includes all relationship methods for all exportable record types (works, Collections, FileSets)
|
109
124
|
relationship_methods = {
|
@@ -126,13 +141,12 @@ module Bulkrax
|
|
126
141
|
end
|
127
142
|
|
128
143
|
def build_mapping_metadata
|
144
|
+
mapping = fetch_field_mapping
|
129
145
|
mapping.each do |key, value|
|
130
|
-
|
131
|
-
next if
|
132
|
-
# relationships handled by #build_relationship_metadata
|
133
|
-
next if [related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
|
134
|
-
next if key == 'file' # handled by #build_files
|
146
|
+
# these keys are handled by other methods
|
147
|
+
next if ['model', 'file', related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
|
135
148
|
next if value['excluded']
|
149
|
+
next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
|
136
150
|
|
137
151
|
object_key = key if value.key?('object')
|
138
152
|
next unless hyrax_record.respond_to?(key.to_s) || object_key.present?
|
@@ -209,8 +223,19 @@ module Bulkrax
|
|
209
223
|
file_mapping = mapping['file']&.[]('from')&.first || 'file'
|
210
224
|
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
211
225
|
|
212
|
-
filenames = file_sets
|
226
|
+
filenames = map_file_sets(file_sets)
|
213
227
|
handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
|
228
|
+
build_thumbnail_files if hyrax_record.work?
|
229
|
+
end
|
230
|
+
|
231
|
+
def build_thumbnail_files
|
232
|
+
return unless importerexporter.include_thumbnails
|
233
|
+
|
234
|
+
thumbnail_mapping = 'thumbnail_file'
|
235
|
+
file_sets = Array.wrap(hyrax_record.thumbnail)
|
236
|
+
|
237
|
+
filenames = map_file_sets(file_sets)
|
238
|
+
handle_join_on_export(thumbnail_mapping, filenames, false)
|
214
239
|
end
|
215
240
|
|
216
241
|
def handle_join_on_export(key, values, join)
|
@@ -224,16 +249,6 @@ module Bulkrax
|
|
224
249
|
end
|
225
250
|
end
|
226
251
|
|
227
|
-
# In order for the existing exported hyrax_record, to be updated by a re-import
|
228
|
-
# we need a unique value in system_identifier
|
229
|
-
# add the existing hyrax_record id to system_identifier
|
230
|
-
def make_round_trippable
|
231
|
-
values = hyrax_record.send(work_identifier.to_s).to_a
|
232
|
-
values << hyrax_record.id
|
233
|
-
hyrax_record.send("#{work_identifier}=", values)
|
234
|
-
hyrax_record.save
|
235
|
-
end
|
236
|
-
|
237
252
|
def record
|
238
253
|
@record ||= raw_metadata
|
239
254
|
end
|
@@ -258,12 +273,12 @@ module Bulkrax
|
|
258
273
|
raise ::StandardError, 'Only expected to find one matching entry' if matching_collection_entries.count > 1
|
259
274
|
identifiers << matching_collection_entries.first&.identifier
|
260
275
|
end
|
261
|
-
|
262
276
|
@collection_identifiers = identifiers.compact.presence || []
|
263
277
|
end
|
264
278
|
|
265
279
|
def collections_created?
|
266
|
-
|
280
|
+
# TODO: look into if this method is still needed after new relationships code
|
281
|
+
true
|
267
282
|
end
|
268
283
|
|
269
284
|
def find_collection_ids
|
@@ -288,5 +303,11 @@ module Bulkrax
|
|
288
303
|
return f if File.exist?(f)
|
289
304
|
raise "File #{f} does not exist"
|
290
305
|
end
|
306
|
+
|
307
|
+
private
|
308
|
+
|
309
|
+
def map_file_sets(file_sets)
|
310
|
+
file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
|
311
|
+
end
|
291
312
|
end
|
292
313
|
end
|
data/app/models/bulkrax/entry.rb
CHANGED
@@ -4,6 +4,8 @@ module Bulkrax
|
|
4
4
|
# Custom error class for collections_created?
|
5
5
|
class CollectionsCreatedError < RuntimeError; end
|
6
6
|
class OAIError < RuntimeError; end
|
7
|
+
# TODO: remove when ApplicationParser#bagit_zip_file_size_check is removed
|
8
|
+
class BagitZipError < RuntimeError; end
|
7
9
|
class Entry < ApplicationRecord
|
8
10
|
include Bulkrax::HasMatchers
|
9
11
|
include Bulkrax::ImportBehavior
|
@@ -34,6 +36,7 @@ module Bulkrax
|
|
34
36
|
delegate :client,
|
35
37
|
:collection_name,
|
36
38
|
:user,
|
39
|
+
:generated_metadata_mapping,
|
37
40
|
:related_parents_raw_mapping,
|
38
41
|
:related_parents_parsed_mapping,
|
39
42
|
:related_children_raw_mapping,
|
@@ -70,6 +73,15 @@ module Bulkrax
|
|
70
73
|
parser&.work_identifier&.to_s || 'source'
|
71
74
|
end
|
72
75
|
|
76
|
+
# Returns field_mapping hash based on whether or not generated metadata should be included
|
77
|
+
def fetch_field_mapping
|
78
|
+
return self.mapping if importerexporter.generated_metadata
|
79
|
+
|
80
|
+
self.mapping.each do |key, value|
|
81
|
+
self.mapping.delete(key) if value[generated_metadata_mapping]
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
73
85
|
def self.parent_field(parser)
|
74
86
|
parser.related_parents_parsed_mapping
|
75
87
|
end
|
@@ -51,6 +51,14 @@ module Bulkrax
|
|
51
51
|
self.start_date.present? || self.finish_date.present?
|
52
52
|
end
|
53
53
|
|
54
|
+
def include_thumbnails?
|
55
|
+
self.include_thumbnails
|
56
|
+
end
|
57
|
+
|
58
|
+
def generated_metadata?
|
59
|
+
self.generated_metadata
|
60
|
+
end
|
61
|
+
|
54
62
|
def work_visibility_list
|
55
63
|
[
|
56
64
|
['Any', ''],
|
@@ -12,7 +12,7 @@ module Bulkrax
|
|
12
12
|
# check for our entry in our current importer first
|
13
13
|
importer_id = ImporterRun.find(importer_run_id).importer_id
|
14
14
|
default_scope = { identifier: identifier, importerexporter_type: 'Bulkrax::Importer' }
|
15
|
-
record = Entry.find_by(default_scope
|
15
|
+
record = Entry.find_by(default_scope.merge({ importerexporter_id: importer_id })) || Entry.find_by(default_scope)
|
16
16
|
|
17
17
|
# TODO(alishaevn): discuss whether we are only looking for Collection models here
|
18
18
|
# use ActiveFedora::Base.find(identifier) instead?
|
@@ -7,6 +7,8 @@ module Bulkrax
|
|
7
7
|
|
8
8
|
def build_for_exporter
|
9
9
|
build_export_metadata
|
10
|
+
# TODO(alishaevn): determine if the line below is still necessary
|
11
|
+
# the csv and bagit parsers also have write_files methods
|
10
12
|
write_files if export_type == 'full' && !importerexporter.parser_klass.include?('Bagit')
|
11
13
|
rescue RSolr::Error::Http, CollectionsCreatedError => e
|
12
14
|
raise e
|
@@ -28,6 +30,7 @@ module Bulkrax
|
|
28
30
|
return if hyrax_record.is_a?(Collection)
|
29
31
|
|
30
32
|
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
33
|
+
file_sets << hyrax_record.thumbnail if hyrax_record.thumbnail.present? && hyrax_record.work? && exporter.include_thumbnails
|
31
34
|
file_sets.each do |fs|
|
32
35
|
path = File.join(exporter_export_path, 'files')
|
33
36
|
FileUtils.mkdir_p(path)
|
@@ -42,19 +45,22 @@ module Bulkrax
|
|
42
45
|
end
|
43
46
|
end
|
44
47
|
|
45
|
-
# Prepend the file_set id to ensure a unique filename
|
48
|
+
# Prepend the file_set id to ensure a unique filename and also one that is not longer than 255 characters
|
46
49
|
def filename(file_set)
|
47
50
|
return if file_set.original_file.blank?
|
48
51
|
fn = file_set.original_file.file_name.first
|
49
52
|
mime = Mime::Type.lookup(file_set.original_file.mime_type)
|
50
53
|
ext_mime = MIME::Types.of(file_set.original_file.file_name).first
|
51
|
-
if fn.include?(file_set.id) || importerexporter.metadata_only?
|
52
|
-
|
53
|
-
|
54
|
+
if fn.include?(file_set.id) || importerexporter.metadata_only? || importerexporter.parser_klass.include?('Bagit')
|
55
|
+
filename = "#{fn}.#{mime.to_sym}"
|
56
|
+
filename = fn if mime.to_s == ext_mime.to_s
|
54
57
|
else
|
55
|
-
|
56
|
-
|
58
|
+
filename = "#{file_set.id}_#{fn}.#{mime.to_sym}"
|
59
|
+
filename = "#{file_set.id}_#{fn}" if mime.to_s == ext_mime.to_s
|
57
60
|
end
|
61
|
+
# Remove extention truncate and reattach
|
62
|
+
ext = File.extname(filename)
|
63
|
+
"#{File.basename(filename, ext)[0...(220 - ext.length)]}#{ext}"
|
58
64
|
end
|
59
65
|
end
|
60
66
|
end
|
@@ -28,5 +28,13 @@ module Bulkrax
|
|
28
28
|
|
29
29
|
raise StandardError, 'File set must be related to at least one work'
|
30
30
|
end
|
31
|
+
|
32
|
+
def parent_jobs
|
33
|
+
false # FileSet relationships are handled in ObjectFactory#create_file_set
|
34
|
+
end
|
35
|
+
|
36
|
+
def child_jobs
|
37
|
+
raise ::StandardError, 'A FileSet cannot be a parent of a Collection, Work, or other FileSet'
|
38
|
+
end
|
31
39
|
end
|
32
40
|
end
|
@@ -51,6 +51,10 @@ module Bulkrax
|
|
51
51
|
@work_identifier ||= get_field_mapping_hash_for('source_identifier')&.keys&.first&.to_sym || :source
|
52
52
|
end
|
53
53
|
|
54
|
+
def generated_metadata_mapping
|
55
|
+
@generated_metadata_mapping ||= 'generated'
|
56
|
+
end
|
57
|
+
|
54
58
|
def related_parents_raw_mapping
|
55
59
|
@related_parents_raw_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.values&.first&.[]('from')&.first
|
56
60
|
end
|
@@ -242,15 +246,48 @@ module Bulkrax
|
|
242
246
|
def write
|
243
247
|
write_files
|
244
248
|
zip
|
249
|
+
# uncomment next line to debug for faulty zipping during bagit export
|
250
|
+
bagit_zip_file_size_check if importerexporter.parser_klass.include?('Bagit')
|
245
251
|
end
|
246
252
|
|
247
253
|
def unzip(file_to_unzip)
|
248
|
-
|
254
|
+
Zip::File.open(file_to_unzip) do |zip_file|
|
255
|
+
zip_file.each do |entry|
|
256
|
+
entry_path = File.join(importer_unzip_path, entry.name)
|
257
|
+
FileUtils.mkdir_p(File.dirname(entry_path))
|
258
|
+
zip_file.extract(entry, entry_path) unless File.exist?(entry_path)
|
259
|
+
end
|
260
|
+
end
|
249
261
|
end
|
250
262
|
|
251
263
|
def zip
|
252
264
|
FileUtils.rm_rf(exporter_export_zip_path)
|
253
|
-
|
265
|
+
Zip::File.open(exporter_export_zip_path, create: true) do |zip_file|
|
266
|
+
Dir["#{exporter_export_path}/**/**"].each do |file|
|
267
|
+
zip_file.add(file.sub("#{exporter_export_path}/", ''), file)
|
268
|
+
end
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
# TODO: remove Entry::BagitZipError as well as this method when we're sure it's not needed
|
273
|
+
def bagit_zip_file_size_check
|
274
|
+
Zip::File.open(exporter_export_zip_path) do |zip_file|
|
275
|
+
zip_file.select { |entry| entry.name.include?('data/') && entry.file? }.each do |zipped_file|
|
276
|
+
Dir["#{exporter_export_path}/**/data/*"].select { |file| file.include?(zipped_file.name) }.each do |file|
|
277
|
+
begin
|
278
|
+
raise BagitZipError, "Invalid Bag, file size mismatch for #{file.sub("#{exporter_export_path}/", '')}" if File.size(file) != zipped_file.size
|
279
|
+
rescue BagitZipError => e
|
280
|
+
matched_entry_ids = importerexporter.entry_ids.select do |id|
|
281
|
+
Bulkrax::Entry.find(id).identifier.include?(zipped_file.name.split('/').first)
|
282
|
+
end
|
283
|
+
matched_entry_ids.each do |entry_id|
|
284
|
+
Bulkrax::Entry.find(entry_id).status_info(e)
|
285
|
+
status_info('Complete (with failures)')
|
286
|
+
end
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
end
|
254
291
|
end
|
255
292
|
|
256
293
|
# Is this a file?
|
@@ -272,7 +309,6 @@ module Bulkrax
|
|
272
309
|
|
273
310
|
def real_import_file_path
|
274
311
|
return importer_unzip_path if file? && zip?
|
275
|
-
|
276
312
|
parser_fields['import_file_path']
|
277
313
|
end
|
278
314
|
end
|
@@ -1,9 +1,11 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Bulkrax
|
4
|
-
class BagitParser < ApplicationParser
|
4
|
+
class BagitParser < ApplicationParser # rubocop:disable Metrics/ClassLength
|
5
|
+
include ExportBehavior
|
6
|
+
|
5
7
|
def self.export_supported?
|
6
|
-
|
8
|
+
true
|
7
9
|
end
|
8
10
|
|
9
11
|
def valid_import?
|
@@ -14,19 +16,11 @@ module Bulkrax
|
|
14
16
|
end
|
15
17
|
|
16
18
|
def entry_class
|
17
|
-
parser_fields['metadata_format'
|
18
|
-
|
19
|
-
|
20
|
-
def collection_entry_class
|
21
|
-
parser_fields['metadata_format'].gsub('Entry', 'CollectionEntry').constantize
|
22
|
-
rescue
|
23
|
-
Entry
|
24
|
-
end
|
25
|
-
|
26
|
-
def file_set_entry_class
|
27
|
-
csv_format = Bulkrax::Importer.last.parser_fields['metadata_format'] == "Bulkrax::CsvEntry"
|
28
|
-
csv_format ? CsvFileSetEntry : RdfFileSetEntry
|
19
|
+
rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
|
20
|
+
rdf_format ? RdfEntry : CsvEntry
|
29
21
|
end
|
22
|
+
alias collection_entry_class entry_class
|
23
|
+
alias file_set_entry_class entry_class
|
30
24
|
|
31
25
|
# Take a random sample of 10 metadata_paths and work out the import fields from that
|
32
26
|
def import_fields
|
@@ -101,7 +95,185 @@ module Bulkrax
|
|
101
95
|
end
|
102
96
|
|
103
97
|
def total
|
104
|
-
|
98
|
+
importerexporter.entries.count
|
99
|
+
end
|
100
|
+
|
101
|
+
def extra_filters
|
102
|
+
output = ""
|
103
|
+
if importerexporter.start_date.present?
|
104
|
+
start_dt = importerexporter.start_date.to_datetime.strftime('%FT%TZ')
|
105
|
+
finish_dt = importerexporter.finish_date.present? ? importerexporter.finish_date.to_datetime.end_of_day.strftime('%FT%TZ') : "NOW"
|
106
|
+
output += " AND system_modified_dtsi:[#{start_dt} TO #{finish_dt}]"
|
107
|
+
end
|
108
|
+
output += importerexporter.work_visibility.present? ? " AND visibility_ssi:#{importerexporter.work_visibility}" : ""
|
109
|
+
output += importerexporter.workflow_status.present? ? " AND workflow_state_name_ssim:#{importerexporter.workflow_status}" : ""
|
110
|
+
output
|
111
|
+
end
|
112
|
+
|
113
|
+
def current_record_ids
|
114
|
+
@work_ids = []
|
115
|
+
@collection_ids = []
|
116
|
+
@file_set_ids = []
|
117
|
+
|
118
|
+
case importerexporter.export_from
|
119
|
+
when 'all'
|
120
|
+
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
121
|
+
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
122
|
+
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
123
|
+
when 'collection'
|
124
|
+
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
125
|
+
when 'worktype'
|
126
|
+
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
127
|
+
when 'importer'
|
128
|
+
set_ids_for_exporting_from_importer
|
129
|
+
end
|
130
|
+
|
131
|
+
@work_ids + @collection_ids + @file_set_ids
|
132
|
+
end
|
133
|
+
|
134
|
+
# Set the following instance variables: @work_ids, @collection_ids, @file_set_ids
|
135
|
+
# @see #current_record_ids
|
136
|
+
def set_ids_for_exporting_from_importer
|
137
|
+
entry_ids = Importer.find(importerexporter.export_source).entries.pluck(:id)
|
138
|
+
complete_statuses = Status.latest_by_statusable
|
139
|
+
.includes(:statusable)
|
140
|
+
.where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
|
141
|
+
|
142
|
+
complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
|
143
|
+
extra_filters = extra_filters.presence || '*:*'
|
144
|
+
|
145
|
+
{ :@work_ids => ::Hyrax.config.curation_concerns, :@collection_ids => [::Collection], :@file_set_ids => [::FileSet] }.each do |instance_var, models_to_search|
|
146
|
+
instance_variable_set(instance_var, ActiveFedora::SolrService.post(
|
147
|
+
extra_filters.to_s,
|
148
|
+
fq: [
|
149
|
+
%(#{::Solrizer.solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
|
150
|
+
"has_model_ssim:(#{models_to_search.join(' OR ')})"
|
151
|
+
],
|
152
|
+
fl: 'id',
|
153
|
+
rows: 2_000_000_000
|
154
|
+
)['response']['docs'].map { |obj| obj['id'] })
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def create_new_entries
|
159
|
+
current_record_ids.each_with_index do |id, index|
|
160
|
+
break if limit_reached?(limit, index)
|
161
|
+
|
162
|
+
this_entry_class = if @collection_ids.include?(id)
|
163
|
+
collection_entry_class
|
164
|
+
elsif @file_set_ids.include?(id)
|
165
|
+
file_set_entry_class
|
166
|
+
else
|
167
|
+
entry_class
|
168
|
+
end
|
169
|
+
new_entry = find_or_create_entry(this_entry_class, id, 'Bulkrax::Exporter')
|
170
|
+
|
171
|
+
begin
|
172
|
+
entry = ExportWorkJob.perform_now(new_entry.id, current_run.id)
|
173
|
+
rescue => e
|
174
|
+
Rails.logger.info("#{e.message} was detected during export")
|
175
|
+
end
|
176
|
+
|
177
|
+
self.headers |= entry.parsed_metadata.keys if entry
|
178
|
+
end
|
179
|
+
end
|
180
|
+
alias create_from_collection create_new_entries
|
181
|
+
alias create_from_importer create_new_entries
|
182
|
+
alias create_from_worktype create_new_entries
|
183
|
+
alias create_from_all create_new_entries
|
184
|
+
|
185
|
+
# export methods
|
186
|
+
|
187
|
+
# rubocop:disable Metrics/AbcSize
|
188
|
+
def write_files
|
189
|
+
require 'open-uri'
|
190
|
+
require 'socket'
|
191
|
+
importerexporter.entries.where(identifier: current_record_ids)[0..limit || total].each do |e|
|
192
|
+
bag = BagIt::Bag.new setup_bagit_folder(e.identifier)
|
193
|
+
w = ActiveFedora::Base.find(e.identifier)
|
194
|
+
next unless Hyrax.config.curation_concerns.include?(w.class)
|
195
|
+
|
196
|
+
w.file_sets.each do |fs|
|
197
|
+
file_name = filename(fs)
|
198
|
+
next if file_name.blank?
|
199
|
+
io = open(fs.original_file.uri)
|
200
|
+
file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
|
201
|
+
file.write(io.read)
|
202
|
+
file.close
|
203
|
+
bag.add_file(file_name, file.path)
|
204
|
+
end
|
205
|
+
CSV.open(setup_csv_metadata_export_file(e.identifier), "w", headers: export_headers, write_headers: true) do |csv|
|
206
|
+
csv << e.parsed_metadata
|
207
|
+
end
|
208
|
+
write_triples(e)
|
209
|
+
bag.manifest!(algo: 'sha256')
|
210
|
+
end
|
211
|
+
end
|
212
|
+
# rubocop:enable Metrics/AbcSize
|
213
|
+
|
214
|
+
def setup_csv_metadata_export_file(id)
|
215
|
+
File.join(importerexporter.exporter_export_path, id, 'metadata.csv')
|
216
|
+
end
|
217
|
+
|
218
|
+
def key_allowed(key)
|
219
|
+
!Bulkrax.reserved_properties.include?(key) &&
|
220
|
+
new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
|
221
|
+
key != source_identifier.to_s
|
222
|
+
end
|
223
|
+
|
224
|
+
# All possible column names
|
225
|
+
def export_headers
|
226
|
+
headers = sort_headers(self.headers)
|
227
|
+
|
228
|
+
# we don't want access_control_id exported and we want file at the end
|
229
|
+
headers.delete('access_control_id') if headers.include?('access_control_id')
|
230
|
+
|
231
|
+
# add the headers below at the beginning or end to maintain the preexisting export behavior
|
232
|
+
headers.prepend('model')
|
233
|
+
headers.prepend(source_identifier.to_s)
|
234
|
+
headers.prepend('id')
|
235
|
+
|
236
|
+
headers.uniq
|
237
|
+
end
|
238
|
+
|
239
|
+
def object_names
|
240
|
+
return @object_names if @object_names
|
241
|
+
|
242
|
+
@object_names = mapping.values.map { |value| value['object'] }
|
243
|
+
@object_names.uniq!.delete(nil)
|
244
|
+
|
245
|
+
@object_names
|
246
|
+
end
|
247
|
+
|
248
|
+
def sort_headers(headers)
|
249
|
+
# converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
|
250
|
+
# while keeping objects grouped together
|
251
|
+
headers.sort_by do |item|
|
252
|
+
number = item.match(/\d+/)&.[](0) || 0.to_s
|
253
|
+
sort_number = number.rjust(4, "0")
|
254
|
+
object_prefix = object_names.detect { |o| item.match(/^#{o}/) } || item
|
255
|
+
remainder = item.gsub(/^#{object_prefix}_/, '').gsub(/_#{number}/, '')
|
256
|
+
"#{object_prefix}_#{sort_number}_#{remainder}"
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
def setup_triple_metadata_export_file(id)
|
261
|
+
File.join(importerexporter.exporter_export_path, id, 'metadata.nt')
|
262
|
+
end
|
263
|
+
|
264
|
+
def setup_bagit_folder(id)
|
265
|
+
File.join(importerexporter.exporter_export_path, id)
|
266
|
+
end
|
267
|
+
|
268
|
+
def write_triples(e)
|
269
|
+
sd = SolrDocument.find(e.identifier)
|
270
|
+
return if sd.nil?
|
271
|
+
|
272
|
+
req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname })
|
273
|
+
rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples)
|
274
|
+
File.open(setup_triple_metadata_export_file(e.identifier), "w") do |triples|
|
275
|
+
triples.write(rdf)
|
276
|
+
end
|
105
277
|
end
|
106
278
|
|
107
279
|
def required_elements?(keys)
|
@@ -126,11 +298,7 @@ module Bulkrax
|
|
126
298
|
def bags
|
127
299
|
return @bags if @bags.present?
|
128
300
|
new_bag = bag(import_file_path)
|
129
|
-
@bags =
|
130
|
-
[new_bag]
|
131
|
-
else
|
132
|
-
Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
|
133
|
-
end
|
301
|
+
@bags = new_bag ? [new_bag] : Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
|
134
302
|
@bags.delete(nil)
|
135
303
|
raise StandardError, 'No valid bags found' if @bags.blank?
|
136
304
|
return @bags
|
@@ -11,12 +11,15 @@ module Bulkrax
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def records(_opts = {})
|
14
|
+
return @records if @records.present?
|
15
|
+
|
14
16
|
file_for_import = only_updates ? parser_fields['partial_import_file_path'] : import_file_path
|
15
17
|
# data for entry does not need source_identifier for csv, because csvs are read sequentially and mapped after raw data is read.
|
16
18
|
csv_data = entry_class.read_data(file_for_import)
|
17
19
|
importer.parser_fields['total'] = csv_data.count
|
18
20
|
importer.save
|
19
|
-
|
21
|
+
|
22
|
+
@records = csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
|
20
23
|
end
|
21
24
|
|
22
25
|
def build_records
|
@@ -145,7 +148,6 @@ module Bulkrax
|
|
145
148
|
'Bulkrax::Importer',
|
146
149
|
current_record.to_h)
|
147
150
|
if current_record[:delete].present?
|
148
|
-
# TODO: create a "Delete" job for file_sets and collections
|
149
151
|
"Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
|
150
152
|
else
|
151
153
|
"Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id)
|
@@ -60,6 +60,16 @@
|
|
60
60
|
hint: 'leave blank or 0 for all records',
|
61
61
|
label: t('bulkrax.exporter.labels.limit') %>
|
62
62
|
|
63
|
+
<%= form.input :generated_metadata?,
|
64
|
+
as: :boolean,
|
65
|
+
label: t('bulkrax.exporter.labels.generated_metadata'),
|
66
|
+
hint: t('bulkrax.exporter.hints.generated_metadata') %>
|
67
|
+
|
68
|
+
<%= form.input :include_thumbnails?,
|
69
|
+
as: :boolean,
|
70
|
+
label: t('bulkrax.exporter.labels.include_thumbnails'),
|
71
|
+
hint: t('bulkrax.exporter.hints.include_thumbnails') %>
|
72
|
+
|
63
73
|
<%= form.input :date_filter,
|
64
74
|
as: :boolean,
|
65
75
|
label: t('bulkrax.exporter.labels.filter_by_date') %>
|
@@ -57,6 +57,18 @@
|
|
57
57
|
<strong><%= t('bulkrax.exporter.labels.limit') %>:</strong>
|
58
58
|
<%= @exporter.limit %>
|
59
59
|
</p>
|
60
|
+
|
61
|
+
<p class='bulkrax-p-align'>
|
62
|
+
<strong><%= t('bulkrax.exporter.labels.generated_metadata') %>:</strong>
|
63
|
+
<%= @exporter.generated_metadata %>
|
64
|
+
</p>
|
65
|
+
|
66
|
+
<p class='bulkrax-p-align'>
|
67
|
+
<strong><%= t('bulkrax.exporter.labels.include_thumbnails') %>:</strong>
|
68
|
+
<%= @exporter.include_thumbnails %>
|
69
|
+
</p>
|
70
|
+
|
71
|
+
|
60
72
|
<%= render partial: 'bulkrax/shared/bulkrax_errors', locals: {item: @exporter} %>
|
61
73
|
|
62
74
|
<%= render partial: 'bulkrax/shared/bulkrax_field_mapping', locals: {item: @exporter} %>
|
@@ -178,7 +178,7 @@
|
|
178
178
|
<% elsif e.status == "Pending" %>
|
179
179
|
<td><span class="glyphicon glyphicon-option-horizontal" style="color: blue;"></span> <%= e.status %></td>
|
180
180
|
<% else %>
|
181
|
-
<td><span class="glyphicon glyphicon-remove" style="color: red
|
181
|
+
<td><span class="glyphicon glyphicon-remove" style="color: <%= e.status == 'Deleted' ? 'green' : 'red' %>;"></span> <%= e.status %></td>
|
182
182
|
<% end %>
|
183
183
|
<% if e.last_error.present? %>
|
184
184
|
<td><%= link_to e.last_error.dig("error_class"), bulkrax.importer_entry_path(@importer.id, e.id) %></td>
|
@@ -16,6 +16,8 @@ en:
|
|
16
16
|
filter_by_date: Filter By Date
|
17
17
|
finish_date: End Date
|
18
18
|
full: Metadata and Files
|
19
|
+
include_thumbnails: Include Thumbnails?
|
20
|
+
generated_metadata: Include Generated Metadata?
|
19
21
|
importer: Importer
|
20
22
|
limit: Limit
|
21
23
|
metadata: Metadata Only
|
@@ -35,3 +37,6 @@ en:
|
|
35
37
|
ingested: "Ingested"
|
36
38
|
unapproved: "Unapproved"
|
37
39
|
needs_repair: "Needs Repair"
|
40
|
+
hints:
|
41
|
+
include_thumbnails: "These exported fields currently cannot be imported."
|
42
|
+
generated_metadata: "These exported fields currently cannot be imported."
|
data/lib/bulkrax/version.rb
CHANGED
data/lib/bulkrax.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-06-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -253,6 +253,7 @@ files:
|
|
253
253
|
- app/controllers/bulkrax/exporters_controller.rb
|
254
254
|
- app/controllers/bulkrax/importers_controller.rb
|
255
255
|
- app/controllers/concerns/bulkrax/api.rb
|
256
|
+
- app/controllers/concerns/bulkrax/download_behavior.rb
|
256
257
|
- app/factories/bulkrax/object_factory.rb
|
257
258
|
- app/helpers/bulkrax/application_helper.rb
|
258
259
|
- app/helpers/bulkrax/exporters_helper.rb
|
@@ -260,6 +261,9 @@ files:
|
|
260
261
|
- app/helpers/bulkrax/validation_helper.rb
|
261
262
|
- app/jobs/bulkrax/application_job.rb
|
262
263
|
- app/jobs/bulkrax/create_relationships_job.rb
|
264
|
+
- app/jobs/bulkrax/delete_collection_job.rb
|
265
|
+
- app/jobs/bulkrax/delete_file_set_job.rb
|
266
|
+
- app/jobs/bulkrax/delete_job.rb
|
263
267
|
- app/jobs/bulkrax/delete_work_job.rb
|
264
268
|
- app/jobs/bulkrax/download_cloud_file_job.rb
|
265
269
|
- app/jobs/bulkrax/export_work_job.rb
|
@@ -294,7 +298,6 @@ files:
|
|
294
298
|
- app/models/bulkrax/rdf_file_set_entry.rb
|
295
299
|
- app/models/bulkrax/status.rb
|
296
300
|
- app/models/bulkrax/xml_entry.rb
|
297
|
-
- app/models/concerns/bulkrax/download_behavior.rb
|
298
301
|
- app/models/concerns/bulkrax/dynamic_record_lookup.rb
|
299
302
|
- app/models/concerns/bulkrax/errored_entries.rb
|
300
303
|
- app/models/concerns/bulkrax/export_behavior.rb
|
@@ -369,6 +372,8 @@ files:
|
|
369
372
|
- db/migrate/20220119213325_add_work_counters_to_importer_runs.rb
|
370
373
|
- db/migrate/20220301001839_create_bulkrax_pending_relationships.rb
|
371
374
|
- db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb
|
375
|
+
- db/migrate/20220412233954_add_include_thumbnails_to_bulkrax_exporters.rb
|
376
|
+
- db/migrate/20220413180915_add_generated_metadata_to_bulkrax_exporters.rb
|
372
377
|
- lib/bulkrax.rb
|
373
378
|
- lib/bulkrax/engine.rb
|
374
379
|
- lib/bulkrax/version.rb
|
@@ -384,7 +389,7 @@ homepage: https://github.com/samvera-labs/bulkrax
|
|
384
389
|
licenses:
|
385
390
|
- Apache-2.0
|
386
391
|
metadata: {}
|
387
|
-
post_install_message:
|
392
|
+
post_install_message:
|
388
393
|
rdoc_options: []
|
389
394
|
require_paths:
|
390
395
|
- lib
|
@@ -399,8 +404,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
399
404
|
- !ruby/object:Gem::Version
|
400
405
|
version: '0'
|
401
406
|
requirements: []
|
402
|
-
rubygems_version: 3.
|
403
|
-
signing_key:
|
407
|
+
rubygems_version: 3.0.3
|
408
|
+
signing_key:
|
404
409
|
specification_version: 4
|
405
410
|
summary: Import and export tool for Hyrax and Hyku
|
406
411
|
test_files: []
|