bulkrax 3.2.0 → 3.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/controllers/bulkrax/exporters_controller.rb +4 -4
- data/app/{models → controllers}/concerns/bulkrax/download_behavior.rb +1 -5
- data/app/factories/bulkrax/object_factory.rb +2 -1
- data/app/jobs/bulkrax/create_relationships_job.rb +1 -1
- data/app/jobs/bulkrax/importer_job.rb +2 -0
- data/app/models/bulkrax/csv_entry.rb +20 -2
- data/app/models/bulkrax/entry.rb +12 -0
- data/app/models/bulkrax/exporter.rb +8 -0
- data/app/models/bulkrax/importer.rb +13 -10
- data/app/models/bulkrax/importer_run.rb +2 -1
- data/app/models/bulkrax/pending_relationship.rb +1 -1
- data/app/models/concerns/bulkrax/export_behavior.rb +4 -1
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +8 -0
- data/app/models/concerns/bulkrax/import_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/status_info.rb +1 -1
- data/app/parsers/bulkrax/application_parser.rb +40 -3
- data/app/parsers/bulkrax/bagit_parser.rb +188 -20
- data/app/views/bulkrax/exporters/_form.html.erb +10 -0
- data/app/views/bulkrax/exporters/show.html.erb +12 -0
- data/config/locales/bulkrax.en.yml +5 -0
- data/db/migrate/20220412233954_add_include_thumbnails_to_bulkrax_exporters.rb +5 -0
- data/db/migrate/20220413180915_add_generated_metadata_to_bulkrax_exporters.rb +5 -0
- data/db/migrate/20220609001128_rename_bulkrax_importer_run_to_importer_run.rb +7 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +1 -0
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b136a8742a7f9b953b4c3ef86d700540b931b6fbf22798d719e9aa693ea61fa9
|
4
|
+
data.tar.gz: e1cc32eda55a606285cf6e080340db608a8867f0bd4de8ab1361ca4a3d21adf9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 87e35f340faa583a9ae6ac156f95fa4958fcaf9d3ed09a9963a8dda39649a66307a8c494375a989e2103c2d1edef62343f5b69878eae2249d63cc4b8f65240a8
|
7
|
+
data.tar.gz: 21ddafc671eda822144b6b73abfb89892ffac954f0692952985299f40f7d0e1aba472077aac72a1f681da315ba35de795f43d95df323c2178576716d02eafc75
|
@@ -101,12 +101,12 @@ module Bulkrax
|
|
101
101
|
def exporter_params
|
102
102
|
params[:exporter][:export_source] = params[:exporter]["export_source_#{params[:exporter][:export_from]}".to_sym]
|
103
103
|
if params[:exporter][:date_filter] == "1"
|
104
|
-
params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type,
|
105
|
-
:parser_klass, :limit, :start_date, :finish_date, :work_visibility,
|
104
|
+
params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type, :generated_metadata,
|
105
|
+
:include_thumbnails, :parser_klass, :limit, :start_date, :finish_date, :work_visibility,
|
106
106
|
:workflow_status, field_mapping: {})
|
107
107
|
else
|
108
|
-
params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type,
|
109
|
-
:parser_klass, :limit, :work_visibility, :workflow_status,
|
108
|
+
params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type, :generated_metadata,
|
109
|
+
:include_thumbnails, :parser_klass, :limit, :work_visibility, :workflow_status,
|
110
110
|
field_mapping: {}).merge(start_date: nil, finish_date: nil)
|
111
111
|
end
|
112
112
|
end
|
@@ -42,7 +42,7 @@ module Bulkrax
|
|
42
42
|
def send_file_contents
|
43
43
|
self.status = 200
|
44
44
|
prepare_file_headers
|
45
|
-
|
45
|
+
send_file file
|
46
46
|
end
|
47
47
|
|
48
48
|
def prepare_file_headers
|
@@ -53,9 +53,5 @@ module Bulkrax
|
|
53
53
|
response.headers['Last-Modified'] = File.mtime(file_path).utc.strftime("%a, %d %b %Y %T GMT")
|
54
54
|
self.content_type = download_content_type
|
55
55
|
end
|
56
|
-
|
57
|
-
def stream_body(iostream)
|
58
|
-
self.response_body = iostream
|
59
|
-
end
|
60
56
|
end
|
61
57
|
end
|
@@ -61,6 +61,7 @@ module Bulkrax
|
|
61
61
|
work_actor.update(environment(attrs))
|
62
62
|
end
|
63
63
|
end
|
64
|
+
object.apply_depositor_metadata(@user) && object.save! if object.depositor.nil?
|
64
65
|
log_updated(object)
|
65
66
|
end
|
66
67
|
|
@@ -107,6 +108,7 @@ module Bulkrax
|
|
107
108
|
end
|
108
109
|
end
|
109
110
|
end
|
111
|
+
object.apply_depositor_metadata(@user) && object.save! if object.depositor.nil?
|
110
112
|
log_created(object)
|
111
113
|
end
|
112
114
|
|
@@ -141,7 +143,6 @@ module Bulkrax
|
|
141
143
|
attrs = clean_attrs(attrs)
|
142
144
|
attrs = collection_type(attrs)
|
143
145
|
object.attributes = attrs
|
144
|
-
object.apply_depositor_metadata(@user)
|
145
146
|
object.save!
|
146
147
|
end
|
147
148
|
|
@@ -33,7 +33,7 @@ module Bulkrax
|
|
33
33
|
# is the child in the relationship, and vice versa if a child_identifier is passed.
|
34
34
|
def perform(parent_identifier:, importer_run_id:) # rubocop:disable Metrics/AbcSize
|
35
35
|
pending_relationships = Bulkrax::PendingRelationship.find_each.select do |rel|
|
36
|
-
rel.
|
36
|
+
rel.importer_run_id == importer_run_id && rel.parent_id == parent_identifier
|
37
37
|
end.sort_by(&:order)
|
38
38
|
|
39
39
|
@importer_run_id = importer_run_id
|
@@ -12,6 +12,8 @@ module Bulkrax
|
|
12
12
|
import(importer, only_updates_since_last_import)
|
13
13
|
update_current_run_counters(importer)
|
14
14
|
schedule(importer) if importer.schedulable?
|
15
|
+
rescue CSV::MalformedCSVError => e
|
16
|
+
importer.status_info(e)
|
15
17
|
end
|
16
18
|
|
17
19
|
def import(importer, only_updates_since_last_import)
|
@@ -99,6 +99,7 @@ module Bulkrax
|
|
99
99
|
build_files_metadata unless hyrax_record.is_a?(Collection)
|
100
100
|
build_relationship_metadata
|
101
101
|
build_mapping_metadata
|
102
|
+
self.save!
|
102
103
|
|
103
104
|
self.parsed_metadata
|
104
105
|
end
|
@@ -113,9 +114,10 @@ module Bulkrax
|
|
113
114
|
def build_files_metadata
|
114
115
|
file_mapping = key_for_export('file')
|
115
116
|
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
116
|
-
filenames = file_sets
|
117
|
+
filenames = map_file_sets(file_sets)
|
117
118
|
|
118
119
|
handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
|
120
|
+
build_thumbnail_files if hyrax_record.work?
|
119
121
|
end
|
120
122
|
|
121
123
|
def build_relationship_metadata
|
@@ -140,6 +142,7 @@ module Bulkrax
|
|
140
142
|
end
|
141
143
|
|
142
144
|
def build_mapping_metadata
|
145
|
+
mapping = fetch_field_mapping
|
143
146
|
mapping.each do |key, value|
|
144
147
|
# these keys are handled by other methods
|
145
148
|
next if ['model', 'file', related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
|
@@ -217,6 +220,16 @@ module Bulkrax
|
|
217
220
|
end
|
218
221
|
end
|
219
222
|
|
223
|
+
def build_thumbnail_files
|
224
|
+
return unless importerexporter.include_thumbnails
|
225
|
+
|
226
|
+
thumbnail_mapping = 'thumbnail_file'
|
227
|
+
file_sets = Array.wrap(hyrax_record.thumbnail)
|
228
|
+
|
229
|
+
filenames = map_file_sets(file_sets)
|
230
|
+
handle_join_on_export(thumbnail_mapping, filenames, false)
|
231
|
+
end
|
232
|
+
|
220
233
|
def handle_join_on_export(key, values, join)
|
221
234
|
if join
|
222
235
|
parsed_metadata[key] = values.join(' | ') # TODO: make split char dynamic
|
@@ -252,7 +265,6 @@ module Bulkrax
|
|
252
265
|
raise ::StandardError, 'Only expected to find one matching entry' if matching_collection_entries.count > 1
|
253
266
|
identifiers << matching_collection_entries.first&.identifier
|
254
267
|
end
|
255
|
-
|
256
268
|
@collection_identifiers = identifiers.compact.presence || []
|
257
269
|
end
|
258
270
|
|
@@ -283,5 +295,11 @@ module Bulkrax
|
|
283
295
|
return f if File.exist?(f)
|
284
296
|
raise "File #{f} does not exist"
|
285
297
|
end
|
298
|
+
|
299
|
+
private
|
300
|
+
|
301
|
+
def map_file_sets(file_sets)
|
302
|
+
file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
|
303
|
+
end
|
286
304
|
end
|
287
305
|
end
|
data/app/models/bulkrax/entry.rb
CHANGED
@@ -4,6 +4,8 @@ module Bulkrax
|
|
4
4
|
# Custom error class for collections_created?
|
5
5
|
class CollectionsCreatedError < RuntimeError; end
|
6
6
|
class OAIError < RuntimeError; end
|
7
|
+
# TODO: remove when ApplicationParser#bagit_zip_file_size_check is removed
|
8
|
+
class BagitZipError < RuntimeError; end
|
7
9
|
class Entry < ApplicationRecord
|
8
10
|
include Bulkrax::HasMatchers
|
9
11
|
include Bulkrax::ImportBehavior
|
@@ -34,6 +36,7 @@ module Bulkrax
|
|
34
36
|
delegate :client,
|
35
37
|
:collection_name,
|
36
38
|
:user,
|
39
|
+
:generated_metadata_mapping,
|
37
40
|
:related_parents_raw_mapping,
|
38
41
|
:related_parents_parsed_mapping,
|
39
42
|
:related_children_raw_mapping,
|
@@ -70,6 +73,15 @@ module Bulkrax
|
|
70
73
|
parser&.work_identifier&.to_s || 'source'
|
71
74
|
end
|
72
75
|
|
76
|
+
# Returns field_mapping hash based on whether or not generated metadata should be included
|
77
|
+
def fetch_field_mapping
|
78
|
+
return self.mapping if importerexporter.generated_metadata
|
79
|
+
|
80
|
+
self.mapping.each do |key, value|
|
81
|
+
self.mapping.delete(key) if value[generated_metadata_mapping]
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
73
85
|
def self.parent_field(parser)
|
74
86
|
parser.related_parents_parsed_mapping
|
75
87
|
end
|
@@ -51,6 +51,14 @@ module Bulkrax
|
|
51
51
|
self.start_date.present? || self.finish_date.present?
|
52
52
|
end
|
53
53
|
|
54
|
+
def include_thumbnails?
|
55
|
+
self.include_thumbnails
|
56
|
+
end
|
57
|
+
|
58
|
+
def generated_metadata?
|
59
|
+
self.generated_metadata
|
60
|
+
end
|
61
|
+
|
54
62
|
def work_visibility_list
|
55
63
|
[
|
56
64
|
['Any', ''],
|
@@ -96,16 +96,19 @@ module Bulkrax
|
|
96
96
|
end
|
97
97
|
|
98
98
|
def current_run
|
99
|
-
@current_run
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
99
|
+
return @current_run if @current_run.present?
|
100
|
+
|
101
|
+
@current_run = self.importer_runs.create!
|
102
|
+
return @current_run if file? && zip?
|
103
|
+
|
104
|
+
entry_counts = {
|
105
|
+
total_work_entries: self.limit || parser.works_total,
|
106
|
+
total_collection_entries: parser.collections_total,
|
107
|
+
total_file_set_entries: parser.file_sets_total
|
108
|
+
}
|
109
|
+
@current_run.update!(entry_counts)
|
110
|
+
|
111
|
+
@current_run
|
109
112
|
end
|
110
113
|
|
111
114
|
def last_run
|
@@ -4,9 +4,10 @@ module Bulkrax
|
|
4
4
|
class ImporterRun < ApplicationRecord
|
5
5
|
belongs_to :importer
|
6
6
|
has_many :statuses, as: :runnable, dependent: :destroy
|
7
|
+
has_many :pending_relationships, dependent: :destroy
|
7
8
|
|
8
9
|
def parents
|
9
|
-
|
10
|
+
pending_relationships.pluck(:parent_id).uniq
|
10
11
|
end
|
11
12
|
end
|
12
13
|
end
|
@@ -7,6 +7,8 @@ module Bulkrax
|
|
7
7
|
|
8
8
|
def build_for_exporter
|
9
9
|
build_export_metadata
|
10
|
+
# TODO(alishaevn): determine if the line below is still necessary
|
11
|
+
# the csv and bagit parsers also have write_files methods
|
10
12
|
write_files if export_type == 'full' && !importerexporter.parser_klass.include?('Bagit')
|
11
13
|
rescue RSolr::Error::Http, CollectionsCreatedError => e
|
12
14
|
raise e
|
@@ -28,6 +30,7 @@ module Bulkrax
|
|
28
30
|
return if hyrax_record.is_a?(Collection)
|
29
31
|
|
30
32
|
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
33
|
+
file_sets << hyrax_record.thumbnail if hyrax_record.thumbnail.present? && hyrax_record.work? && exporter.include_thumbnails
|
31
34
|
file_sets.each do |fs|
|
32
35
|
path = File.join(exporter_export_path, 'files')
|
33
36
|
FileUtils.mkdir_p(path)
|
@@ -48,7 +51,7 @@ module Bulkrax
|
|
48
51
|
fn = file_set.original_file.file_name.first
|
49
52
|
mime = Mime::Type.lookup(file_set.original_file.mime_type)
|
50
53
|
ext_mime = MIME::Types.of(file_set.original_file.file_name).first
|
51
|
-
if fn.include?(file_set.id) || importerexporter.metadata_only?
|
54
|
+
if fn.include?(file_set.id) || importerexporter.metadata_only? || importerexporter.parser_klass.include?('Bagit')
|
52
55
|
filename = "#{fn}.#{mime.to_sym}"
|
53
56
|
filename = fn if mime.to_s == ext_mime.to_s
|
54
57
|
else
|
@@ -28,5 +28,13 @@ module Bulkrax
|
|
28
28
|
|
29
29
|
raise StandardError, 'File set must be related to at least one work'
|
30
30
|
end
|
31
|
+
|
32
|
+
def parent_jobs
|
33
|
+
false # FileSet relationships are handled in ObjectFactory#create_file_set
|
34
|
+
end
|
35
|
+
|
36
|
+
def child_jobs
|
37
|
+
raise ::StandardError, 'A FileSet cannot be a parent of a Collection, Work, or other FileSet'
|
38
|
+
end
|
31
39
|
end
|
32
40
|
end
|
@@ -50,7 +50,7 @@ module Bulkrax
|
|
50
50
|
self.parsed_metadata[related_parents_parsed_mapping].each do |parent_identifier|
|
51
51
|
next if parent_identifier.blank?
|
52
52
|
|
53
|
-
PendingRelationship.create!(child_id: self.identifier, parent_id: parent_identifier,
|
53
|
+
PendingRelationship.create!(child_id: self.identifier, parent_id: parent_identifier, importer_run_id: importerexporter.last_run.id, order: self.id)
|
54
54
|
end
|
55
55
|
end
|
56
56
|
|
@@ -58,7 +58,7 @@ module Bulkrax
|
|
58
58
|
self.parsed_metadata[related_children_parsed_mapping].each do |child_identifier|
|
59
59
|
next if child_identifier.blank?
|
60
60
|
|
61
|
-
PendingRelationship.create!(parent_id: self.identifier, child_id: child_identifier,
|
61
|
+
PendingRelationship.create!(parent_id: self.identifier, child_id: child_identifier, importer_run_id: importerexporter.last_run.id, order: self.id)
|
62
62
|
end
|
63
63
|
end
|
64
64
|
|
@@ -51,6 +51,10 @@ module Bulkrax
|
|
51
51
|
@work_identifier ||= get_field_mapping_hash_for('source_identifier')&.keys&.first&.to_sym || :source
|
52
52
|
end
|
53
53
|
|
54
|
+
def generated_metadata_mapping
|
55
|
+
@generated_metadata_mapping ||= 'generated'
|
56
|
+
end
|
57
|
+
|
54
58
|
def related_parents_raw_mapping
|
55
59
|
@related_parents_raw_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.values&.first&.[]('from')&.first
|
56
60
|
end
|
@@ -242,15 +246,49 @@ module Bulkrax
|
|
242
246
|
def write
|
243
247
|
write_files
|
244
248
|
zip
|
249
|
+
# uncomment next line to debug for faulty zipping during bagit export
|
250
|
+
bagit_zip_file_size_check if importerexporter.parser_klass.include?('Bagit')
|
245
251
|
end
|
246
252
|
|
247
253
|
def unzip(file_to_unzip)
|
248
|
-
|
254
|
+
Zip::File.open(file_to_unzip) do |zip_file|
|
255
|
+
zip_file.each do |entry|
|
256
|
+
entry_path = File.join(importer_unzip_path, entry.name)
|
257
|
+
FileUtils.mkdir_p(File.dirname(entry_path))
|
258
|
+
zip_file.extract(entry, entry_path) unless File.exist?(entry_path)
|
259
|
+
end
|
260
|
+
end
|
249
261
|
end
|
250
262
|
|
251
263
|
def zip
|
264
|
+
require 'zip'
|
252
265
|
FileUtils.rm_rf(exporter_export_zip_path)
|
253
|
-
|
266
|
+
Zip::File.open(exporter_export_zip_path, create: true) do |zip_file|
|
267
|
+
Dir["#{exporter_export_path}/**/**"].each do |file|
|
268
|
+
zip_file.add(file.sub("#{exporter_export_path}/", ''), file)
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
# TODO: remove Entry::BagitZipError as well as this method when we're sure it's not needed
|
274
|
+
def bagit_zip_file_size_check
|
275
|
+
Zip::File.open(exporter_export_zip_path) do |zip_file|
|
276
|
+
zip_file.select { |entry| entry.name.include?('data/') && entry.file? }.each do |zipped_file|
|
277
|
+
Dir["#{exporter_export_path}/**/data/*"].select { |file| file.include?(zipped_file.name) }.each do |file|
|
278
|
+
begin
|
279
|
+
raise BagitZipError, "Invalid Bag, file size mismatch for #{file.sub("#{exporter_export_path}/", '')}" if File.size(file) != zipped_file.size
|
280
|
+
rescue BagitZipError => e
|
281
|
+
matched_entry_ids = importerexporter.entry_ids.select do |id|
|
282
|
+
Bulkrax::Entry.find(id).identifier.include?(zipped_file.name.split('/').first)
|
283
|
+
end
|
284
|
+
matched_entry_ids.each do |entry_id|
|
285
|
+
Bulkrax::Entry.find(entry_id).status_info(e)
|
286
|
+
status_info('Complete (with failures)')
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
end
|
291
|
+
end
|
254
292
|
end
|
255
293
|
|
256
294
|
# Is this a file?
|
@@ -272,7 +310,6 @@ module Bulkrax
|
|
272
310
|
|
273
311
|
def real_import_file_path
|
274
312
|
return importer_unzip_path if file? && zip?
|
275
|
-
|
276
313
|
parser_fields['import_file_path']
|
277
314
|
end
|
278
315
|
end
|
@@ -1,9 +1,11 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Bulkrax
|
4
|
-
class BagitParser < ApplicationParser
|
4
|
+
class BagitParser < ApplicationParser # rubocop:disable Metrics/ClassLength
|
5
|
+
include ExportBehavior
|
6
|
+
|
5
7
|
def self.export_supported?
|
6
|
-
|
8
|
+
true
|
7
9
|
end
|
8
10
|
|
9
11
|
def valid_import?
|
@@ -14,19 +16,11 @@ module Bulkrax
|
|
14
16
|
end
|
15
17
|
|
16
18
|
def entry_class
|
17
|
-
parser_fields['metadata_format'
|
18
|
-
|
19
|
-
|
20
|
-
def collection_entry_class
|
21
|
-
parser_fields['metadata_format'].gsub('Entry', 'CollectionEntry').constantize
|
22
|
-
rescue
|
23
|
-
Entry
|
24
|
-
end
|
25
|
-
|
26
|
-
def file_set_entry_class
|
27
|
-
csv_format = Bulkrax::Importer.last.parser_fields['metadata_format'] == "Bulkrax::CsvEntry"
|
28
|
-
csv_format ? CsvFileSetEntry : RdfFileSetEntry
|
19
|
+
rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
|
20
|
+
rdf_format ? RdfEntry : CsvEntry
|
29
21
|
end
|
22
|
+
alias collection_entry_class entry_class
|
23
|
+
alias file_set_entry_class entry_class
|
30
24
|
|
31
25
|
# Take a random sample of 10 metadata_paths and work out the import fields from that
|
32
26
|
def import_fields
|
@@ -101,7 +95,185 @@ module Bulkrax
|
|
101
95
|
end
|
102
96
|
|
103
97
|
def total
|
104
|
-
|
98
|
+
importerexporter.entries.count
|
99
|
+
end
|
100
|
+
|
101
|
+
def extra_filters
|
102
|
+
output = ""
|
103
|
+
if importerexporter.start_date.present?
|
104
|
+
start_dt = importerexporter.start_date.to_datetime.strftime('%FT%TZ')
|
105
|
+
finish_dt = importerexporter.finish_date.present? ? importerexporter.finish_date.to_datetime.end_of_day.strftime('%FT%TZ') : "NOW"
|
106
|
+
output += " AND system_modified_dtsi:[#{start_dt} TO #{finish_dt}]"
|
107
|
+
end
|
108
|
+
output += importerexporter.work_visibility.present? ? " AND visibility_ssi:#{importerexporter.work_visibility}" : ""
|
109
|
+
output += importerexporter.workflow_status.present? ? " AND workflow_state_name_ssim:#{importerexporter.workflow_status}" : ""
|
110
|
+
output
|
111
|
+
end
|
112
|
+
|
113
|
+
def current_record_ids
|
114
|
+
@work_ids = []
|
115
|
+
@collection_ids = []
|
116
|
+
@file_set_ids = []
|
117
|
+
|
118
|
+
case importerexporter.export_from
|
119
|
+
when 'all'
|
120
|
+
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
121
|
+
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
122
|
+
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
123
|
+
when 'collection'
|
124
|
+
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
125
|
+
when 'worktype'
|
126
|
+
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
127
|
+
when 'importer'
|
128
|
+
set_ids_for_exporting_from_importer
|
129
|
+
end
|
130
|
+
|
131
|
+
@work_ids + @collection_ids + @file_set_ids
|
132
|
+
end
|
133
|
+
|
134
|
+
# Set the following instance variables: @work_ids, @collection_ids, @file_set_ids
|
135
|
+
# @see #current_record_ids
|
136
|
+
def set_ids_for_exporting_from_importer
|
137
|
+
entry_ids = Importer.find(importerexporter.export_source).entries.pluck(:id)
|
138
|
+
complete_statuses = Status.latest_by_statusable
|
139
|
+
.includes(:statusable)
|
140
|
+
.where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
|
141
|
+
|
142
|
+
complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
|
143
|
+
extra_filters = extra_filters.presence || '*:*'
|
144
|
+
|
145
|
+
{ :@work_ids => ::Hyrax.config.curation_concerns, :@collection_ids => [::Collection], :@file_set_ids => [::FileSet] }.each do |instance_var, models_to_search|
|
146
|
+
instance_variable_set(instance_var, ActiveFedora::SolrService.post(
|
147
|
+
extra_filters.to_s,
|
148
|
+
fq: [
|
149
|
+
%(#{::Solrizer.solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
|
150
|
+
"has_model_ssim:(#{models_to_search.join(' OR ')})"
|
151
|
+
],
|
152
|
+
fl: 'id',
|
153
|
+
rows: 2_000_000_000
|
154
|
+
)['response']['docs'].map { |obj| obj['id'] })
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def create_new_entries
|
159
|
+
current_record_ids.each_with_index do |id, index|
|
160
|
+
break if limit_reached?(limit, index)
|
161
|
+
|
162
|
+
this_entry_class = if @collection_ids.include?(id)
|
163
|
+
collection_entry_class
|
164
|
+
elsif @file_set_ids.include?(id)
|
165
|
+
file_set_entry_class
|
166
|
+
else
|
167
|
+
entry_class
|
168
|
+
end
|
169
|
+
new_entry = find_or_create_entry(this_entry_class, id, 'Bulkrax::Exporter')
|
170
|
+
|
171
|
+
begin
|
172
|
+
entry = ExportWorkJob.perform_now(new_entry.id, current_run.id)
|
173
|
+
rescue => e
|
174
|
+
Rails.logger.info("#{e.message} was detected during export")
|
175
|
+
end
|
176
|
+
|
177
|
+
self.headers |= entry.parsed_metadata.keys if entry
|
178
|
+
end
|
179
|
+
end
|
180
|
+
alias create_from_collection create_new_entries
|
181
|
+
alias create_from_importer create_new_entries
|
182
|
+
alias create_from_worktype create_new_entries
|
183
|
+
alias create_from_all create_new_entries
|
184
|
+
|
185
|
+
# export methods
|
186
|
+
|
187
|
+
# rubocop:disable Metrics/AbcSize
|
188
|
+
def write_files
|
189
|
+
require 'open-uri'
|
190
|
+
require 'socket'
|
191
|
+
importerexporter.entries.where(identifier: current_record_ids)[0..limit || total].each do |e|
|
192
|
+
bag = BagIt::Bag.new setup_bagit_folder(e.identifier)
|
193
|
+
w = ActiveFedora::Base.find(e.identifier)
|
194
|
+
next unless Hyrax.config.curation_concerns.include?(w.class)
|
195
|
+
|
196
|
+
w.file_sets.each do |fs|
|
197
|
+
file_name = filename(fs)
|
198
|
+
next if file_name.blank?
|
199
|
+
io = open(fs.original_file.uri)
|
200
|
+
file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
|
201
|
+
file.write(io.read)
|
202
|
+
file.close
|
203
|
+
bag.add_file(file_name, file.path)
|
204
|
+
end
|
205
|
+
CSV.open(setup_csv_metadata_export_file(e.identifier), "w", headers: export_headers, write_headers: true) do |csv|
|
206
|
+
csv << e.parsed_metadata
|
207
|
+
end
|
208
|
+
write_triples(e)
|
209
|
+
bag.manifest!(algo: 'sha256')
|
210
|
+
end
|
211
|
+
end
|
212
|
+
# rubocop:enable Metrics/AbcSize
|
213
|
+
|
214
|
+
def setup_csv_metadata_export_file(id)
|
215
|
+
File.join(importerexporter.exporter_export_path, id, 'metadata.csv')
|
216
|
+
end
|
217
|
+
|
218
|
+
def key_allowed(key)
|
219
|
+
!Bulkrax.reserved_properties.include?(key) &&
|
220
|
+
new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
|
221
|
+
key != source_identifier.to_s
|
222
|
+
end
|
223
|
+
|
224
|
+
# All possible column names
|
225
|
+
def export_headers
|
226
|
+
headers = sort_headers(self.headers)
|
227
|
+
|
228
|
+
# we don't want access_control_id exported and we want file at the end
|
229
|
+
headers.delete('access_control_id') if headers.include?('access_control_id')
|
230
|
+
|
231
|
+
# add the headers below at the beginning or end to maintain the preexisting export behavior
|
232
|
+
headers.prepend('model')
|
233
|
+
headers.prepend(source_identifier.to_s)
|
234
|
+
headers.prepend('id')
|
235
|
+
|
236
|
+
headers.uniq
|
237
|
+
end
|
238
|
+
|
239
|
+
def object_names
|
240
|
+
return @object_names if @object_names
|
241
|
+
|
242
|
+
@object_names = mapping.values.map { |value| value['object'] }
|
243
|
+
@object_names.uniq!.delete(nil)
|
244
|
+
|
245
|
+
@object_names
|
246
|
+
end
|
247
|
+
|
248
|
+
def sort_headers(headers)
|
249
|
+
# converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
|
250
|
+
# while keeping objects grouped together
|
251
|
+
headers.sort_by do |item|
|
252
|
+
number = item.match(/\d+/)&.[](0) || 0.to_s
|
253
|
+
sort_number = number.rjust(4, "0")
|
254
|
+
object_prefix = object_names.detect { |o| item.match(/^#{o}/) } || item
|
255
|
+
remainder = item.gsub(/^#{object_prefix}_/, '').gsub(/_#{number}/, '')
|
256
|
+
"#{object_prefix}_#{sort_number}_#{remainder}"
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
def setup_triple_metadata_export_file(id)
|
261
|
+
File.join(importerexporter.exporter_export_path, id, 'metadata.nt')
|
262
|
+
end
|
263
|
+
|
264
|
+
def setup_bagit_folder(id)
|
265
|
+
File.join(importerexporter.exporter_export_path, id)
|
266
|
+
end
|
267
|
+
|
268
|
+
def write_triples(e)
|
269
|
+
sd = SolrDocument.find(e.identifier)
|
270
|
+
return if sd.nil?
|
271
|
+
|
272
|
+
req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname })
|
273
|
+
rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples)
|
274
|
+
File.open(setup_triple_metadata_export_file(e.identifier), "w") do |triples|
|
275
|
+
triples.write(rdf)
|
276
|
+
end
|
105
277
|
end
|
106
278
|
|
107
279
|
def required_elements?(keys)
|
@@ -126,11 +298,7 @@ module Bulkrax
|
|
126
298
|
def bags
|
127
299
|
return @bags if @bags.present?
|
128
300
|
new_bag = bag(import_file_path)
|
129
|
-
@bags =
|
130
|
-
[new_bag]
|
131
|
-
else
|
132
|
-
Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
|
133
|
-
end
|
301
|
+
@bags = new_bag ? [new_bag] : Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
|
134
302
|
@bags.delete(nil)
|
135
303
|
raise StandardError, 'No valid bags found' if @bags.blank?
|
136
304
|
return @bags
|
@@ -60,6 +60,16 @@
|
|
60
60
|
hint: 'leave blank or 0 for all records',
|
61
61
|
label: t('bulkrax.exporter.labels.limit') %>
|
62
62
|
|
63
|
+
<%= form.input :generated_metadata?,
|
64
|
+
as: :boolean,
|
65
|
+
label: t('bulkrax.exporter.labels.generated_metadata'),
|
66
|
+
hint: t('bulkrax.exporter.hints.generated_metadata') %>
|
67
|
+
|
68
|
+
<%= form.input :include_thumbnails?,
|
69
|
+
as: :boolean,
|
70
|
+
label: t('bulkrax.exporter.labels.include_thumbnails'),
|
71
|
+
hint: t('bulkrax.exporter.hints.include_thumbnails') %>
|
72
|
+
|
63
73
|
<%= form.input :date_filter,
|
64
74
|
as: :boolean,
|
65
75
|
label: t('bulkrax.exporter.labels.filter_by_date') %>
|
@@ -57,6 +57,18 @@
|
|
57
57
|
<strong><%= t('bulkrax.exporter.labels.limit') %>:</strong>
|
58
58
|
<%= @exporter.limit %>
|
59
59
|
</p>
|
60
|
+
|
61
|
+
<p class='bulkrax-p-align'>
|
62
|
+
<strong><%= t('bulkrax.exporter.labels.generated_metadata') %>:</strong>
|
63
|
+
<%= @exporter.generated_metadata %>
|
64
|
+
</p>
|
65
|
+
|
66
|
+
<p class='bulkrax-p-align'>
|
67
|
+
<strong><%= t('bulkrax.exporter.labels.include_thumbnails') %>:</strong>
|
68
|
+
<%= @exporter.include_thumbnails %>
|
69
|
+
</p>
|
70
|
+
|
71
|
+
|
60
72
|
<%= render partial: 'bulkrax/shared/bulkrax_errors', locals: {item: @exporter} %>
|
61
73
|
|
62
74
|
<%= render partial: 'bulkrax/shared/bulkrax_field_mapping', locals: {item: @exporter} %>
|
@@ -16,6 +16,8 @@ en:
|
|
16
16
|
filter_by_date: Filter By Date
|
17
17
|
finish_date: End Date
|
18
18
|
full: Metadata and Files
|
19
|
+
include_thumbnails: Include Thumbnails?
|
20
|
+
generated_metadata: Include Generated Metadata?
|
19
21
|
importer: Importer
|
20
22
|
limit: Limit
|
21
23
|
metadata: Metadata Only
|
@@ -35,3 +37,6 @@ en:
|
|
35
37
|
ingested: "Ingested"
|
36
38
|
unapproved: "Unapproved"
|
37
39
|
needs_repair: "Needs Repair"
|
40
|
+
hints:
|
41
|
+
include_thumbnails: "These exported fields currently cannot be imported."
|
42
|
+
generated_metadata: "These exported fields currently cannot be imported."
|
@@ -0,0 +1,7 @@
|
|
1
|
+
class RenameBulkraxImporterRunToImporterRun < ActiveRecord::Migration[5.2]
|
2
|
+
def change
|
3
|
+
if column_exists?(:bulkrax_pending_relationships, :bulkrax_importer_run_id)
|
4
|
+
rename_column :bulkrax_pending_relationships, :bulkrax_importer_run_id, :importer_run_id
|
5
|
+
end
|
6
|
+
end
|
7
|
+
end
|
data/lib/bulkrax/version.rb
CHANGED
data/lib/bulkrax.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.2
|
4
|
+
version: 3.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-06-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -253,6 +253,7 @@ files:
|
|
253
253
|
- app/controllers/bulkrax/exporters_controller.rb
|
254
254
|
- app/controllers/bulkrax/importers_controller.rb
|
255
255
|
- app/controllers/concerns/bulkrax/api.rb
|
256
|
+
- app/controllers/concerns/bulkrax/download_behavior.rb
|
256
257
|
- app/factories/bulkrax/object_factory.rb
|
257
258
|
- app/helpers/bulkrax/application_helper.rb
|
258
259
|
- app/helpers/bulkrax/exporters_helper.rb
|
@@ -297,7 +298,6 @@ files:
|
|
297
298
|
- app/models/bulkrax/rdf_file_set_entry.rb
|
298
299
|
- app/models/bulkrax/status.rb
|
299
300
|
- app/models/bulkrax/xml_entry.rb
|
300
|
-
- app/models/concerns/bulkrax/download_behavior.rb
|
301
301
|
- app/models/concerns/bulkrax/dynamic_record_lookup.rb
|
302
302
|
- app/models/concerns/bulkrax/errored_entries.rb
|
303
303
|
- app/models/concerns/bulkrax/export_behavior.rb
|
@@ -372,6 +372,9 @@ files:
|
|
372
372
|
- db/migrate/20220119213325_add_work_counters_to_importer_runs.rb
|
373
373
|
- db/migrate/20220301001839_create_bulkrax_pending_relationships.rb
|
374
374
|
- db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb
|
375
|
+
- db/migrate/20220412233954_add_include_thumbnails_to_bulkrax_exporters.rb
|
376
|
+
- db/migrate/20220413180915_add_generated_metadata_to_bulkrax_exporters.rb
|
377
|
+
- db/migrate/20220609001128_rename_bulkrax_importer_run_to_importer_run.rb
|
375
378
|
- lib/bulkrax.rb
|
376
379
|
- lib/bulkrax/engine.rb
|
377
380
|
- lib/bulkrax/version.rb
|