bulkrax 3.2.0 → 3.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/bulkrax/exporters_controller.rb +4 -4
- data/app/{models → controllers}/concerns/bulkrax/download_behavior.rb +1 -5
- data/app/factories/bulkrax/object_factory.rb +2 -1
- data/app/jobs/bulkrax/create_relationships_job.rb +1 -1
- data/app/jobs/bulkrax/importer_job.rb +2 -0
- data/app/models/bulkrax/csv_entry.rb +20 -2
- data/app/models/bulkrax/entry.rb +12 -0
- data/app/models/bulkrax/exporter.rb +8 -0
- data/app/models/bulkrax/importer.rb +13 -10
- data/app/models/bulkrax/importer_run.rb +2 -1
- data/app/models/bulkrax/pending_relationship.rb +1 -1
- data/app/models/concerns/bulkrax/export_behavior.rb +4 -1
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +8 -0
- data/app/models/concerns/bulkrax/import_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/status_info.rb +1 -1
- data/app/parsers/bulkrax/application_parser.rb +40 -3
- data/app/parsers/bulkrax/bagit_parser.rb +188 -20
- data/app/views/bulkrax/exporters/_form.html.erb +10 -0
- data/app/views/bulkrax/exporters/show.html.erb +12 -0
- data/config/locales/bulkrax.en.yml +5 -0
- data/db/migrate/20220412233954_add_include_thumbnails_to_bulkrax_exporters.rb +5 -0
- data/db/migrate/20220413180915_add_generated_metadata_to_bulkrax_exporters.rb +5 -0
- data/db/migrate/20220609001128_rename_bulkrax_importer_run_to_importer_run.rb +7 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +1 -0
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b136a8742a7f9b953b4c3ef86d700540b931b6fbf22798d719e9aa693ea61fa9
|
4
|
+
data.tar.gz: e1cc32eda55a606285cf6e080340db608a8867f0bd4de8ab1361ca4a3d21adf9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 87e35f340faa583a9ae6ac156f95fa4958fcaf9d3ed09a9963a8dda39649a66307a8c494375a989e2103c2d1edef62343f5b69878eae2249d63cc4b8f65240a8
|
7
|
+
data.tar.gz: 21ddafc671eda822144b6b73abfb89892ffac954f0692952985299f40f7d0e1aba472077aac72a1f681da315ba35de795f43d95df323c2178576716d02eafc75
|
@@ -101,12 +101,12 @@ module Bulkrax
|
|
101
101
|
def exporter_params
|
102
102
|
params[:exporter][:export_source] = params[:exporter]["export_source_#{params[:exporter][:export_from]}".to_sym]
|
103
103
|
if params[:exporter][:date_filter] == "1"
|
104
|
-
params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type,
|
105
|
-
:parser_klass, :limit, :start_date, :finish_date, :work_visibility,
|
104
|
+
params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type, :generated_metadata,
|
105
|
+
:include_thumbnails, :parser_klass, :limit, :start_date, :finish_date, :work_visibility,
|
106
106
|
:workflow_status, field_mapping: {})
|
107
107
|
else
|
108
|
-
params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type,
|
109
|
-
:parser_klass, :limit, :work_visibility, :workflow_status,
|
108
|
+
params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type, :generated_metadata,
|
109
|
+
:include_thumbnails, :parser_klass, :limit, :work_visibility, :workflow_status,
|
110
110
|
field_mapping: {}).merge(start_date: nil, finish_date: nil)
|
111
111
|
end
|
112
112
|
end
|
@@ -42,7 +42,7 @@ module Bulkrax
|
|
42
42
|
def send_file_contents
|
43
43
|
self.status = 200
|
44
44
|
prepare_file_headers
|
45
|
-
|
45
|
+
send_file file
|
46
46
|
end
|
47
47
|
|
48
48
|
def prepare_file_headers
|
@@ -53,9 +53,5 @@ module Bulkrax
|
|
53
53
|
response.headers['Last-Modified'] = File.mtime(file_path).utc.strftime("%a, %d %b %Y %T GMT")
|
54
54
|
self.content_type = download_content_type
|
55
55
|
end
|
56
|
-
|
57
|
-
def stream_body(iostream)
|
58
|
-
self.response_body = iostream
|
59
|
-
end
|
60
56
|
end
|
61
57
|
end
|
@@ -61,6 +61,7 @@ module Bulkrax
|
|
61
61
|
work_actor.update(environment(attrs))
|
62
62
|
end
|
63
63
|
end
|
64
|
+
object.apply_depositor_metadata(@user) && object.save! if object.depositor.nil?
|
64
65
|
log_updated(object)
|
65
66
|
end
|
66
67
|
|
@@ -107,6 +108,7 @@ module Bulkrax
|
|
107
108
|
end
|
108
109
|
end
|
109
110
|
end
|
111
|
+
object.apply_depositor_metadata(@user) && object.save! if object.depositor.nil?
|
110
112
|
log_created(object)
|
111
113
|
end
|
112
114
|
|
@@ -141,7 +143,6 @@ module Bulkrax
|
|
141
143
|
attrs = clean_attrs(attrs)
|
142
144
|
attrs = collection_type(attrs)
|
143
145
|
object.attributes = attrs
|
144
|
-
object.apply_depositor_metadata(@user)
|
145
146
|
object.save!
|
146
147
|
end
|
147
148
|
|
@@ -33,7 +33,7 @@ module Bulkrax
|
|
33
33
|
# is the child in the relationship, and vice versa if a child_identifier is passed.
|
34
34
|
def perform(parent_identifier:, importer_run_id:) # rubocop:disable Metrics/AbcSize
|
35
35
|
pending_relationships = Bulkrax::PendingRelationship.find_each.select do |rel|
|
36
|
-
rel.
|
36
|
+
rel.importer_run_id == importer_run_id && rel.parent_id == parent_identifier
|
37
37
|
end.sort_by(&:order)
|
38
38
|
|
39
39
|
@importer_run_id = importer_run_id
|
@@ -12,6 +12,8 @@ module Bulkrax
|
|
12
12
|
import(importer, only_updates_since_last_import)
|
13
13
|
update_current_run_counters(importer)
|
14
14
|
schedule(importer) if importer.schedulable?
|
15
|
+
rescue CSV::MalformedCSVError => e
|
16
|
+
importer.status_info(e)
|
15
17
|
end
|
16
18
|
|
17
19
|
def import(importer, only_updates_since_last_import)
|
@@ -99,6 +99,7 @@ module Bulkrax
|
|
99
99
|
build_files_metadata unless hyrax_record.is_a?(Collection)
|
100
100
|
build_relationship_metadata
|
101
101
|
build_mapping_metadata
|
102
|
+
self.save!
|
102
103
|
|
103
104
|
self.parsed_metadata
|
104
105
|
end
|
@@ -113,9 +114,10 @@ module Bulkrax
|
|
113
114
|
def build_files_metadata
|
114
115
|
file_mapping = key_for_export('file')
|
115
116
|
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
116
|
-
filenames = file_sets
|
117
|
+
filenames = map_file_sets(file_sets)
|
117
118
|
|
118
119
|
handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
|
120
|
+
build_thumbnail_files if hyrax_record.work?
|
119
121
|
end
|
120
122
|
|
121
123
|
def build_relationship_metadata
|
@@ -140,6 +142,7 @@ module Bulkrax
|
|
140
142
|
end
|
141
143
|
|
142
144
|
def build_mapping_metadata
|
145
|
+
mapping = fetch_field_mapping
|
143
146
|
mapping.each do |key, value|
|
144
147
|
# these keys are handled by other methods
|
145
148
|
next if ['model', 'file', related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
|
@@ -217,6 +220,16 @@ module Bulkrax
|
|
217
220
|
end
|
218
221
|
end
|
219
222
|
|
223
|
+
def build_thumbnail_files
|
224
|
+
return unless importerexporter.include_thumbnails
|
225
|
+
|
226
|
+
thumbnail_mapping = 'thumbnail_file'
|
227
|
+
file_sets = Array.wrap(hyrax_record.thumbnail)
|
228
|
+
|
229
|
+
filenames = map_file_sets(file_sets)
|
230
|
+
handle_join_on_export(thumbnail_mapping, filenames, false)
|
231
|
+
end
|
232
|
+
|
220
233
|
def handle_join_on_export(key, values, join)
|
221
234
|
if join
|
222
235
|
parsed_metadata[key] = values.join(' | ') # TODO: make split char dynamic
|
@@ -252,7 +265,6 @@ module Bulkrax
|
|
252
265
|
raise ::StandardError, 'Only expected to find one matching entry' if matching_collection_entries.count > 1
|
253
266
|
identifiers << matching_collection_entries.first&.identifier
|
254
267
|
end
|
255
|
-
|
256
268
|
@collection_identifiers = identifiers.compact.presence || []
|
257
269
|
end
|
258
270
|
|
@@ -283,5 +295,11 @@ module Bulkrax
|
|
283
295
|
return f if File.exist?(f)
|
284
296
|
raise "File #{f} does not exist"
|
285
297
|
end
|
298
|
+
|
299
|
+
private
|
300
|
+
|
301
|
+
def map_file_sets(file_sets)
|
302
|
+
file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
|
303
|
+
end
|
286
304
|
end
|
287
305
|
end
|
data/app/models/bulkrax/entry.rb
CHANGED
@@ -4,6 +4,8 @@ module Bulkrax
|
|
4
4
|
# Custom error class for collections_created?
|
5
5
|
class CollectionsCreatedError < RuntimeError; end
|
6
6
|
class OAIError < RuntimeError; end
|
7
|
+
# TODO: remove when ApplicationParser#bagit_zip_file_size_check is removed
|
8
|
+
class BagitZipError < RuntimeError; end
|
7
9
|
class Entry < ApplicationRecord
|
8
10
|
include Bulkrax::HasMatchers
|
9
11
|
include Bulkrax::ImportBehavior
|
@@ -34,6 +36,7 @@ module Bulkrax
|
|
34
36
|
delegate :client,
|
35
37
|
:collection_name,
|
36
38
|
:user,
|
39
|
+
:generated_metadata_mapping,
|
37
40
|
:related_parents_raw_mapping,
|
38
41
|
:related_parents_parsed_mapping,
|
39
42
|
:related_children_raw_mapping,
|
@@ -70,6 +73,15 @@ module Bulkrax
|
|
70
73
|
parser&.work_identifier&.to_s || 'source'
|
71
74
|
end
|
72
75
|
|
76
|
+
# Returns field_mapping hash based on whether or not generated metadata should be included
|
77
|
+
def fetch_field_mapping
|
78
|
+
return self.mapping if importerexporter.generated_metadata
|
79
|
+
|
80
|
+
self.mapping.each do |key, value|
|
81
|
+
self.mapping.delete(key) if value[generated_metadata_mapping]
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
73
85
|
def self.parent_field(parser)
|
74
86
|
parser.related_parents_parsed_mapping
|
75
87
|
end
|
@@ -51,6 +51,14 @@ module Bulkrax
|
|
51
51
|
self.start_date.present? || self.finish_date.present?
|
52
52
|
end
|
53
53
|
|
54
|
+
def include_thumbnails?
|
55
|
+
self.include_thumbnails
|
56
|
+
end
|
57
|
+
|
58
|
+
def generated_metadata?
|
59
|
+
self.generated_metadata
|
60
|
+
end
|
61
|
+
|
54
62
|
def work_visibility_list
|
55
63
|
[
|
56
64
|
['Any', ''],
|
@@ -96,16 +96,19 @@ module Bulkrax
|
|
96
96
|
end
|
97
97
|
|
98
98
|
def current_run
|
99
|
-
@current_run
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
99
|
+
return @current_run if @current_run.present?
|
100
|
+
|
101
|
+
@current_run = self.importer_runs.create!
|
102
|
+
return @current_run if file? && zip?
|
103
|
+
|
104
|
+
entry_counts = {
|
105
|
+
total_work_entries: self.limit || parser.works_total,
|
106
|
+
total_collection_entries: parser.collections_total,
|
107
|
+
total_file_set_entries: parser.file_sets_total
|
108
|
+
}
|
109
|
+
@current_run.update!(entry_counts)
|
110
|
+
|
111
|
+
@current_run
|
109
112
|
end
|
110
113
|
|
111
114
|
def last_run
|
@@ -4,9 +4,10 @@ module Bulkrax
|
|
4
4
|
class ImporterRun < ApplicationRecord
|
5
5
|
belongs_to :importer
|
6
6
|
has_many :statuses, as: :runnable, dependent: :destroy
|
7
|
+
has_many :pending_relationships, dependent: :destroy
|
7
8
|
|
8
9
|
def parents
|
9
|
-
|
10
|
+
pending_relationships.pluck(:parent_id).uniq
|
10
11
|
end
|
11
12
|
end
|
12
13
|
end
|
@@ -7,6 +7,8 @@ module Bulkrax
|
|
7
7
|
|
8
8
|
def build_for_exporter
|
9
9
|
build_export_metadata
|
10
|
+
# TODO(alishaevn): determine if the line below is still necessary
|
11
|
+
# the csv and bagit parsers also have write_files methods
|
10
12
|
write_files if export_type == 'full' && !importerexporter.parser_klass.include?('Bagit')
|
11
13
|
rescue RSolr::Error::Http, CollectionsCreatedError => e
|
12
14
|
raise e
|
@@ -28,6 +30,7 @@ module Bulkrax
|
|
28
30
|
return if hyrax_record.is_a?(Collection)
|
29
31
|
|
30
32
|
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
33
|
+
file_sets << hyrax_record.thumbnail if hyrax_record.thumbnail.present? && hyrax_record.work? && exporter.include_thumbnails
|
31
34
|
file_sets.each do |fs|
|
32
35
|
path = File.join(exporter_export_path, 'files')
|
33
36
|
FileUtils.mkdir_p(path)
|
@@ -48,7 +51,7 @@ module Bulkrax
|
|
48
51
|
fn = file_set.original_file.file_name.first
|
49
52
|
mime = Mime::Type.lookup(file_set.original_file.mime_type)
|
50
53
|
ext_mime = MIME::Types.of(file_set.original_file.file_name).first
|
51
|
-
if fn.include?(file_set.id) || importerexporter.metadata_only?
|
54
|
+
if fn.include?(file_set.id) || importerexporter.metadata_only? || importerexporter.parser_klass.include?('Bagit')
|
52
55
|
filename = "#{fn}.#{mime.to_sym}"
|
53
56
|
filename = fn if mime.to_s == ext_mime.to_s
|
54
57
|
else
|
@@ -28,5 +28,13 @@ module Bulkrax
|
|
28
28
|
|
29
29
|
raise StandardError, 'File set must be related to at least one work'
|
30
30
|
end
|
31
|
+
|
32
|
+
def parent_jobs
|
33
|
+
false # FileSet relationships are handled in ObjectFactory#create_file_set
|
34
|
+
end
|
35
|
+
|
36
|
+
def child_jobs
|
37
|
+
raise ::StandardError, 'A FileSet cannot be a parent of a Collection, Work, or other FileSet'
|
38
|
+
end
|
31
39
|
end
|
32
40
|
end
|
@@ -50,7 +50,7 @@ module Bulkrax
|
|
50
50
|
self.parsed_metadata[related_parents_parsed_mapping].each do |parent_identifier|
|
51
51
|
next if parent_identifier.blank?
|
52
52
|
|
53
|
-
PendingRelationship.create!(child_id: self.identifier, parent_id: parent_identifier,
|
53
|
+
PendingRelationship.create!(child_id: self.identifier, parent_id: parent_identifier, importer_run_id: importerexporter.last_run.id, order: self.id)
|
54
54
|
end
|
55
55
|
end
|
56
56
|
|
@@ -58,7 +58,7 @@ module Bulkrax
|
|
58
58
|
self.parsed_metadata[related_children_parsed_mapping].each do |child_identifier|
|
59
59
|
next if child_identifier.blank?
|
60
60
|
|
61
|
-
PendingRelationship.create!(parent_id: self.identifier, child_id: child_identifier,
|
61
|
+
PendingRelationship.create!(parent_id: self.identifier, child_id: child_identifier, importer_run_id: importerexporter.last_run.id, order: self.id)
|
62
62
|
end
|
63
63
|
end
|
64
64
|
|
@@ -51,6 +51,10 @@ module Bulkrax
|
|
51
51
|
@work_identifier ||= get_field_mapping_hash_for('source_identifier')&.keys&.first&.to_sym || :source
|
52
52
|
end
|
53
53
|
|
54
|
+
def generated_metadata_mapping
|
55
|
+
@generated_metadata_mapping ||= 'generated'
|
56
|
+
end
|
57
|
+
|
54
58
|
def related_parents_raw_mapping
|
55
59
|
@related_parents_raw_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.values&.first&.[]('from')&.first
|
56
60
|
end
|
@@ -242,15 +246,49 @@ module Bulkrax
|
|
242
246
|
def write
|
243
247
|
write_files
|
244
248
|
zip
|
249
|
+
# uncomment next line to debug for faulty zipping during bagit export
|
250
|
+
bagit_zip_file_size_check if importerexporter.parser_klass.include?('Bagit')
|
245
251
|
end
|
246
252
|
|
247
253
|
def unzip(file_to_unzip)
|
248
|
-
|
254
|
+
Zip::File.open(file_to_unzip) do |zip_file|
|
255
|
+
zip_file.each do |entry|
|
256
|
+
entry_path = File.join(importer_unzip_path, entry.name)
|
257
|
+
FileUtils.mkdir_p(File.dirname(entry_path))
|
258
|
+
zip_file.extract(entry, entry_path) unless File.exist?(entry_path)
|
259
|
+
end
|
260
|
+
end
|
249
261
|
end
|
250
262
|
|
251
263
|
def zip
|
264
|
+
require 'zip'
|
252
265
|
FileUtils.rm_rf(exporter_export_zip_path)
|
253
|
-
|
266
|
+
Zip::File.open(exporter_export_zip_path, create: true) do |zip_file|
|
267
|
+
Dir["#{exporter_export_path}/**/**"].each do |file|
|
268
|
+
zip_file.add(file.sub("#{exporter_export_path}/", ''), file)
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
# TODO: remove Entry::BagitZipError as well as this method when we're sure it's not needed
|
274
|
+
def bagit_zip_file_size_check
|
275
|
+
Zip::File.open(exporter_export_zip_path) do |zip_file|
|
276
|
+
zip_file.select { |entry| entry.name.include?('data/') && entry.file? }.each do |zipped_file|
|
277
|
+
Dir["#{exporter_export_path}/**/data/*"].select { |file| file.include?(zipped_file.name) }.each do |file|
|
278
|
+
begin
|
279
|
+
raise BagitZipError, "Invalid Bag, file size mismatch for #{file.sub("#{exporter_export_path}/", '')}" if File.size(file) != zipped_file.size
|
280
|
+
rescue BagitZipError => e
|
281
|
+
matched_entry_ids = importerexporter.entry_ids.select do |id|
|
282
|
+
Bulkrax::Entry.find(id).identifier.include?(zipped_file.name.split('/').first)
|
283
|
+
end
|
284
|
+
matched_entry_ids.each do |entry_id|
|
285
|
+
Bulkrax::Entry.find(entry_id).status_info(e)
|
286
|
+
status_info('Complete (with failures)')
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
end
|
291
|
+
end
|
254
292
|
end
|
255
293
|
|
256
294
|
# Is this a file?
|
@@ -272,7 +310,6 @@ module Bulkrax
|
|
272
310
|
|
273
311
|
def real_import_file_path
|
274
312
|
return importer_unzip_path if file? && zip?
|
275
|
-
|
276
313
|
parser_fields['import_file_path']
|
277
314
|
end
|
278
315
|
end
|
@@ -1,9 +1,11 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Bulkrax
|
4
|
-
class BagitParser < ApplicationParser
|
4
|
+
class BagitParser < ApplicationParser # rubocop:disable Metrics/ClassLength
|
5
|
+
include ExportBehavior
|
6
|
+
|
5
7
|
def self.export_supported?
|
6
|
-
|
8
|
+
true
|
7
9
|
end
|
8
10
|
|
9
11
|
def valid_import?
|
@@ -14,19 +16,11 @@ module Bulkrax
|
|
14
16
|
end
|
15
17
|
|
16
18
|
def entry_class
|
17
|
-
parser_fields['metadata_format'
|
18
|
-
|
19
|
-
|
20
|
-
def collection_entry_class
|
21
|
-
parser_fields['metadata_format'].gsub('Entry', 'CollectionEntry').constantize
|
22
|
-
rescue
|
23
|
-
Entry
|
24
|
-
end
|
25
|
-
|
26
|
-
def file_set_entry_class
|
27
|
-
csv_format = Bulkrax::Importer.last.parser_fields['metadata_format'] == "Bulkrax::CsvEntry"
|
28
|
-
csv_format ? CsvFileSetEntry : RdfFileSetEntry
|
19
|
+
rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
|
20
|
+
rdf_format ? RdfEntry : CsvEntry
|
29
21
|
end
|
22
|
+
alias collection_entry_class entry_class
|
23
|
+
alias file_set_entry_class entry_class
|
30
24
|
|
31
25
|
# Take a random sample of 10 metadata_paths and work out the import fields from that
|
32
26
|
def import_fields
|
@@ -101,7 +95,185 @@ module Bulkrax
|
|
101
95
|
end
|
102
96
|
|
103
97
|
def total
|
104
|
-
|
98
|
+
importerexporter.entries.count
|
99
|
+
end
|
100
|
+
|
101
|
+
def extra_filters
|
102
|
+
output = ""
|
103
|
+
if importerexporter.start_date.present?
|
104
|
+
start_dt = importerexporter.start_date.to_datetime.strftime('%FT%TZ')
|
105
|
+
finish_dt = importerexporter.finish_date.present? ? importerexporter.finish_date.to_datetime.end_of_day.strftime('%FT%TZ') : "NOW"
|
106
|
+
output += " AND system_modified_dtsi:[#{start_dt} TO #{finish_dt}]"
|
107
|
+
end
|
108
|
+
output += importerexporter.work_visibility.present? ? " AND visibility_ssi:#{importerexporter.work_visibility}" : ""
|
109
|
+
output += importerexporter.workflow_status.present? ? " AND workflow_state_name_ssim:#{importerexporter.workflow_status}" : ""
|
110
|
+
output
|
111
|
+
end
|
112
|
+
|
113
|
+
def current_record_ids
|
114
|
+
@work_ids = []
|
115
|
+
@collection_ids = []
|
116
|
+
@file_set_ids = []
|
117
|
+
|
118
|
+
case importerexporter.export_from
|
119
|
+
when 'all'
|
120
|
+
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
121
|
+
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
122
|
+
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
123
|
+
when 'collection'
|
124
|
+
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
125
|
+
when 'worktype'
|
126
|
+
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
127
|
+
when 'importer'
|
128
|
+
set_ids_for_exporting_from_importer
|
129
|
+
end
|
130
|
+
|
131
|
+
@work_ids + @collection_ids + @file_set_ids
|
132
|
+
end
|
133
|
+
|
134
|
+
# Set the following instance variables: @work_ids, @collection_ids, @file_set_ids
|
135
|
+
# @see #current_record_ids
|
136
|
+
def set_ids_for_exporting_from_importer
|
137
|
+
entry_ids = Importer.find(importerexporter.export_source).entries.pluck(:id)
|
138
|
+
complete_statuses = Status.latest_by_statusable
|
139
|
+
.includes(:statusable)
|
140
|
+
.where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
|
141
|
+
|
142
|
+
complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
|
143
|
+
extra_filters = extra_filters.presence || '*:*'
|
144
|
+
|
145
|
+
{ :@work_ids => ::Hyrax.config.curation_concerns, :@collection_ids => [::Collection], :@file_set_ids => [::FileSet] }.each do |instance_var, models_to_search|
|
146
|
+
instance_variable_set(instance_var, ActiveFedora::SolrService.post(
|
147
|
+
extra_filters.to_s,
|
148
|
+
fq: [
|
149
|
+
%(#{::Solrizer.solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
|
150
|
+
"has_model_ssim:(#{models_to_search.join(' OR ')})"
|
151
|
+
],
|
152
|
+
fl: 'id',
|
153
|
+
rows: 2_000_000_000
|
154
|
+
)['response']['docs'].map { |obj| obj['id'] })
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def create_new_entries
|
159
|
+
current_record_ids.each_with_index do |id, index|
|
160
|
+
break if limit_reached?(limit, index)
|
161
|
+
|
162
|
+
this_entry_class = if @collection_ids.include?(id)
|
163
|
+
collection_entry_class
|
164
|
+
elsif @file_set_ids.include?(id)
|
165
|
+
file_set_entry_class
|
166
|
+
else
|
167
|
+
entry_class
|
168
|
+
end
|
169
|
+
new_entry = find_or_create_entry(this_entry_class, id, 'Bulkrax::Exporter')
|
170
|
+
|
171
|
+
begin
|
172
|
+
entry = ExportWorkJob.perform_now(new_entry.id, current_run.id)
|
173
|
+
rescue => e
|
174
|
+
Rails.logger.info("#{e.message} was detected during export")
|
175
|
+
end
|
176
|
+
|
177
|
+
self.headers |= entry.parsed_metadata.keys if entry
|
178
|
+
end
|
179
|
+
end
|
180
|
+
alias create_from_collection create_new_entries
|
181
|
+
alias create_from_importer create_new_entries
|
182
|
+
alias create_from_worktype create_new_entries
|
183
|
+
alias create_from_all create_new_entries
|
184
|
+
|
185
|
+
# export methods
|
186
|
+
|
187
|
+
# rubocop:disable Metrics/AbcSize
|
188
|
+
def write_files
|
189
|
+
require 'open-uri'
|
190
|
+
require 'socket'
|
191
|
+
importerexporter.entries.where(identifier: current_record_ids)[0..limit || total].each do |e|
|
192
|
+
bag = BagIt::Bag.new setup_bagit_folder(e.identifier)
|
193
|
+
w = ActiveFedora::Base.find(e.identifier)
|
194
|
+
next unless Hyrax.config.curation_concerns.include?(w.class)
|
195
|
+
|
196
|
+
w.file_sets.each do |fs|
|
197
|
+
file_name = filename(fs)
|
198
|
+
next if file_name.blank?
|
199
|
+
io = open(fs.original_file.uri)
|
200
|
+
file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
|
201
|
+
file.write(io.read)
|
202
|
+
file.close
|
203
|
+
bag.add_file(file_name, file.path)
|
204
|
+
end
|
205
|
+
CSV.open(setup_csv_metadata_export_file(e.identifier), "w", headers: export_headers, write_headers: true) do |csv|
|
206
|
+
csv << e.parsed_metadata
|
207
|
+
end
|
208
|
+
write_triples(e)
|
209
|
+
bag.manifest!(algo: 'sha256')
|
210
|
+
end
|
211
|
+
end
|
212
|
+
# rubocop:enable Metrics/AbcSize
|
213
|
+
|
214
|
+
def setup_csv_metadata_export_file(id)
|
215
|
+
File.join(importerexporter.exporter_export_path, id, 'metadata.csv')
|
216
|
+
end
|
217
|
+
|
218
|
+
def key_allowed(key)
|
219
|
+
!Bulkrax.reserved_properties.include?(key) &&
|
220
|
+
new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
|
221
|
+
key != source_identifier.to_s
|
222
|
+
end
|
223
|
+
|
224
|
+
# All possible column names
|
225
|
+
def export_headers
|
226
|
+
headers = sort_headers(self.headers)
|
227
|
+
|
228
|
+
# we don't want access_control_id exported and we want file at the end
|
229
|
+
headers.delete('access_control_id') if headers.include?('access_control_id')
|
230
|
+
|
231
|
+
# add the headers below at the beginning or end to maintain the preexisting export behavior
|
232
|
+
headers.prepend('model')
|
233
|
+
headers.prepend(source_identifier.to_s)
|
234
|
+
headers.prepend('id')
|
235
|
+
|
236
|
+
headers.uniq
|
237
|
+
end
|
238
|
+
|
239
|
+
def object_names
|
240
|
+
return @object_names if @object_names
|
241
|
+
|
242
|
+
@object_names = mapping.values.map { |value| value['object'] }
|
243
|
+
@object_names.uniq!.delete(nil)
|
244
|
+
|
245
|
+
@object_names
|
246
|
+
end
|
247
|
+
|
248
|
+
def sort_headers(headers)
|
249
|
+
# converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
|
250
|
+
# while keeping objects grouped together
|
251
|
+
headers.sort_by do |item|
|
252
|
+
number = item.match(/\d+/)&.[](0) || 0.to_s
|
253
|
+
sort_number = number.rjust(4, "0")
|
254
|
+
object_prefix = object_names.detect { |o| item.match(/^#{o}/) } || item
|
255
|
+
remainder = item.gsub(/^#{object_prefix}_/, '').gsub(/_#{number}/, '')
|
256
|
+
"#{object_prefix}_#{sort_number}_#{remainder}"
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
def setup_triple_metadata_export_file(id)
|
261
|
+
File.join(importerexporter.exporter_export_path, id, 'metadata.nt')
|
262
|
+
end
|
263
|
+
|
264
|
+
def setup_bagit_folder(id)
|
265
|
+
File.join(importerexporter.exporter_export_path, id)
|
266
|
+
end
|
267
|
+
|
268
|
+
def write_triples(e)
|
269
|
+
sd = SolrDocument.find(e.identifier)
|
270
|
+
return if sd.nil?
|
271
|
+
|
272
|
+
req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname })
|
273
|
+
rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples)
|
274
|
+
File.open(setup_triple_metadata_export_file(e.identifier), "w") do |triples|
|
275
|
+
triples.write(rdf)
|
276
|
+
end
|
105
277
|
end
|
106
278
|
|
107
279
|
def required_elements?(keys)
|
@@ -126,11 +298,7 @@ module Bulkrax
|
|
126
298
|
def bags
|
127
299
|
return @bags if @bags.present?
|
128
300
|
new_bag = bag(import_file_path)
|
129
|
-
@bags =
|
130
|
-
[new_bag]
|
131
|
-
else
|
132
|
-
Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
|
133
|
-
end
|
301
|
+
@bags = new_bag ? [new_bag] : Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
|
134
302
|
@bags.delete(nil)
|
135
303
|
raise StandardError, 'No valid bags found' if @bags.blank?
|
136
304
|
return @bags
|
@@ -60,6 +60,16 @@
|
|
60
60
|
hint: 'leave blank or 0 for all records',
|
61
61
|
label: t('bulkrax.exporter.labels.limit') %>
|
62
62
|
|
63
|
+
<%= form.input :generated_metadata?,
|
64
|
+
as: :boolean,
|
65
|
+
label: t('bulkrax.exporter.labels.generated_metadata'),
|
66
|
+
hint: t('bulkrax.exporter.hints.generated_metadata') %>
|
67
|
+
|
68
|
+
<%= form.input :include_thumbnails?,
|
69
|
+
as: :boolean,
|
70
|
+
label: t('bulkrax.exporter.labels.include_thumbnails'),
|
71
|
+
hint: t('bulkrax.exporter.hints.include_thumbnails') %>
|
72
|
+
|
63
73
|
<%= form.input :date_filter,
|
64
74
|
as: :boolean,
|
65
75
|
label: t('bulkrax.exporter.labels.filter_by_date') %>
|
@@ -57,6 +57,18 @@
|
|
57
57
|
<strong><%= t('bulkrax.exporter.labels.limit') %>:</strong>
|
58
58
|
<%= @exporter.limit %>
|
59
59
|
</p>
|
60
|
+
|
61
|
+
<p class='bulkrax-p-align'>
|
62
|
+
<strong><%= t('bulkrax.exporter.labels.generated_metadata') %>:</strong>
|
63
|
+
<%= @exporter.generated_metadata %>
|
64
|
+
</p>
|
65
|
+
|
66
|
+
<p class='bulkrax-p-align'>
|
67
|
+
<strong><%= t('bulkrax.exporter.labels.include_thumbnails') %>:</strong>
|
68
|
+
<%= @exporter.include_thumbnails %>
|
69
|
+
</p>
|
70
|
+
|
71
|
+
|
60
72
|
<%= render partial: 'bulkrax/shared/bulkrax_errors', locals: {item: @exporter} %>
|
61
73
|
|
62
74
|
<%= render partial: 'bulkrax/shared/bulkrax_field_mapping', locals: {item: @exporter} %>
|
@@ -16,6 +16,8 @@ en:
|
|
16
16
|
filter_by_date: Filter By Date
|
17
17
|
finish_date: End Date
|
18
18
|
full: Metadata and Files
|
19
|
+
include_thumbnails: Include Thumbnails?
|
20
|
+
generated_metadata: Include Generated Metadata?
|
19
21
|
importer: Importer
|
20
22
|
limit: Limit
|
21
23
|
metadata: Metadata Only
|
@@ -35,3 +37,6 @@ en:
|
|
35
37
|
ingested: "Ingested"
|
36
38
|
unapproved: "Unapproved"
|
37
39
|
needs_repair: "Needs Repair"
|
40
|
+
hints:
|
41
|
+
include_thumbnails: "These exported fields currently cannot be imported."
|
42
|
+
generated_metadata: "These exported fields currently cannot be imported."
|
@@ -0,0 +1,7 @@
|
|
1
|
+
class RenameBulkraxImporterRunToImporterRun < ActiveRecord::Migration[5.2]
|
2
|
+
def change
|
3
|
+
if column_exists?(:bulkrax_pending_relationships, :bulkrax_importer_run_id)
|
4
|
+
rename_column :bulkrax_pending_relationships, :bulkrax_importer_run_id, :importer_run_id
|
5
|
+
end
|
6
|
+
end
|
7
|
+
end
|
data/lib/bulkrax/version.rb
CHANGED
data/lib/bulkrax.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.2
|
4
|
+
version: 3.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-06-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -253,6 +253,7 @@ files:
|
|
253
253
|
- app/controllers/bulkrax/exporters_controller.rb
|
254
254
|
- app/controllers/bulkrax/importers_controller.rb
|
255
255
|
- app/controllers/concerns/bulkrax/api.rb
|
256
|
+
- app/controllers/concerns/bulkrax/download_behavior.rb
|
256
257
|
- app/factories/bulkrax/object_factory.rb
|
257
258
|
- app/helpers/bulkrax/application_helper.rb
|
258
259
|
- app/helpers/bulkrax/exporters_helper.rb
|
@@ -297,7 +298,6 @@ files:
|
|
297
298
|
- app/models/bulkrax/rdf_file_set_entry.rb
|
298
299
|
- app/models/bulkrax/status.rb
|
299
300
|
- app/models/bulkrax/xml_entry.rb
|
300
|
-
- app/models/concerns/bulkrax/download_behavior.rb
|
301
301
|
- app/models/concerns/bulkrax/dynamic_record_lookup.rb
|
302
302
|
- app/models/concerns/bulkrax/errored_entries.rb
|
303
303
|
- app/models/concerns/bulkrax/export_behavior.rb
|
@@ -372,6 +372,9 @@ files:
|
|
372
372
|
- db/migrate/20220119213325_add_work_counters_to_importer_runs.rb
|
373
373
|
- db/migrate/20220301001839_create_bulkrax_pending_relationships.rb
|
374
374
|
- db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb
|
375
|
+
- db/migrate/20220412233954_add_include_thumbnails_to_bulkrax_exporters.rb
|
376
|
+
- db/migrate/20220413180915_add_generated_metadata_to_bulkrax_exporters.rb
|
377
|
+
- db/migrate/20220609001128_rename_bulkrax_importer_run_to_importer_run.rb
|
375
378
|
- lib/bulkrax.rb
|
376
379
|
- lib/bulkrax/engine.rb
|
377
380
|
- lib/bulkrax/version.rb
|