bulkrax 3.3.1 → 3.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/jobs/bulkrax/create_relationships_job.rb +4 -2
- data/app/jobs/bulkrax/export_work_job.rb +7 -7
- data/app/jobs/bulkrax/importer_job.rb +2 -0
- data/app/models/bulkrax/csv_entry.rb +2 -10
- data/app/models/bulkrax/exporter.rb +4 -1
- data/app/models/bulkrax/importer.rb +13 -10
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +7 -8
- data/app/models/concerns/bulkrax/export_behavior.rb +1 -1
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +5 -1
- data/app/models/concerns/bulkrax/import_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/status_info.rb +1 -1
- data/app/parsers/bulkrax/application_parser.rb +1 -1
- data/app/parsers/bulkrax/bagit_parser.rb +64 -153
- data/app/parsers/bulkrax/csv_parser.rb +10 -5
- data/app/views/bulkrax/exporters/_form.html.erb +13 -10
- data/app/views/bulkrax/exporters/show.html.erb +5 -0
- data/config/locales/bulkrax.en.yml +1 -0
- data/db/migrate/20220609001128_rename_bulkrax_importer_run_to_importer_run.rb +11 -1
- data/lib/bulkrax/version.rb +1 -1
- data/lib/generators/bulkrax/install_generator.rb +1 -1
- metadata +68 -54
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0a80674a9f582c3b8e83f442318908edb6ca9f0b615c970d09b17d941cc8027d
|
4
|
+
data.tar.gz: a2a53116ef49e03dde1aa1df14d8259a2b4abf06a82cff63a9d4ba622ba6600a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af3d75fb03105e37f7374f3a7f863c545d9cc9c95ab2f18bbbf7b4692024e09811f103a372327b4724c836568bad176ed0ad0b7be929ed556259aa9b0793fce6
|
7
|
+
data.tar.gz: 1117a185fbab2bae0746187f464bebea855759a5ecccf0d34f098ac55ad7a2952e663268372262ba8f97820c8c1f02bd29c74a388cfd8ea9cfed84a46dad94cf
|
@@ -42,10 +42,12 @@ module Bulkrax
|
|
42
42
|
pending_relationships.each do |rel|
|
43
43
|
raise ::StandardError, %("#{rel}" needs either a child or a parent to create a relationship) if rel.child_id.nil? || rel.parent_id.nil?
|
44
44
|
@child_entry, child_record = find_record(rel.child_id, importer_run_id)
|
45
|
-
|
45
|
+
if child_record
|
46
|
+
child_record.is_a?(::Collection) ? @child_records[:collections] << child_record : @child_records[:works] << child_record
|
47
|
+
end
|
46
48
|
end
|
47
49
|
|
48
|
-
if (child_records[:collections].blank? && child_records[:works].blank?) || parent_record.
|
50
|
+
if (child_records[:collections].blank? && child_records[:works].blank?) || parent_record.nil?
|
49
51
|
reschedule({ parent_identifier: parent_identifier, importer_run_id: importer_run_id })
|
50
52
|
return false # stop current job from continuing to run after rescheduling
|
51
53
|
end
|
@@ -6,26 +6,26 @@ module Bulkrax
|
|
6
6
|
|
7
7
|
def perform(*args)
|
8
8
|
entry = Entry.find(args[0])
|
9
|
+
exporter_run = ExporterRun.find(args[1])
|
9
10
|
begin
|
10
11
|
entry.build
|
11
12
|
entry.save
|
12
13
|
rescue StandardError
|
13
14
|
# rubocop:disable Rails/SkipsModelValidations
|
14
|
-
|
15
|
-
|
15
|
+
exporter_run.increment!(:failed_records)
|
16
|
+
exporter_run.decrement!(:enqueued_records) unless exporter_run.enqueued_records <= 0
|
16
17
|
raise
|
17
18
|
else
|
18
19
|
if entry.failed?
|
19
|
-
|
20
|
-
|
20
|
+
exporter_run.increment!(:failed_records)
|
21
|
+
exporter_run.decrement!(:enqueued_records) unless exporter_run.enqueued_records <= 0
|
21
22
|
raise entry.reload.current_status.error_class.constantize
|
22
23
|
else
|
23
|
-
|
24
|
-
|
24
|
+
exporter_run.increment!(:processed_records)
|
25
|
+
exporter_run.decrement!(:enqueued_records) unless exporter_run.enqueued_records <= 0
|
25
26
|
end
|
26
27
|
# rubocop:enable Rails/SkipsModelValidations
|
27
28
|
end
|
28
|
-
exporter_run = ExporterRun.find(args[1])
|
29
29
|
return entry if exporter_run.enqueued_records.positive?
|
30
30
|
|
31
31
|
if exporter_run.failed_records.positive?
|
@@ -12,6 +12,8 @@ module Bulkrax
|
|
12
12
|
import(importer, only_updates_since_last_import)
|
13
13
|
update_current_run_counters(importer)
|
14
14
|
schedule(importer) if importer.schedulable?
|
15
|
+
rescue CSV::MalformedCSVError => e
|
16
|
+
importer.status_info(e)
|
15
17
|
end
|
16
18
|
|
17
19
|
def import(importer, only_updates_since_last_import)
|
@@ -114,9 +114,10 @@ module Bulkrax
|
|
114
114
|
def build_files_metadata
|
115
115
|
file_mapping = key_for_export('file')
|
116
116
|
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
117
|
-
filenames = file_sets
|
117
|
+
filenames = map_file_sets(file_sets)
|
118
118
|
|
119
119
|
handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
|
120
|
+
build_thumbnail_files if hyrax_record.work?
|
120
121
|
end
|
121
122
|
|
122
123
|
def build_relationship_metadata
|
@@ -219,15 +220,6 @@ module Bulkrax
|
|
219
220
|
end
|
220
221
|
end
|
221
222
|
|
222
|
-
def build_files
|
223
|
-
file_mapping = mapping['file']&.[]('from')&.first || 'file'
|
224
|
-
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
225
|
-
|
226
|
-
filenames = map_file_sets(file_sets)
|
227
|
-
handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
|
228
|
-
build_thumbnail_files if hyrax_record.work?
|
229
|
-
end
|
230
|
-
|
231
223
|
def build_thumbnail_files
|
232
224
|
return unless importerexporter.include_thumbnails
|
233
225
|
|
@@ -14,13 +14,15 @@ module Bulkrax
|
|
14
14
|
validates :name, presence: true
|
15
15
|
validates :parser_klass, presence: true
|
16
16
|
|
17
|
-
delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
|
17
|
+
delegate :write, :create_from_collection, :create_from_collections_metadata, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
|
18
18
|
|
19
19
|
def export
|
20
20
|
current_run && setup_export_path
|
21
21
|
case self.export_from
|
22
22
|
when 'collection'
|
23
23
|
create_from_collection
|
24
|
+
when 'collections metadata'
|
25
|
+
create_from_collections_metadata
|
24
26
|
when 'importer'
|
25
27
|
create_from_importer
|
26
28
|
when 'worktype'
|
@@ -87,6 +89,7 @@ module Bulkrax
|
|
87
89
|
[
|
88
90
|
[I18n.t('bulkrax.exporter.labels.importer'), 'importer'],
|
89
91
|
[I18n.t('bulkrax.exporter.labels.collection'), 'collection'],
|
92
|
+
[I18n.t('bulkrax.exporter.labels.collections_metadata'), 'collections metadata'],
|
90
93
|
[I18n.t('bulkrax.exporter.labels.worktype'), 'worktype'],
|
91
94
|
[I18n.t('bulkrax.exporter.labels.all'), 'all']
|
92
95
|
]
|
@@ -96,16 +96,19 @@ module Bulkrax
|
|
96
96
|
end
|
97
97
|
|
98
98
|
def current_run
|
99
|
-
@current_run
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
99
|
+
return @current_run if @current_run.present?
|
100
|
+
|
101
|
+
@current_run = self.importer_runs.create!
|
102
|
+
return @current_run if file? && zip?
|
103
|
+
|
104
|
+
entry_counts = {
|
105
|
+
total_work_entries: self.limit || parser.works_total,
|
106
|
+
total_collection_entries: parser.collections_total,
|
107
|
+
total_file_set_entries: parser.file_sets_total
|
108
|
+
}
|
109
|
+
@current_run.update!(entry_counts)
|
110
|
+
|
111
|
+
@current_run
|
109
112
|
end
|
110
113
|
|
111
114
|
def last_run
|
@@ -12,15 +12,14 @@ module Bulkrax
|
|
12
12
|
# check for our entry in our current importer first
|
13
13
|
importer_id = ImporterRun.find(importer_run_id).importer_id
|
14
14
|
default_scope = { identifier: identifier, importerexporter_type: 'Bulkrax::Importer' }
|
15
|
-
record = Entry.find_by(default_scope.merge({ importerexporter_id: importer_id })) || Entry.find_by(default_scope)
|
16
15
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
16
|
+
begin
|
17
|
+
# the identifier parameter can be a :source_identifier or the id of an object
|
18
|
+
record = Entry.find_by(default_scope.merge({ importerexporter_id: importer_id })) || Entry.find_by(default_scope)
|
19
|
+
record ||= ActiveFedora::Base.find(identifier)
|
20
|
+
# NameError for if ActiveFedora isn't installed
|
21
|
+
rescue NameError, ActiveFedora::ObjectNotFoundError
|
22
|
+
record = nil
|
24
23
|
end
|
25
24
|
|
26
25
|
# return the found entry here instead of searching for it again in the CreateRelationshipsJob
|
@@ -51,7 +51,7 @@ module Bulkrax
|
|
51
51
|
fn = file_set.original_file.file_name.first
|
52
52
|
mime = Mime::Type.lookup(file_set.original_file.mime_type)
|
53
53
|
ext_mime = MIME::Types.of(file_set.original_file.file_name).first
|
54
|
-
if fn.include?(file_set.id) || importerexporter.metadata_only?
|
54
|
+
if fn.include?(file_set.id) || importerexporter.metadata_only?
|
55
55
|
filename = "#{fn}.#{mime.to_sym}"
|
56
56
|
filename = fn if mime.to_s == ext_mime.to_s
|
57
57
|
else
|
@@ -8,10 +8,14 @@ module Bulkrax
|
|
8
8
|
|
9
9
|
def add_path_to_file
|
10
10
|
parsed_metadata['file'].each_with_index do |filename, i|
|
11
|
-
|
11
|
+
next if filename.blank?
|
12
|
+
|
13
|
+
path_to_file = parser.path_to_files(filename: filename)
|
12
14
|
|
13
15
|
parsed_metadata['file'][i] = path_to_file
|
14
16
|
end
|
17
|
+
parsed_metadata['file'].delete('')
|
18
|
+
|
15
19
|
raise ::StandardError, "one or more file paths are invalid: #{parsed_metadata['file'].join(', ')}" unless parsed_metadata['file'].map { |file_path| ::File.file?(file_path) }.all?
|
16
20
|
|
17
21
|
parsed_metadata['file']
|
@@ -12,8 +12,8 @@ module Bulkrax
|
|
12
12
|
raise CollectionsCreatedError unless collections_created?
|
13
13
|
@item = factory.run!
|
14
14
|
add_user_to_permission_templates! if self.class.to_s.include?("Collection")
|
15
|
-
parent_jobs if self.parsed_metadata[related_parents_parsed_mapping].present?
|
16
|
-
child_jobs if self.parsed_metadata[related_children_parsed_mapping].present?
|
15
|
+
parent_jobs if self.parsed_metadata[related_parents_parsed_mapping]&.join.present?
|
16
|
+
child_jobs if self.parsed_metadata[related_children_parsed_mapping]&.join.present?
|
17
17
|
end
|
18
18
|
rescue RSolr::Error::Http, CollectionsCreatedError => e
|
19
19
|
raise e
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
require 'zip'
|
2
3
|
|
3
4
|
module Bulkrax
|
4
5
|
class ApplicationParser # rubocop:disable Metrics/ClassLength
|
@@ -261,7 +262,6 @@ module Bulkrax
|
|
261
262
|
end
|
262
263
|
|
263
264
|
def zip
|
264
|
-
require 'zip'
|
265
265
|
FileUtils.rm_rf(exporter_export_zip_path)
|
266
266
|
Zip::File.open(exporter_export_zip_path, create: true) do |zip_file|
|
267
267
|
Dir["#{exporter_export_path}/**/**"].each do |file|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Bulkrax
|
4
|
-
class BagitParser <
|
4
|
+
class BagitParser < CsvParser # rubocop:disable Metrics/ClassLength
|
5
5
|
include ExportBehavior
|
6
6
|
|
7
7
|
def self.export_supported?
|
@@ -19,8 +19,10 @@ module Bulkrax
|
|
19
19
|
rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
|
20
20
|
rdf_format ? RdfEntry : CsvEntry
|
21
21
|
end
|
22
|
-
|
23
|
-
|
22
|
+
|
23
|
+
def path_to_files(filename:)
|
24
|
+
@path_to_files ||= Dir.glob(File.join(import_file_path, '**/data', filename)).first
|
25
|
+
end
|
24
26
|
|
25
27
|
# Take a random sample of 10 metadata_paths and work out the import fields from that
|
26
28
|
def import_fields
|
@@ -30,39 +32,41 @@ module Bulkrax
|
|
30
32
|
end.flatten.compact.uniq
|
31
33
|
end
|
32
34
|
|
33
|
-
#
|
34
|
-
# Create an Array of all metadata records, one per file
|
35
|
+
# Create an Array of all metadata records
|
35
36
|
def records(_opts = {})
|
36
37
|
raise StandardError, 'No BagIt records were found' if bags.blank?
|
37
38
|
@records ||= bags.map do |bag|
|
38
39
|
path = metadata_path(bag)
|
39
40
|
raise StandardError, 'No metadata files were found' if path.blank?
|
40
41
|
data = entry_class.read_data(path)
|
41
|
-
|
42
|
-
data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
|
43
|
-
data
|
42
|
+
get_data(bag, data)
|
44
43
|
end
|
44
|
+
|
45
|
+
@records = @records.flatten
|
45
46
|
end
|
46
47
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
new_entry = find_or_create_entry(collection_entry_class, collection, 'Bulkrax::Importer', metadata)
|
60
|
-
ImportCollectionJob.perform_now(new_entry.id, current_run.id)
|
61
|
-
increment_counters(index, collection: true)
|
48
|
+
def get_data(bag, data)
|
49
|
+
if entry_class == CsvEntry
|
50
|
+
data = data.map do |data_row|
|
51
|
+
record_data = entry_class.data_for_entry(data_row, source_identifier, self)
|
52
|
+
next record_data if importerexporter.metadata_only?
|
53
|
+
|
54
|
+
record_data[:file] = bag.bag_files.join('|') if ::Hyrax.config.curation_concerns.include? record_data[:model]&.constantize
|
55
|
+
record_data
|
56
|
+
end
|
57
|
+
else
|
58
|
+
data = entry_class.data_for_entry(data, source_identifier, self)
|
59
|
+
data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
|
62
60
|
end
|
61
|
+
|
62
|
+
data
|
63
63
|
end
|
64
64
|
|
65
65
|
def create_works
|
66
|
+
entry_class == CsvEntry ? super : create_rdf_works
|
67
|
+
end
|
68
|
+
|
69
|
+
def create_rdf_works
|
66
70
|
records.each_with_index do |record, index|
|
67
71
|
next unless record_has_source_identifier(record, index)
|
68
72
|
break if limit_reached?(limit, index)
|
@@ -81,33 +85,16 @@ module Bulkrax
|
|
81
85
|
status_info(e)
|
82
86
|
end
|
83
87
|
|
84
|
-
def collections
|
85
|
-
records.map { |r| r[related_parents_parsed_mapping].split(/\s*[;|]\s*/) if r[related_parents_parsed_mapping].present? }.flatten.compact.uniq
|
86
|
-
end
|
87
|
-
|
88
|
-
def collections_total
|
89
|
-
collections.size
|
90
|
-
end
|
91
|
-
|
92
|
-
# TODO: change to differentiate between collection and work records when adding ability to import collection metadata
|
93
|
-
def works_total
|
94
|
-
total
|
95
|
-
end
|
96
|
-
|
97
88
|
def total
|
98
|
-
|
99
|
-
end
|
89
|
+
@total = importer.parser_fields['total'] || 0 if importer?
|
100
90
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
output += importerexporter.work_visibility.present? ? " AND visibility_ssi:#{importerexporter.work_visibility}" : ""
|
109
|
-
output += importerexporter.workflow_status.present? ? " AND workflow_state_name_ssim:#{importerexporter.workflow_status}" : ""
|
110
|
-
output
|
91
|
+
@total = if exporter?
|
92
|
+
limit.nil? || limit.zero? ? current_record_ids.count : limit
|
93
|
+
end
|
94
|
+
|
95
|
+
return @total || 0
|
96
|
+
rescue StandardError
|
97
|
+
@total = 0
|
111
98
|
end
|
112
99
|
|
113
100
|
def current_record_ids
|
@@ -118,7 +105,6 @@ module Bulkrax
|
|
118
105
|
case importerexporter.export_from
|
119
106
|
when 'all'
|
120
107
|
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
121
|
-
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
122
108
|
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
123
109
|
when 'collection'
|
124
110
|
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
@@ -127,89 +113,49 @@ module Bulkrax
|
|
127
113
|
when 'importer'
|
128
114
|
set_ids_for_exporting_from_importer
|
129
115
|
end
|
130
|
-
|
131
116
|
@work_ids + @collection_ids + @file_set_ids
|
132
117
|
end
|
133
118
|
|
134
|
-
# Set the following instance variables: @work_ids, @collection_ids, @file_set_ids
|
135
|
-
# @see #current_record_ids
|
136
|
-
def set_ids_for_exporting_from_importer
|
137
|
-
entry_ids = Importer.find(importerexporter.export_source).entries.pluck(:id)
|
138
|
-
complete_statuses = Status.latest_by_statusable
|
139
|
-
.includes(:statusable)
|
140
|
-
.where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
|
141
|
-
|
142
|
-
complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
|
143
|
-
extra_filters = extra_filters.presence || '*:*'
|
144
|
-
|
145
|
-
{ :@work_ids => ::Hyrax.config.curation_concerns, :@collection_ids => [::Collection], :@file_set_ids => [::FileSet] }.each do |instance_var, models_to_search|
|
146
|
-
instance_variable_set(instance_var, ActiveFedora::SolrService.post(
|
147
|
-
extra_filters.to_s,
|
148
|
-
fq: [
|
149
|
-
%(#{::Solrizer.solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
|
150
|
-
"has_model_ssim:(#{models_to_search.join(' OR ')})"
|
151
|
-
],
|
152
|
-
fl: 'id',
|
153
|
-
rows: 2_000_000_000
|
154
|
-
)['response']['docs'].map { |obj| obj['id'] })
|
155
|
-
end
|
156
|
-
end
|
157
|
-
|
158
|
-
def create_new_entries
|
159
|
-
current_record_ids.each_with_index do |id, index|
|
160
|
-
break if limit_reached?(limit, index)
|
161
|
-
|
162
|
-
this_entry_class = if @collection_ids.include?(id)
|
163
|
-
collection_entry_class
|
164
|
-
elsif @file_set_ids.include?(id)
|
165
|
-
file_set_entry_class
|
166
|
-
else
|
167
|
-
entry_class
|
168
|
-
end
|
169
|
-
new_entry = find_or_create_entry(this_entry_class, id, 'Bulkrax::Exporter')
|
170
|
-
|
171
|
-
begin
|
172
|
-
entry = ExportWorkJob.perform_now(new_entry.id, current_run.id)
|
173
|
-
rescue => e
|
174
|
-
Rails.logger.info("#{e.message} was detected during export")
|
175
|
-
end
|
176
|
-
|
177
|
-
self.headers |= entry.parsed_metadata.keys if entry
|
178
|
-
end
|
179
|
-
end
|
180
|
-
alias create_from_collection create_new_entries
|
181
|
-
alias create_from_importer create_new_entries
|
182
|
-
alias create_from_worktype create_new_entries
|
183
|
-
alias create_from_all create_new_entries
|
184
|
-
|
185
119
|
# export methods
|
186
120
|
|
187
|
-
# rubocop:disable Metrics/AbcSize
|
121
|
+
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
188
122
|
def write_files
|
189
123
|
require 'open-uri'
|
190
124
|
require 'socket'
|
191
|
-
importerexporter.entries.where(identifier: current_record_ids)[0..limit || total].each do |
|
192
|
-
|
193
|
-
|
194
|
-
|
125
|
+
importerexporter.entries.where(identifier: current_record_ids)[0..limit || total].each do |entry|
|
126
|
+
record = ActiveFedora::Base.find(entry.identifier)
|
127
|
+
next unless Hyrax.config.curation_concerns.include?(record.class)
|
128
|
+
bag = BagIt::Bag.new setup_bagit_folder(entry.identifier)
|
129
|
+
bag_entries = [entry]
|
130
|
+
|
131
|
+
record.file_sets.each do |fs|
|
132
|
+
if @file_set_ids.present?
|
133
|
+
file_set_entry = Bulkrax::CsvFileSetEntry.where("parsed_metadata LIKE '%#{fs.id}%'").first
|
134
|
+
bag_entries << file_set_entry unless file_set_entry.nil?
|
135
|
+
end
|
195
136
|
|
196
|
-
w.file_sets.each do |fs|
|
197
137
|
file_name = filename(fs)
|
198
138
|
next if file_name.blank?
|
199
139
|
io = open(fs.original_file.uri)
|
200
140
|
file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
|
201
141
|
file.write(io.read)
|
202
142
|
file.close
|
203
|
-
|
143
|
+
begin
|
144
|
+
bag.add_file(file_name, file.path)
|
145
|
+
rescue => e
|
146
|
+
entry.status_info(e)
|
147
|
+
status_info(e)
|
148
|
+
end
|
204
149
|
end
|
205
|
-
|
206
|
-
|
150
|
+
|
151
|
+
CSV.open(setup_csv_metadata_export_file(entry.identifier), "w", headers: export_headers, write_headers: true) do |csv|
|
152
|
+
bag_entries.each { |csv_entry| csv << csv_entry.parsed_metadata }
|
207
153
|
end
|
208
|
-
write_triples(
|
154
|
+
write_triples(entry)
|
209
155
|
bag.manifest!(algo: 'sha256')
|
210
156
|
end
|
211
157
|
end
|
212
|
-
# rubocop:enable Metrics/AbcSize
|
158
|
+
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
213
159
|
|
214
160
|
def setup_csv_metadata_export_file(id)
|
215
161
|
File.join(importerexporter.exporter_export_path, id, 'metadata.csv')
|
@@ -221,42 +167,6 @@ module Bulkrax
|
|
221
167
|
key != source_identifier.to_s
|
222
168
|
end
|
223
169
|
|
224
|
-
# All possible column names
|
225
|
-
def export_headers
|
226
|
-
headers = sort_headers(self.headers)
|
227
|
-
|
228
|
-
# we don't want access_control_id exported and we want file at the end
|
229
|
-
headers.delete('access_control_id') if headers.include?('access_control_id')
|
230
|
-
|
231
|
-
# add the headers below at the beginning or end to maintain the preexisting export behavior
|
232
|
-
headers.prepend('model')
|
233
|
-
headers.prepend(source_identifier.to_s)
|
234
|
-
headers.prepend('id')
|
235
|
-
|
236
|
-
headers.uniq
|
237
|
-
end
|
238
|
-
|
239
|
-
def object_names
|
240
|
-
return @object_names if @object_names
|
241
|
-
|
242
|
-
@object_names = mapping.values.map { |value| value['object'] }
|
243
|
-
@object_names.uniq!.delete(nil)
|
244
|
-
|
245
|
-
@object_names
|
246
|
-
end
|
247
|
-
|
248
|
-
def sort_headers(headers)
|
249
|
-
# converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
|
250
|
-
# while keeping objects grouped together
|
251
|
-
headers.sort_by do |item|
|
252
|
-
number = item.match(/\d+/)&.[](0) || 0.to_s
|
253
|
-
sort_number = number.rjust(4, "0")
|
254
|
-
object_prefix = object_names.detect { |o| item.match(/^#{o}/) } || item
|
255
|
-
remainder = item.gsub(/^#{object_prefix}_/, '').gsub(/_#{number}/, '')
|
256
|
-
"#{object_prefix}_#{sort_number}_#{remainder}"
|
257
|
-
end
|
258
|
-
end
|
259
|
-
|
260
170
|
def setup_triple_metadata_export_file(id)
|
261
171
|
File.join(importerexporter.exporter_export_path, id, 'metadata.nt')
|
262
172
|
end
|
@@ -276,11 +186,6 @@ module Bulkrax
|
|
276
186
|
end
|
277
187
|
end
|
278
188
|
|
279
|
-
def required_elements?(keys)
|
280
|
-
return if keys.blank?
|
281
|
-
!required_elements.map { |el| keys.map(&:to_s).include?(el) }.include?(false)
|
282
|
-
end
|
283
|
-
|
284
189
|
# @todo - investigate getting directory structure
|
285
190
|
# @todo - investigate using perform_later, and having the importer check for
|
286
191
|
# DownloadCloudFileJob before it starts
|
@@ -331,5 +236,11 @@ module Bulkrax
|
|
331
236
|
return nil unless bag.valid?
|
332
237
|
bag
|
333
238
|
end
|
239
|
+
|
240
|
+
# use the version of this method from the application parser instead
|
241
|
+
def real_import_file_path
|
242
|
+
return importer_unzip_path if file? && zip?
|
243
|
+
parser_fields['import_file_path']
|
244
|
+
end
|
334
245
|
end
|
335
246
|
end
|
@@ -195,6 +195,9 @@ module Bulkrax
|
|
195
195
|
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
196
196
|
when 'collection'
|
197
197
|
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
198
|
+
@collection_ids = ActiveFedora::SolrService.query("id:#{importerexporter.export_source} #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
199
|
+
when 'collections metadata'
|
200
|
+
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
198
201
|
when 'worktype'
|
199
202
|
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
200
203
|
when 'importer'
|
@@ -251,6 +254,7 @@ module Bulkrax
|
|
251
254
|
end
|
252
255
|
end
|
253
256
|
alias create_from_collection create_new_entries
|
257
|
+
alias create_from_collections_metadata create_new_entries
|
254
258
|
alias create_from_importer create_new_entries
|
255
259
|
alias create_from_worktype create_new_entries
|
256
260
|
alias create_from_all create_new_entries
|
@@ -268,8 +272,8 @@ module Bulkrax
|
|
268
272
|
CsvFileSetEntry
|
269
273
|
end
|
270
274
|
|
271
|
-
#
|
272
|
-
#
|
275
|
+
# TODO: figure out why using the version of this method that's in the bagit parser
|
276
|
+
# breaks specs for the "if importer?" line
|
273
277
|
def total
|
274
278
|
@total = importer.parser_fields['total'] || 0 if importer?
|
275
279
|
@total = limit || current_record_ids.count if exporter?
|
@@ -378,10 +382,11 @@ module Bulkrax
|
|
378
382
|
end
|
379
383
|
|
380
384
|
# Retrieve the path where we expect to find the files
|
381
|
-
def path_to_files
|
385
|
+
def path_to_files(**args)
|
386
|
+
filename = args.fetch(:filename, '')
|
387
|
+
|
382
388
|
@path_to_files ||= File.join(
|
383
|
-
zip? ? importer_unzip_path : File.dirname(import_file_path),
|
384
|
-
'files'
|
389
|
+
zip? ? importer_unzip_path : File.dirname(import_file_path), 'files', filename
|
385
390
|
)
|
386
391
|
end
|
387
392
|
|
@@ -15,20 +15,21 @@
|
|
15
15
|
|
16
16
|
<%= form.hidden_field :user_id, value: current_user.id %>
|
17
17
|
|
18
|
-
<%= form.input :export_type,
|
19
|
-
collection: form.object.export_type_list,
|
20
|
-
label: t('bulkrax.exporter.labels.export_type'),
|
18
|
+
<%= form.input :export_type,
|
19
|
+
collection: form.object.export_type_list,
|
20
|
+
label: t('bulkrax.exporter.labels.export_type'),
|
21
21
|
required: true,
|
22
22
|
prompt: 'Please select an export type' %>
|
23
23
|
|
24
|
-
<%= form.input :export_from,
|
25
|
-
collection: form.object.export_from_list,
|
26
|
-
label: t('bulkrax.exporter.labels.export_from'),
|
24
|
+
<%= form.input :export_from,
|
25
|
+
collection: form.object.export_from_list,
|
26
|
+
label: t('bulkrax.exporter.labels.export_from'),
|
27
27
|
required: true,
|
28
28
|
prompt: 'Please select an export source' %>
|
29
29
|
|
30
30
|
<%= form.input :export_source_importer,
|
31
31
|
label: t('bulkrax.exporter.labels.importer'),
|
32
|
+
required: true,
|
32
33
|
prompt: 'Select from the list',
|
33
34
|
label_html: { class: 'importer export-source-option hidden' },
|
34
35
|
input_html: { class: 'importer export-source-option hidden' },
|
@@ -37,6 +38,7 @@
|
|
37
38
|
<%= form.input :export_source_collection,
|
38
39
|
prompt: 'Start typing ...',
|
39
40
|
label: t('bulkrax.exporter.labels.collection'),
|
41
|
+
required: true,
|
40
42
|
placeholder: @collection&.title&.first,
|
41
43
|
label_html: { class: 'collection export-source-option hidden' },
|
42
44
|
input_html: {
|
@@ -50,13 +52,14 @@
|
|
50
52
|
|
51
53
|
<%= form.input :export_source_worktype,
|
52
54
|
label: t('bulkrax.exporter.labels.worktype'),
|
55
|
+
required: true,
|
53
56
|
prompt: 'Select from the list',
|
54
57
|
label_html: { class: 'worktype export-source-option hidden' },
|
55
58
|
input_html: { class: 'worktype export-source-option hidden' },
|
56
59
|
collection: Hyrax.config.curation_concerns.map {|cc| [cc.to_s, cc.to_s] } %>
|
57
60
|
|
58
|
-
<%= form.input :limit,
|
59
|
-
as: :integer,
|
61
|
+
<%= form.input :limit,
|
62
|
+
as: :integer,
|
60
63
|
hint: 'leave blank or 0 for all records',
|
61
64
|
label: t('bulkrax.exporter.labels.limit') %>
|
62
65
|
|
@@ -90,8 +93,8 @@
|
|
90
93
|
collection: form.object.workflow_status_list,
|
91
94
|
label: t('bulkrax.exporter.labels.status') %>
|
92
95
|
|
93
|
-
<%= form.input :parser_klass,
|
94
|
-
collection: Bulkrax.parsers.map {|p| [p[:name], p[:class_name], {'data-partial' => p[:partial]}] if p[:class_name].constantize.export_supported? }.compact,
|
96
|
+
<%= form.input :parser_klass,
|
97
|
+
collection: Bulkrax.parsers.map {|p| [p[:name], p[:class_name], {'data-partial' => p[:partial]}] if p[:class_name].constantize.export_supported? }.compact,
|
95
98
|
label: t('bulkrax.exporter.labels.export_format') %>
|
96
99
|
</div>
|
97
100
|
|
@@ -40,6 +40,11 @@
|
|
40
40
|
<% when 'collection' %>
|
41
41
|
<% collection = Collection.find(@exporter.export_source) %>
|
42
42
|
<%= link_to collection&.title&.first, hyrax.dashboard_collection_path(collection.id) %>
|
43
|
+
<% when 'collections metadata' %>
|
44
|
+
<% collections = Collection.all %>
|
45
|
+
<% collections.each_with_index do |c, i| %>
|
46
|
+
<%= link_to c&.title&.first, hyrax.dashboard_collection_path(c.id) %><%= ',' if i != collections.count - 1 %>
|
47
|
+
<% end %>
|
43
48
|
<% when 'importer' %>
|
44
49
|
<% importer = Bulkrax::Importer.find(@exporter.export_source) %>
|
45
50
|
<%= link_to importer.name, bulkrax.importer_path(importer.id) %>
|
@@ -1,7 +1,17 @@
|
|
1
1
|
class RenameBulkraxImporterRunToImporterRun < ActiveRecord::Migration[5.2]
|
2
|
-
def
|
2
|
+
def up
|
3
3
|
if column_exists?(:bulkrax_pending_relationships, :bulkrax_importer_run_id)
|
4
|
+
remove_foreign_key :bulkrax_pending_relationships, :bulkrax_importer_runs
|
5
|
+
remove_index :bulkrax_pending_relationships, column: :bulkrax_importer_run_id
|
6
|
+
|
4
7
|
rename_column :bulkrax_pending_relationships, :bulkrax_importer_run_id, :importer_run_id
|
8
|
+
|
9
|
+
add_foreign_key :bulkrax_pending_relationships, :bulkrax_importer_runs, column: :importer_run_id
|
10
|
+
add_index :bulkrax_pending_relationships, :importer_run_id, name: 'index_bulkrax_pending_relationships_on_importer_run_id'
|
5
11
|
end
|
6
12
|
end
|
13
|
+
|
14
|
+
def down
|
15
|
+
rename_column :bulkrax_pending_relationships, :importer_run_id, :bulkrax_importer_run_id
|
16
|
+
end
|
7
17
|
end
|
data/lib/bulkrax/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-06-
|
11
|
+
date: 2022-06-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -25,81 +25,81 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 5.1.6
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: bagit
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: '0.4'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: '0.4'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: coderay
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
47
|
+
version: '0'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: iso8601
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - "
|
59
|
+
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
61
|
+
version: 0.9.0
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- - "
|
66
|
+
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
68
|
+
version: 0.9.0
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: kaminari
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- - "
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 0
|
75
|
+
version: '0'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- - "
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 0
|
82
|
+
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
84
|
+
name: language_list
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- - "
|
87
|
+
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
90
|
-
- - "
|
89
|
+
version: '1.2'
|
90
|
+
- - ">="
|
91
91
|
- !ruby/object:Gem::Version
|
92
|
-
version: 2.
|
92
|
+
version: 1.2.1
|
93
93
|
type: :runtime
|
94
94
|
prerelease: false
|
95
95
|
version_requirements: !ruby/object:Gem::Requirement
|
96
96
|
requirements:
|
97
|
-
- - "
|
97
|
+
- - "~>"
|
98
98
|
- !ruby/object:Gem::Version
|
99
|
-
version: '
|
100
|
-
- - "
|
99
|
+
version: '1.2'
|
100
|
+
- - ">="
|
101
101
|
- !ruby/object:Gem::Version
|
102
|
-
version: 2.
|
102
|
+
version: 1.2.1
|
103
103
|
- !ruby/object:Gem::Dependency
|
104
104
|
name: libxml-ruby
|
105
105
|
requirement: !ruby/object:Gem::Requirement
|
@@ -115,61 +115,75 @@ dependencies:
|
|
115
115
|
- !ruby/object:Gem::Version
|
116
116
|
version: 3.1.0
|
117
117
|
- !ruby/object:Gem::Dependency
|
118
|
-
name:
|
118
|
+
name: loofah
|
119
119
|
requirement: !ruby/object:Gem::Requirement
|
120
120
|
requirements:
|
121
|
-
- - "~>"
|
122
|
-
- !ruby/object:Gem::Version
|
123
|
-
version: '1.2'
|
124
121
|
- - ">="
|
125
122
|
- !ruby/object:Gem::Version
|
126
|
-
version:
|
123
|
+
version: 2.2.3
|
127
124
|
type: :runtime
|
128
125
|
prerelease: false
|
129
126
|
version_requirements: !ruby/object:Gem::Requirement
|
130
127
|
requirements:
|
131
|
-
- - "~>"
|
132
|
-
- !ruby/object:Gem::Version
|
133
|
-
version: '1.2'
|
134
128
|
- - ">="
|
135
129
|
- !ruby/object:Gem::Version
|
136
|
-
version:
|
130
|
+
version: 2.2.3
|
137
131
|
- !ruby/object:Gem::Dependency
|
138
|
-
name:
|
132
|
+
name: oai
|
139
133
|
requirement: !ruby/object:Gem::Requirement
|
140
134
|
requirements:
|
141
135
|
- - ">="
|
142
136
|
- !ruby/object:Gem::Version
|
143
|
-
version:
|
137
|
+
version: '0.4'
|
144
138
|
- - "<"
|
145
139
|
- !ruby/object:Gem::Version
|
146
|
-
version:
|
140
|
+
version: 2.x
|
147
141
|
type: :runtime
|
148
142
|
prerelease: false
|
149
143
|
version_requirements: !ruby/object:Gem::Requirement
|
150
144
|
requirements:
|
151
145
|
- - ">="
|
152
146
|
- !ruby/object:Gem::Version
|
153
|
-
version:
|
147
|
+
version: '0.4'
|
154
148
|
- - "<"
|
155
149
|
- !ruby/object:Gem::Version
|
156
|
-
version:
|
150
|
+
version: 2.x
|
157
151
|
- !ruby/object:Gem::Dependency
|
158
|
-
name:
|
152
|
+
name: rack
|
159
153
|
requirement: !ruby/object:Gem::Requirement
|
160
154
|
requirements:
|
161
|
-
- - "
|
155
|
+
- - ">="
|
162
156
|
- !ruby/object:Gem::Version
|
163
|
-
version:
|
157
|
+
version: 2.0.6
|
164
158
|
type: :runtime
|
165
159
|
prerelease: false
|
166
160
|
version_requirements: !ruby/object:Gem::Requirement
|
167
161
|
requirements:
|
168
|
-
- - "
|
162
|
+
- - ">="
|
169
163
|
- !ruby/object:Gem::Version
|
170
|
-
version:
|
164
|
+
version: 2.0.6
|
171
165
|
- !ruby/object:Gem::Dependency
|
172
|
-
name:
|
166
|
+
name: rdf
|
167
|
+
requirement: !ruby/object:Gem::Requirement
|
168
|
+
requirements:
|
169
|
+
- - ">="
|
170
|
+
- !ruby/object:Gem::Version
|
171
|
+
version: 2.0.2
|
172
|
+
- - "<"
|
173
|
+
- !ruby/object:Gem::Version
|
174
|
+
version: '4.0'
|
175
|
+
type: :runtime
|
176
|
+
prerelease: false
|
177
|
+
version_requirements: !ruby/object:Gem::Requirement
|
178
|
+
requirements:
|
179
|
+
- - ">="
|
180
|
+
- !ruby/object:Gem::Version
|
181
|
+
version: 2.0.2
|
182
|
+
- - "<"
|
183
|
+
- !ruby/object:Gem::Version
|
184
|
+
version: '4.0'
|
185
|
+
- !ruby/object:Gem::Dependency
|
186
|
+
name: rubyzip
|
173
187
|
requirement: !ruby/object:Gem::Requirement
|
174
188
|
requirements:
|
175
189
|
- - ">="
|
@@ -183,7 +197,7 @@ dependencies:
|
|
183
197
|
- !ruby/object:Gem::Version
|
184
198
|
version: '0'
|
185
199
|
- !ruby/object:Gem::Dependency
|
186
|
-
name:
|
200
|
+
name: simple_form
|
187
201
|
requirement: !ruby/object:Gem::Requirement
|
188
202
|
requirements:
|
189
203
|
- - ">="
|
@@ -390,7 +404,7 @@ homepage: https://github.com/samvera-labs/bulkrax
|
|
390
404
|
licenses:
|
391
405
|
- Apache-2.0
|
392
406
|
metadata: {}
|
393
|
-
post_install_message:
|
407
|
+
post_install_message:
|
394
408
|
rdoc_options: []
|
395
409
|
require_paths:
|
396
410
|
- lib
|
@@ -405,8 +419,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
405
419
|
- !ruby/object:Gem::Version
|
406
420
|
version: '0'
|
407
421
|
requirements: []
|
408
|
-
rubygems_version: 3.
|
409
|
-
signing_key:
|
422
|
+
rubygems_version: 3.0.3
|
423
|
+
signing_key:
|
410
424
|
specification_version: 4
|
411
425
|
summary: Import and export tool for Hyrax and Hyku
|
412
426
|
test_files: []
|