bulkrax 3.3.1 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/jobs/bulkrax/create_relationships_job.rb +4 -2
- data/app/jobs/bulkrax/export_work_job.rb +7 -7
- data/app/jobs/bulkrax/importer_job.rb +2 -0
- data/app/models/bulkrax/csv_entry.rb +2 -10
- data/app/models/bulkrax/exporter.rb +4 -1
- data/app/models/bulkrax/importer.rb +13 -10
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +7 -8
- data/app/models/concerns/bulkrax/export_behavior.rb +1 -1
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +5 -1
- data/app/models/concerns/bulkrax/import_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/status_info.rb +1 -1
- data/app/parsers/bulkrax/application_parser.rb +1 -1
- data/app/parsers/bulkrax/bagit_parser.rb +64 -153
- data/app/parsers/bulkrax/csv_parser.rb +10 -5
- data/app/views/bulkrax/exporters/_form.html.erb +13 -10
- data/app/views/bulkrax/exporters/show.html.erb +5 -0
- data/config/locales/bulkrax.en.yml +1 -0
- data/db/migrate/20220609001128_rename_bulkrax_importer_run_to_importer_run.rb +11 -1
- data/lib/bulkrax/version.rb +1 -1
- data/lib/generators/bulkrax/install_generator.rb +1 -1
- metadata +68 -54
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0a80674a9f582c3b8e83f442318908edb6ca9f0b615c970d09b17d941cc8027d
|
4
|
+
data.tar.gz: a2a53116ef49e03dde1aa1df14d8259a2b4abf06a82cff63a9d4ba622ba6600a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af3d75fb03105e37f7374f3a7f863c545d9cc9c95ab2f18bbbf7b4692024e09811f103a372327b4724c836568bad176ed0ad0b7be929ed556259aa9b0793fce6
|
7
|
+
data.tar.gz: 1117a185fbab2bae0746187f464bebea855759a5ecccf0d34f098ac55ad7a2952e663268372262ba8f97820c8c1f02bd29c74a388cfd8ea9cfed84a46dad94cf
|
@@ -42,10 +42,12 @@ module Bulkrax
|
|
42
42
|
pending_relationships.each do |rel|
|
43
43
|
raise ::StandardError, %("#{rel}" needs either a child or a parent to create a relationship) if rel.child_id.nil? || rel.parent_id.nil?
|
44
44
|
@child_entry, child_record = find_record(rel.child_id, importer_run_id)
|
45
|
-
|
45
|
+
if child_record
|
46
|
+
child_record.is_a?(::Collection) ? @child_records[:collections] << child_record : @child_records[:works] << child_record
|
47
|
+
end
|
46
48
|
end
|
47
49
|
|
48
|
-
if (child_records[:collections].blank? && child_records[:works].blank?) || parent_record.
|
50
|
+
if (child_records[:collections].blank? && child_records[:works].blank?) || parent_record.nil?
|
49
51
|
reschedule({ parent_identifier: parent_identifier, importer_run_id: importer_run_id })
|
50
52
|
return false # stop current job from continuing to run after rescheduling
|
51
53
|
end
|
@@ -6,26 +6,26 @@ module Bulkrax
|
|
6
6
|
|
7
7
|
def perform(*args)
|
8
8
|
entry = Entry.find(args[0])
|
9
|
+
exporter_run = ExporterRun.find(args[1])
|
9
10
|
begin
|
10
11
|
entry.build
|
11
12
|
entry.save
|
12
13
|
rescue StandardError
|
13
14
|
# rubocop:disable Rails/SkipsModelValidations
|
14
|
-
|
15
|
-
|
15
|
+
exporter_run.increment!(:failed_records)
|
16
|
+
exporter_run.decrement!(:enqueued_records) unless exporter_run.enqueued_records <= 0
|
16
17
|
raise
|
17
18
|
else
|
18
19
|
if entry.failed?
|
19
|
-
|
20
|
-
|
20
|
+
exporter_run.increment!(:failed_records)
|
21
|
+
exporter_run.decrement!(:enqueued_records) unless exporter_run.enqueued_records <= 0
|
21
22
|
raise entry.reload.current_status.error_class.constantize
|
22
23
|
else
|
23
|
-
|
24
|
-
|
24
|
+
exporter_run.increment!(:processed_records)
|
25
|
+
exporter_run.decrement!(:enqueued_records) unless exporter_run.enqueued_records <= 0
|
25
26
|
end
|
26
27
|
# rubocop:enable Rails/SkipsModelValidations
|
27
28
|
end
|
28
|
-
exporter_run = ExporterRun.find(args[1])
|
29
29
|
return entry if exporter_run.enqueued_records.positive?
|
30
30
|
|
31
31
|
if exporter_run.failed_records.positive?
|
@@ -12,6 +12,8 @@ module Bulkrax
|
|
12
12
|
import(importer, only_updates_since_last_import)
|
13
13
|
update_current_run_counters(importer)
|
14
14
|
schedule(importer) if importer.schedulable?
|
15
|
+
rescue CSV::MalformedCSVError => e
|
16
|
+
importer.status_info(e)
|
15
17
|
end
|
16
18
|
|
17
19
|
def import(importer, only_updates_since_last_import)
|
@@ -114,9 +114,10 @@ module Bulkrax
|
|
114
114
|
def build_files_metadata
|
115
115
|
file_mapping = key_for_export('file')
|
116
116
|
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
117
|
-
filenames = file_sets
|
117
|
+
filenames = map_file_sets(file_sets)
|
118
118
|
|
119
119
|
handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
|
120
|
+
build_thumbnail_files if hyrax_record.work?
|
120
121
|
end
|
121
122
|
|
122
123
|
def build_relationship_metadata
|
@@ -219,15 +220,6 @@ module Bulkrax
|
|
219
220
|
end
|
220
221
|
end
|
221
222
|
|
222
|
-
def build_files
|
223
|
-
file_mapping = mapping['file']&.[]('from')&.first || 'file'
|
224
|
-
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
225
|
-
|
226
|
-
filenames = map_file_sets(file_sets)
|
227
|
-
handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
|
228
|
-
build_thumbnail_files if hyrax_record.work?
|
229
|
-
end
|
230
|
-
|
231
223
|
def build_thumbnail_files
|
232
224
|
return unless importerexporter.include_thumbnails
|
233
225
|
|
@@ -14,13 +14,15 @@ module Bulkrax
|
|
14
14
|
validates :name, presence: true
|
15
15
|
validates :parser_klass, presence: true
|
16
16
|
|
17
|
-
delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
|
17
|
+
delegate :write, :create_from_collection, :create_from_collections_metadata, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
|
18
18
|
|
19
19
|
def export
|
20
20
|
current_run && setup_export_path
|
21
21
|
case self.export_from
|
22
22
|
when 'collection'
|
23
23
|
create_from_collection
|
24
|
+
when 'collections metadata'
|
25
|
+
create_from_collections_metadata
|
24
26
|
when 'importer'
|
25
27
|
create_from_importer
|
26
28
|
when 'worktype'
|
@@ -87,6 +89,7 @@ module Bulkrax
|
|
87
89
|
[
|
88
90
|
[I18n.t('bulkrax.exporter.labels.importer'), 'importer'],
|
89
91
|
[I18n.t('bulkrax.exporter.labels.collection'), 'collection'],
|
92
|
+
[I18n.t('bulkrax.exporter.labels.collections_metadata'), 'collections metadata'],
|
90
93
|
[I18n.t('bulkrax.exporter.labels.worktype'), 'worktype'],
|
91
94
|
[I18n.t('bulkrax.exporter.labels.all'), 'all']
|
92
95
|
]
|
@@ -96,16 +96,19 @@ module Bulkrax
|
|
96
96
|
end
|
97
97
|
|
98
98
|
def current_run
|
99
|
-
@current_run
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
99
|
+
return @current_run if @current_run.present?
|
100
|
+
|
101
|
+
@current_run = self.importer_runs.create!
|
102
|
+
return @current_run if file? && zip?
|
103
|
+
|
104
|
+
entry_counts = {
|
105
|
+
total_work_entries: self.limit || parser.works_total,
|
106
|
+
total_collection_entries: parser.collections_total,
|
107
|
+
total_file_set_entries: parser.file_sets_total
|
108
|
+
}
|
109
|
+
@current_run.update!(entry_counts)
|
110
|
+
|
111
|
+
@current_run
|
109
112
|
end
|
110
113
|
|
111
114
|
def last_run
|
@@ -12,15 +12,14 @@ module Bulkrax
|
|
12
12
|
# check for our entry in our current importer first
|
13
13
|
importer_id = ImporterRun.find(importer_run_id).importer_id
|
14
14
|
default_scope = { identifier: identifier, importerexporter_type: 'Bulkrax::Importer' }
|
15
|
-
record = Entry.find_by(default_scope.merge({ importerexporter_id: importer_id })) || Entry.find_by(default_scope)
|
16
15
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
16
|
+
begin
|
17
|
+
# the identifier parameter can be a :source_identifier or the id of an object
|
18
|
+
record = Entry.find_by(default_scope.merge({ importerexporter_id: importer_id })) || Entry.find_by(default_scope)
|
19
|
+
record ||= ActiveFedora::Base.find(identifier)
|
20
|
+
# NameError for if ActiveFedora isn't installed
|
21
|
+
rescue NameError, ActiveFedora::ObjectNotFoundError
|
22
|
+
record = nil
|
24
23
|
end
|
25
24
|
|
26
25
|
# return the found entry here instead of searching for it again in the CreateRelationshipsJob
|
@@ -51,7 +51,7 @@ module Bulkrax
|
|
51
51
|
fn = file_set.original_file.file_name.first
|
52
52
|
mime = Mime::Type.lookup(file_set.original_file.mime_type)
|
53
53
|
ext_mime = MIME::Types.of(file_set.original_file.file_name).first
|
54
|
-
if fn.include?(file_set.id) || importerexporter.metadata_only?
|
54
|
+
if fn.include?(file_set.id) || importerexporter.metadata_only?
|
55
55
|
filename = "#{fn}.#{mime.to_sym}"
|
56
56
|
filename = fn if mime.to_s == ext_mime.to_s
|
57
57
|
else
|
@@ -8,10 +8,14 @@ module Bulkrax
|
|
8
8
|
|
9
9
|
def add_path_to_file
|
10
10
|
parsed_metadata['file'].each_with_index do |filename, i|
|
11
|
-
|
11
|
+
next if filename.blank?
|
12
|
+
|
13
|
+
path_to_file = parser.path_to_files(filename: filename)
|
12
14
|
|
13
15
|
parsed_metadata['file'][i] = path_to_file
|
14
16
|
end
|
17
|
+
parsed_metadata['file'].delete('')
|
18
|
+
|
15
19
|
raise ::StandardError, "one or more file paths are invalid: #{parsed_metadata['file'].join(', ')}" unless parsed_metadata['file'].map { |file_path| ::File.file?(file_path) }.all?
|
16
20
|
|
17
21
|
parsed_metadata['file']
|
@@ -12,8 +12,8 @@ module Bulkrax
|
|
12
12
|
raise CollectionsCreatedError unless collections_created?
|
13
13
|
@item = factory.run!
|
14
14
|
add_user_to_permission_templates! if self.class.to_s.include?("Collection")
|
15
|
-
parent_jobs if self.parsed_metadata[related_parents_parsed_mapping].present?
|
16
|
-
child_jobs if self.parsed_metadata[related_children_parsed_mapping].present?
|
15
|
+
parent_jobs if self.parsed_metadata[related_parents_parsed_mapping]&.join.present?
|
16
|
+
child_jobs if self.parsed_metadata[related_children_parsed_mapping]&.join.present?
|
17
17
|
end
|
18
18
|
rescue RSolr::Error::Http, CollectionsCreatedError => e
|
19
19
|
raise e
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
require 'zip'
|
2
3
|
|
3
4
|
module Bulkrax
|
4
5
|
class ApplicationParser # rubocop:disable Metrics/ClassLength
|
@@ -261,7 +262,6 @@ module Bulkrax
|
|
261
262
|
end
|
262
263
|
|
263
264
|
def zip
|
264
|
-
require 'zip'
|
265
265
|
FileUtils.rm_rf(exporter_export_zip_path)
|
266
266
|
Zip::File.open(exporter_export_zip_path, create: true) do |zip_file|
|
267
267
|
Dir["#{exporter_export_path}/**/**"].each do |file|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Bulkrax
|
4
|
-
class BagitParser <
|
4
|
+
class BagitParser < CsvParser # rubocop:disable Metrics/ClassLength
|
5
5
|
include ExportBehavior
|
6
6
|
|
7
7
|
def self.export_supported?
|
@@ -19,8 +19,10 @@ module Bulkrax
|
|
19
19
|
rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
|
20
20
|
rdf_format ? RdfEntry : CsvEntry
|
21
21
|
end
|
22
|
-
|
23
|
-
|
22
|
+
|
23
|
+
def path_to_files(filename:)
|
24
|
+
@path_to_files ||= Dir.glob(File.join(import_file_path, '**/data', filename)).first
|
25
|
+
end
|
24
26
|
|
25
27
|
# Take a random sample of 10 metadata_paths and work out the import fields from that
|
26
28
|
def import_fields
|
@@ -30,39 +32,41 @@ module Bulkrax
|
|
30
32
|
end.flatten.compact.uniq
|
31
33
|
end
|
32
34
|
|
33
|
-
#
|
34
|
-
# Create an Array of all metadata records, one per file
|
35
|
+
# Create an Array of all metadata records
|
35
36
|
def records(_opts = {})
|
36
37
|
raise StandardError, 'No BagIt records were found' if bags.blank?
|
37
38
|
@records ||= bags.map do |bag|
|
38
39
|
path = metadata_path(bag)
|
39
40
|
raise StandardError, 'No metadata files were found' if path.blank?
|
40
41
|
data = entry_class.read_data(path)
|
41
|
-
|
42
|
-
data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
|
43
|
-
data
|
42
|
+
get_data(bag, data)
|
44
43
|
end
|
44
|
+
|
45
|
+
@records = @records.flatten
|
45
46
|
end
|
46
47
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
new_entry = find_or_create_entry(collection_entry_class, collection, 'Bulkrax::Importer', metadata)
|
60
|
-
ImportCollectionJob.perform_now(new_entry.id, current_run.id)
|
61
|
-
increment_counters(index, collection: true)
|
48
|
+
def get_data(bag, data)
|
49
|
+
if entry_class == CsvEntry
|
50
|
+
data = data.map do |data_row|
|
51
|
+
record_data = entry_class.data_for_entry(data_row, source_identifier, self)
|
52
|
+
next record_data if importerexporter.metadata_only?
|
53
|
+
|
54
|
+
record_data[:file] = bag.bag_files.join('|') if ::Hyrax.config.curation_concerns.include? record_data[:model]&.constantize
|
55
|
+
record_data
|
56
|
+
end
|
57
|
+
else
|
58
|
+
data = entry_class.data_for_entry(data, source_identifier, self)
|
59
|
+
data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
|
62
60
|
end
|
61
|
+
|
62
|
+
data
|
63
63
|
end
|
64
64
|
|
65
65
|
def create_works
|
66
|
+
entry_class == CsvEntry ? super : create_rdf_works
|
67
|
+
end
|
68
|
+
|
69
|
+
def create_rdf_works
|
66
70
|
records.each_with_index do |record, index|
|
67
71
|
next unless record_has_source_identifier(record, index)
|
68
72
|
break if limit_reached?(limit, index)
|
@@ -81,33 +85,16 @@ module Bulkrax
|
|
81
85
|
status_info(e)
|
82
86
|
end
|
83
87
|
|
84
|
-
def collections
|
85
|
-
records.map { |r| r[related_parents_parsed_mapping].split(/\s*[;|]\s*/) if r[related_parents_parsed_mapping].present? }.flatten.compact.uniq
|
86
|
-
end
|
87
|
-
|
88
|
-
def collections_total
|
89
|
-
collections.size
|
90
|
-
end
|
91
|
-
|
92
|
-
# TODO: change to differentiate between collection and work records when adding ability to import collection metadata
|
93
|
-
def works_total
|
94
|
-
total
|
95
|
-
end
|
96
|
-
|
97
88
|
def total
|
98
|
-
|
99
|
-
end
|
89
|
+
@total = importer.parser_fields['total'] || 0 if importer?
|
100
90
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
output += importerexporter.work_visibility.present? ? " AND visibility_ssi:#{importerexporter.work_visibility}" : ""
|
109
|
-
output += importerexporter.workflow_status.present? ? " AND workflow_state_name_ssim:#{importerexporter.workflow_status}" : ""
|
110
|
-
output
|
91
|
+
@total = if exporter?
|
92
|
+
limit.nil? || limit.zero? ? current_record_ids.count : limit
|
93
|
+
end
|
94
|
+
|
95
|
+
return @total || 0
|
96
|
+
rescue StandardError
|
97
|
+
@total = 0
|
111
98
|
end
|
112
99
|
|
113
100
|
def current_record_ids
|
@@ -118,7 +105,6 @@ module Bulkrax
|
|
118
105
|
case importerexporter.export_from
|
119
106
|
when 'all'
|
120
107
|
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
121
|
-
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
122
108
|
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
123
109
|
when 'collection'
|
124
110
|
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
@@ -127,89 +113,49 @@ module Bulkrax
|
|
127
113
|
when 'importer'
|
128
114
|
set_ids_for_exporting_from_importer
|
129
115
|
end
|
130
|
-
|
131
116
|
@work_ids + @collection_ids + @file_set_ids
|
132
117
|
end
|
133
118
|
|
134
|
-
# Set the following instance variables: @work_ids, @collection_ids, @file_set_ids
|
135
|
-
# @see #current_record_ids
|
136
|
-
def set_ids_for_exporting_from_importer
|
137
|
-
entry_ids = Importer.find(importerexporter.export_source).entries.pluck(:id)
|
138
|
-
complete_statuses = Status.latest_by_statusable
|
139
|
-
.includes(:statusable)
|
140
|
-
.where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
|
141
|
-
|
142
|
-
complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
|
143
|
-
extra_filters = extra_filters.presence || '*:*'
|
144
|
-
|
145
|
-
{ :@work_ids => ::Hyrax.config.curation_concerns, :@collection_ids => [::Collection], :@file_set_ids => [::FileSet] }.each do |instance_var, models_to_search|
|
146
|
-
instance_variable_set(instance_var, ActiveFedora::SolrService.post(
|
147
|
-
extra_filters.to_s,
|
148
|
-
fq: [
|
149
|
-
%(#{::Solrizer.solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
|
150
|
-
"has_model_ssim:(#{models_to_search.join(' OR ')})"
|
151
|
-
],
|
152
|
-
fl: 'id',
|
153
|
-
rows: 2_000_000_000
|
154
|
-
)['response']['docs'].map { |obj| obj['id'] })
|
155
|
-
end
|
156
|
-
end
|
157
|
-
|
158
|
-
def create_new_entries
|
159
|
-
current_record_ids.each_with_index do |id, index|
|
160
|
-
break if limit_reached?(limit, index)
|
161
|
-
|
162
|
-
this_entry_class = if @collection_ids.include?(id)
|
163
|
-
collection_entry_class
|
164
|
-
elsif @file_set_ids.include?(id)
|
165
|
-
file_set_entry_class
|
166
|
-
else
|
167
|
-
entry_class
|
168
|
-
end
|
169
|
-
new_entry = find_or_create_entry(this_entry_class, id, 'Bulkrax::Exporter')
|
170
|
-
|
171
|
-
begin
|
172
|
-
entry = ExportWorkJob.perform_now(new_entry.id, current_run.id)
|
173
|
-
rescue => e
|
174
|
-
Rails.logger.info("#{e.message} was detected during export")
|
175
|
-
end
|
176
|
-
|
177
|
-
self.headers |= entry.parsed_metadata.keys if entry
|
178
|
-
end
|
179
|
-
end
|
180
|
-
alias create_from_collection create_new_entries
|
181
|
-
alias create_from_importer create_new_entries
|
182
|
-
alias create_from_worktype create_new_entries
|
183
|
-
alias create_from_all create_new_entries
|
184
|
-
|
185
119
|
# export methods
|
186
120
|
|
187
|
-
# rubocop:disable Metrics/AbcSize
|
121
|
+
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
188
122
|
def write_files
|
189
123
|
require 'open-uri'
|
190
124
|
require 'socket'
|
191
|
-
importerexporter.entries.where(identifier: current_record_ids)[0..limit || total].each do |
|
192
|
-
|
193
|
-
|
194
|
-
|
125
|
+
importerexporter.entries.where(identifier: current_record_ids)[0..limit || total].each do |entry|
|
126
|
+
record = ActiveFedora::Base.find(entry.identifier)
|
127
|
+
next unless Hyrax.config.curation_concerns.include?(record.class)
|
128
|
+
bag = BagIt::Bag.new setup_bagit_folder(entry.identifier)
|
129
|
+
bag_entries = [entry]
|
130
|
+
|
131
|
+
record.file_sets.each do |fs|
|
132
|
+
if @file_set_ids.present?
|
133
|
+
file_set_entry = Bulkrax::CsvFileSetEntry.where("parsed_metadata LIKE '%#{fs.id}%'").first
|
134
|
+
bag_entries << file_set_entry unless file_set_entry.nil?
|
135
|
+
end
|
195
136
|
|
196
|
-
w.file_sets.each do |fs|
|
197
137
|
file_name = filename(fs)
|
198
138
|
next if file_name.blank?
|
199
139
|
io = open(fs.original_file.uri)
|
200
140
|
file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
|
201
141
|
file.write(io.read)
|
202
142
|
file.close
|
203
|
-
|
143
|
+
begin
|
144
|
+
bag.add_file(file_name, file.path)
|
145
|
+
rescue => e
|
146
|
+
entry.status_info(e)
|
147
|
+
status_info(e)
|
148
|
+
end
|
204
149
|
end
|
205
|
-
|
206
|
-
|
150
|
+
|
151
|
+
CSV.open(setup_csv_metadata_export_file(entry.identifier), "w", headers: export_headers, write_headers: true) do |csv|
|
152
|
+
bag_entries.each { |csv_entry| csv << csv_entry.parsed_metadata }
|
207
153
|
end
|
208
|
-
write_triples(
|
154
|
+
write_triples(entry)
|
209
155
|
bag.manifest!(algo: 'sha256')
|
210
156
|
end
|
211
157
|
end
|
212
|
-
# rubocop:enable Metrics/AbcSize
|
158
|
+
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
213
159
|
|
214
160
|
def setup_csv_metadata_export_file(id)
|
215
161
|
File.join(importerexporter.exporter_export_path, id, 'metadata.csv')
|
@@ -221,42 +167,6 @@ module Bulkrax
|
|
221
167
|
key != source_identifier.to_s
|
222
168
|
end
|
223
169
|
|
224
|
-
# All possible column names
|
225
|
-
def export_headers
|
226
|
-
headers = sort_headers(self.headers)
|
227
|
-
|
228
|
-
# we don't want access_control_id exported and we want file at the end
|
229
|
-
headers.delete('access_control_id') if headers.include?('access_control_id')
|
230
|
-
|
231
|
-
# add the headers below at the beginning or end to maintain the preexisting export behavior
|
232
|
-
headers.prepend('model')
|
233
|
-
headers.prepend(source_identifier.to_s)
|
234
|
-
headers.prepend('id')
|
235
|
-
|
236
|
-
headers.uniq
|
237
|
-
end
|
238
|
-
|
239
|
-
def object_names
|
240
|
-
return @object_names if @object_names
|
241
|
-
|
242
|
-
@object_names = mapping.values.map { |value| value['object'] }
|
243
|
-
@object_names.uniq!.delete(nil)
|
244
|
-
|
245
|
-
@object_names
|
246
|
-
end
|
247
|
-
|
248
|
-
def sort_headers(headers)
|
249
|
-
# converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
|
250
|
-
# while keeping objects grouped together
|
251
|
-
headers.sort_by do |item|
|
252
|
-
number = item.match(/\d+/)&.[](0) || 0.to_s
|
253
|
-
sort_number = number.rjust(4, "0")
|
254
|
-
object_prefix = object_names.detect { |o| item.match(/^#{o}/) } || item
|
255
|
-
remainder = item.gsub(/^#{object_prefix}_/, '').gsub(/_#{number}/, '')
|
256
|
-
"#{object_prefix}_#{sort_number}_#{remainder}"
|
257
|
-
end
|
258
|
-
end
|
259
|
-
|
260
170
|
def setup_triple_metadata_export_file(id)
|
261
171
|
File.join(importerexporter.exporter_export_path, id, 'metadata.nt')
|
262
172
|
end
|
@@ -276,11 +186,6 @@ module Bulkrax
|
|
276
186
|
end
|
277
187
|
end
|
278
188
|
|
279
|
-
def required_elements?(keys)
|
280
|
-
return if keys.blank?
|
281
|
-
!required_elements.map { |el| keys.map(&:to_s).include?(el) }.include?(false)
|
282
|
-
end
|
283
|
-
|
284
189
|
# @todo - investigate getting directory structure
|
285
190
|
# @todo - investigate using perform_later, and having the importer check for
|
286
191
|
# DownloadCloudFileJob before it starts
|
@@ -331,5 +236,11 @@ module Bulkrax
|
|
331
236
|
return nil unless bag.valid?
|
332
237
|
bag
|
333
238
|
end
|
239
|
+
|
240
|
+
# use the version of this method from the application parser instead
|
241
|
+
def real_import_file_path
|
242
|
+
return importer_unzip_path if file? && zip?
|
243
|
+
parser_fields['import_file_path']
|
244
|
+
end
|
334
245
|
end
|
335
246
|
end
|
@@ -195,6 +195,9 @@ module Bulkrax
|
|
195
195
|
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
196
196
|
when 'collection'
|
197
197
|
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
198
|
+
@collection_ids = ActiveFedora::SolrService.query("id:#{importerexporter.export_source} #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
199
|
+
when 'collections metadata'
|
200
|
+
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
198
201
|
when 'worktype'
|
199
202
|
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
200
203
|
when 'importer'
|
@@ -251,6 +254,7 @@ module Bulkrax
|
|
251
254
|
end
|
252
255
|
end
|
253
256
|
alias create_from_collection create_new_entries
|
257
|
+
alias create_from_collections_metadata create_new_entries
|
254
258
|
alias create_from_importer create_new_entries
|
255
259
|
alias create_from_worktype create_new_entries
|
256
260
|
alias create_from_all create_new_entries
|
@@ -268,8 +272,8 @@ module Bulkrax
|
|
268
272
|
CsvFileSetEntry
|
269
273
|
end
|
270
274
|
|
271
|
-
#
|
272
|
-
#
|
275
|
+
# TODO: figure out why using the version of this method that's in the bagit parser
|
276
|
+
# breaks specs for the "if importer?" line
|
273
277
|
def total
|
274
278
|
@total = importer.parser_fields['total'] || 0 if importer?
|
275
279
|
@total = limit || current_record_ids.count if exporter?
|
@@ -378,10 +382,11 @@ module Bulkrax
|
|
378
382
|
end
|
379
383
|
|
380
384
|
# Retrieve the path where we expect to find the files
|
381
|
-
def path_to_files
|
385
|
+
def path_to_files(**args)
|
386
|
+
filename = args.fetch(:filename, '')
|
387
|
+
|
382
388
|
@path_to_files ||= File.join(
|
383
|
-
zip? ? importer_unzip_path : File.dirname(import_file_path),
|
384
|
-
'files'
|
389
|
+
zip? ? importer_unzip_path : File.dirname(import_file_path), 'files', filename
|
385
390
|
)
|
386
391
|
end
|
387
392
|
|
@@ -15,20 +15,21 @@
|
|
15
15
|
|
16
16
|
<%= form.hidden_field :user_id, value: current_user.id %>
|
17
17
|
|
18
|
-
<%= form.input :export_type,
|
19
|
-
collection: form.object.export_type_list,
|
20
|
-
label: t('bulkrax.exporter.labels.export_type'),
|
18
|
+
<%= form.input :export_type,
|
19
|
+
collection: form.object.export_type_list,
|
20
|
+
label: t('bulkrax.exporter.labels.export_type'),
|
21
21
|
required: true,
|
22
22
|
prompt: 'Please select an export type' %>
|
23
23
|
|
24
|
-
<%= form.input :export_from,
|
25
|
-
collection: form.object.export_from_list,
|
26
|
-
label: t('bulkrax.exporter.labels.export_from'),
|
24
|
+
<%= form.input :export_from,
|
25
|
+
collection: form.object.export_from_list,
|
26
|
+
label: t('bulkrax.exporter.labels.export_from'),
|
27
27
|
required: true,
|
28
28
|
prompt: 'Please select an export source' %>
|
29
29
|
|
30
30
|
<%= form.input :export_source_importer,
|
31
31
|
label: t('bulkrax.exporter.labels.importer'),
|
32
|
+
required: true,
|
32
33
|
prompt: 'Select from the list',
|
33
34
|
label_html: { class: 'importer export-source-option hidden' },
|
34
35
|
input_html: { class: 'importer export-source-option hidden' },
|
@@ -37,6 +38,7 @@
|
|
37
38
|
<%= form.input :export_source_collection,
|
38
39
|
prompt: 'Start typing ...',
|
39
40
|
label: t('bulkrax.exporter.labels.collection'),
|
41
|
+
required: true,
|
40
42
|
placeholder: @collection&.title&.first,
|
41
43
|
label_html: { class: 'collection export-source-option hidden' },
|
42
44
|
input_html: {
|
@@ -50,13 +52,14 @@
|
|
50
52
|
|
51
53
|
<%= form.input :export_source_worktype,
|
52
54
|
label: t('bulkrax.exporter.labels.worktype'),
|
55
|
+
required: true,
|
53
56
|
prompt: 'Select from the list',
|
54
57
|
label_html: { class: 'worktype export-source-option hidden' },
|
55
58
|
input_html: { class: 'worktype export-source-option hidden' },
|
56
59
|
collection: Hyrax.config.curation_concerns.map {|cc| [cc.to_s, cc.to_s] } %>
|
57
60
|
|
58
|
-
<%= form.input :limit,
|
59
|
-
as: :integer,
|
61
|
+
<%= form.input :limit,
|
62
|
+
as: :integer,
|
60
63
|
hint: 'leave blank or 0 for all records',
|
61
64
|
label: t('bulkrax.exporter.labels.limit') %>
|
62
65
|
|
@@ -90,8 +93,8 @@
|
|
90
93
|
collection: form.object.workflow_status_list,
|
91
94
|
label: t('bulkrax.exporter.labels.status') %>
|
92
95
|
|
93
|
-
<%= form.input :parser_klass,
|
94
|
-
collection: Bulkrax.parsers.map {|p| [p[:name], p[:class_name], {'data-partial' => p[:partial]}] if p[:class_name].constantize.export_supported? }.compact,
|
96
|
+
<%= form.input :parser_klass,
|
97
|
+
collection: Bulkrax.parsers.map {|p| [p[:name], p[:class_name], {'data-partial' => p[:partial]}] if p[:class_name].constantize.export_supported? }.compact,
|
95
98
|
label: t('bulkrax.exporter.labels.export_format') %>
|
96
99
|
</div>
|
97
100
|
|
@@ -40,6 +40,11 @@
|
|
40
40
|
<% when 'collection' %>
|
41
41
|
<% collection = Collection.find(@exporter.export_source) %>
|
42
42
|
<%= link_to collection&.title&.first, hyrax.dashboard_collection_path(collection.id) %>
|
43
|
+
<% when 'collections metadata' %>
|
44
|
+
<% collections = Collection.all %>
|
45
|
+
<% collections.each_with_index do |c, i| %>
|
46
|
+
<%= link_to c&.title&.first, hyrax.dashboard_collection_path(c.id) %><%= ',' if i != collections.count - 1 %>
|
47
|
+
<% end %>
|
43
48
|
<% when 'importer' %>
|
44
49
|
<% importer = Bulkrax::Importer.find(@exporter.export_source) %>
|
45
50
|
<%= link_to importer.name, bulkrax.importer_path(importer.id) %>
|
@@ -1,7 +1,17 @@
|
|
1
1
|
class RenameBulkraxImporterRunToImporterRun < ActiveRecord::Migration[5.2]
|
2
|
-
def
|
2
|
+
def up
|
3
3
|
if column_exists?(:bulkrax_pending_relationships, :bulkrax_importer_run_id)
|
4
|
+
remove_foreign_key :bulkrax_pending_relationships, :bulkrax_importer_runs
|
5
|
+
remove_index :bulkrax_pending_relationships, column: :bulkrax_importer_run_id
|
6
|
+
|
4
7
|
rename_column :bulkrax_pending_relationships, :bulkrax_importer_run_id, :importer_run_id
|
8
|
+
|
9
|
+
add_foreign_key :bulkrax_pending_relationships, :bulkrax_importer_runs, column: :importer_run_id
|
10
|
+
add_index :bulkrax_pending_relationships, :importer_run_id, name: 'index_bulkrax_pending_relationships_on_importer_run_id'
|
5
11
|
end
|
6
12
|
end
|
13
|
+
|
14
|
+
def down
|
15
|
+
rename_column :bulkrax_pending_relationships, :importer_run_id, :bulkrax_importer_run_id
|
16
|
+
end
|
7
17
|
end
|
data/lib/bulkrax/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-06-
|
11
|
+
date: 2022-06-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -25,81 +25,81 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 5.1.6
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: bagit
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: '0.4'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: '0.4'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: coderay
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
47
|
+
version: '0'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: iso8601
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - "
|
59
|
+
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
61
|
+
version: 0.9.0
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- - "
|
66
|
+
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
68
|
+
version: 0.9.0
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: kaminari
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- - "
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 0
|
75
|
+
version: '0'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- - "
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 0
|
82
|
+
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
84
|
+
name: language_list
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- - "
|
87
|
+
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
90
|
-
- - "
|
89
|
+
version: '1.2'
|
90
|
+
- - ">="
|
91
91
|
- !ruby/object:Gem::Version
|
92
|
-
version: 2.
|
92
|
+
version: 1.2.1
|
93
93
|
type: :runtime
|
94
94
|
prerelease: false
|
95
95
|
version_requirements: !ruby/object:Gem::Requirement
|
96
96
|
requirements:
|
97
|
-
- - "
|
97
|
+
- - "~>"
|
98
98
|
- !ruby/object:Gem::Version
|
99
|
-
version: '
|
100
|
-
- - "
|
99
|
+
version: '1.2'
|
100
|
+
- - ">="
|
101
101
|
- !ruby/object:Gem::Version
|
102
|
-
version: 2.
|
102
|
+
version: 1.2.1
|
103
103
|
- !ruby/object:Gem::Dependency
|
104
104
|
name: libxml-ruby
|
105
105
|
requirement: !ruby/object:Gem::Requirement
|
@@ -115,61 +115,75 @@ dependencies:
|
|
115
115
|
- !ruby/object:Gem::Version
|
116
116
|
version: 3.1.0
|
117
117
|
- !ruby/object:Gem::Dependency
|
118
|
-
name:
|
118
|
+
name: loofah
|
119
119
|
requirement: !ruby/object:Gem::Requirement
|
120
120
|
requirements:
|
121
|
-
- - "~>"
|
122
|
-
- !ruby/object:Gem::Version
|
123
|
-
version: '1.2'
|
124
121
|
- - ">="
|
125
122
|
- !ruby/object:Gem::Version
|
126
|
-
version:
|
123
|
+
version: 2.2.3
|
127
124
|
type: :runtime
|
128
125
|
prerelease: false
|
129
126
|
version_requirements: !ruby/object:Gem::Requirement
|
130
127
|
requirements:
|
131
|
-
- - "~>"
|
132
|
-
- !ruby/object:Gem::Version
|
133
|
-
version: '1.2'
|
134
128
|
- - ">="
|
135
129
|
- !ruby/object:Gem::Version
|
136
|
-
version:
|
130
|
+
version: 2.2.3
|
137
131
|
- !ruby/object:Gem::Dependency
|
138
|
-
name:
|
132
|
+
name: oai
|
139
133
|
requirement: !ruby/object:Gem::Requirement
|
140
134
|
requirements:
|
141
135
|
- - ">="
|
142
136
|
- !ruby/object:Gem::Version
|
143
|
-
version:
|
137
|
+
version: '0.4'
|
144
138
|
- - "<"
|
145
139
|
- !ruby/object:Gem::Version
|
146
|
-
version:
|
140
|
+
version: 2.x
|
147
141
|
type: :runtime
|
148
142
|
prerelease: false
|
149
143
|
version_requirements: !ruby/object:Gem::Requirement
|
150
144
|
requirements:
|
151
145
|
- - ">="
|
152
146
|
- !ruby/object:Gem::Version
|
153
|
-
version:
|
147
|
+
version: '0.4'
|
154
148
|
- - "<"
|
155
149
|
- !ruby/object:Gem::Version
|
156
|
-
version:
|
150
|
+
version: 2.x
|
157
151
|
- !ruby/object:Gem::Dependency
|
158
|
-
name:
|
152
|
+
name: rack
|
159
153
|
requirement: !ruby/object:Gem::Requirement
|
160
154
|
requirements:
|
161
|
-
- - "
|
155
|
+
- - ">="
|
162
156
|
- !ruby/object:Gem::Version
|
163
|
-
version:
|
157
|
+
version: 2.0.6
|
164
158
|
type: :runtime
|
165
159
|
prerelease: false
|
166
160
|
version_requirements: !ruby/object:Gem::Requirement
|
167
161
|
requirements:
|
168
|
-
- - "
|
162
|
+
- - ">="
|
169
163
|
- !ruby/object:Gem::Version
|
170
|
-
version:
|
164
|
+
version: 2.0.6
|
171
165
|
- !ruby/object:Gem::Dependency
|
172
|
-
name:
|
166
|
+
name: rdf
|
167
|
+
requirement: !ruby/object:Gem::Requirement
|
168
|
+
requirements:
|
169
|
+
- - ">="
|
170
|
+
- !ruby/object:Gem::Version
|
171
|
+
version: 2.0.2
|
172
|
+
- - "<"
|
173
|
+
- !ruby/object:Gem::Version
|
174
|
+
version: '4.0'
|
175
|
+
type: :runtime
|
176
|
+
prerelease: false
|
177
|
+
version_requirements: !ruby/object:Gem::Requirement
|
178
|
+
requirements:
|
179
|
+
- - ">="
|
180
|
+
- !ruby/object:Gem::Version
|
181
|
+
version: 2.0.2
|
182
|
+
- - "<"
|
183
|
+
- !ruby/object:Gem::Version
|
184
|
+
version: '4.0'
|
185
|
+
- !ruby/object:Gem::Dependency
|
186
|
+
name: rubyzip
|
173
187
|
requirement: !ruby/object:Gem::Requirement
|
174
188
|
requirements:
|
175
189
|
- - ">="
|
@@ -183,7 +197,7 @@ dependencies:
|
|
183
197
|
- !ruby/object:Gem::Version
|
184
198
|
version: '0'
|
185
199
|
- !ruby/object:Gem::Dependency
|
186
|
-
name:
|
200
|
+
name: simple_form
|
187
201
|
requirement: !ruby/object:Gem::Requirement
|
188
202
|
requirements:
|
189
203
|
- - ">="
|
@@ -390,7 +404,7 @@ homepage: https://github.com/samvera-labs/bulkrax
|
|
390
404
|
licenses:
|
391
405
|
- Apache-2.0
|
392
406
|
metadata: {}
|
393
|
-
post_install_message:
|
407
|
+
post_install_message:
|
394
408
|
rdoc_options: []
|
395
409
|
require_paths:
|
396
410
|
- lib
|
@@ -405,8 +419,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
405
419
|
- !ruby/object:Gem::Version
|
406
420
|
version: '0'
|
407
421
|
requirements: []
|
408
|
-
rubygems_version: 3.
|
409
|
-
signing_key:
|
422
|
+
rubygems_version: 3.0.3
|
423
|
+
signing_key:
|
410
424
|
specification_version: 4
|
411
425
|
summary: Import and export tool for Hyrax and Hyku
|
412
426
|
test_files: []
|