bulkrax 3.5.1 → 4.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -5
- data/app/controllers/bulkrax/exporters_controller.rb +1 -1
- data/app/models/bulkrax/entry.rb +0 -2
- data/app/models/bulkrax/exporter.rb +16 -6
- data/app/models/concerns/bulkrax/export_behavior.rb +0 -22
- data/app/parsers/bulkrax/application_parser.rb +7 -26
- data/app/parsers/bulkrax/bagit_parser.rb +47 -38
- data/app/parsers/bulkrax/csv_parser.rb +70 -9
- data/app/views/bulkrax/exporters/_downloads.html.erb +8 -0
- data/app/views/bulkrax/exporters/index.html.erb +5 -2
- data/app/views/bulkrax/exporters/show.html.erb +4 -12
- data/config/locales/bulkrax.en.yml +0 -1
- data/lib/bulkrax/version.rb +1 -1
- data/lib/tasks/bulkrax_tasks.rake +28 -4
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e466ce0a1d1fe4b2c3baa9a6a5356b0138b5e7c1f2e1b88a7d17b852257f6b46
|
4
|
+
data.tar.gz: 52b585cd22870e2b4d586b3848cd48f8106f39a54341bc2aeba91a05079b9f0e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cbb4169d6956f98f3b27ac143f0f16f6ac1adb86fa66a7819fad73f28d45ba5a8578b306de8d4319112a2bf1e4b4edb4aeb3b149abfa0bd840b4eb19a1aa4172
|
7
|
+
data.tar.gz: b06a77c964f82d2fa17b8cde3cff5413ceb0e9ab21eb9b7e0c299d6b42a3217308fc497474151f6b51022b7d2a48b2b2af4262a7d97941038aa6505b32e053d4
|
data/README.md
CHANGED
@@ -70,7 +70,7 @@ Bulkrax.setup do |config|
|
|
70
70
|
end
|
71
71
|
```
|
72
72
|
|
73
|
-
The [configuration guide](https://github.com/samvera-labs/bulkrax/wiki/
|
73
|
+
The [configuration guide](https://github.com/samvera-labs/bulkrax/wiki/Configuring-Bulkrax) provides detailed instructions on the various available configurations.
|
74
74
|
|
75
75
|
Example:
|
76
76
|
|
@@ -120,7 +120,7 @@ It's unlikely that the incoming import data has fields that exactly match those
|
|
120
120
|
|
121
121
|
By default, a mapping for the OAI parser has been added to map standard oai_dc fields to Hyrax basic_metadata. The other parsers have no default mapping, and will map any incoming fields to Hyrax properties with the same name. Configurations can be added in `config/intializers/bulkrax.rb`
|
122
122
|
|
123
|
-
Configuring field mappings is documented in the [Bulkrax Configuration Guide](https://github.com/samvera-labs/bulkrax/wiki/
|
123
|
+
Configuring field mappings is documented in the [Bulkrax Configuration Guide](https://github.com/samvera-labs/bulkrax/wiki/Configuring-Bulkrax).
|
124
124
|
|
125
125
|
## Importing Files
|
126
126
|
|
@@ -151,7 +151,7 @@ end
|
|
151
151
|
|
152
152
|
## Customizing Bulkrax
|
153
153
|
|
154
|
-
For further information on how to extend and customize Bulkrax, please see the [Bulkrax Customization Guide](https://github.com/samvera-labs/bulkrax/wiki/Customizing).
|
154
|
+
For further information on how to extend and customize Bulkrax, please see the [Bulkrax Customization Guide](https://github.com/samvera-labs/bulkrax/wiki/Customizing-Bulkrax).
|
155
155
|
|
156
156
|
## How it Works
|
157
157
|
Once you have Bulkrax installed, you will have access to an easy to use interface with which you are able to create, edit, delete, run, and re-run imports and exports.
|
@@ -191,8 +191,6 @@ We encourage everyone to help improve this project. Bug reports and pull reques
|
|
191
191
|
|
192
192
|
This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](https://contributor-covenant.org) code of conduct.
|
193
193
|
|
194
|
-
All Contributors should have signed the Samvera Contributor License Agreement (CLA)
|
195
|
-
|
196
194
|
## Questions
|
197
195
|
Questions can be sent to support@notch8.com. Please make sure to include "Bulkrax" in the subject line of your email.
|
198
196
|
|
data/app/models/bulkrax/entry.rb
CHANGED
@@ -4,8 +4,6 @@ module Bulkrax
|
|
4
4
|
# Custom error class for collections_created?
|
5
5
|
class CollectionsCreatedError < RuntimeError; end
|
6
6
|
class OAIError < RuntimeError; end
|
7
|
-
# TODO: remove when ApplicationParser#bagit_zip_file_size_check is removed
|
8
|
-
class BagitZipError < RuntimeError; end
|
9
7
|
class Entry < ApplicationRecord
|
10
8
|
include Bulkrax::HasMatchers
|
11
9
|
include Bulkrax::ImportBehavior
|
@@ -14,15 +14,13 @@ module Bulkrax
|
|
14
14
|
validates :name, presence: true
|
15
15
|
validates :parser_klass, presence: true
|
16
16
|
|
17
|
-
delegate :write, :create_from_collection, :
|
17
|
+
delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
|
18
18
|
|
19
19
|
def export
|
20
20
|
current_run && setup_export_path
|
21
21
|
case self.export_from
|
22
22
|
when 'collection'
|
23
23
|
create_from_collection
|
24
|
-
when 'collections metadata'
|
25
|
-
create_from_collections_metadata
|
26
24
|
when 'importer'
|
27
25
|
create_from_importer
|
28
26
|
when 'worktype'
|
@@ -89,7 +87,6 @@ module Bulkrax
|
|
89
87
|
[
|
90
88
|
[I18n.t('bulkrax.exporter.labels.importer'), 'importer'],
|
91
89
|
[I18n.t('bulkrax.exporter.labels.collection'), 'collection'],
|
92
|
-
[I18n.t('bulkrax.exporter.labels.collections_metadata'), 'collections metadata'],
|
93
90
|
[I18n.t('bulkrax.exporter.labels.worktype'), 'worktype'],
|
94
91
|
[I18n.t('bulkrax.exporter.labels.all'), 'all']
|
95
92
|
]
|
@@ -124,9 +121,13 @@ module Bulkrax
|
|
124
121
|
end
|
125
122
|
|
126
123
|
def exporter_export_zip_path
|
127
|
-
@exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_#{self.exporter_runs.last.id}
|
124
|
+
@exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_#{self.exporter_runs.last.id}")
|
128
125
|
rescue
|
129
|
-
@exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_0
|
126
|
+
@exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_0")
|
127
|
+
end
|
128
|
+
|
129
|
+
def exporter_export_zip_files
|
130
|
+
@exporter_export_zip_files ||= Dir["#{exporter_export_zip_path}/**"].map { |zip| Array(zip.split('/').last) }
|
130
131
|
end
|
131
132
|
|
132
133
|
def export_properties
|
@@ -137,5 +138,14 @@ module Bulkrax
|
|
137
138
|
def metadata_only?
|
138
139
|
export_type == 'metadata'
|
139
140
|
end
|
141
|
+
|
142
|
+
def sort_zip_files(zip_files)
|
143
|
+
zip_files.sort_by do |item|
|
144
|
+
number = item.split('_').last.match(/\d+/)&.[](0) || 0.to_s
|
145
|
+
sort_number = number.rjust(4, "0")
|
146
|
+
|
147
|
+
sort_number
|
148
|
+
end
|
149
|
+
end
|
140
150
|
end
|
141
151
|
end
|
@@ -7,9 +7,6 @@ module Bulkrax
|
|
7
7
|
|
8
8
|
def build_for_exporter
|
9
9
|
build_export_metadata
|
10
|
-
# TODO(alishaevn): determine if the line below is still necessary
|
11
|
-
# the csv and bagit parsers also have write_files methods
|
12
|
-
write_files if export_type == 'full' && !importerexporter.parser_klass.include?('Bagit')
|
13
10
|
rescue RSolr::Error::Http, CollectionsCreatedError => e
|
14
11
|
raise e
|
15
12
|
rescue StandardError => e
|
@@ -26,25 +23,6 @@ module Bulkrax
|
|
26
23
|
@hyrax_record ||= ActiveFedora::Base.find(self.identifier)
|
27
24
|
end
|
28
25
|
|
29
|
-
def write_files
|
30
|
-
return if hyrax_record.is_a?(Collection)
|
31
|
-
|
32
|
-
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
33
|
-
file_sets << hyrax_record.thumbnail if hyrax_record.thumbnail.present? && hyrax_record.work? && exporter.include_thumbnails
|
34
|
-
file_sets.each do |fs|
|
35
|
-
path = File.join(exporter_export_path, 'files')
|
36
|
-
FileUtils.mkdir_p(path)
|
37
|
-
file = filename(fs)
|
38
|
-
require 'open-uri'
|
39
|
-
io = open(fs.original_file.uri)
|
40
|
-
next if file.blank?
|
41
|
-
File.open(File.join(path, file), 'wb') do |f|
|
42
|
-
f.write(io.read)
|
43
|
-
f.close
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
26
|
# Prepend the file_set id to ensure a unique filename and also one that is not longer than 255 characters
|
49
27
|
def filename(file_set)
|
50
28
|
return if file_set.original_file.blank?
|
@@ -75,7 +75,7 @@ module Bulkrax
|
|
75
75
|
def get_field_mapping_hash_for(key)
|
76
76
|
return instance_variable_get("@#{key}_hash") if instance_variable_get("@#{key}_hash").present?
|
77
77
|
|
78
|
-
mapping = importerexporter.field_mapping
|
78
|
+
mapping = importerexporter.field_mapping.is_a?(Hash) ? importerexporter.field_mapping : {}
|
79
79
|
instance_variable_set(
|
80
80
|
"@#{key}_hash",
|
81
81
|
mapping&.with_indifferent_access&.select { |_, h| h.key?(key) }
|
@@ -247,8 +247,6 @@ module Bulkrax
|
|
247
247
|
def write
|
248
248
|
write_files
|
249
249
|
zip
|
250
|
-
# uncomment next line to debug for faulty zipping during bagit export
|
251
|
-
bagit_zip_file_size_check if importerexporter.parser_klass.include?('Bagit')
|
252
250
|
end
|
253
251
|
|
254
252
|
def unzip(file_to_unzip)
|
@@ -262,30 +260,13 @@ module Bulkrax
|
|
262
260
|
end
|
263
261
|
|
264
262
|
def zip
|
265
|
-
FileUtils.
|
266
|
-
Zip::File.open(exporter_export_zip_path, create: true) do |zip_file|
|
267
|
-
Dir["#{exporter_export_path}/**/**"].each do |file|
|
268
|
-
zip_file.add(file.sub("#{exporter_export_path}/", ''), file)
|
269
|
-
end
|
270
|
-
end
|
271
|
-
end
|
263
|
+
FileUtils.mkdir_p(exporter_export_zip_path)
|
272
264
|
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
begin
|
279
|
-
raise BagitZipError, "Invalid Bag, file size mismatch for #{file.sub("#{exporter_export_path}/", '')}" if File.size(file) != zipped_file.size
|
280
|
-
rescue BagitZipError => e
|
281
|
-
matched_entry_ids = importerexporter.entry_ids.select do |id|
|
282
|
-
Bulkrax::Entry.find(id).identifier.include?(zipped_file.name.split('/').first)
|
283
|
-
end
|
284
|
-
matched_entry_ids.each do |entry_id|
|
285
|
-
Bulkrax::Entry.find(entry_id).status_info(e)
|
286
|
-
status_info('Complete (with failures)')
|
287
|
-
end
|
288
|
-
end
|
265
|
+
Dir["#{exporter_export_path}/**"].each do |folder|
|
266
|
+
zip_path = "#{exporter_export_zip_path.split('/').last}_#{folder.split('/').last}.zip"
|
267
|
+
Zip::File.open(File.join("#{exporter_export_zip_path}/#{zip_path}"), create: true) do |zip_file|
|
268
|
+
Dir["#{folder}/**/**"].each do |file|
|
269
|
+
zip_file.add(file.sub("#{folder}/", ''), file)
|
289
270
|
end
|
290
271
|
end
|
291
272
|
end
|
@@ -97,43 +97,42 @@ module Bulkrax
|
|
97
97
|
@total = 0
|
98
98
|
end
|
99
99
|
|
100
|
-
def current_record_ids
|
101
|
-
@work_ids = []
|
102
|
-
@collection_ids = []
|
103
|
-
@file_set_ids = []
|
104
|
-
|
105
|
-
case importerexporter.export_from
|
106
|
-
when 'all'
|
107
|
-
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
108
|
-
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
109
|
-
when 'collection'
|
110
|
-
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
111
|
-
when 'worktype'
|
112
|
-
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
113
|
-
when 'importer'
|
114
|
-
set_ids_for_exporting_from_importer
|
115
|
-
end
|
116
|
-
@work_ids + @collection_ids + @file_set_ids
|
117
|
-
end
|
118
|
-
|
119
100
|
# export methods
|
120
101
|
|
121
102
|
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
122
103
|
def write_files
|
123
104
|
require 'open-uri'
|
124
105
|
require 'socket'
|
125
|
-
|
106
|
+
|
107
|
+
folder_count = 1
|
108
|
+
records_in_folder = 0
|
109
|
+
work_entries = importerexporter.entries.where(identifier: @work_ids)
|
110
|
+
collection_entries = importerexporter.entries.where(identifier: @collection_ids)
|
111
|
+
file_set_entries = importerexporter.entries.where(identifier: @file_set_ids)
|
112
|
+
|
113
|
+
work_entries[0..limit || total].each do |entry|
|
126
114
|
record = ActiveFedora::Base.find(entry.identifier)
|
127
|
-
next unless
|
128
|
-
|
115
|
+
next unless record
|
116
|
+
|
129
117
|
bag_entries = [entry]
|
130
118
|
|
131
|
-
record.
|
132
|
-
if
|
133
|
-
|
134
|
-
bag_entries << file_set_entry unless file_set_entry.nil?
|
135
|
-
end
|
119
|
+
if record.member_of_collection_ids.present?
|
120
|
+
collection_entries.each { |ce| bag_entries << ce if ce.parsed_metadata.value?(record.id) }
|
121
|
+
end
|
136
122
|
|
123
|
+
if record.file_sets.present?
|
124
|
+
file_set_entries.each { |fse| bag_entries << fse if fse.parsed_metadata.value?(record.id) }
|
125
|
+
end
|
126
|
+
|
127
|
+
records_in_folder += bag_entries.count
|
128
|
+
if records_in_folder > records_split_count
|
129
|
+
folder_count += 1
|
130
|
+
records_in_folder = bag_entries.count
|
131
|
+
end
|
132
|
+
|
133
|
+
bag ||= BagIt::Bag.new setup_bagit_folder(folder_count, entry.identifier)
|
134
|
+
|
135
|
+
record.file_sets.each do |fs|
|
137
136
|
file_name = filename(fs)
|
138
137
|
next if file_name.blank?
|
139
138
|
io = open(fs.original_file.uri)
|
@@ -141,24 +140,28 @@ module Bulkrax
|
|
141
140
|
file.write(io.read)
|
142
141
|
file.close
|
143
142
|
begin
|
144
|
-
bag.add_file(file_name, file.path)
|
143
|
+
bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank?
|
145
144
|
rescue => e
|
146
145
|
entry.status_info(e)
|
147
146
|
status_info(e)
|
148
147
|
end
|
149
148
|
end
|
150
149
|
|
151
|
-
CSV.open(setup_csv_metadata_export_file(entry.identifier), "w", headers: export_headers, write_headers: true) do |csv|
|
150
|
+
CSV.open(setup_csv_metadata_export_file(folder_count, entry.identifier), "w", headers: export_headers, write_headers: true) do |csv|
|
152
151
|
bag_entries.each { |csv_entry| csv << csv_entry.parsed_metadata }
|
153
152
|
end
|
154
|
-
|
153
|
+
|
154
|
+
write_triples(folder_count, entry)
|
155
155
|
bag.manifest!(algo: 'sha256')
|
156
156
|
end
|
157
157
|
end
|
158
158
|
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
159
159
|
|
160
|
-
def setup_csv_metadata_export_file(id)
|
161
|
-
File.join(importerexporter.exporter_export_path,
|
160
|
+
def setup_csv_metadata_export_file(folder_count, id)
|
161
|
+
path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
|
162
|
+
FileUtils.mkdir_p(path) unless File.exist?(path)
|
163
|
+
|
164
|
+
File.join(path, id, 'metadata.csv')
|
162
165
|
end
|
163
166
|
|
164
167
|
def key_allowed(key)
|
@@ -167,21 +170,27 @@ module Bulkrax
|
|
167
170
|
key != source_identifier.to_s
|
168
171
|
end
|
169
172
|
|
170
|
-
def setup_triple_metadata_export_file(id)
|
171
|
-
File.join(importerexporter.exporter_export_path,
|
173
|
+
def setup_triple_metadata_export_file(folder_count, id)
|
174
|
+
path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
|
175
|
+
FileUtils.mkdir_p(path) unless File.exist?(path)
|
176
|
+
|
177
|
+
File.join(path, id, 'metadata.nt')
|
172
178
|
end
|
173
179
|
|
174
|
-
def setup_bagit_folder(id)
|
175
|
-
File.join(importerexporter.exporter_export_path,
|
180
|
+
def setup_bagit_folder(folder_count, id)
|
181
|
+
path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
|
182
|
+
FileUtils.mkdir_p(path) unless File.exist?(path)
|
183
|
+
|
184
|
+
File.join(path, id)
|
176
185
|
end
|
177
186
|
|
178
|
-
def write_triples(e)
|
187
|
+
def write_triples(folder_count, e)
|
179
188
|
sd = SolrDocument.find(e.identifier)
|
180
189
|
return if sd.nil?
|
181
190
|
|
182
191
|
req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname })
|
183
192
|
rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples)
|
184
|
-
File.open(setup_triple_metadata_export_file(e.identifier), "w") do |triples|
|
193
|
+
File.open(setup_triple_metadata_export_file(folder_count, e.identifier), "w") do |triples|
|
185
194
|
triples.write(rdf)
|
186
195
|
end
|
187
196
|
end
|
@@ -4,6 +4,7 @@ require 'csv'
|
|
4
4
|
module Bulkrax
|
5
5
|
class CsvParser < ApplicationParser # rubocop:disable Metrics/ClassLength
|
6
6
|
include ErroredEntries
|
7
|
+
include ExportBehavior
|
7
8
|
attr_writer :collections, :file_sets, :works
|
8
9
|
|
9
10
|
def self.export_supported?
|
@@ -183,6 +184,7 @@ module Bulkrax
|
|
183
184
|
current_record_ids
|
184
185
|
end
|
185
186
|
|
187
|
+
# rubocop:disable Metrics/AbcSize
|
186
188
|
def current_record_ids
|
187
189
|
@work_ids = []
|
188
190
|
@collection_ids = []
|
@@ -194,18 +196,28 @@ module Bulkrax
|
|
194
196
|
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
195
197
|
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
196
198
|
when 'collection'
|
197
|
-
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
199
|
+
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters} AND has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')})", method: :post, rows: 2_000_000_000).map(&:id)
|
200
|
+
# get the parent collection and child collections
|
198
201
|
@collection_ids = ActiveFedora::SolrService.query("id:#{importerexporter.export_source} #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
199
|
-
|
200
|
-
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
202
|
+
@collection_ids += ActiveFedora::SolrService.query("has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source}", method: :post, rows: 2_147_483_647).map(&:id)
|
201
203
|
when 'worktype'
|
202
204
|
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
203
205
|
when 'importer'
|
204
206
|
set_ids_for_exporting_from_importer
|
205
207
|
end
|
206
208
|
|
209
|
+
find_child_file_sets(@work_ids) if importerexporter.export_from == 'collection'
|
210
|
+
|
207
211
|
@work_ids + @collection_ids + @file_set_ids
|
208
212
|
end
|
213
|
+
# rubocop:enable Metrics/AbcSize
|
214
|
+
|
215
|
+
# find the related file set ids so entries can be made for export
|
216
|
+
def find_child_file_sets(work_ids)
|
217
|
+
work_ids.each do |id|
|
218
|
+
ActiveFedora::Base.find(id).file_set_ids.each { |fs_id| @file_set_ids << fs_id }
|
219
|
+
end
|
220
|
+
end
|
209
221
|
|
210
222
|
# Set the following instance variables: @work_ids, @collection_ids, @file_set_ids
|
211
223
|
# @see #current_record_ids
|
@@ -254,7 +266,6 @@ module Bulkrax
|
|
254
266
|
end
|
255
267
|
end
|
256
268
|
alias create_from_collection create_new_entries
|
257
|
-
alias create_from_collections_metadata create_new_entries
|
258
269
|
alias create_from_importer create_new_entries
|
259
270
|
alias create_from_worktype create_new_entries
|
260
271
|
alias create_from_all create_new_entries
|
@@ -283,6 +294,10 @@ module Bulkrax
|
|
283
294
|
@total = 0
|
284
295
|
end
|
285
296
|
|
297
|
+
def records_split_count
|
298
|
+
1000
|
299
|
+
end
|
300
|
+
|
286
301
|
# @todo - investigate getting directory structure
|
287
302
|
# @todo - investigate using perform_later, and having the importer check for
|
288
303
|
# DownloadCloudFileJob before it starts
|
@@ -307,9 +322,38 @@ module Bulkrax
|
|
307
322
|
# export methods
|
308
323
|
|
309
324
|
def write_files
|
310
|
-
|
311
|
-
|
312
|
-
|
325
|
+
require 'open-uri'
|
326
|
+
folder_count = 0
|
327
|
+
sorted_entries = sort_entries(importerexporter.entries.uniq(&:identifier))
|
328
|
+
|
329
|
+
sorted_entries[0..limit || total].in_groups_of(records_split_count, false) do |group|
|
330
|
+
folder_count += 1
|
331
|
+
|
332
|
+
CSV.open(setup_export_file(folder_count), "w", headers: export_headers, write_headers: true) do |csv|
|
333
|
+
group.each do |entry|
|
334
|
+
csv << entry.parsed_metadata
|
335
|
+
next if importerexporter.metadata_only? || entry.type == 'Bulkrax::CsvCollectionEntry'
|
336
|
+
|
337
|
+
store_files(entry.identifier, folder_count.to_s)
|
338
|
+
end
|
339
|
+
end
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
def store_files(identifier, folder_count)
|
344
|
+
record = ActiveFedora::Base.find(identifier)
|
345
|
+
file_sets = record.file_set? ? Array.wrap(record) : record.file_sets
|
346
|
+
file_sets << record.thumbnail if exporter.include_thumbnails && record.thumbnail.present? && record.work?
|
347
|
+
file_sets.each do |fs|
|
348
|
+
path = File.join(exporter_export_path, folder_count, 'files')
|
349
|
+
FileUtils.mkdir_p(path) unless File.exist? path
|
350
|
+
file = filename(fs)
|
351
|
+
io = open(fs.original_file.uri)
|
352
|
+
next if file.blank?
|
353
|
+
|
354
|
+
File.open(File.join(path, file), 'wb') do |f|
|
355
|
+
f.write(io.read)
|
356
|
+
f.close
|
313
357
|
end
|
314
358
|
end
|
315
359
|
end
|
@@ -343,6 +387,20 @@ module Bulkrax
|
|
343
387
|
@object_names
|
344
388
|
end
|
345
389
|
|
390
|
+
def sort_entries(entries)
|
391
|
+
# always export models in the same order: work, collection, file set
|
392
|
+
entries.sort_by do |entry|
|
393
|
+
case entry.type
|
394
|
+
when 'Bulkrax::CsvEntry'
|
395
|
+
'0'
|
396
|
+
when 'Bulkrax::CsvCollectionEntry'
|
397
|
+
'1'
|
398
|
+
when 'Bulkrax::CsvFileSetEntry'
|
399
|
+
'2'
|
400
|
+
end
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
346
404
|
def sort_headers(headers)
|
347
405
|
# converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
|
348
406
|
# while keeping objects grouped together
|
@@ -356,8 +414,11 @@ module Bulkrax
|
|
356
414
|
end
|
357
415
|
|
358
416
|
# in the parser as it is specific to the format
|
359
|
-
def setup_export_file
|
360
|
-
File.join(importerexporter.exporter_export_path,
|
417
|
+
def setup_export_file(folder_count)
|
418
|
+
path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
|
419
|
+
FileUtils.mkdir_p(path) unless File.exist?(path)
|
420
|
+
|
421
|
+
File.join(path, "export_#{importerexporter.export_source}_from_#{importerexporter.export_from}_#{folder_count}.csv")
|
361
422
|
end
|
362
423
|
|
363
424
|
# Retrieve file paths for [:file] mapping in records
|
@@ -21,7 +21,7 @@
|
|
21
21
|
<th scope="col">Name</th>
|
22
22
|
<th scope="col">Status</th>
|
23
23
|
<th scope="col">Date Exported</th>
|
24
|
-
<th scope="col"
|
24
|
+
<th scope="col">Downloadable Files</th>
|
25
25
|
<th scope="col"></th>
|
26
26
|
<th scope="col"></th>
|
27
27
|
<th scope="col"></th>
|
@@ -35,7 +35,10 @@
|
|
35
35
|
<td><%= exporter.created_at %></td>
|
36
36
|
<td>
|
37
37
|
<% if File.exist?(exporter.exporter_export_zip_path) %>
|
38
|
-
<%=
|
38
|
+
<%= simple_form_for(exporter, method: :get, url: exporter_download_path(exporter)) do |form| %>
|
39
|
+
<%= render 'downloads', exporter: exporter, form: form %>
|
40
|
+
<%= form.button :submit, value: 'Download', data: { disable_with: false } %>
|
41
|
+
<% end %>
|
39
42
|
<% end%>
|
40
43
|
</td>
|
41
44
|
<td><%= link_to raw('<span class="glyphicon glyphicon-info-sign"></span>'), exporter_path(exporter) %></td>
|
@@ -8,10 +8,11 @@
|
|
8
8
|
<div class='panel-body'>
|
9
9
|
|
10
10
|
<% if File.exist?(@exporter.exporter_export_zip_path) %>
|
11
|
-
|
11
|
+
<%= simple_form_for @exporter, method: :get, url: exporter_download_path(@exporter), html: { class: 'form-inline bulkrax-p-align' } do |form| %>
|
12
12
|
<strong>Download:</strong>
|
13
|
-
<%=
|
14
|
-
|
13
|
+
<%= render 'downloads', exporter: @exporter, form: form %>
|
14
|
+
<%= form.button :submit, value: 'Download', data: { disable_with: false } %>
|
15
|
+
<% end %>
|
15
16
|
<% end %>
|
16
17
|
|
17
18
|
<p class='bulkrax-p-align'>
|
@@ -40,11 +41,6 @@
|
|
40
41
|
<% when 'collection' %>
|
41
42
|
<% collection = Collection.find(@exporter.export_source) %>
|
42
43
|
<%= link_to collection&.title&.first, hyrax.dashboard_collection_path(collection.id) %>
|
43
|
-
<% when 'collections metadata' %>
|
44
|
-
<% collections = Collection.all %>
|
45
|
-
<% collections.each_with_index do |c, i| %>
|
46
|
-
<%= link_to c&.title&.first, hyrax.dashboard_collection_path(c.id) %><%= ',' if i != collections.count - 1 %>
|
47
|
-
<% end %>
|
48
44
|
<% when 'importer' %>
|
49
45
|
<% importer = Bulkrax::Importer.find(@exporter.export_source) %>
|
50
46
|
<%= link_to importer.name, bulkrax.importer_path(importer.id) %>
|
@@ -135,10 +131,6 @@
|
|
135
131
|
<%= page_entries_info(@work_entries) %><br>
|
136
132
|
<%= paginate(@work_entries, param_name: :work_entries_page) %>
|
137
133
|
<br>
|
138
|
-
<% if File.exist?(@exporter.exporter_export_zip_path) %>
|
139
|
-
<%= link_to 'Download', exporter_download_path(@exporter) %>
|
140
|
-
|
|
141
|
-
<% end %>
|
142
134
|
<%= link_to 'Edit', edit_exporter_path(@exporter) %>
|
143
135
|
|
|
144
136
|
<%= link_to 'Back', exporters_path %>
|
data/lib/bulkrax/version.rb
CHANGED
@@ -1,6 +1,30 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
namespace :bulkrax do
|
4
|
+
desc "Remove old exported zips and create new ones with the new file structure"
|
5
|
+
task rerun_all_exporters: :environment do
|
6
|
+
if defined?(::Hyku)
|
7
|
+
Account.find_each do |account|
|
8
|
+
puts "=============== updating #{account.name} ============"
|
9
|
+
next if account.name == "search"
|
10
|
+
switch!(account)
|
11
|
+
|
12
|
+
rerun_exporters_and_delete_zips
|
13
|
+
|
14
|
+
puts "=============== finished updating #{account.name} ============"
|
15
|
+
end
|
16
|
+
else
|
17
|
+
rerun_exporters_and_delete_zips
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def rerun_exporters_and_delete_zips
|
22
|
+
begin
|
23
|
+
Bulkrax::Exporter.all.each { |e| Bulkrax::ExporterJob.perform_later(e.id) }
|
24
|
+
rescue => e
|
25
|
+
puts "(#{e.message})"
|
26
|
+
end
|
27
|
+
|
28
|
+
Dir["tmp/exports/**.zip"].each { |zip_path| FileUtils.rm_rf(zip_path) }
|
29
|
+
end
|
30
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -331,6 +331,7 @@ files:
|
|
331
331
|
- app/views/bulkrax/entries/_parsed_metadata.html.erb
|
332
332
|
- app/views/bulkrax/entries/_raw_metadata.html.erb
|
333
333
|
- app/views/bulkrax/entries/show.html.erb
|
334
|
+
- app/views/bulkrax/exporters/_downloads.html.erb
|
334
335
|
- app/views/bulkrax/exporters/_form.html.erb
|
335
336
|
- app/views/bulkrax/exporters/edit.html.erb
|
336
337
|
- app/views/bulkrax/exporters/index.html.erb
|