bulkrax 3.5.1 → 4.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -5
- data/app/controllers/bulkrax/exporters_controller.rb +1 -1
- data/app/models/bulkrax/entry.rb +0 -2
- data/app/models/bulkrax/exporter.rb +16 -6
- data/app/models/concerns/bulkrax/export_behavior.rb +0 -22
- data/app/parsers/bulkrax/application_parser.rb +7 -26
- data/app/parsers/bulkrax/bagit_parser.rb +47 -38
- data/app/parsers/bulkrax/csv_parser.rb +70 -9
- data/app/views/bulkrax/exporters/_downloads.html.erb +8 -0
- data/app/views/bulkrax/exporters/index.html.erb +5 -2
- data/app/views/bulkrax/exporters/show.html.erb +4 -12
- data/config/locales/bulkrax.en.yml +0 -1
- data/lib/bulkrax/version.rb +1 -1
- data/lib/tasks/bulkrax_tasks.rake +28 -4
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e466ce0a1d1fe4b2c3baa9a6a5356b0138b5e7c1f2e1b88a7d17b852257f6b46
|
4
|
+
data.tar.gz: 52b585cd22870e2b4d586b3848cd48f8106f39a54341bc2aeba91a05079b9f0e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cbb4169d6956f98f3b27ac143f0f16f6ac1adb86fa66a7819fad73f28d45ba5a8578b306de8d4319112a2bf1e4b4edb4aeb3b149abfa0bd840b4eb19a1aa4172
|
7
|
+
data.tar.gz: b06a77c964f82d2fa17b8cde3cff5413ceb0e9ab21eb9b7e0c299d6b42a3217308fc497474151f6b51022b7d2a48b2b2af4262a7d97941038aa6505b32e053d4
|
data/README.md
CHANGED
@@ -70,7 +70,7 @@ Bulkrax.setup do |config|
|
|
70
70
|
end
|
71
71
|
```
|
72
72
|
|
73
|
-
The [configuration guide](https://github.com/samvera-labs/bulkrax/wiki/
|
73
|
+
The [configuration guide](https://github.com/samvera-labs/bulkrax/wiki/Configuring-Bulkrax) provides detailed instructions on the various available configurations.
|
74
74
|
|
75
75
|
Example:
|
76
76
|
|
@@ -120,7 +120,7 @@ It's unlikely that the incoming import data has fields that exactly match those
|
|
120
120
|
|
121
121
|
By default, a mapping for the OAI parser has been added to map standard oai_dc fields to Hyrax basic_metadata. The other parsers have no default mapping, and will map any incoming fields to Hyrax properties with the same name. Configurations can be added in `config/intializers/bulkrax.rb`
|
122
122
|
|
123
|
-
Configuring field mappings is documented in the [Bulkrax Configuration Guide](https://github.com/samvera-labs/bulkrax/wiki/
|
123
|
+
Configuring field mappings is documented in the [Bulkrax Configuration Guide](https://github.com/samvera-labs/bulkrax/wiki/Configuring-Bulkrax).
|
124
124
|
|
125
125
|
## Importing Files
|
126
126
|
|
@@ -151,7 +151,7 @@ end
|
|
151
151
|
|
152
152
|
## Customizing Bulkrax
|
153
153
|
|
154
|
-
For further information on how to extend and customize Bulkrax, please see the [Bulkrax Customization Guide](https://github.com/samvera-labs/bulkrax/wiki/Customizing).
|
154
|
+
For further information on how to extend and customize Bulkrax, please see the [Bulkrax Customization Guide](https://github.com/samvera-labs/bulkrax/wiki/Customizing-Bulkrax).
|
155
155
|
|
156
156
|
## How it Works
|
157
157
|
Once you have Bulkrax installed, you will have access to an easy to use interface with which you are able to create, edit, delete, run, and re-run imports and exports.
|
@@ -191,8 +191,6 @@ We encourage everyone to help improve this project. Bug reports and pull reques
|
|
191
191
|
|
192
192
|
This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](https://contributor-covenant.org) code of conduct.
|
193
193
|
|
194
|
-
All Contributors should have signed the Samvera Contributor License Agreement (CLA)
|
195
|
-
|
196
194
|
## Questions
|
197
195
|
Questions can be sent to support@notch8.com. Please make sure to include "Bulkrax" in the subject line of your email.
|
198
196
|
|
data/app/models/bulkrax/entry.rb
CHANGED
@@ -4,8 +4,6 @@ module Bulkrax
|
|
4
4
|
# Custom error class for collections_created?
|
5
5
|
class CollectionsCreatedError < RuntimeError; end
|
6
6
|
class OAIError < RuntimeError; end
|
7
|
-
# TODO: remove when ApplicationParser#bagit_zip_file_size_check is removed
|
8
|
-
class BagitZipError < RuntimeError; end
|
9
7
|
class Entry < ApplicationRecord
|
10
8
|
include Bulkrax::HasMatchers
|
11
9
|
include Bulkrax::ImportBehavior
|
@@ -14,15 +14,13 @@ module Bulkrax
|
|
14
14
|
validates :name, presence: true
|
15
15
|
validates :parser_klass, presence: true
|
16
16
|
|
17
|
-
delegate :write, :create_from_collection, :
|
17
|
+
delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
|
18
18
|
|
19
19
|
def export
|
20
20
|
current_run && setup_export_path
|
21
21
|
case self.export_from
|
22
22
|
when 'collection'
|
23
23
|
create_from_collection
|
24
|
-
when 'collections metadata'
|
25
|
-
create_from_collections_metadata
|
26
24
|
when 'importer'
|
27
25
|
create_from_importer
|
28
26
|
when 'worktype'
|
@@ -89,7 +87,6 @@ module Bulkrax
|
|
89
87
|
[
|
90
88
|
[I18n.t('bulkrax.exporter.labels.importer'), 'importer'],
|
91
89
|
[I18n.t('bulkrax.exporter.labels.collection'), 'collection'],
|
92
|
-
[I18n.t('bulkrax.exporter.labels.collections_metadata'), 'collections metadata'],
|
93
90
|
[I18n.t('bulkrax.exporter.labels.worktype'), 'worktype'],
|
94
91
|
[I18n.t('bulkrax.exporter.labels.all'), 'all']
|
95
92
|
]
|
@@ -124,9 +121,13 @@ module Bulkrax
|
|
124
121
|
end
|
125
122
|
|
126
123
|
def exporter_export_zip_path
|
127
|
-
@exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_#{self.exporter_runs.last.id}
|
124
|
+
@exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_#{self.exporter_runs.last.id}")
|
128
125
|
rescue
|
129
|
-
@exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_0
|
126
|
+
@exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_0")
|
127
|
+
end
|
128
|
+
|
129
|
+
def exporter_export_zip_files
|
130
|
+
@exporter_export_zip_files ||= Dir["#{exporter_export_zip_path}/**"].map { |zip| Array(zip.split('/').last) }
|
130
131
|
end
|
131
132
|
|
132
133
|
def export_properties
|
@@ -137,5 +138,14 @@ module Bulkrax
|
|
137
138
|
def metadata_only?
|
138
139
|
export_type == 'metadata'
|
139
140
|
end
|
141
|
+
|
142
|
+
def sort_zip_files(zip_files)
|
143
|
+
zip_files.sort_by do |item|
|
144
|
+
number = item.split('_').last.match(/\d+/)&.[](0) || 0.to_s
|
145
|
+
sort_number = number.rjust(4, "0")
|
146
|
+
|
147
|
+
sort_number
|
148
|
+
end
|
149
|
+
end
|
140
150
|
end
|
141
151
|
end
|
@@ -7,9 +7,6 @@ module Bulkrax
|
|
7
7
|
|
8
8
|
def build_for_exporter
|
9
9
|
build_export_metadata
|
10
|
-
# TODO(alishaevn): determine if the line below is still necessary
|
11
|
-
# the csv and bagit parsers also have write_files methods
|
12
|
-
write_files if export_type == 'full' && !importerexporter.parser_klass.include?('Bagit')
|
13
10
|
rescue RSolr::Error::Http, CollectionsCreatedError => e
|
14
11
|
raise e
|
15
12
|
rescue StandardError => e
|
@@ -26,25 +23,6 @@ module Bulkrax
|
|
26
23
|
@hyrax_record ||= ActiveFedora::Base.find(self.identifier)
|
27
24
|
end
|
28
25
|
|
29
|
-
def write_files
|
30
|
-
return if hyrax_record.is_a?(Collection)
|
31
|
-
|
32
|
-
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
33
|
-
file_sets << hyrax_record.thumbnail if hyrax_record.thumbnail.present? && hyrax_record.work? && exporter.include_thumbnails
|
34
|
-
file_sets.each do |fs|
|
35
|
-
path = File.join(exporter_export_path, 'files')
|
36
|
-
FileUtils.mkdir_p(path)
|
37
|
-
file = filename(fs)
|
38
|
-
require 'open-uri'
|
39
|
-
io = open(fs.original_file.uri)
|
40
|
-
next if file.blank?
|
41
|
-
File.open(File.join(path, file), 'wb') do |f|
|
42
|
-
f.write(io.read)
|
43
|
-
f.close
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
26
|
# Prepend the file_set id to ensure a unique filename and also one that is not longer than 255 characters
|
49
27
|
def filename(file_set)
|
50
28
|
return if file_set.original_file.blank?
|
@@ -75,7 +75,7 @@ module Bulkrax
|
|
75
75
|
def get_field_mapping_hash_for(key)
|
76
76
|
return instance_variable_get("@#{key}_hash") if instance_variable_get("@#{key}_hash").present?
|
77
77
|
|
78
|
-
mapping = importerexporter.field_mapping
|
78
|
+
mapping = importerexporter.field_mapping.is_a?(Hash) ? importerexporter.field_mapping : {}
|
79
79
|
instance_variable_set(
|
80
80
|
"@#{key}_hash",
|
81
81
|
mapping&.with_indifferent_access&.select { |_, h| h.key?(key) }
|
@@ -247,8 +247,6 @@ module Bulkrax
|
|
247
247
|
def write
|
248
248
|
write_files
|
249
249
|
zip
|
250
|
-
# uncomment next line to debug for faulty zipping during bagit export
|
251
|
-
bagit_zip_file_size_check if importerexporter.parser_klass.include?('Bagit')
|
252
250
|
end
|
253
251
|
|
254
252
|
def unzip(file_to_unzip)
|
@@ -262,30 +260,13 @@ module Bulkrax
|
|
262
260
|
end
|
263
261
|
|
264
262
|
def zip
|
265
|
-
FileUtils.
|
266
|
-
Zip::File.open(exporter_export_zip_path, create: true) do |zip_file|
|
267
|
-
Dir["#{exporter_export_path}/**/**"].each do |file|
|
268
|
-
zip_file.add(file.sub("#{exporter_export_path}/", ''), file)
|
269
|
-
end
|
270
|
-
end
|
271
|
-
end
|
263
|
+
FileUtils.mkdir_p(exporter_export_zip_path)
|
272
264
|
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
begin
|
279
|
-
raise BagitZipError, "Invalid Bag, file size mismatch for #{file.sub("#{exporter_export_path}/", '')}" if File.size(file) != zipped_file.size
|
280
|
-
rescue BagitZipError => e
|
281
|
-
matched_entry_ids = importerexporter.entry_ids.select do |id|
|
282
|
-
Bulkrax::Entry.find(id).identifier.include?(zipped_file.name.split('/').first)
|
283
|
-
end
|
284
|
-
matched_entry_ids.each do |entry_id|
|
285
|
-
Bulkrax::Entry.find(entry_id).status_info(e)
|
286
|
-
status_info('Complete (with failures)')
|
287
|
-
end
|
288
|
-
end
|
265
|
+
Dir["#{exporter_export_path}/**"].each do |folder|
|
266
|
+
zip_path = "#{exporter_export_zip_path.split('/').last}_#{folder.split('/').last}.zip"
|
267
|
+
Zip::File.open(File.join("#{exporter_export_zip_path}/#{zip_path}"), create: true) do |zip_file|
|
268
|
+
Dir["#{folder}/**/**"].each do |file|
|
269
|
+
zip_file.add(file.sub("#{folder}/", ''), file)
|
289
270
|
end
|
290
271
|
end
|
291
272
|
end
|
@@ -97,43 +97,42 @@ module Bulkrax
|
|
97
97
|
@total = 0
|
98
98
|
end
|
99
99
|
|
100
|
-
def current_record_ids
|
101
|
-
@work_ids = []
|
102
|
-
@collection_ids = []
|
103
|
-
@file_set_ids = []
|
104
|
-
|
105
|
-
case importerexporter.export_from
|
106
|
-
when 'all'
|
107
|
-
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
108
|
-
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
109
|
-
when 'collection'
|
110
|
-
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
111
|
-
when 'worktype'
|
112
|
-
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
113
|
-
when 'importer'
|
114
|
-
set_ids_for_exporting_from_importer
|
115
|
-
end
|
116
|
-
@work_ids + @collection_ids + @file_set_ids
|
117
|
-
end
|
118
|
-
|
119
100
|
# export methods
|
120
101
|
|
121
102
|
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
122
103
|
def write_files
|
123
104
|
require 'open-uri'
|
124
105
|
require 'socket'
|
125
|
-
|
106
|
+
|
107
|
+
folder_count = 1
|
108
|
+
records_in_folder = 0
|
109
|
+
work_entries = importerexporter.entries.where(identifier: @work_ids)
|
110
|
+
collection_entries = importerexporter.entries.where(identifier: @collection_ids)
|
111
|
+
file_set_entries = importerexporter.entries.where(identifier: @file_set_ids)
|
112
|
+
|
113
|
+
work_entries[0..limit || total].each do |entry|
|
126
114
|
record = ActiveFedora::Base.find(entry.identifier)
|
127
|
-
next unless
|
128
|
-
|
115
|
+
next unless record
|
116
|
+
|
129
117
|
bag_entries = [entry]
|
130
118
|
|
131
|
-
record.
|
132
|
-
if
|
133
|
-
|
134
|
-
bag_entries << file_set_entry unless file_set_entry.nil?
|
135
|
-
end
|
119
|
+
if record.member_of_collection_ids.present?
|
120
|
+
collection_entries.each { |ce| bag_entries << ce if ce.parsed_metadata.value?(record.id) }
|
121
|
+
end
|
136
122
|
|
123
|
+
if record.file_sets.present?
|
124
|
+
file_set_entries.each { |fse| bag_entries << fse if fse.parsed_metadata.value?(record.id) }
|
125
|
+
end
|
126
|
+
|
127
|
+
records_in_folder += bag_entries.count
|
128
|
+
if records_in_folder > records_split_count
|
129
|
+
folder_count += 1
|
130
|
+
records_in_folder = bag_entries.count
|
131
|
+
end
|
132
|
+
|
133
|
+
bag ||= BagIt::Bag.new setup_bagit_folder(folder_count, entry.identifier)
|
134
|
+
|
135
|
+
record.file_sets.each do |fs|
|
137
136
|
file_name = filename(fs)
|
138
137
|
next if file_name.blank?
|
139
138
|
io = open(fs.original_file.uri)
|
@@ -141,24 +140,28 @@ module Bulkrax
|
|
141
140
|
file.write(io.read)
|
142
141
|
file.close
|
143
142
|
begin
|
144
|
-
bag.add_file(file_name, file.path)
|
143
|
+
bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank?
|
145
144
|
rescue => e
|
146
145
|
entry.status_info(e)
|
147
146
|
status_info(e)
|
148
147
|
end
|
149
148
|
end
|
150
149
|
|
151
|
-
CSV.open(setup_csv_metadata_export_file(entry.identifier), "w", headers: export_headers, write_headers: true) do |csv|
|
150
|
+
CSV.open(setup_csv_metadata_export_file(folder_count, entry.identifier), "w", headers: export_headers, write_headers: true) do |csv|
|
152
151
|
bag_entries.each { |csv_entry| csv << csv_entry.parsed_metadata }
|
153
152
|
end
|
154
|
-
|
153
|
+
|
154
|
+
write_triples(folder_count, entry)
|
155
155
|
bag.manifest!(algo: 'sha256')
|
156
156
|
end
|
157
157
|
end
|
158
158
|
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
159
159
|
|
160
|
-
def setup_csv_metadata_export_file(id)
|
161
|
-
File.join(importerexporter.exporter_export_path,
|
160
|
+
def setup_csv_metadata_export_file(folder_count, id)
|
161
|
+
path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
|
162
|
+
FileUtils.mkdir_p(path) unless File.exist?(path)
|
163
|
+
|
164
|
+
File.join(path, id, 'metadata.csv')
|
162
165
|
end
|
163
166
|
|
164
167
|
def key_allowed(key)
|
@@ -167,21 +170,27 @@ module Bulkrax
|
|
167
170
|
key != source_identifier.to_s
|
168
171
|
end
|
169
172
|
|
170
|
-
def setup_triple_metadata_export_file(id)
|
171
|
-
File.join(importerexporter.exporter_export_path,
|
173
|
+
def setup_triple_metadata_export_file(folder_count, id)
|
174
|
+
path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
|
175
|
+
FileUtils.mkdir_p(path) unless File.exist?(path)
|
176
|
+
|
177
|
+
File.join(path, id, 'metadata.nt')
|
172
178
|
end
|
173
179
|
|
174
|
-
def setup_bagit_folder(id)
|
175
|
-
File.join(importerexporter.exporter_export_path,
|
180
|
+
def setup_bagit_folder(folder_count, id)
|
181
|
+
path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
|
182
|
+
FileUtils.mkdir_p(path) unless File.exist?(path)
|
183
|
+
|
184
|
+
File.join(path, id)
|
176
185
|
end
|
177
186
|
|
178
|
-
def write_triples(e)
|
187
|
+
def write_triples(folder_count, e)
|
179
188
|
sd = SolrDocument.find(e.identifier)
|
180
189
|
return if sd.nil?
|
181
190
|
|
182
191
|
req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname })
|
183
192
|
rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples)
|
184
|
-
File.open(setup_triple_metadata_export_file(e.identifier), "w") do |triples|
|
193
|
+
File.open(setup_triple_metadata_export_file(folder_count, e.identifier), "w") do |triples|
|
185
194
|
triples.write(rdf)
|
186
195
|
end
|
187
196
|
end
|
@@ -4,6 +4,7 @@ require 'csv'
|
|
4
4
|
module Bulkrax
|
5
5
|
class CsvParser < ApplicationParser # rubocop:disable Metrics/ClassLength
|
6
6
|
include ErroredEntries
|
7
|
+
include ExportBehavior
|
7
8
|
attr_writer :collections, :file_sets, :works
|
8
9
|
|
9
10
|
def self.export_supported?
|
@@ -183,6 +184,7 @@ module Bulkrax
|
|
183
184
|
current_record_ids
|
184
185
|
end
|
185
186
|
|
187
|
+
# rubocop:disable Metrics/AbcSize
|
186
188
|
def current_record_ids
|
187
189
|
@work_ids = []
|
188
190
|
@collection_ids = []
|
@@ -194,18 +196,28 @@ module Bulkrax
|
|
194
196
|
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
195
197
|
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
196
198
|
when 'collection'
|
197
|
-
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
199
|
+
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters} AND has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')})", method: :post, rows: 2_000_000_000).map(&:id)
|
200
|
+
# get the parent collection and child collections
|
198
201
|
@collection_ids = ActiveFedora::SolrService.query("id:#{importerexporter.export_source} #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
199
|
-
|
200
|
-
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
202
|
+
@collection_ids += ActiveFedora::SolrService.query("has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source}", method: :post, rows: 2_147_483_647).map(&:id)
|
201
203
|
when 'worktype'
|
202
204
|
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
203
205
|
when 'importer'
|
204
206
|
set_ids_for_exporting_from_importer
|
205
207
|
end
|
206
208
|
|
209
|
+
find_child_file_sets(@work_ids) if importerexporter.export_from == 'collection'
|
210
|
+
|
207
211
|
@work_ids + @collection_ids + @file_set_ids
|
208
212
|
end
|
213
|
+
# rubocop:enable Metrics/AbcSize
|
214
|
+
|
215
|
+
# find the related file set ids so entries can be made for export
|
216
|
+
def find_child_file_sets(work_ids)
|
217
|
+
work_ids.each do |id|
|
218
|
+
ActiveFedora::Base.find(id).file_set_ids.each { |fs_id| @file_set_ids << fs_id }
|
219
|
+
end
|
220
|
+
end
|
209
221
|
|
210
222
|
# Set the following instance variables: @work_ids, @collection_ids, @file_set_ids
|
211
223
|
# @see #current_record_ids
|
@@ -254,7 +266,6 @@ module Bulkrax
|
|
254
266
|
end
|
255
267
|
end
|
256
268
|
alias create_from_collection create_new_entries
|
257
|
-
alias create_from_collections_metadata create_new_entries
|
258
269
|
alias create_from_importer create_new_entries
|
259
270
|
alias create_from_worktype create_new_entries
|
260
271
|
alias create_from_all create_new_entries
|
@@ -283,6 +294,10 @@ module Bulkrax
|
|
283
294
|
@total = 0
|
284
295
|
end
|
285
296
|
|
297
|
+
def records_split_count
|
298
|
+
1000
|
299
|
+
end
|
300
|
+
|
286
301
|
# @todo - investigate getting directory structure
|
287
302
|
# @todo - investigate using perform_later, and having the importer check for
|
288
303
|
# DownloadCloudFileJob before it starts
|
@@ -307,9 +322,38 @@ module Bulkrax
|
|
307
322
|
# export methods
|
308
323
|
|
309
324
|
def write_files
|
310
|
-
|
311
|
-
|
312
|
-
|
325
|
+
require 'open-uri'
|
326
|
+
folder_count = 0
|
327
|
+
sorted_entries = sort_entries(importerexporter.entries.uniq(&:identifier))
|
328
|
+
|
329
|
+
sorted_entries[0..limit || total].in_groups_of(records_split_count, false) do |group|
|
330
|
+
folder_count += 1
|
331
|
+
|
332
|
+
CSV.open(setup_export_file(folder_count), "w", headers: export_headers, write_headers: true) do |csv|
|
333
|
+
group.each do |entry|
|
334
|
+
csv << entry.parsed_metadata
|
335
|
+
next if importerexporter.metadata_only? || entry.type == 'Bulkrax::CsvCollectionEntry'
|
336
|
+
|
337
|
+
store_files(entry.identifier, folder_count.to_s)
|
338
|
+
end
|
339
|
+
end
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
def store_files(identifier, folder_count)
|
344
|
+
record = ActiveFedora::Base.find(identifier)
|
345
|
+
file_sets = record.file_set? ? Array.wrap(record) : record.file_sets
|
346
|
+
file_sets << record.thumbnail if exporter.include_thumbnails && record.thumbnail.present? && record.work?
|
347
|
+
file_sets.each do |fs|
|
348
|
+
path = File.join(exporter_export_path, folder_count, 'files')
|
349
|
+
FileUtils.mkdir_p(path) unless File.exist? path
|
350
|
+
file = filename(fs)
|
351
|
+
io = open(fs.original_file.uri)
|
352
|
+
next if file.blank?
|
353
|
+
|
354
|
+
File.open(File.join(path, file), 'wb') do |f|
|
355
|
+
f.write(io.read)
|
356
|
+
f.close
|
313
357
|
end
|
314
358
|
end
|
315
359
|
end
|
@@ -343,6 +387,20 @@ module Bulkrax
|
|
343
387
|
@object_names
|
344
388
|
end
|
345
389
|
|
390
|
+
def sort_entries(entries)
|
391
|
+
# always export models in the same order: work, collection, file set
|
392
|
+
entries.sort_by do |entry|
|
393
|
+
case entry.type
|
394
|
+
when 'Bulkrax::CsvEntry'
|
395
|
+
'0'
|
396
|
+
when 'Bulkrax::CsvCollectionEntry'
|
397
|
+
'1'
|
398
|
+
when 'Bulkrax::CsvFileSetEntry'
|
399
|
+
'2'
|
400
|
+
end
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
346
404
|
def sort_headers(headers)
|
347
405
|
# converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
|
348
406
|
# while keeping objects grouped together
|
@@ -356,8 +414,11 @@ module Bulkrax
|
|
356
414
|
end
|
357
415
|
|
358
416
|
# in the parser as it is specific to the format
|
359
|
-
def setup_export_file
|
360
|
-
File.join(importerexporter.exporter_export_path,
|
417
|
+
def setup_export_file(folder_count)
|
418
|
+
path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
|
419
|
+
FileUtils.mkdir_p(path) unless File.exist?(path)
|
420
|
+
|
421
|
+
File.join(path, "export_#{importerexporter.export_source}_from_#{importerexporter.export_from}_#{folder_count}.csv")
|
361
422
|
end
|
362
423
|
|
363
424
|
# Retrieve file paths for [:file] mapping in records
|
@@ -21,7 +21,7 @@
|
|
21
21
|
<th scope="col">Name</th>
|
22
22
|
<th scope="col">Status</th>
|
23
23
|
<th scope="col">Date Exported</th>
|
24
|
-
<th scope="col"
|
24
|
+
<th scope="col">Downloadable Files</th>
|
25
25
|
<th scope="col"></th>
|
26
26
|
<th scope="col"></th>
|
27
27
|
<th scope="col"></th>
|
@@ -35,7 +35,10 @@
|
|
35
35
|
<td><%= exporter.created_at %></td>
|
36
36
|
<td>
|
37
37
|
<% if File.exist?(exporter.exporter_export_zip_path) %>
|
38
|
-
<%=
|
38
|
+
<%= simple_form_for(exporter, method: :get, url: exporter_download_path(exporter)) do |form| %>
|
39
|
+
<%= render 'downloads', exporter: exporter, form: form %>
|
40
|
+
<%= form.button :submit, value: 'Download', data: { disable_with: false } %>
|
41
|
+
<% end %>
|
39
42
|
<% end%>
|
40
43
|
</td>
|
41
44
|
<td><%= link_to raw('<span class="glyphicon glyphicon-info-sign"></span>'), exporter_path(exporter) %></td>
|
@@ -8,10 +8,11 @@
|
|
8
8
|
<div class='panel-body'>
|
9
9
|
|
10
10
|
<% if File.exist?(@exporter.exporter_export_zip_path) %>
|
11
|
-
|
11
|
+
<%= simple_form_for @exporter, method: :get, url: exporter_download_path(@exporter), html: { class: 'form-inline bulkrax-p-align' } do |form| %>
|
12
12
|
<strong>Download:</strong>
|
13
|
-
<%=
|
14
|
-
|
13
|
+
<%= render 'downloads', exporter: @exporter, form: form %>
|
14
|
+
<%= form.button :submit, value: 'Download', data: { disable_with: false } %>
|
15
|
+
<% end %>
|
15
16
|
<% end %>
|
16
17
|
|
17
18
|
<p class='bulkrax-p-align'>
|
@@ -40,11 +41,6 @@
|
|
40
41
|
<% when 'collection' %>
|
41
42
|
<% collection = Collection.find(@exporter.export_source) %>
|
42
43
|
<%= link_to collection&.title&.first, hyrax.dashboard_collection_path(collection.id) %>
|
43
|
-
<% when 'collections metadata' %>
|
44
|
-
<% collections = Collection.all %>
|
45
|
-
<% collections.each_with_index do |c, i| %>
|
46
|
-
<%= link_to c&.title&.first, hyrax.dashboard_collection_path(c.id) %><%= ',' if i != collections.count - 1 %>
|
47
|
-
<% end %>
|
48
44
|
<% when 'importer' %>
|
49
45
|
<% importer = Bulkrax::Importer.find(@exporter.export_source) %>
|
50
46
|
<%= link_to importer.name, bulkrax.importer_path(importer.id) %>
|
@@ -135,10 +131,6 @@
|
|
135
131
|
<%= page_entries_info(@work_entries) %><br>
|
136
132
|
<%= paginate(@work_entries, param_name: :work_entries_page) %>
|
137
133
|
<br>
|
138
|
-
<% if File.exist?(@exporter.exporter_export_zip_path) %>
|
139
|
-
<%= link_to 'Download', exporter_download_path(@exporter) %>
|
140
|
-
|
|
141
|
-
<% end %>
|
142
134
|
<%= link_to 'Edit', edit_exporter_path(@exporter) %>
|
143
135
|
|
|
144
136
|
<%= link_to 'Back', exporters_path %>
|
data/lib/bulkrax/version.rb
CHANGED
@@ -1,6 +1,30 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
namespace :bulkrax do
|
4
|
+
desc "Remove old exported zips and create new ones with the new file structure"
|
5
|
+
task rerun_all_exporters: :environment do
|
6
|
+
if defined?(::Hyku)
|
7
|
+
Account.find_each do |account|
|
8
|
+
puts "=============== updating #{account.name} ============"
|
9
|
+
next if account.name == "search"
|
10
|
+
switch!(account)
|
11
|
+
|
12
|
+
rerun_exporters_and_delete_zips
|
13
|
+
|
14
|
+
puts "=============== finished updating #{account.name} ============"
|
15
|
+
end
|
16
|
+
else
|
17
|
+
rerun_exporters_and_delete_zips
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def rerun_exporters_and_delete_zips
|
22
|
+
begin
|
23
|
+
Bulkrax::Exporter.all.each { |e| Bulkrax::ExporterJob.perform_later(e.id) }
|
24
|
+
rescue => e
|
25
|
+
puts "(#{e.message})"
|
26
|
+
end
|
27
|
+
|
28
|
+
Dir["tmp/exports/**.zip"].each { |zip_path| FileUtils.rm_rf(zip_path) }
|
29
|
+
end
|
30
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -331,6 +331,7 @@ files:
|
|
331
331
|
- app/views/bulkrax/entries/_parsed_metadata.html.erb
|
332
332
|
- app/views/bulkrax/entries/_raw_metadata.html.erb
|
333
333
|
- app/views/bulkrax/entries/show.html.erb
|
334
|
+
- app/views/bulkrax/exporters/_downloads.html.erb
|
334
335
|
- app/views/bulkrax/exporters/_form.html.erb
|
335
336
|
- app/views/bulkrax/exporters/edit.html.erb
|
336
337
|
- app/views/bulkrax/exporters/index.html.erb
|