bulkrax 3.1.1 → 3.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 41dde3161532c80ff433be4697fa6c42f0f453dfea3547181c44914a68e3b466
4
- data.tar.gz: 67473e62b537f71aa77aa664c361ba1e86c60e77451e4813a44597a9f465010b
3
+ metadata.gz: 4c6719caedaf4adb707986e0b6771f1025ce1e08d0bf46afa78a85d99faded2b
4
+ data.tar.gz: 7af41a63f79c6d9792066cf545f35bb723c7667970f0ea1e3a87c04dceda28d0
5
5
  SHA512:
6
- metadata.gz: 3859148384111048a2a2c096cee9bde5a9c78cb8e5a73b5ddb1d31c3ddefe7a221d1ebff97e3b1f96f09e746ac60b3ec1c14e615ef162fe1a5e16f883a3a7ecf
7
- data.tar.gz: bbc83265d59d5026546e6e92ccab3b93d6d4bf35ae08145ab6b27112ed587449572d763fdf3d876ec391dfce43d8b5fa96154e8507053ff2fac7b81bc2409580
6
+ metadata.gz: 7ed10c67e81885b4ff3b40d2731b93b790495e19cad51fb9dfb0dcc5bebcd4c60fecc86fe0932f058d100a7937cedab43c1df050c98efd2107c086fdb2fd93a2
7
+ data.tar.gz: 5a81d4cd7d0289b5d27a13876f20c29b0b40f0989ecbac7caaf26599a1cbdea5c27a2473f759783690562345e8b5d7c515a4f4a0f92d5241803897887e2e9214
@@ -101,12 +101,12 @@ module Bulkrax
101
101
  def exporter_params
102
102
  params[:exporter][:export_source] = params[:exporter]["export_source_#{params[:exporter][:export_from]}".to_sym]
103
103
  if params[:exporter][:date_filter] == "1"
104
- params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type,
105
- :parser_klass, :limit, :start_date, :finish_date, :work_visibility,
104
+ params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type, :generated_metadata,
105
+ :include_thumbnails, :parser_klass, :limit, :start_date, :finish_date, :work_visibility,
106
106
  :workflow_status, field_mapping: {})
107
107
  else
108
- params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type,
109
- :parser_klass, :limit, :work_visibility, :workflow_status,
108
+ params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type, :generated_metadata,
109
+ :include_thumbnails, :parser_klass, :limit, :work_visibility, :workflow_status,
110
110
  field_mapping: {}).merge(start_date: nil, finish_date: nil)
111
111
  end
112
112
  end
@@ -42,7 +42,7 @@ module Bulkrax
42
42
  def send_file_contents
43
43
  self.status = 200
44
44
  prepare_file_headers
45
- stream_body file.read
45
+ send_file file
46
46
  end
47
47
 
48
48
  def prepare_file_headers
@@ -53,9 +53,5 @@ module Bulkrax
53
53
  response.headers['Last-Modified'] = File.mtime(file_path).utc.strftime("%a, %d %b %Y %T GMT")
54
54
  self.content_type = download_content_type
55
55
  end
56
-
57
- def stream_body(iostream)
58
- self.response_body = iostream
59
- end
60
56
  end
61
57
  end
@@ -49,13 +49,11 @@ module Bulkrax
49
49
  reschedule({ parent_identifier: parent_identifier, importer_run_id: importer_run_id })
50
50
  return false # stop current job from continuing to run after rescheduling
51
51
  end
52
- importer_id = ImporterRun.find(importer_run_id).importer_id
53
52
  @parent_entry ||= Bulkrax::Entry.where(identifier: parent_identifier,
54
- importerexporter_id: importer_id,
53
+ importerexporter_id: ImporterRun.find(importer_run_id).importer_id,
55
54
  importerexporter_type: "Bulkrax::Importer").first
56
55
  create_relationships
57
56
  pending_relationships.each(&:destroy)
58
- Bulkrax::Importer.find(importer_id).record_status
59
57
  rescue ::StandardError => e
60
58
  parent_entry ? parent_entry.status_info(e) : child_entry.status_info(e)
61
59
  Bulkrax::ImporterRun.find(importer_run_id).increment!(:failed_relationships) # rubocop:disable Rails/SkipsModelValidations
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class DeleteCollectionJob < DeleteJob; end
5
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class DeleteFileSetJob < DeleteJob; end
5
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class DeleteJob < ApplicationJob
5
+ queue_as :import
6
+
7
+ # rubocop:disable Rails/SkipsModelValidations
8
+ def perform(entry, importer_run)
9
+ obj = entry.factory.find
10
+ obj&.delete
11
+ ImporterRun.find(importer_run.id).increment!(:deleted_records)
12
+ ImporterRun.find(importer_run.id).decrement!(:enqueued_records)
13
+ entry.save!
14
+ entry.importer.current_run = ImporterRun.find(importer_run.id)
15
+ entry.importer.record_status
16
+ entry.status_info("Deleted", ImporterRun.find(importer_run.id))
17
+ end
18
+ # rubocop:enable Rails/SkipsModelValidations
19
+ end
20
+ end
@@ -1,20 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- class DeleteWorkJob < ApplicationJob
5
- queue_as :import
6
-
7
- # rubocop:disable Rails/SkipsModelValidations
8
- def perform(entry, importer_run)
9
- work = entry.factory.find
10
- work&.delete
11
- ImporterRun.find(importer_run.id).increment!(:deleted_records)
12
- ImporterRun.find(importer_run.id).decrement!(:enqueued_records)
13
- entry.save!
14
- entry.importer.current_run = ImporterRun.find(importer_run.id)
15
- entry.importer.record_status
16
- entry.status_info("Deleted", ImporterRun.find(importer_run.id))
17
- end
18
- # rubocop:enable Rails/SkipsModelValidations
19
- end
4
+ class DeleteWorkJob < DeleteJob; end
20
5
  end
@@ -93,17 +93,32 @@ module Bulkrax
93
93
  end
94
94
 
95
95
  def build_export_metadata
96
- # make_round_trippable
97
96
  self.parsed_metadata = {}
98
- self.parsed_metadata['id'] = hyrax_record.id
99
- self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
100
- self.parsed_metadata['model'] = hyrax_record.has_model.first
97
+
98
+ build_system_metadata
99
+ build_files_metadata unless hyrax_record.is_a?(Collection)
101
100
  build_relationship_metadata
102
101
  build_mapping_metadata
103
- build_files unless hyrax_record.is_a?(Collection)
102
+ self.save!
103
+
104
104
  self.parsed_metadata
105
105
  end
106
106
 
107
+ # Metadata required by Bulkrax for round-tripping
108
+ def build_system_metadata
109
+ self.parsed_metadata['id'] = hyrax_record.id
110
+ self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
111
+ self.parsed_metadata[key_for_export('model')] = hyrax_record.has_model.first
112
+ end
113
+
114
+ def build_files_metadata
115
+ file_mapping = key_for_export('file')
116
+ file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
117
+ filenames = file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
118
+
119
+ handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
120
+ end
121
+
107
122
  def build_relationship_metadata
108
123
  # Includes all relationship methods for all exportable record types (works, Collections, FileSets)
109
124
  relationship_methods = {
@@ -126,13 +141,12 @@ module Bulkrax
126
141
  end
127
142
 
128
143
  def build_mapping_metadata
144
+ mapping = fetch_field_mapping
129
145
  mapping.each do |key, value|
130
- next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
131
- next if key == "model"
132
- # relationships handled by #build_relationship_metadata
133
- next if [related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
134
- next if key == 'file' # handled by #build_files
146
+ # these keys are handled by other methods
147
+ next if ['model', 'file', related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
135
148
  next if value['excluded']
149
+ next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
136
150
 
137
151
  object_key = key if value.key?('object')
138
152
  next unless hyrax_record.respond_to?(key.to_s) || object_key.present?
@@ -209,8 +223,19 @@ module Bulkrax
209
223
  file_mapping = mapping['file']&.[]('from')&.first || 'file'
210
224
  file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
211
225
 
212
- filenames = file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
226
+ filenames = map_file_sets(file_sets)
213
227
  handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
228
+ build_thumbnail_files if hyrax_record.work?
229
+ end
230
+
231
+ def build_thumbnail_files
232
+ return unless importerexporter.include_thumbnails
233
+
234
+ thumbnail_mapping = 'thumbnail_file'
235
+ file_sets = Array.wrap(hyrax_record.thumbnail)
236
+
237
+ filenames = map_file_sets(file_sets)
238
+ handle_join_on_export(thumbnail_mapping, filenames, false)
214
239
  end
215
240
 
216
241
  def handle_join_on_export(key, values, join)
@@ -224,16 +249,6 @@ module Bulkrax
224
249
  end
225
250
  end
226
251
 
227
- # In order for the existing exported hyrax_record, to be updated by a re-import
228
- # we need a unique value in system_identifier
229
- # add the existing hyrax_record id to system_identifier
230
- def make_round_trippable
231
- values = hyrax_record.send(work_identifier.to_s).to_a
232
- values << hyrax_record.id
233
- hyrax_record.send("#{work_identifier}=", values)
234
- hyrax_record.save
235
- end
236
-
237
252
  def record
238
253
  @record ||= raw_metadata
239
254
  end
@@ -258,12 +273,12 @@ module Bulkrax
258
273
  raise ::StandardError, 'Only expected to find one matching entry' if matching_collection_entries.count > 1
259
274
  identifiers << matching_collection_entries.first&.identifier
260
275
  end
261
-
262
276
  @collection_identifiers = identifiers.compact.presence || []
263
277
  end
264
278
 
265
279
  def collections_created?
266
- collection_identifiers.length == self.collection_ids.length
280
+ # TODO: look into if this method is still needed after new relationships code
281
+ true
267
282
  end
268
283
 
269
284
  def find_collection_ids
@@ -288,5 +303,11 @@ module Bulkrax
288
303
  return f if File.exist?(f)
289
304
  raise "File #{f} does not exist"
290
305
  end
306
+
307
+ private
308
+
309
+ def map_file_sets(file_sets)
310
+ file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
311
+ end
291
312
  end
292
313
  end
@@ -4,6 +4,8 @@ module Bulkrax
4
4
  # Custom error class for collections_created?
5
5
  class CollectionsCreatedError < RuntimeError; end
6
6
  class OAIError < RuntimeError; end
7
+ # TODO: remove when ApplicationParser#bagit_zip_file_size_check is removed
8
+ class BagitZipError < RuntimeError; end
7
9
  class Entry < ApplicationRecord
8
10
  include Bulkrax::HasMatchers
9
11
  include Bulkrax::ImportBehavior
@@ -34,6 +36,7 @@ module Bulkrax
34
36
  delegate :client,
35
37
  :collection_name,
36
38
  :user,
39
+ :generated_metadata_mapping,
37
40
  :related_parents_raw_mapping,
38
41
  :related_parents_parsed_mapping,
39
42
  :related_children_raw_mapping,
@@ -70,6 +73,15 @@ module Bulkrax
70
73
  parser&.work_identifier&.to_s || 'source'
71
74
  end
72
75
 
76
+ # Returns field_mapping hash based on whether or not generated metadata should be included
77
+ def fetch_field_mapping
78
+ return self.mapping if importerexporter.generated_metadata
79
+
80
+ self.mapping.each do |key, value|
81
+ self.mapping.delete(key) if value[generated_metadata_mapping]
82
+ end
83
+ end
84
+
73
85
  def self.parent_field(parser)
74
86
  parser.related_parents_parsed_mapping
75
87
  end
@@ -51,6 +51,14 @@ module Bulkrax
51
51
  self.start_date.present? || self.finish_date.present?
52
52
  end
53
53
 
54
+ def include_thumbnails?
55
+ self.include_thumbnails
56
+ end
57
+
58
+ def generated_metadata?
59
+ self.generated_metadata
60
+ end
61
+
54
62
  def work_visibility_list
55
63
  [
56
64
  ['Any', ''],
@@ -12,7 +12,7 @@ module Bulkrax
12
12
  # check for our entry in our current importer first
13
13
  importer_id = ImporterRun.find(importer_run_id).importer_id
14
14
  default_scope = { identifier: identifier, importerexporter_type: 'Bulkrax::Importer' }
15
- record = Entry.find_by(default_scope, importerexporter_id: importer_id) || Entry.find_by(default_scope)
15
+ record = Entry.find_by(default_scope.merge({ importerexporter_id: importer_id })) || Entry.find_by(default_scope)
16
16
 
17
17
  # TODO(alishaevn): discuss whether we are only looking for Collection models here
18
18
  # use ActiveFedora::Base.find(identifier) instead?
@@ -7,6 +7,8 @@ module Bulkrax
7
7
 
8
8
  def build_for_exporter
9
9
  build_export_metadata
10
+ # TODO(alishaevn): determine if the line below is still necessary
11
+ # the csv and bagit parsers also have write_files methods
10
12
  write_files if export_type == 'full' && !importerexporter.parser_klass.include?('Bagit')
11
13
  rescue RSolr::Error::Http, CollectionsCreatedError => e
12
14
  raise e
@@ -28,6 +30,7 @@ module Bulkrax
28
30
  return if hyrax_record.is_a?(Collection)
29
31
 
30
32
  file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
33
+ file_sets << hyrax_record.thumbnail if hyrax_record.thumbnail.present? && hyrax_record.work? && exporter.include_thumbnails
31
34
  file_sets.each do |fs|
32
35
  path = File.join(exporter_export_path, 'files')
33
36
  FileUtils.mkdir_p(path)
@@ -42,19 +45,22 @@ module Bulkrax
42
45
  end
43
46
  end
44
47
 
45
- # Prepend the file_set id to ensure a unique filename
48
+ # Prepend the file_set id to ensure a unique filename and also one that is not longer than 255 characters
46
49
  def filename(file_set)
47
50
  return if file_set.original_file.blank?
48
51
  fn = file_set.original_file.file_name.first
49
52
  mime = Mime::Type.lookup(file_set.original_file.mime_type)
50
53
  ext_mime = MIME::Types.of(file_set.original_file.file_name).first
51
- if fn.include?(file_set.id) || importerexporter.metadata_only?
52
- return fn if mime.to_s == ext_mime.to_s
53
- return "#{fn}.#{mime.to_sym}"
54
+ if fn.include?(file_set.id) || importerexporter.metadata_only? || importerexporter.parser_klass.include?('Bagit')
55
+ filename = "#{fn}.#{mime.to_sym}"
56
+ filename = fn if mime.to_s == ext_mime.to_s
54
57
  else
55
- return "#{file_set.id}_#{fn}" if mime.to_s == ext_mime.to_s
56
- return "#{file_set.id}_#{fn}.#{mime.to_sym}"
58
+ filename = "#{file_set.id}_#{fn}.#{mime.to_sym}"
59
+ filename = "#{file_set.id}_#{fn}" if mime.to_s == ext_mime.to_s
57
60
  end
61
+ # Remove extention truncate and reattach
62
+ ext = File.extname(filename)
63
+ "#{File.basename(filename, ext)[0...(220 - ext.length)]}#{ext}"
58
64
  end
59
65
  end
60
66
  end
@@ -28,5 +28,13 @@ module Bulkrax
28
28
 
29
29
  raise StandardError, 'File set must be related to at least one work'
30
30
  end
31
+
32
+ def parent_jobs
33
+ false # FileSet relationships are handled in ObjectFactory#create_file_set
34
+ end
35
+
36
+ def child_jobs
37
+ raise ::StandardError, 'A FileSet cannot be a parent of a Collection, Work, or other FileSet'
38
+ end
31
39
  end
32
40
  end
@@ -51,6 +51,10 @@ module Bulkrax
51
51
  @work_identifier ||= get_field_mapping_hash_for('source_identifier')&.keys&.first&.to_sym || :source
52
52
  end
53
53
 
54
+ def generated_metadata_mapping
55
+ @generated_metadata_mapping ||= 'generated'
56
+ end
57
+
54
58
  def related_parents_raw_mapping
55
59
  @related_parents_raw_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.values&.first&.[]('from')&.first
56
60
  end
@@ -242,15 +246,48 @@ module Bulkrax
242
246
  def write
243
247
  write_files
244
248
  zip
249
+ # uncomment next line to debug for faulty zipping during bagit export
250
+ bagit_zip_file_size_check if importerexporter.parser_klass.include?('Bagit')
245
251
  end
246
252
 
247
253
  def unzip(file_to_unzip)
248
- WillowSword::ZipPackage.new(file_to_unzip, importer_unzip_path).unzip_file
254
+ Zip::File.open(file_to_unzip) do |zip_file|
255
+ zip_file.each do |entry|
256
+ entry_path = File.join(importer_unzip_path, entry.name)
257
+ FileUtils.mkdir_p(File.dirname(entry_path))
258
+ zip_file.extract(entry, entry_path) unless File.exist?(entry_path)
259
+ end
260
+ end
249
261
  end
250
262
 
251
263
  def zip
252
264
  FileUtils.rm_rf(exporter_export_zip_path)
253
- WillowSword::ZipPackage.new(exporter_export_path, exporter_export_zip_path).create_zip
265
+ Zip::File.open(exporter_export_zip_path, create: true) do |zip_file|
266
+ Dir["#{exporter_export_path}/**/**"].each do |file|
267
+ zip_file.add(file.sub("#{exporter_export_path}/", ''), file)
268
+ end
269
+ end
270
+ end
271
+
272
+ # TODO: remove Entry::BagitZipError as well as this method when we're sure it's not needed
273
+ def bagit_zip_file_size_check
274
+ Zip::File.open(exporter_export_zip_path) do |zip_file|
275
+ zip_file.select { |entry| entry.name.include?('data/') && entry.file? }.each do |zipped_file|
276
+ Dir["#{exporter_export_path}/**/data/*"].select { |file| file.include?(zipped_file.name) }.each do |file|
277
+ begin
278
+ raise BagitZipError, "Invalid Bag, file size mismatch for #{file.sub("#{exporter_export_path}/", '')}" if File.size(file) != zipped_file.size
279
+ rescue BagitZipError => e
280
+ matched_entry_ids = importerexporter.entry_ids.select do |id|
281
+ Bulkrax::Entry.find(id).identifier.include?(zipped_file.name.split('/').first)
282
+ end
283
+ matched_entry_ids.each do |entry_id|
284
+ Bulkrax::Entry.find(entry_id).status_info(e)
285
+ status_info('Complete (with failures)')
286
+ end
287
+ end
288
+ end
289
+ end
290
+ end
254
291
  end
255
292
 
256
293
  # Is this a file?
@@ -272,7 +309,6 @@ module Bulkrax
272
309
 
273
310
  def real_import_file_path
274
311
  return importer_unzip_path if file? && zip?
275
-
276
312
  parser_fields['import_file_path']
277
313
  end
278
314
  end
@@ -1,9 +1,11 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- class BagitParser < ApplicationParser
4
+ class BagitParser < ApplicationParser # rubocop:disable Metrics/ClassLength
5
+ include ExportBehavior
6
+
5
7
  def self.export_supported?
6
- false # @todo will be supported
8
+ true
7
9
  end
8
10
 
9
11
  def valid_import?
@@ -14,19 +16,11 @@ module Bulkrax
14
16
  end
15
17
 
16
18
  def entry_class
17
- parser_fields['metadata_format'].constantize
18
- end
19
-
20
- def collection_entry_class
21
- parser_fields['metadata_format'].gsub('Entry', 'CollectionEntry').constantize
22
- rescue
23
- Entry
24
- end
25
-
26
- def file_set_entry_class
27
- csv_format = Bulkrax::Importer.last.parser_fields['metadata_format'] == "Bulkrax::CsvEntry"
28
- csv_format ? CsvFileSetEntry : RdfFileSetEntry
19
+ rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
20
+ rdf_format ? RdfEntry : CsvEntry
29
21
  end
22
+ alias collection_entry_class entry_class
23
+ alias file_set_entry_class entry_class
30
24
 
31
25
  # Take a random sample of 10 metadata_paths and work out the import fields from that
32
26
  def import_fields
@@ -101,7 +95,185 @@ module Bulkrax
101
95
  end
102
96
 
103
97
  def total
104
- metadata_paths.count
98
+ importerexporter.entries.count
99
+ end
100
+
101
+ def extra_filters
102
+ output = ""
103
+ if importerexporter.start_date.present?
104
+ start_dt = importerexporter.start_date.to_datetime.strftime('%FT%TZ')
105
+ finish_dt = importerexporter.finish_date.present? ? importerexporter.finish_date.to_datetime.end_of_day.strftime('%FT%TZ') : "NOW"
106
+ output += " AND system_modified_dtsi:[#{start_dt} TO #{finish_dt}]"
107
+ end
108
+ output += importerexporter.work_visibility.present? ? " AND visibility_ssi:#{importerexporter.work_visibility}" : ""
109
+ output += importerexporter.workflow_status.present? ? " AND workflow_state_name_ssim:#{importerexporter.workflow_status}" : ""
110
+ output
111
+ end
112
+
113
+ def current_record_ids
114
+ @work_ids = []
115
+ @collection_ids = []
116
+ @file_set_ids = []
117
+
118
+ case importerexporter.export_from
119
+ when 'all'
120
+ @work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
121
+ @collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
122
+ @file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
123
+ when 'collection'
124
+ @work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
125
+ when 'worktype'
126
+ @work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
127
+ when 'importer'
128
+ set_ids_for_exporting_from_importer
129
+ end
130
+
131
+ @work_ids + @collection_ids + @file_set_ids
132
+ end
133
+
134
+ # Set the following instance variables: @work_ids, @collection_ids, @file_set_ids
135
+ # @see #current_record_ids
136
+ def set_ids_for_exporting_from_importer
137
+ entry_ids = Importer.find(importerexporter.export_source).entries.pluck(:id)
138
+ complete_statuses = Status.latest_by_statusable
139
+ .includes(:statusable)
140
+ .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
141
+
142
+ complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
143
+ extra_filters = extra_filters.presence || '*:*'
144
+
145
+ { :@work_ids => ::Hyrax.config.curation_concerns, :@collection_ids => [::Collection], :@file_set_ids => [::FileSet] }.each do |instance_var, models_to_search|
146
+ instance_variable_set(instance_var, ActiveFedora::SolrService.post(
147
+ extra_filters.to_s,
148
+ fq: [
149
+ %(#{::Solrizer.solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
150
+ "has_model_ssim:(#{models_to_search.join(' OR ')})"
151
+ ],
152
+ fl: 'id',
153
+ rows: 2_000_000_000
154
+ )['response']['docs'].map { |obj| obj['id'] })
155
+ end
156
+ end
157
+
158
+ def create_new_entries
159
+ current_record_ids.each_with_index do |id, index|
160
+ break if limit_reached?(limit, index)
161
+
162
+ this_entry_class = if @collection_ids.include?(id)
163
+ collection_entry_class
164
+ elsif @file_set_ids.include?(id)
165
+ file_set_entry_class
166
+ else
167
+ entry_class
168
+ end
169
+ new_entry = find_or_create_entry(this_entry_class, id, 'Bulkrax::Exporter')
170
+
171
+ begin
172
+ entry = ExportWorkJob.perform_now(new_entry.id, current_run.id)
173
+ rescue => e
174
+ Rails.logger.info("#{e.message} was detected during export")
175
+ end
176
+
177
+ self.headers |= entry.parsed_metadata.keys if entry
178
+ end
179
+ end
180
+ alias create_from_collection create_new_entries
181
+ alias create_from_importer create_new_entries
182
+ alias create_from_worktype create_new_entries
183
+ alias create_from_all create_new_entries
184
+
185
+ # export methods
186
+
187
+ # rubocop:disable Metrics/AbcSize
188
+ def write_files
189
+ require 'open-uri'
190
+ require 'socket'
191
+ importerexporter.entries.where(identifier: current_record_ids)[0..limit || total].each do |e|
192
+ bag = BagIt::Bag.new setup_bagit_folder(e.identifier)
193
+ w = ActiveFedora::Base.find(e.identifier)
194
+ next unless Hyrax.config.curation_concerns.include?(w.class)
195
+
196
+ w.file_sets.each do |fs|
197
+ file_name = filename(fs)
198
+ next if file_name.blank?
199
+ io = open(fs.original_file.uri)
200
+ file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
201
+ file.write(io.read)
202
+ file.close
203
+ bag.add_file(file_name, file.path)
204
+ end
205
+ CSV.open(setup_csv_metadata_export_file(e.identifier), "w", headers: export_headers, write_headers: true) do |csv|
206
+ csv << e.parsed_metadata
207
+ end
208
+ write_triples(e)
209
+ bag.manifest!(algo: 'sha256')
210
+ end
211
+ end
212
+ # rubocop:enable Metrics/AbcSize
213
+
214
+ def setup_csv_metadata_export_file(id)
215
+ File.join(importerexporter.exporter_export_path, id, 'metadata.csv')
216
+ end
217
+
218
+ def key_allowed(key)
219
+ !Bulkrax.reserved_properties.include?(key) &&
220
+ new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
221
+ key != source_identifier.to_s
222
+ end
223
+
224
+ # All possible column names
225
+ def export_headers
226
+ headers = sort_headers(self.headers)
227
+
228
+ # we don't want access_control_id exported and we want file at the end
229
+ headers.delete('access_control_id') if headers.include?('access_control_id')
230
+
231
+ # add the headers below at the beginning or end to maintain the preexisting export behavior
232
+ headers.prepend('model')
233
+ headers.prepend(source_identifier.to_s)
234
+ headers.prepend('id')
235
+
236
+ headers.uniq
237
+ end
238
+
239
+ def object_names
240
+ return @object_names if @object_names
241
+
242
+ @object_names = mapping.values.map { |value| value['object'] }
243
+ @object_names.uniq!.delete(nil)
244
+
245
+ @object_names
246
+ end
247
+
248
+ def sort_headers(headers)
249
+ # converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
250
+ # while keeping objects grouped together
251
+ headers.sort_by do |item|
252
+ number = item.match(/\d+/)&.[](0) || 0.to_s
253
+ sort_number = number.rjust(4, "0")
254
+ object_prefix = object_names.detect { |o| item.match(/^#{o}/) } || item
255
+ remainder = item.gsub(/^#{object_prefix}_/, '').gsub(/_#{number}/, '')
256
+ "#{object_prefix}_#{sort_number}_#{remainder}"
257
+ end
258
+ end
259
+
260
+ def setup_triple_metadata_export_file(id)
261
+ File.join(importerexporter.exporter_export_path, id, 'metadata.nt')
262
+ end
263
+
264
+ def setup_bagit_folder(id)
265
+ File.join(importerexporter.exporter_export_path, id)
266
+ end
267
+
268
+ def write_triples(e)
269
+ sd = SolrDocument.find(e.identifier)
270
+ return if sd.nil?
271
+
272
+ req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname })
273
+ rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples)
274
+ File.open(setup_triple_metadata_export_file(e.identifier), "w") do |triples|
275
+ triples.write(rdf)
276
+ end
105
277
  end
106
278
 
107
279
  def required_elements?(keys)
@@ -126,11 +298,7 @@ module Bulkrax
126
298
  def bags
127
299
  return @bags if @bags.present?
128
300
  new_bag = bag(import_file_path)
129
- @bags = if new_bag
130
- [new_bag]
131
- else
132
- Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
133
- end
301
+ @bags = new_bag ? [new_bag] : Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
134
302
  @bags.delete(nil)
135
303
  raise StandardError, 'No valid bags found' if @bags.blank?
136
304
  return @bags
@@ -11,12 +11,15 @@ module Bulkrax
11
11
  end
12
12
 
13
13
  def records(_opts = {})
14
+ return @records if @records.present?
15
+
14
16
  file_for_import = only_updates ? parser_fields['partial_import_file_path'] : import_file_path
15
17
  # data for entry does not need source_identifier for csv, because csvs are read sequentially and mapped after raw data is read.
16
18
  csv_data = entry_class.read_data(file_for_import)
17
19
  importer.parser_fields['total'] = csv_data.count
18
20
  importer.save
19
- @records ||= csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
21
+
22
+ @records = csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
20
23
  end
21
24
 
22
25
  def build_records
@@ -145,7 +148,6 @@ module Bulkrax
145
148
  'Bulkrax::Importer',
146
149
  current_record.to_h)
147
150
  if current_record[:delete].present?
148
- # TODO: create a "Delete" job for file_sets and collections
149
151
  "Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
150
152
  else
151
153
  "Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id)
@@ -60,6 +60,16 @@
60
60
  hint: 'leave blank or 0 for all records',
61
61
  label: t('bulkrax.exporter.labels.limit') %>
62
62
 
63
+ <%= form.input :generated_metadata?,
64
+ as: :boolean,
65
+ label: t('bulkrax.exporter.labels.generated_metadata'),
66
+ hint: t('bulkrax.exporter.hints.generated_metadata') %>
67
+
68
+ <%= form.input :include_thumbnails?,
69
+ as: :boolean,
70
+ label: t('bulkrax.exporter.labels.include_thumbnails'),
71
+ hint: t('bulkrax.exporter.hints.include_thumbnails') %>
72
+
63
73
  <%= form.input :date_filter,
64
74
  as: :boolean,
65
75
  label: t('bulkrax.exporter.labels.filter_by_date') %>
@@ -57,6 +57,18 @@
57
57
  <strong><%= t('bulkrax.exporter.labels.limit') %>:</strong>
58
58
  <%= @exporter.limit %>
59
59
  </p>
60
+
61
+ <p class='bulkrax-p-align'>
62
+ <strong><%= t('bulkrax.exporter.labels.generated_metadata') %>:</strong>
63
+ <%= @exporter.generated_metadata %>
64
+ </p>
65
+
66
+ <p class='bulkrax-p-align'>
67
+ <strong><%= t('bulkrax.exporter.labels.include_thumbnails') %>:</strong>
68
+ <%= @exporter.include_thumbnails %>
69
+ </p>
70
+
71
+
60
72
  <%= render partial: 'bulkrax/shared/bulkrax_errors', locals: {item: @exporter} %>
61
73
 
62
74
  <%= render partial: 'bulkrax/shared/bulkrax_field_mapping', locals: {item: @exporter} %>
@@ -178,7 +178,7 @@
178
178
  <% elsif e.status == "Pending" %>
179
179
  <td><span class="glyphicon glyphicon-option-horizontal" style="color: blue;"></span> <%= e.status %></td>
180
180
  <% else %>
181
- <td><span class="glyphicon glyphicon-remove" style="color: red;"></span> <%= e.status %></td>
181
+ <td><span class="glyphicon glyphicon-remove" style="color: <%= e.status == 'Deleted' ? 'green' : 'red' %>;"></span> <%= e.status %></td>
182
182
  <% end %>
183
183
  <% if e.last_error.present? %>
184
184
  <td><%= link_to e.last_error.dig("error_class"), bulkrax.importer_entry_path(@importer.id, e.id) %></td>
@@ -16,6 +16,8 @@ en:
16
16
  filter_by_date: Filter By Date
17
17
  finish_date: End Date
18
18
  full: Metadata and Files
19
+ include_thumbnails: Include Thumbnails?
20
+ generated_metadata: Include Generated Metadata?
19
21
  importer: Importer
20
22
  limit: Limit
21
23
  metadata: Metadata Only
@@ -35,3 +37,6 @@ en:
35
37
  ingested: "Ingested"
36
38
  unapproved: "Unapproved"
37
39
  needs_repair: "Needs Repair"
40
+ hints:
41
+ include_thumbnails: "These exported fields currently cannot be imported."
42
+ generated_metadata: "These exported fields currently cannot be imported."
@@ -0,0 +1,5 @@
1
+ class AddIncludeThumbnailsToBulkraxExporters < ActiveRecord::Migration[5.2]
2
+ def change
3
+ add_column :bulkrax_exporters, :include_thumbnails, :boolean, default: false unless column_exists?(:bulkrax_exporters, :include_thumbnails)
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ class AddGeneratedMetadataToBulkraxExporters < ActiveRecord::Migration[5.2]
2
+ def change
3
+ add_column :bulkrax_exporters, :generated_metadata, :boolean, default: false unless column_exists?(:bulkrax_exporters, :generated_metadata)
4
+ end
5
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- VERSION = '3.1.1'
4
+ VERSION = '3.3.0'
5
5
  end
data/lib/bulkrax.rb CHANGED
@@ -9,6 +9,7 @@ module Bulkrax
9
9
  :default_work_type,
10
10
  :default_field_mapping,
11
11
  :fill_in_blank_source_identifiers,
12
+ :generated_metadata_mapping,
12
13
  :related_children_field_mapping,
13
14
  :related_parents_field_mapping,
14
15
  :reserved_properties,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bulkrax
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.1
4
+ version: 3.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Kaufman
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-20 00:00:00.000000000 Z
11
+ date: 2022-06-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -253,6 +253,7 @@ files:
253
253
  - app/controllers/bulkrax/exporters_controller.rb
254
254
  - app/controllers/bulkrax/importers_controller.rb
255
255
  - app/controllers/concerns/bulkrax/api.rb
256
+ - app/controllers/concerns/bulkrax/download_behavior.rb
256
257
  - app/factories/bulkrax/object_factory.rb
257
258
  - app/helpers/bulkrax/application_helper.rb
258
259
  - app/helpers/bulkrax/exporters_helper.rb
@@ -260,6 +261,9 @@ files:
260
261
  - app/helpers/bulkrax/validation_helper.rb
261
262
  - app/jobs/bulkrax/application_job.rb
262
263
  - app/jobs/bulkrax/create_relationships_job.rb
264
+ - app/jobs/bulkrax/delete_collection_job.rb
265
+ - app/jobs/bulkrax/delete_file_set_job.rb
266
+ - app/jobs/bulkrax/delete_job.rb
263
267
  - app/jobs/bulkrax/delete_work_job.rb
264
268
  - app/jobs/bulkrax/download_cloud_file_job.rb
265
269
  - app/jobs/bulkrax/export_work_job.rb
@@ -294,7 +298,6 @@ files:
294
298
  - app/models/bulkrax/rdf_file_set_entry.rb
295
299
  - app/models/bulkrax/status.rb
296
300
  - app/models/bulkrax/xml_entry.rb
297
- - app/models/concerns/bulkrax/download_behavior.rb
298
301
  - app/models/concerns/bulkrax/dynamic_record_lookup.rb
299
302
  - app/models/concerns/bulkrax/errored_entries.rb
300
303
  - app/models/concerns/bulkrax/export_behavior.rb
@@ -369,6 +372,8 @@ files:
369
372
  - db/migrate/20220119213325_add_work_counters_to_importer_runs.rb
370
373
  - db/migrate/20220301001839_create_bulkrax_pending_relationships.rb
371
374
  - db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb
375
+ - db/migrate/20220412233954_add_include_thumbnails_to_bulkrax_exporters.rb
376
+ - db/migrate/20220413180915_add_generated_metadata_to_bulkrax_exporters.rb
372
377
  - lib/bulkrax.rb
373
378
  - lib/bulkrax/engine.rb
374
379
  - lib/bulkrax/version.rb
@@ -384,7 +389,7 @@ homepage: https://github.com/samvera-labs/bulkrax
384
389
  licenses:
385
390
  - Apache-2.0
386
391
  metadata: {}
387
- post_install_message:
392
+ post_install_message:
388
393
  rdoc_options: []
389
394
  require_paths:
390
395
  - lib
@@ -399,8 +404,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
399
404
  - !ruby/object:Gem::Version
400
405
  version: '0'
401
406
  requirements: []
402
- rubygems_version: 3.1.4
403
- signing_key:
407
+ rubygems_version: 3.0.3
408
+ signing_key:
404
409
  specification_version: 4
405
410
  summary: Import and export tool for Hyrax and Hyku
406
411
  test_files: []