bulkrax 3.1.1 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 41dde3161532c80ff433be4697fa6c42f0f453dfea3547181c44914a68e3b466
4
- data.tar.gz: 67473e62b537f71aa77aa664c361ba1e86c60e77451e4813a44597a9f465010b
3
+ metadata.gz: 4c6719caedaf4adb707986e0b6771f1025ce1e08d0bf46afa78a85d99faded2b
4
+ data.tar.gz: 7af41a63f79c6d9792066cf545f35bb723c7667970f0ea1e3a87c04dceda28d0
5
5
  SHA512:
6
- metadata.gz: 3859148384111048a2a2c096cee9bde5a9c78cb8e5a73b5ddb1d31c3ddefe7a221d1ebff97e3b1f96f09e746ac60b3ec1c14e615ef162fe1a5e16f883a3a7ecf
7
- data.tar.gz: bbc83265d59d5026546e6e92ccab3b93d6d4bf35ae08145ab6b27112ed587449572d763fdf3d876ec391dfce43d8b5fa96154e8507053ff2fac7b81bc2409580
6
+ metadata.gz: 7ed10c67e81885b4ff3b40d2731b93b790495e19cad51fb9dfb0dcc5bebcd4c60fecc86fe0932f058d100a7937cedab43c1df050c98efd2107c086fdb2fd93a2
7
+ data.tar.gz: 5a81d4cd7d0289b5d27a13876f20c29b0b40f0989ecbac7caaf26599a1cbdea5c27a2473f759783690562345e8b5d7c515a4f4a0f92d5241803897887e2e9214
@@ -101,12 +101,12 @@ module Bulkrax
101
101
  def exporter_params
102
102
  params[:exporter][:export_source] = params[:exporter]["export_source_#{params[:exporter][:export_from]}".to_sym]
103
103
  if params[:exporter][:date_filter] == "1"
104
- params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type,
105
- :parser_klass, :limit, :start_date, :finish_date, :work_visibility,
104
+ params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type, :generated_metadata,
105
+ :include_thumbnails, :parser_klass, :limit, :start_date, :finish_date, :work_visibility,
106
106
  :workflow_status, field_mapping: {})
107
107
  else
108
- params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type,
109
- :parser_klass, :limit, :work_visibility, :workflow_status,
108
+ params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type, :generated_metadata,
109
+ :include_thumbnails, :parser_klass, :limit, :work_visibility, :workflow_status,
110
110
  field_mapping: {}).merge(start_date: nil, finish_date: nil)
111
111
  end
112
112
  end
@@ -42,7 +42,7 @@ module Bulkrax
42
42
  def send_file_contents
43
43
  self.status = 200
44
44
  prepare_file_headers
45
- stream_body file.read
45
+ send_file file
46
46
  end
47
47
 
48
48
  def prepare_file_headers
@@ -53,9 +53,5 @@ module Bulkrax
53
53
  response.headers['Last-Modified'] = File.mtime(file_path).utc.strftime("%a, %d %b %Y %T GMT")
54
54
  self.content_type = download_content_type
55
55
  end
56
-
57
- def stream_body(iostream)
58
- self.response_body = iostream
59
- end
60
56
  end
61
57
  end
@@ -49,13 +49,11 @@ module Bulkrax
49
49
  reschedule({ parent_identifier: parent_identifier, importer_run_id: importer_run_id })
50
50
  return false # stop current job from continuing to run after rescheduling
51
51
  end
52
- importer_id = ImporterRun.find(importer_run_id).importer_id
53
52
  @parent_entry ||= Bulkrax::Entry.where(identifier: parent_identifier,
54
- importerexporter_id: importer_id,
53
+ importerexporter_id: ImporterRun.find(importer_run_id).importer_id,
55
54
  importerexporter_type: "Bulkrax::Importer").first
56
55
  create_relationships
57
56
  pending_relationships.each(&:destroy)
58
- Bulkrax::Importer.find(importer_id).record_status
59
57
  rescue ::StandardError => e
60
58
  parent_entry ? parent_entry.status_info(e) : child_entry.status_info(e)
61
59
  Bulkrax::ImporterRun.find(importer_run_id).increment!(:failed_relationships) # rubocop:disable Rails/SkipsModelValidations
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class DeleteCollectionJob < DeleteJob; end
5
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class DeleteFileSetJob < DeleteJob; end
5
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class DeleteJob < ApplicationJob
5
+ queue_as :import
6
+
7
+ # rubocop:disable Rails/SkipsModelValidations
8
+ def perform(entry, importer_run)
9
+ obj = entry.factory.find
10
+ obj&.delete
11
+ ImporterRun.find(importer_run.id).increment!(:deleted_records)
12
+ ImporterRun.find(importer_run.id).decrement!(:enqueued_records)
13
+ entry.save!
14
+ entry.importer.current_run = ImporterRun.find(importer_run.id)
15
+ entry.importer.record_status
16
+ entry.status_info("Deleted", ImporterRun.find(importer_run.id))
17
+ end
18
+ # rubocop:enable Rails/SkipsModelValidations
19
+ end
20
+ end
@@ -1,20 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- class DeleteWorkJob < ApplicationJob
5
- queue_as :import
6
-
7
- # rubocop:disable Rails/SkipsModelValidations
8
- def perform(entry, importer_run)
9
- work = entry.factory.find
10
- work&.delete
11
- ImporterRun.find(importer_run.id).increment!(:deleted_records)
12
- ImporterRun.find(importer_run.id).decrement!(:enqueued_records)
13
- entry.save!
14
- entry.importer.current_run = ImporterRun.find(importer_run.id)
15
- entry.importer.record_status
16
- entry.status_info("Deleted", ImporterRun.find(importer_run.id))
17
- end
18
- # rubocop:enable Rails/SkipsModelValidations
19
- end
4
+ class DeleteWorkJob < DeleteJob; end
20
5
  end
@@ -93,17 +93,32 @@ module Bulkrax
93
93
  end
94
94
 
95
95
  def build_export_metadata
96
- # make_round_trippable
97
96
  self.parsed_metadata = {}
98
- self.parsed_metadata['id'] = hyrax_record.id
99
- self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
100
- self.parsed_metadata['model'] = hyrax_record.has_model.first
97
+
98
+ build_system_metadata
99
+ build_files_metadata unless hyrax_record.is_a?(Collection)
101
100
  build_relationship_metadata
102
101
  build_mapping_metadata
103
- build_files unless hyrax_record.is_a?(Collection)
102
+ self.save!
103
+
104
104
  self.parsed_metadata
105
105
  end
106
106
 
107
+ # Metadata required by Bulkrax for round-tripping
108
+ def build_system_metadata
109
+ self.parsed_metadata['id'] = hyrax_record.id
110
+ self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
111
+ self.parsed_metadata[key_for_export('model')] = hyrax_record.has_model.first
112
+ end
113
+
114
+ def build_files_metadata
115
+ file_mapping = key_for_export('file')
116
+ file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
117
+ filenames = file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
118
+
119
+ handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
120
+ end
121
+
107
122
  def build_relationship_metadata
108
123
  # Includes all relationship methods for all exportable record types (works, Collections, FileSets)
109
124
  relationship_methods = {
@@ -126,13 +141,12 @@ module Bulkrax
126
141
  end
127
142
 
128
143
  def build_mapping_metadata
144
+ mapping = fetch_field_mapping
129
145
  mapping.each do |key, value|
130
- next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
131
- next if key == "model"
132
- # relationships handled by #build_relationship_metadata
133
- next if [related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
134
- next if key == 'file' # handled by #build_files
146
+ # these keys are handled by other methods
147
+ next if ['model', 'file', related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
135
148
  next if value['excluded']
149
+ next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
136
150
 
137
151
  object_key = key if value.key?('object')
138
152
  next unless hyrax_record.respond_to?(key.to_s) || object_key.present?
@@ -209,8 +223,19 @@ module Bulkrax
209
223
  file_mapping = mapping['file']&.[]('from')&.first || 'file'
210
224
  file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
211
225
 
212
- filenames = file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
226
+ filenames = map_file_sets(file_sets)
213
227
  handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
228
+ build_thumbnail_files if hyrax_record.work?
229
+ end
230
+
231
+ def build_thumbnail_files
232
+ return unless importerexporter.include_thumbnails
233
+
234
+ thumbnail_mapping = 'thumbnail_file'
235
+ file_sets = Array.wrap(hyrax_record.thumbnail)
236
+
237
+ filenames = map_file_sets(file_sets)
238
+ handle_join_on_export(thumbnail_mapping, filenames, false)
214
239
  end
215
240
 
216
241
  def handle_join_on_export(key, values, join)
@@ -224,16 +249,6 @@ module Bulkrax
224
249
  end
225
250
  end
226
251
 
227
- # In order for the existing exported hyrax_record, to be updated by a re-import
228
- # we need a unique value in system_identifier
229
- # add the existing hyrax_record id to system_identifier
230
- def make_round_trippable
231
- values = hyrax_record.send(work_identifier.to_s).to_a
232
- values << hyrax_record.id
233
- hyrax_record.send("#{work_identifier}=", values)
234
- hyrax_record.save
235
- end
236
-
237
252
  def record
238
253
  @record ||= raw_metadata
239
254
  end
@@ -258,12 +273,12 @@ module Bulkrax
258
273
  raise ::StandardError, 'Only expected to find one matching entry' if matching_collection_entries.count > 1
259
274
  identifiers << matching_collection_entries.first&.identifier
260
275
  end
261
-
262
276
  @collection_identifiers = identifiers.compact.presence || []
263
277
  end
264
278
 
265
279
  def collections_created?
266
- collection_identifiers.length == self.collection_ids.length
280
+ # TODO: look into if this method is still needed after new relationships code
281
+ true
267
282
  end
268
283
 
269
284
  def find_collection_ids
@@ -288,5 +303,11 @@ module Bulkrax
288
303
  return f if File.exist?(f)
289
304
  raise "File #{f} does not exist"
290
305
  end
306
+
307
+ private
308
+
309
+ def map_file_sets(file_sets)
310
+ file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
311
+ end
291
312
  end
292
313
  end
@@ -4,6 +4,8 @@ module Bulkrax
4
4
  # Custom error class for collections_created?
5
5
  class CollectionsCreatedError < RuntimeError; end
6
6
  class OAIError < RuntimeError; end
7
+ # TODO: remove when ApplicationParser#bagit_zip_file_size_check is removed
8
+ class BagitZipError < RuntimeError; end
7
9
  class Entry < ApplicationRecord
8
10
  include Bulkrax::HasMatchers
9
11
  include Bulkrax::ImportBehavior
@@ -34,6 +36,7 @@ module Bulkrax
34
36
  delegate :client,
35
37
  :collection_name,
36
38
  :user,
39
+ :generated_metadata_mapping,
37
40
  :related_parents_raw_mapping,
38
41
  :related_parents_parsed_mapping,
39
42
  :related_children_raw_mapping,
@@ -70,6 +73,15 @@ module Bulkrax
70
73
  parser&.work_identifier&.to_s || 'source'
71
74
  end
72
75
 
76
+ # Returns field_mapping hash based on whether or not generated metadata should be included
77
+ def fetch_field_mapping
78
+ return self.mapping if importerexporter.generated_metadata
79
+
80
+ self.mapping.each do |key, value|
81
+ self.mapping.delete(key) if value[generated_metadata_mapping]
82
+ end
83
+ end
84
+
73
85
  def self.parent_field(parser)
74
86
  parser.related_parents_parsed_mapping
75
87
  end
@@ -51,6 +51,14 @@ module Bulkrax
51
51
  self.start_date.present? || self.finish_date.present?
52
52
  end
53
53
 
54
+ def include_thumbnails?
55
+ self.include_thumbnails
56
+ end
57
+
58
+ def generated_metadata?
59
+ self.generated_metadata
60
+ end
61
+
54
62
  def work_visibility_list
55
63
  [
56
64
  ['Any', ''],
@@ -12,7 +12,7 @@ module Bulkrax
12
12
  # check for our entry in our current importer first
13
13
  importer_id = ImporterRun.find(importer_run_id).importer_id
14
14
  default_scope = { identifier: identifier, importerexporter_type: 'Bulkrax::Importer' }
15
- record = Entry.find_by(default_scope, importerexporter_id: importer_id) || Entry.find_by(default_scope)
15
+ record = Entry.find_by(default_scope.merge({ importerexporter_id: importer_id })) || Entry.find_by(default_scope)
16
16
 
17
17
  # TODO(alishaevn): discuss whether we are only looking for Collection models here
18
18
  # use ActiveFedora::Base.find(identifier) instead?
@@ -7,6 +7,8 @@ module Bulkrax
7
7
 
8
8
  def build_for_exporter
9
9
  build_export_metadata
10
+ # TODO(alishaevn): determine if the line below is still necessary
11
+ # the csv and bagit parsers also have write_files methods
10
12
  write_files if export_type == 'full' && !importerexporter.parser_klass.include?('Bagit')
11
13
  rescue RSolr::Error::Http, CollectionsCreatedError => e
12
14
  raise e
@@ -28,6 +30,7 @@ module Bulkrax
28
30
  return if hyrax_record.is_a?(Collection)
29
31
 
30
32
  file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
33
+ file_sets << hyrax_record.thumbnail if hyrax_record.thumbnail.present? && hyrax_record.work? && exporter.include_thumbnails
31
34
  file_sets.each do |fs|
32
35
  path = File.join(exporter_export_path, 'files')
33
36
  FileUtils.mkdir_p(path)
@@ -42,19 +45,22 @@ module Bulkrax
42
45
  end
43
46
  end
44
47
 
45
- # Prepend the file_set id to ensure a unique filename
48
+ # Prepend the file_set id to ensure a unique filename and also one that is not longer than 255 characters
46
49
  def filename(file_set)
47
50
  return if file_set.original_file.blank?
48
51
  fn = file_set.original_file.file_name.first
49
52
  mime = Mime::Type.lookup(file_set.original_file.mime_type)
50
53
  ext_mime = MIME::Types.of(file_set.original_file.file_name).first
51
- if fn.include?(file_set.id) || importerexporter.metadata_only?
52
- return fn if mime.to_s == ext_mime.to_s
53
- return "#{fn}.#{mime.to_sym}"
54
+ if fn.include?(file_set.id) || importerexporter.metadata_only? || importerexporter.parser_klass.include?('Bagit')
55
+ filename = "#{fn}.#{mime.to_sym}"
56
+ filename = fn if mime.to_s == ext_mime.to_s
54
57
  else
55
- return "#{file_set.id}_#{fn}" if mime.to_s == ext_mime.to_s
56
- return "#{file_set.id}_#{fn}.#{mime.to_sym}"
58
+ filename = "#{file_set.id}_#{fn}.#{mime.to_sym}"
59
+ filename = "#{file_set.id}_#{fn}" if mime.to_s == ext_mime.to_s
57
60
  end
61
+ # Remove extention truncate and reattach
62
+ ext = File.extname(filename)
63
+ "#{File.basename(filename, ext)[0...(220 - ext.length)]}#{ext}"
58
64
  end
59
65
  end
60
66
  end
@@ -28,5 +28,13 @@ module Bulkrax
28
28
 
29
29
  raise StandardError, 'File set must be related to at least one work'
30
30
  end
31
+
32
+ def parent_jobs
33
+ false # FileSet relationships are handled in ObjectFactory#create_file_set
34
+ end
35
+
36
+ def child_jobs
37
+ raise ::StandardError, 'A FileSet cannot be a parent of a Collection, Work, or other FileSet'
38
+ end
31
39
  end
32
40
  end
@@ -51,6 +51,10 @@ module Bulkrax
51
51
  @work_identifier ||= get_field_mapping_hash_for('source_identifier')&.keys&.first&.to_sym || :source
52
52
  end
53
53
 
54
+ def generated_metadata_mapping
55
+ @generated_metadata_mapping ||= 'generated'
56
+ end
57
+
54
58
  def related_parents_raw_mapping
55
59
  @related_parents_raw_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.values&.first&.[]('from')&.first
56
60
  end
@@ -242,15 +246,48 @@ module Bulkrax
242
246
  def write
243
247
  write_files
244
248
  zip
249
+ # uncomment next line to debug for faulty zipping during bagit export
250
+ bagit_zip_file_size_check if importerexporter.parser_klass.include?('Bagit')
245
251
  end
246
252
 
247
253
  def unzip(file_to_unzip)
248
- WillowSword::ZipPackage.new(file_to_unzip, importer_unzip_path).unzip_file
254
+ Zip::File.open(file_to_unzip) do |zip_file|
255
+ zip_file.each do |entry|
256
+ entry_path = File.join(importer_unzip_path, entry.name)
257
+ FileUtils.mkdir_p(File.dirname(entry_path))
258
+ zip_file.extract(entry, entry_path) unless File.exist?(entry_path)
259
+ end
260
+ end
249
261
  end
250
262
 
251
263
  def zip
252
264
  FileUtils.rm_rf(exporter_export_zip_path)
253
- WillowSword::ZipPackage.new(exporter_export_path, exporter_export_zip_path).create_zip
265
+ Zip::File.open(exporter_export_zip_path, create: true) do |zip_file|
266
+ Dir["#{exporter_export_path}/**/**"].each do |file|
267
+ zip_file.add(file.sub("#{exporter_export_path}/", ''), file)
268
+ end
269
+ end
270
+ end
271
+
272
+ # TODO: remove Entry::BagitZipError as well as this method when we're sure it's not needed
273
+ def bagit_zip_file_size_check
274
+ Zip::File.open(exporter_export_zip_path) do |zip_file|
275
+ zip_file.select { |entry| entry.name.include?('data/') && entry.file? }.each do |zipped_file|
276
+ Dir["#{exporter_export_path}/**/data/*"].select { |file| file.include?(zipped_file.name) }.each do |file|
277
+ begin
278
+ raise BagitZipError, "Invalid Bag, file size mismatch for #{file.sub("#{exporter_export_path}/", '')}" if File.size(file) != zipped_file.size
279
+ rescue BagitZipError => e
280
+ matched_entry_ids = importerexporter.entry_ids.select do |id|
281
+ Bulkrax::Entry.find(id).identifier.include?(zipped_file.name.split('/').first)
282
+ end
283
+ matched_entry_ids.each do |entry_id|
284
+ Bulkrax::Entry.find(entry_id).status_info(e)
285
+ status_info('Complete (with failures)')
286
+ end
287
+ end
288
+ end
289
+ end
290
+ end
254
291
  end
255
292
 
256
293
  # Is this a file?
@@ -272,7 +309,6 @@ module Bulkrax
272
309
 
273
310
  def real_import_file_path
274
311
  return importer_unzip_path if file? && zip?
275
-
276
312
  parser_fields['import_file_path']
277
313
  end
278
314
  end
@@ -1,9 +1,11 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- class BagitParser < ApplicationParser
4
+ class BagitParser < ApplicationParser # rubocop:disable Metrics/ClassLength
5
+ include ExportBehavior
6
+
5
7
  def self.export_supported?
6
- false # @todo will be supported
8
+ true
7
9
  end
8
10
 
9
11
  def valid_import?
@@ -14,19 +16,11 @@ module Bulkrax
14
16
  end
15
17
 
16
18
  def entry_class
17
- parser_fields['metadata_format'].constantize
18
- end
19
-
20
- def collection_entry_class
21
- parser_fields['metadata_format'].gsub('Entry', 'CollectionEntry').constantize
22
- rescue
23
- Entry
24
- end
25
-
26
- def file_set_entry_class
27
- csv_format = Bulkrax::Importer.last.parser_fields['metadata_format'] == "Bulkrax::CsvEntry"
28
- csv_format ? CsvFileSetEntry : RdfFileSetEntry
19
+ rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
20
+ rdf_format ? RdfEntry : CsvEntry
29
21
  end
22
+ alias collection_entry_class entry_class
23
+ alias file_set_entry_class entry_class
30
24
 
31
25
  # Take a random sample of 10 metadata_paths and work out the import fields from that
32
26
  def import_fields
@@ -101,7 +95,185 @@ module Bulkrax
101
95
  end
102
96
 
103
97
  def total
104
- metadata_paths.count
98
+ importerexporter.entries.count
99
+ end
100
+
101
+ def extra_filters
102
+ output = ""
103
+ if importerexporter.start_date.present?
104
+ start_dt = importerexporter.start_date.to_datetime.strftime('%FT%TZ')
105
+ finish_dt = importerexporter.finish_date.present? ? importerexporter.finish_date.to_datetime.end_of_day.strftime('%FT%TZ') : "NOW"
106
+ output += " AND system_modified_dtsi:[#{start_dt} TO #{finish_dt}]"
107
+ end
108
+ output += importerexporter.work_visibility.present? ? " AND visibility_ssi:#{importerexporter.work_visibility}" : ""
109
+ output += importerexporter.workflow_status.present? ? " AND workflow_state_name_ssim:#{importerexporter.workflow_status}" : ""
110
+ output
111
+ end
112
+
113
+ def current_record_ids
114
+ @work_ids = []
115
+ @collection_ids = []
116
+ @file_set_ids = []
117
+
118
+ case importerexporter.export_from
119
+ when 'all'
120
+ @work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
121
+ @collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
122
+ @file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
123
+ when 'collection'
124
+ @work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
125
+ when 'worktype'
126
+ @work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
127
+ when 'importer'
128
+ set_ids_for_exporting_from_importer
129
+ end
130
+
131
+ @work_ids + @collection_ids + @file_set_ids
132
+ end
133
+
134
+ # Set the following instance variables: @work_ids, @collection_ids, @file_set_ids
135
+ # @see #current_record_ids
136
+ def set_ids_for_exporting_from_importer
137
+ entry_ids = Importer.find(importerexporter.export_source).entries.pluck(:id)
138
+ complete_statuses = Status.latest_by_statusable
139
+ .includes(:statusable)
140
+ .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
141
+
142
+ complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
143
+ extra_filters = extra_filters.presence || '*:*'
144
+
145
+ { :@work_ids => ::Hyrax.config.curation_concerns, :@collection_ids => [::Collection], :@file_set_ids => [::FileSet] }.each do |instance_var, models_to_search|
146
+ instance_variable_set(instance_var, ActiveFedora::SolrService.post(
147
+ extra_filters.to_s,
148
+ fq: [
149
+ %(#{::Solrizer.solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
150
+ "has_model_ssim:(#{models_to_search.join(' OR ')})"
151
+ ],
152
+ fl: 'id',
153
+ rows: 2_000_000_000
154
+ )['response']['docs'].map { |obj| obj['id'] })
155
+ end
156
+ end
157
+
158
+ def create_new_entries
159
+ current_record_ids.each_with_index do |id, index|
160
+ break if limit_reached?(limit, index)
161
+
162
+ this_entry_class = if @collection_ids.include?(id)
163
+ collection_entry_class
164
+ elsif @file_set_ids.include?(id)
165
+ file_set_entry_class
166
+ else
167
+ entry_class
168
+ end
169
+ new_entry = find_or_create_entry(this_entry_class, id, 'Bulkrax::Exporter')
170
+
171
+ begin
172
+ entry = ExportWorkJob.perform_now(new_entry.id, current_run.id)
173
+ rescue => e
174
+ Rails.logger.info("#{e.message} was detected during export")
175
+ end
176
+
177
+ self.headers |= entry.parsed_metadata.keys if entry
178
+ end
179
+ end
180
+ alias create_from_collection create_new_entries
181
+ alias create_from_importer create_new_entries
182
+ alias create_from_worktype create_new_entries
183
+ alias create_from_all create_new_entries
184
+
185
+ # export methods
186
+
187
+ # rubocop:disable Metrics/AbcSize
188
+ def write_files
189
+ require 'open-uri'
190
+ require 'socket'
191
+ importerexporter.entries.where(identifier: current_record_ids)[0..limit || total].each do |e|
192
+ bag = BagIt::Bag.new setup_bagit_folder(e.identifier)
193
+ w = ActiveFedora::Base.find(e.identifier)
194
+ next unless Hyrax.config.curation_concerns.include?(w.class)
195
+
196
+ w.file_sets.each do |fs|
197
+ file_name = filename(fs)
198
+ next if file_name.blank?
199
+ io = open(fs.original_file.uri)
200
+ file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
201
+ file.write(io.read)
202
+ file.close
203
+ bag.add_file(file_name, file.path)
204
+ end
205
+ CSV.open(setup_csv_metadata_export_file(e.identifier), "w", headers: export_headers, write_headers: true) do |csv|
206
+ csv << e.parsed_metadata
207
+ end
208
+ write_triples(e)
209
+ bag.manifest!(algo: 'sha256')
210
+ end
211
+ end
212
+ # rubocop:enable Metrics/AbcSize
213
+
214
+ def setup_csv_metadata_export_file(id)
215
+ File.join(importerexporter.exporter_export_path, id, 'metadata.csv')
216
+ end
217
+
218
+ def key_allowed(key)
219
+ !Bulkrax.reserved_properties.include?(key) &&
220
+ new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
221
+ key != source_identifier.to_s
222
+ end
223
+
224
+ # All possible column names
225
+ def export_headers
226
+ headers = sort_headers(self.headers)
227
+
228
+ # we don't want access_control_id exported and we want file at the end
229
+ headers.delete('access_control_id') if headers.include?('access_control_id')
230
+
231
+ # add the headers below at the beginning or end to maintain the preexisting export behavior
232
+ headers.prepend('model')
233
+ headers.prepend(source_identifier.to_s)
234
+ headers.prepend('id')
235
+
236
+ headers.uniq
237
+ end
238
+
239
+ def object_names
240
+ return @object_names if @object_names
241
+
242
+ @object_names = mapping.values.map { |value| value['object'] }
243
+ @object_names.uniq!.delete(nil)
244
+
245
+ @object_names
246
+ end
247
+
248
+ def sort_headers(headers)
249
+ # converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
250
+ # while keeping objects grouped together
251
+ headers.sort_by do |item|
252
+ number = item.match(/\d+/)&.[](0) || 0.to_s
253
+ sort_number = number.rjust(4, "0")
254
+ object_prefix = object_names.detect { |o| item.match(/^#{o}/) } || item
255
+ remainder = item.gsub(/^#{object_prefix}_/, '').gsub(/_#{number}/, '')
256
+ "#{object_prefix}_#{sort_number}_#{remainder}"
257
+ end
258
+ end
259
+
260
+ def setup_triple_metadata_export_file(id)
261
+ File.join(importerexporter.exporter_export_path, id, 'metadata.nt')
262
+ end
263
+
264
+ def setup_bagit_folder(id)
265
+ File.join(importerexporter.exporter_export_path, id)
266
+ end
267
+
268
+ def write_triples(e)
269
+ sd = SolrDocument.find(e.identifier)
270
+ return if sd.nil?
271
+
272
+ req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname })
273
+ rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples)
274
+ File.open(setup_triple_metadata_export_file(e.identifier), "w") do |triples|
275
+ triples.write(rdf)
276
+ end
105
277
  end
106
278
 
107
279
  def required_elements?(keys)
@@ -126,11 +298,7 @@ module Bulkrax
126
298
  def bags
127
299
  return @bags if @bags.present?
128
300
  new_bag = bag(import_file_path)
129
- @bags = if new_bag
130
- [new_bag]
131
- else
132
- Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
133
- end
301
+ @bags = new_bag ? [new_bag] : Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
134
302
  @bags.delete(nil)
135
303
  raise StandardError, 'No valid bags found' if @bags.blank?
136
304
  return @bags
@@ -11,12 +11,15 @@ module Bulkrax
11
11
  end
12
12
 
13
13
  def records(_opts = {})
14
+ return @records if @records.present?
15
+
14
16
  file_for_import = only_updates ? parser_fields['partial_import_file_path'] : import_file_path
15
17
  # data for entry does not need source_identifier for csv, because csvs are read sequentially and mapped after raw data is read.
16
18
  csv_data = entry_class.read_data(file_for_import)
17
19
  importer.parser_fields['total'] = csv_data.count
18
20
  importer.save
19
- @records ||= csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
21
+
22
+ @records = csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
20
23
  end
21
24
 
22
25
  def build_records
@@ -145,7 +148,6 @@ module Bulkrax
145
148
  'Bulkrax::Importer',
146
149
  current_record.to_h)
147
150
  if current_record[:delete].present?
148
- # TODO: create a "Delete" job for file_sets and collections
149
151
  "Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
150
152
  else
151
153
  "Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id)
@@ -60,6 +60,16 @@
60
60
  hint: 'leave blank or 0 for all records',
61
61
  label: t('bulkrax.exporter.labels.limit') %>
62
62
 
63
+ <%= form.input :generated_metadata?,
64
+ as: :boolean,
65
+ label: t('bulkrax.exporter.labels.generated_metadata'),
66
+ hint: t('bulkrax.exporter.hints.generated_metadata') %>
67
+
68
+ <%= form.input :include_thumbnails?,
69
+ as: :boolean,
70
+ label: t('bulkrax.exporter.labels.include_thumbnails'),
71
+ hint: t('bulkrax.exporter.hints.include_thumbnails') %>
72
+
63
73
  <%= form.input :date_filter,
64
74
  as: :boolean,
65
75
  label: t('bulkrax.exporter.labels.filter_by_date') %>
@@ -57,6 +57,18 @@
57
57
  <strong><%= t('bulkrax.exporter.labels.limit') %>:</strong>
58
58
  <%= @exporter.limit %>
59
59
  </p>
60
+
61
+ <p class='bulkrax-p-align'>
62
+ <strong><%= t('bulkrax.exporter.labels.generated_metadata') %>:</strong>
63
+ <%= @exporter.generated_metadata %>
64
+ </p>
65
+
66
+ <p class='bulkrax-p-align'>
67
+ <strong><%= t('bulkrax.exporter.labels.include_thumbnails') %>:</strong>
68
+ <%= @exporter.include_thumbnails %>
69
+ </p>
70
+
71
+
60
72
  <%= render partial: 'bulkrax/shared/bulkrax_errors', locals: {item: @exporter} %>
61
73
 
62
74
  <%= render partial: 'bulkrax/shared/bulkrax_field_mapping', locals: {item: @exporter} %>
@@ -178,7 +178,7 @@
178
178
  <% elsif e.status == "Pending" %>
179
179
  <td><span class="glyphicon glyphicon-option-horizontal" style="color: blue;"></span> <%= e.status %></td>
180
180
  <% else %>
181
- <td><span class="glyphicon glyphicon-remove" style="color: red;"></span> <%= e.status %></td>
181
+ <td><span class="glyphicon glyphicon-remove" style="color: <%= e.status == 'Deleted' ? 'green' : 'red' %>;"></span> <%= e.status %></td>
182
182
  <% end %>
183
183
  <% if e.last_error.present? %>
184
184
  <td><%= link_to e.last_error.dig("error_class"), bulkrax.importer_entry_path(@importer.id, e.id) %></td>
@@ -16,6 +16,8 @@ en:
16
16
  filter_by_date: Filter By Date
17
17
  finish_date: End Date
18
18
  full: Metadata and Files
19
+ include_thumbnails: Include Thumbnails?
20
+ generated_metadata: Include Generated Metadata?
19
21
  importer: Importer
20
22
  limit: Limit
21
23
  metadata: Metadata Only
@@ -35,3 +37,6 @@ en:
35
37
  ingested: "Ingested"
36
38
  unapproved: "Unapproved"
37
39
  needs_repair: "Needs Repair"
40
+ hints:
41
+ include_thumbnails: "These exported fields currently cannot be imported."
42
+ generated_metadata: "These exported fields currently cannot be imported."
@@ -0,0 +1,5 @@
1
+ class AddIncludeThumbnailsToBulkraxExporters < ActiveRecord::Migration[5.2]
2
+ def change
3
+ add_column :bulkrax_exporters, :include_thumbnails, :boolean, default: false unless column_exists?(:bulkrax_exporters, :include_thumbnails)
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ class AddGeneratedMetadataToBulkraxExporters < ActiveRecord::Migration[5.2]
2
+ def change
3
+ add_column :bulkrax_exporters, :generated_metadata, :boolean, default: false unless column_exists?(:bulkrax_exporters, :generated_metadata)
4
+ end
5
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- VERSION = '3.1.1'
4
+ VERSION = '3.3.0'
5
5
  end
data/lib/bulkrax.rb CHANGED
@@ -9,6 +9,7 @@ module Bulkrax
9
9
  :default_work_type,
10
10
  :default_field_mapping,
11
11
  :fill_in_blank_source_identifiers,
12
+ :generated_metadata_mapping,
12
13
  :related_children_field_mapping,
13
14
  :related_parents_field_mapping,
14
15
  :reserved_properties,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bulkrax
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.1
4
+ version: 3.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Kaufman
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-20 00:00:00.000000000 Z
11
+ date: 2022-06-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -253,6 +253,7 @@ files:
253
253
  - app/controllers/bulkrax/exporters_controller.rb
254
254
  - app/controllers/bulkrax/importers_controller.rb
255
255
  - app/controllers/concerns/bulkrax/api.rb
256
+ - app/controllers/concerns/bulkrax/download_behavior.rb
256
257
  - app/factories/bulkrax/object_factory.rb
257
258
  - app/helpers/bulkrax/application_helper.rb
258
259
  - app/helpers/bulkrax/exporters_helper.rb
@@ -260,6 +261,9 @@ files:
260
261
  - app/helpers/bulkrax/validation_helper.rb
261
262
  - app/jobs/bulkrax/application_job.rb
262
263
  - app/jobs/bulkrax/create_relationships_job.rb
264
+ - app/jobs/bulkrax/delete_collection_job.rb
265
+ - app/jobs/bulkrax/delete_file_set_job.rb
266
+ - app/jobs/bulkrax/delete_job.rb
263
267
  - app/jobs/bulkrax/delete_work_job.rb
264
268
  - app/jobs/bulkrax/download_cloud_file_job.rb
265
269
  - app/jobs/bulkrax/export_work_job.rb
@@ -294,7 +298,6 @@ files:
294
298
  - app/models/bulkrax/rdf_file_set_entry.rb
295
299
  - app/models/bulkrax/status.rb
296
300
  - app/models/bulkrax/xml_entry.rb
297
- - app/models/concerns/bulkrax/download_behavior.rb
298
301
  - app/models/concerns/bulkrax/dynamic_record_lookup.rb
299
302
  - app/models/concerns/bulkrax/errored_entries.rb
300
303
  - app/models/concerns/bulkrax/export_behavior.rb
@@ -369,6 +372,8 @@ files:
369
372
  - db/migrate/20220119213325_add_work_counters_to_importer_runs.rb
370
373
  - db/migrate/20220301001839_create_bulkrax_pending_relationships.rb
371
374
  - db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb
375
+ - db/migrate/20220412233954_add_include_thumbnails_to_bulkrax_exporters.rb
376
+ - db/migrate/20220413180915_add_generated_metadata_to_bulkrax_exporters.rb
372
377
  - lib/bulkrax.rb
373
378
  - lib/bulkrax/engine.rb
374
379
  - lib/bulkrax/version.rb
@@ -384,7 +389,7 @@ homepage: https://github.com/samvera-labs/bulkrax
384
389
  licenses:
385
390
  - Apache-2.0
386
391
  metadata: {}
387
- post_install_message:
392
+ post_install_message:
388
393
  rdoc_options: []
389
394
  require_paths:
390
395
  - lib
@@ -399,8 +404,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
399
404
  - !ruby/object:Gem::Version
400
405
  version: '0'
401
406
  requirements: []
402
- rubygems_version: 3.1.4
403
- signing_key:
407
+ rubygems_version: 3.0.3
408
+ signing_key:
404
409
  specification_version: 4
405
410
  summary: Import and export tool for Hyrax and Hyku
406
411
  test_files: []