bulkrax 3.2.0 → 3.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 49b7cfb404f1878429bbac77aad6d1167e97377fe28b54e2042cee7f6ca04e67
4
- data.tar.gz: c3789c193dc1610c99117e5e2c450ed477a7390995916c4b6b419900f5cf539b
3
+ metadata.gz: b136a8742a7f9b953b4c3ef86d700540b931b6fbf22798d719e9aa693ea61fa9
4
+ data.tar.gz: e1cc32eda55a606285cf6e080340db608a8867f0bd4de8ab1361ca4a3d21adf9
5
5
  SHA512:
6
- metadata.gz: a669e9b566770ad21e3a6242d24779f5c73847b2b97d4f7334f06209ac7de3634e7667550ba35ce6930f9ceaffda8efccb29cf3277b40f29d808ddbd9623a1b8
7
- data.tar.gz: 794e6de65d4ebb5665ab73fdac3eba2d4157ff1b07f3da6cc63a6323b2127dc40021d69ea193da2a356e276f08dde8ccc51dc1d8c563e5ba3e8ecb592e4b4c88
6
+ metadata.gz: 87e35f340faa583a9ae6ac156f95fa4958fcaf9d3ed09a9963a8dda39649a66307a8c494375a989e2103c2d1edef62343f5b69878eae2249d63cc4b8f65240a8
7
+ data.tar.gz: 21ddafc671eda822144b6b73abfb89892ffac954f0692952985299f40f7d0e1aba472077aac72a1f681da315ba35de795f43d95df323c2178576716d02eafc75
@@ -101,12 +101,12 @@ module Bulkrax
101
101
  def exporter_params
102
102
  params[:exporter][:export_source] = params[:exporter]["export_source_#{params[:exporter][:export_from]}".to_sym]
103
103
  if params[:exporter][:date_filter] == "1"
104
- params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type,
105
- :parser_klass, :limit, :start_date, :finish_date, :work_visibility,
104
+ params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type, :generated_metadata,
105
+ :include_thumbnails, :parser_klass, :limit, :start_date, :finish_date, :work_visibility,
106
106
  :workflow_status, field_mapping: {})
107
107
  else
108
- params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type,
109
- :parser_klass, :limit, :work_visibility, :workflow_status,
108
+ params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type, :generated_metadata,
109
+ :include_thumbnails, :parser_klass, :limit, :work_visibility, :workflow_status,
110
110
  field_mapping: {}).merge(start_date: nil, finish_date: nil)
111
111
  end
112
112
  end
@@ -42,7 +42,7 @@ module Bulkrax
42
42
  def send_file_contents
43
43
  self.status = 200
44
44
  prepare_file_headers
45
- stream_body file.read
45
+ send_file file
46
46
  end
47
47
 
48
48
  def prepare_file_headers
@@ -53,9 +53,5 @@ module Bulkrax
53
53
  response.headers['Last-Modified'] = File.mtime(file_path).utc.strftime("%a, %d %b %Y %T GMT")
54
54
  self.content_type = download_content_type
55
55
  end
56
-
57
- def stream_body(iostream)
58
- self.response_body = iostream
59
- end
60
56
  end
61
57
  end
@@ -61,6 +61,7 @@ module Bulkrax
61
61
  work_actor.update(environment(attrs))
62
62
  end
63
63
  end
64
+ object.apply_depositor_metadata(@user) && object.save! if object.depositor.nil?
64
65
  log_updated(object)
65
66
  end
66
67
 
@@ -107,6 +108,7 @@ module Bulkrax
107
108
  end
108
109
  end
109
110
  end
111
+ object.apply_depositor_metadata(@user) && object.save! if object.depositor.nil?
110
112
  log_created(object)
111
113
  end
112
114
 
@@ -141,7 +143,6 @@ module Bulkrax
141
143
  attrs = clean_attrs(attrs)
142
144
  attrs = collection_type(attrs)
143
145
  object.attributes = attrs
144
- object.apply_depositor_metadata(@user)
145
146
  object.save!
146
147
  end
147
148
 
@@ -33,7 +33,7 @@ module Bulkrax
33
33
  # is the child in the relationship, and vice versa if a child_identifier is passed.
34
34
  def perform(parent_identifier:, importer_run_id:) # rubocop:disable Metrics/AbcSize
35
35
  pending_relationships = Bulkrax::PendingRelationship.find_each.select do |rel|
36
- rel.bulkrax_importer_run_id == importer_run_id && rel.parent_id == parent_identifier
36
+ rel.importer_run_id == importer_run_id && rel.parent_id == parent_identifier
37
37
  end.sort_by(&:order)
38
38
 
39
39
  @importer_run_id = importer_run_id
@@ -12,6 +12,8 @@ module Bulkrax
12
12
  import(importer, only_updates_since_last_import)
13
13
  update_current_run_counters(importer)
14
14
  schedule(importer) if importer.schedulable?
15
+ rescue CSV::MalformedCSVError => e
16
+ importer.status_info(e)
15
17
  end
16
18
 
17
19
  def import(importer, only_updates_since_last_import)
@@ -99,6 +99,7 @@ module Bulkrax
99
99
  build_files_metadata unless hyrax_record.is_a?(Collection)
100
100
  build_relationship_metadata
101
101
  build_mapping_metadata
102
+ self.save!
102
103
 
103
104
  self.parsed_metadata
104
105
  end
@@ -113,9 +114,10 @@ module Bulkrax
113
114
  def build_files_metadata
114
115
  file_mapping = key_for_export('file')
115
116
  file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
116
- filenames = file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
117
+ filenames = map_file_sets(file_sets)
117
118
 
118
119
  handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
120
+ build_thumbnail_files if hyrax_record.work?
119
121
  end
120
122
 
121
123
  def build_relationship_metadata
@@ -140,6 +142,7 @@ module Bulkrax
140
142
  end
141
143
 
142
144
  def build_mapping_metadata
145
+ mapping = fetch_field_mapping
143
146
  mapping.each do |key, value|
144
147
  # these keys are handled by other methods
145
148
  next if ['model', 'file', related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
@@ -217,6 +220,16 @@ module Bulkrax
217
220
  end
218
221
  end
219
222
 
223
+ def build_thumbnail_files
224
+ return unless importerexporter.include_thumbnails
225
+
226
+ thumbnail_mapping = 'thumbnail_file'
227
+ file_sets = Array.wrap(hyrax_record.thumbnail)
228
+
229
+ filenames = map_file_sets(file_sets)
230
+ handle_join_on_export(thumbnail_mapping, filenames, false)
231
+ end
232
+
220
233
  def handle_join_on_export(key, values, join)
221
234
  if join
222
235
  parsed_metadata[key] = values.join(' | ') # TODO: make split char dynamic
@@ -252,7 +265,6 @@ module Bulkrax
252
265
  raise ::StandardError, 'Only expected to find one matching entry' if matching_collection_entries.count > 1
253
266
  identifiers << matching_collection_entries.first&.identifier
254
267
  end
255
-
256
268
  @collection_identifiers = identifiers.compact.presence || []
257
269
  end
258
270
 
@@ -283,5 +295,11 @@ module Bulkrax
283
295
  return f if File.exist?(f)
284
296
  raise "File #{f} does not exist"
285
297
  end
298
+
299
+ private
300
+
301
+ def map_file_sets(file_sets)
302
+ file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
303
+ end
286
304
  end
287
305
  end
@@ -4,6 +4,8 @@ module Bulkrax
4
4
  # Custom error class for collections_created?
5
5
  class CollectionsCreatedError < RuntimeError; end
6
6
  class OAIError < RuntimeError; end
7
+ # TODO: remove when ApplicationParser#bagit_zip_file_size_check is removed
8
+ class BagitZipError < RuntimeError; end
7
9
  class Entry < ApplicationRecord
8
10
  include Bulkrax::HasMatchers
9
11
  include Bulkrax::ImportBehavior
@@ -34,6 +36,7 @@ module Bulkrax
34
36
  delegate :client,
35
37
  :collection_name,
36
38
  :user,
39
+ :generated_metadata_mapping,
37
40
  :related_parents_raw_mapping,
38
41
  :related_parents_parsed_mapping,
39
42
  :related_children_raw_mapping,
@@ -70,6 +73,15 @@ module Bulkrax
70
73
  parser&.work_identifier&.to_s || 'source'
71
74
  end
72
75
 
76
+ # Returns field_mapping hash based on whether or not generated metadata should be included
77
+ def fetch_field_mapping
78
+ return self.mapping if importerexporter.generated_metadata
79
+
80
+ self.mapping.each do |key, value|
81
+ self.mapping.delete(key) if value[generated_metadata_mapping]
82
+ end
83
+ end
84
+
73
85
  def self.parent_field(parser)
74
86
  parser.related_parents_parsed_mapping
75
87
  end
@@ -51,6 +51,14 @@ module Bulkrax
51
51
  self.start_date.present? || self.finish_date.present?
52
52
  end
53
53
 
54
+ def include_thumbnails?
55
+ self.include_thumbnails
56
+ end
57
+
58
+ def generated_metadata?
59
+ self.generated_metadata
60
+ end
61
+
54
62
  def work_visibility_list
55
63
  [
56
64
  ['Any', ''],
@@ -96,16 +96,19 @@ module Bulkrax
96
96
  end
97
97
 
98
98
  def current_run
99
- @current_run ||= if file? && zip?
100
- self.importer_runs.create!
101
- else
102
- entry_counts = {
103
- total_work_entries: self.limit || parser.works_total,
104
- total_collection_entries: parser.collections_total,
105
- total_file_set_entries: parser.file_sets_total
106
- }
107
- self.importer_runs.create!(entry_counts)
108
- end
99
+ return @current_run if @current_run.present?
100
+
101
+ @current_run = self.importer_runs.create!
102
+ return @current_run if file? && zip?
103
+
104
+ entry_counts = {
105
+ total_work_entries: self.limit || parser.works_total,
106
+ total_collection_entries: parser.collections_total,
107
+ total_file_set_entries: parser.file_sets_total
108
+ }
109
+ @current_run.update!(entry_counts)
110
+
111
+ @current_run
109
112
  end
110
113
 
111
114
  def last_run
@@ -4,9 +4,10 @@ module Bulkrax
4
4
  class ImporterRun < ApplicationRecord
5
5
  belongs_to :importer
6
6
  has_many :statuses, as: :runnable, dependent: :destroy
7
+ has_many :pending_relationships, dependent: :destroy
7
8
 
8
9
  def parents
9
- PendingRelationship.where(bulkrax_importer_run_id: id).pluck(:parent_id).uniq
10
+ pending_relationships.pluck(:parent_id).uniq
10
11
  end
11
12
  end
12
13
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Bulkrax
4
4
  class PendingRelationship < ApplicationRecord
5
- belongs_to :bulkrax_importer_run, class_name: "::Bulkrax::ImporterRun"
5
+ belongs_to :importer_run
6
6
  end
7
7
  end
@@ -7,6 +7,8 @@ module Bulkrax
7
7
 
8
8
  def build_for_exporter
9
9
  build_export_metadata
10
+ # TODO(alishaevn): determine if the line below is still necessary
11
+ # the csv and bagit parsers also have write_files methods
10
12
  write_files if export_type == 'full' && !importerexporter.parser_klass.include?('Bagit')
11
13
  rescue RSolr::Error::Http, CollectionsCreatedError => e
12
14
  raise e
@@ -28,6 +30,7 @@ module Bulkrax
28
30
  return if hyrax_record.is_a?(Collection)
29
31
 
30
32
  file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
33
+ file_sets << hyrax_record.thumbnail if hyrax_record.thumbnail.present? && hyrax_record.work? && exporter.include_thumbnails
31
34
  file_sets.each do |fs|
32
35
  path = File.join(exporter_export_path, 'files')
33
36
  FileUtils.mkdir_p(path)
@@ -48,7 +51,7 @@ module Bulkrax
48
51
  fn = file_set.original_file.file_name.first
49
52
  mime = Mime::Type.lookup(file_set.original_file.mime_type)
50
53
  ext_mime = MIME::Types.of(file_set.original_file.file_name).first
51
- if fn.include?(file_set.id) || importerexporter.metadata_only?
54
+ if fn.include?(file_set.id) || importerexporter.metadata_only? || importerexporter.parser_klass.include?('Bagit')
52
55
  filename = "#{fn}.#{mime.to_sym}"
53
56
  filename = fn if mime.to_s == ext_mime.to_s
54
57
  else
@@ -28,5 +28,13 @@ module Bulkrax
28
28
 
29
29
  raise StandardError, 'File set must be related to at least one work'
30
30
  end
31
+
32
+ def parent_jobs
33
+ false # FileSet relationships are handled in ObjectFactory#create_file_set
34
+ end
35
+
36
+ def child_jobs
37
+ raise ::StandardError, 'A FileSet cannot be a parent of a Collection, Work, or other FileSet'
38
+ end
31
39
  end
32
40
  end
@@ -50,7 +50,7 @@ module Bulkrax
50
50
  self.parsed_metadata[related_parents_parsed_mapping].each do |parent_identifier|
51
51
  next if parent_identifier.blank?
52
52
 
53
- PendingRelationship.create!(child_id: self.identifier, parent_id: parent_identifier, bulkrax_importer_run_id: importerexporter.last_run.id, order: self.id)
53
+ PendingRelationship.create!(child_id: self.identifier, parent_id: parent_identifier, importer_run_id: importerexporter.last_run.id, order: self.id)
54
54
  end
55
55
  end
56
56
 
@@ -58,7 +58,7 @@ module Bulkrax
58
58
  self.parsed_metadata[related_children_parsed_mapping].each do |child_identifier|
59
59
  next if child_identifier.blank?
60
60
 
61
- PendingRelationship.create!(parent_id: self.identifier, child_id: child_identifier, bulkrax_importer_run_id: importerexporter.last_run.id, order: self.id)
61
+ PendingRelationship.create!(parent_id: self.identifier, child_id: child_identifier, importer_run_id: importerexporter.last_run.id, order: self.id)
62
62
  end
63
63
  end
64
64
 
@@ -18,7 +18,7 @@ module Bulkrax
18
18
  end
19
19
 
20
20
  def failed?
21
- current_status&.status_message&.match(/fail/i)
21
+ current_status&.status_message&.eql?('Failed')
22
22
  end
23
23
 
24
24
  def succeeded?
@@ -51,6 +51,10 @@ module Bulkrax
51
51
  @work_identifier ||= get_field_mapping_hash_for('source_identifier')&.keys&.first&.to_sym || :source
52
52
  end
53
53
 
54
+ def generated_metadata_mapping
55
+ @generated_metadata_mapping ||= 'generated'
56
+ end
57
+
54
58
  def related_parents_raw_mapping
55
59
  @related_parents_raw_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.values&.first&.[]('from')&.first
56
60
  end
@@ -242,15 +246,49 @@ module Bulkrax
242
246
  def write
243
247
  write_files
244
248
  zip
249
+ # uncomment next line to debug for faulty zipping during bagit export
250
+ bagit_zip_file_size_check if importerexporter.parser_klass.include?('Bagit')
245
251
  end
246
252
 
247
253
  def unzip(file_to_unzip)
248
- WillowSword::ZipPackage.new(file_to_unzip, importer_unzip_path).unzip_file
254
+ Zip::File.open(file_to_unzip) do |zip_file|
255
+ zip_file.each do |entry|
256
+ entry_path = File.join(importer_unzip_path, entry.name)
257
+ FileUtils.mkdir_p(File.dirname(entry_path))
258
+ zip_file.extract(entry, entry_path) unless File.exist?(entry_path)
259
+ end
260
+ end
249
261
  end
250
262
 
251
263
  def zip
264
+ require 'zip'
252
265
  FileUtils.rm_rf(exporter_export_zip_path)
253
- WillowSword::ZipPackage.new(exporter_export_path, exporter_export_zip_path).create_zip
266
+ Zip::File.open(exporter_export_zip_path, create: true) do |zip_file|
267
+ Dir["#{exporter_export_path}/**/**"].each do |file|
268
+ zip_file.add(file.sub("#{exporter_export_path}/", ''), file)
269
+ end
270
+ end
271
+ end
272
+
273
+ # TODO: remove Entry::BagitZipError as well as this method when we're sure it's not needed
274
+ def bagit_zip_file_size_check
275
+ Zip::File.open(exporter_export_zip_path) do |zip_file|
276
+ zip_file.select { |entry| entry.name.include?('data/') && entry.file? }.each do |zipped_file|
277
+ Dir["#{exporter_export_path}/**/data/*"].select { |file| file.include?(zipped_file.name) }.each do |file|
278
+ begin
279
+ raise BagitZipError, "Invalid Bag, file size mismatch for #{file.sub("#{exporter_export_path}/", '')}" if File.size(file) != zipped_file.size
280
+ rescue BagitZipError => e
281
+ matched_entry_ids = importerexporter.entry_ids.select do |id|
282
+ Bulkrax::Entry.find(id).identifier.include?(zipped_file.name.split('/').first)
283
+ end
284
+ matched_entry_ids.each do |entry_id|
285
+ Bulkrax::Entry.find(entry_id).status_info(e)
286
+ status_info('Complete (with failures)')
287
+ end
288
+ end
289
+ end
290
+ end
291
+ end
254
292
  end
255
293
 
256
294
  # Is this a file?
@@ -272,7 +310,6 @@ module Bulkrax
272
310
 
273
311
  def real_import_file_path
274
312
  return importer_unzip_path if file? && zip?
275
-
276
313
  parser_fields['import_file_path']
277
314
  end
278
315
  end
@@ -1,9 +1,11 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- class BagitParser < ApplicationParser
4
+ class BagitParser < ApplicationParser # rubocop:disable Metrics/ClassLength
5
+ include ExportBehavior
6
+
5
7
  def self.export_supported?
6
- false # @todo will be supported
8
+ true
7
9
  end
8
10
 
9
11
  def valid_import?
@@ -14,19 +16,11 @@ module Bulkrax
14
16
  end
15
17
 
16
18
  def entry_class
17
- parser_fields['metadata_format'].constantize
18
- end
19
-
20
- def collection_entry_class
21
- parser_fields['metadata_format'].gsub('Entry', 'CollectionEntry').constantize
22
- rescue
23
- Entry
24
- end
25
-
26
- def file_set_entry_class
27
- csv_format = Bulkrax::Importer.last.parser_fields['metadata_format'] == "Bulkrax::CsvEntry"
28
- csv_format ? CsvFileSetEntry : RdfFileSetEntry
19
+ rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
20
+ rdf_format ? RdfEntry : CsvEntry
29
21
  end
22
+ alias collection_entry_class entry_class
23
+ alias file_set_entry_class entry_class
30
24
 
31
25
  # Take a random sample of 10 metadata_paths and work out the import fields from that
32
26
  def import_fields
@@ -101,7 +95,185 @@ module Bulkrax
101
95
  end
102
96
 
103
97
  def total
104
- metadata_paths.count
98
+ importerexporter.entries.count
99
+ end
100
+
101
+ def extra_filters
102
+ output = ""
103
+ if importerexporter.start_date.present?
104
+ start_dt = importerexporter.start_date.to_datetime.strftime('%FT%TZ')
105
+ finish_dt = importerexporter.finish_date.present? ? importerexporter.finish_date.to_datetime.end_of_day.strftime('%FT%TZ') : "NOW"
106
+ output += " AND system_modified_dtsi:[#{start_dt} TO #{finish_dt}]"
107
+ end
108
+ output += importerexporter.work_visibility.present? ? " AND visibility_ssi:#{importerexporter.work_visibility}" : ""
109
+ output += importerexporter.workflow_status.present? ? " AND workflow_state_name_ssim:#{importerexporter.workflow_status}" : ""
110
+ output
111
+ end
112
+
113
+ def current_record_ids
114
+ @work_ids = []
115
+ @collection_ids = []
116
+ @file_set_ids = []
117
+
118
+ case importerexporter.export_from
119
+ when 'all'
120
+ @work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
121
+ @collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
122
+ @file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
123
+ when 'collection'
124
+ @work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
125
+ when 'worktype'
126
+ @work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
127
+ when 'importer'
128
+ set_ids_for_exporting_from_importer
129
+ end
130
+
131
+ @work_ids + @collection_ids + @file_set_ids
132
+ end
133
+
134
+ # Set the following instance variables: @work_ids, @collection_ids, @file_set_ids
135
+ # @see #current_record_ids
136
+ def set_ids_for_exporting_from_importer
137
+ entry_ids = Importer.find(importerexporter.export_source).entries.pluck(:id)
138
+ complete_statuses = Status.latest_by_statusable
139
+ .includes(:statusable)
140
+ .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
141
+
142
+ complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
143
+ extra_filters = extra_filters.presence || '*:*'
144
+
145
+ { :@work_ids => ::Hyrax.config.curation_concerns, :@collection_ids => [::Collection], :@file_set_ids => [::FileSet] }.each do |instance_var, models_to_search|
146
+ instance_variable_set(instance_var, ActiveFedora::SolrService.post(
147
+ extra_filters.to_s,
148
+ fq: [
149
+ %(#{::Solrizer.solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
150
+ "has_model_ssim:(#{models_to_search.join(' OR ')})"
151
+ ],
152
+ fl: 'id',
153
+ rows: 2_000_000_000
154
+ )['response']['docs'].map { |obj| obj['id'] })
155
+ end
156
+ end
157
+
158
+ def create_new_entries
159
+ current_record_ids.each_with_index do |id, index|
160
+ break if limit_reached?(limit, index)
161
+
162
+ this_entry_class = if @collection_ids.include?(id)
163
+ collection_entry_class
164
+ elsif @file_set_ids.include?(id)
165
+ file_set_entry_class
166
+ else
167
+ entry_class
168
+ end
169
+ new_entry = find_or_create_entry(this_entry_class, id, 'Bulkrax::Exporter')
170
+
171
+ begin
172
+ entry = ExportWorkJob.perform_now(new_entry.id, current_run.id)
173
+ rescue => e
174
+ Rails.logger.info("#{e.message} was detected during export")
175
+ end
176
+
177
+ self.headers |= entry.parsed_metadata.keys if entry
178
+ end
179
+ end
180
+ alias create_from_collection create_new_entries
181
+ alias create_from_importer create_new_entries
182
+ alias create_from_worktype create_new_entries
183
+ alias create_from_all create_new_entries
184
+
185
+ # export methods
186
+
187
+ # rubocop:disable Metrics/AbcSize
188
+ def write_files
189
+ require 'open-uri'
190
+ require 'socket'
191
+ importerexporter.entries.where(identifier: current_record_ids)[0..limit || total].each do |e|
192
+ bag = BagIt::Bag.new setup_bagit_folder(e.identifier)
193
+ w = ActiveFedora::Base.find(e.identifier)
194
+ next unless Hyrax.config.curation_concerns.include?(w.class)
195
+
196
+ w.file_sets.each do |fs|
197
+ file_name = filename(fs)
198
+ next if file_name.blank?
199
+ io = open(fs.original_file.uri)
200
+ file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
201
+ file.write(io.read)
202
+ file.close
203
+ bag.add_file(file_name, file.path)
204
+ end
205
+ CSV.open(setup_csv_metadata_export_file(e.identifier), "w", headers: export_headers, write_headers: true) do |csv|
206
+ csv << e.parsed_metadata
207
+ end
208
+ write_triples(e)
209
+ bag.manifest!(algo: 'sha256')
210
+ end
211
+ end
212
+ # rubocop:enable Metrics/AbcSize
213
+
214
+ def setup_csv_metadata_export_file(id)
215
+ File.join(importerexporter.exporter_export_path, id, 'metadata.csv')
216
+ end
217
+
218
+ def key_allowed(key)
219
+ !Bulkrax.reserved_properties.include?(key) &&
220
+ new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
221
+ key != source_identifier.to_s
222
+ end
223
+
224
+ # All possible column names
225
+ def export_headers
226
+ headers = sort_headers(self.headers)
227
+
228
+ # we don't want access_control_id exported and we want file at the end
229
+ headers.delete('access_control_id') if headers.include?('access_control_id')
230
+
231
+ # add the headers below at the beginning or end to maintain the preexisting export behavior
232
+ headers.prepend('model')
233
+ headers.prepend(source_identifier.to_s)
234
+ headers.prepend('id')
235
+
236
+ headers.uniq
237
+ end
238
+
239
+ def object_names
240
+ return @object_names if @object_names
241
+
242
+ @object_names = mapping.values.map { |value| value['object'] }
243
+ @object_names.uniq!.delete(nil)
244
+
245
+ @object_names
246
+ end
247
+
248
+ def sort_headers(headers)
249
+ # converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
250
+ # while keeping objects grouped together
251
+ headers.sort_by do |item|
252
+ number = item.match(/\d+/)&.[](0) || 0.to_s
253
+ sort_number = number.rjust(4, "0")
254
+ object_prefix = object_names.detect { |o| item.match(/^#{o}/) } || item
255
+ remainder = item.gsub(/^#{object_prefix}_/, '').gsub(/_#{number}/, '')
256
+ "#{object_prefix}_#{sort_number}_#{remainder}"
257
+ end
258
+ end
259
+
260
+ def setup_triple_metadata_export_file(id)
261
+ File.join(importerexporter.exporter_export_path, id, 'metadata.nt')
262
+ end
263
+
264
+ def setup_bagit_folder(id)
265
+ File.join(importerexporter.exporter_export_path, id)
266
+ end
267
+
268
+ def write_triples(e)
269
+ sd = SolrDocument.find(e.identifier)
270
+ return if sd.nil?
271
+
272
+ req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname })
273
+ rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples)
274
+ File.open(setup_triple_metadata_export_file(e.identifier), "w") do |triples|
275
+ triples.write(rdf)
276
+ end
105
277
  end
106
278
 
107
279
  def required_elements?(keys)
@@ -126,11 +298,7 @@ module Bulkrax
126
298
  def bags
127
299
  return @bags if @bags.present?
128
300
  new_bag = bag(import_file_path)
129
- @bags = if new_bag
130
- [new_bag]
131
- else
132
- Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
133
- end
301
+ @bags = new_bag ? [new_bag] : Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
134
302
  @bags.delete(nil)
135
303
  raise StandardError, 'No valid bags found' if @bags.blank?
136
304
  return @bags
@@ -60,6 +60,16 @@
60
60
  hint: 'leave blank or 0 for all records',
61
61
  label: t('bulkrax.exporter.labels.limit') %>
62
62
 
63
+ <%= form.input :generated_metadata?,
64
+ as: :boolean,
65
+ label: t('bulkrax.exporter.labels.generated_metadata'),
66
+ hint: t('bulkrax.exporter.hints.generated_metadata') %>
67
+
68
+ <%= form.input :include_thumbnails?,
69
+ as: :boolean,
70
+ label: t('bulkrax.exporter.labels.include_thumbnails'),
71
+ hint: t('bulkrax.exporter.hints.include_thumbnails') %>
72
+
63
73
  <%= form.input :date_filter,
64
74
  as: :boolean,
65
75
  label: t('bulkrax.exporter.labels.filter_by_date') %>
@@ -57,6 +57,18 @@
57
57
  <strong><%= t('bulkrax.exporter.labels.limit') %>:</strong>
58
58
  <%= @exporter.limit %>
59
59
  </p>
60
+
61
+ <p class='bulkrax-p-align'>
62
+ <strong><%= t('bulkrax.exporter.labels.generated_metadata') %>:</strong>
63
+ <%= @exporter.generated_metadata %>
64
+ </p>
65
+
66
+ <p class='bulkrax-p-align'>
67
+ <strong><%= t('bulkrax.exporter.labels.include_thumbnails') %>:</strong>
68
+ <%= @exporter.include_thumbnails %>
69
+ </p>
70
+
71
+
60
72
  <%= render partial: 'bulkrax/shared/bulkrax_errors', locals: {item: @exporter} %>
61
73
 
62
74
  <%= render partial: 'bulkrax/shared/bulkrax_field_mapping', locals: {item: @exporter} %>
@@ -16,6 +16,8 @@ en:
16
16
  filter_by_date: Filter By Date
17
17
  finish_date: End Date
18
18
  full: Metadata and Files
19
+ include_thumbnails: Include Thumbnails?
20
+ generated_metadata: Include Generated Metadata?
19
21
  importer: Importer
20
22
  limit: Limit
21
23
  metadata: Metadata Only
@@ -35,3 +37,6 @@ en:
35
37
  ingested: "Ingested"
36
38
  unapproved: "Unapproved"
37
39
  needs_repair: "Needs Repair"
40
+ hints:
41
+ include_thumbnails: "These exported fields currently cannot be imported."
42
+ generated_metadata: "These exported fields currently cannot be imported."
@@ -0,0 +1,5 @@
1
+ class AddIncludeThumbnailsToBulkraxExporters < ActiveRecord::Migration[5.2]
2
+ def change
3
+ add_column :bulkrax_exporters, :include_thumbnails, :boolean, default: false unless column_exists?(:bulkrax_exporters, :include_thumbnails)
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ class AddGeneratedMetadataToBulkraxExporters < ActiveRecord::Migration[5.2]
2
+ def change
3
+ add_column :bulkrax_exporters, :generated_metadata, :boolean, default: false unless column_exists?(:bulkrax_exporters, :generated_metadata)
4
+ end
5
+ end
@@ -0,0 +1,7 @@
1
+ class RenameBulkraxImporterRunToImporterRun < ActiveRecord::Migration[5.2]
2
+ def change
3
+ if column_exists?(:bulkrax_pending_relationships, :bulkrax_importer_run_id)
4
+ rename_column :bulkrax_pending_relationships, :bulkrax_importer_run_id, :importer_run_id
5
+ end
6
+ end
7
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- VERSION = '3.2.0'
4
+ VERSION = '3.3.2'
5
5
  end
data/lib/bulkrax.rb CHANGED
@@ -9,6 +9,7 @@ module Bulkrax
9
9
  :default_work_type,
10
10
  :default_field_mapping,
11
11
  :fill_in_blank_source_identifiers,
12
+ :generated_metadata_mapping,
12
13
  :related_children_field_mapping,
13
14
  :related_parents_field_mapping,
14
15
  :reserved_properties,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bulkrax
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.0
4
+ version: 3.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Kaufman
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-27 00:00:00.000000000 Z
11
+ date: 2022-06-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -253,6 +253,7 @@ files:
253
253
  - app/controllers/bulkrax/exporters_controller.rb
254
254
  - app/controllers/bulkrax/importers_controller.rb
255
255
  - app/controllers/concerns/bulkrax/api.rb
256
+ - app/controllers/concerns/bulkrax/download_behavior.rb
256
257
  - app/factories/bulkrax/object_factory.rb
257
258
  - app/helpers/bulkrax/application_helper.rb
258
259
  - app/helpers/bulkrax/exporters_helper.rb
@@ -297,7 +298,6 @@ files:
297
298
  - app/models/bulkrax/rdf_file_set_entry.rb
298
299
  - app/models/bulkrax/status.rb
299
300
  - app/models/bulkrax/xml_entry.rb
300
- - app/models/concerns/bulkrax/download_behavior.rb
301
301
  - app/models/concerns/bulkrax/dynamic_record_lookup.rb
302
302
  - app/models/concerns/bulkrax/errored_entries.rb
303
303
  - app/models/concerns/bulkrax/export_behavior.rb
@@ -372,6 +372,9 @@ files:
372
372
  - db/migrate/20220119213325_add_work_counters_to_importer_runs.rb
373
373
  - db/migrate/20220301001839_create_bulkrax_pending_relationships.rb
374
374
  - db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb
375
+ - db/migrate/20220412233954_add_include_thumbnails_to_bulkrax_exporters.rb
376
+ - db/migrate/20220413180915_add_generated_metadata_to_bulkrax_exporters.rb
377
+ - db/migrate/20220609001128_rename_bulkrax_importer_run_to_importer_run.rb
375
378
  - lib/bulkrax.rb
376
379
  - lib/bulkrax/engine.rb
377
380
  - lib/bulkrax/version.rb