bulkrax 3.2.0 → 3.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 49b7cfb404f1878429bbac77aad6d1167e97377fe28b54e2042cee7f6ca04e67
4
- data.tar.gz: c3789c193dc1610c99117e5e2c450ed477a7390995916c4b6b419900f5cf539b
3
+ metadata.gz: b136a8742a7f9b953b4c3ef86d700540b931b6fbf22798d719e9aa693ea61fa9
4
+ data.tar.gz: e1cc32eda55a606285cf6e080340db608a8867f0bd4de8ab1361ca4a3d21adf9
5
5
  SHA512:
6
- metadata.gz: a669e9b566770ad21e3a6242d24779f5c73847b2b97d4f7334f06209ac7de3634e7667550ba35ce6930f9ceaffda8efccb29cf3277b40f29d808ddbd9623a1b8
7
- data.tar.gz: 794e6de65d4ebb5665ab73fdac3eba2d4157ff1b07f3da6cc63a6323b2127dc40021d69ea193da2a356e276f08dde8ccc51dc1d8c563e5ba3e8ecb592e4b4c88
6
+ metadata.gz: 87e35f340faa583a9ae6ac156f95fa4958fcaf9d3ed09a9963a8dda39649a66307a8c494375a989e2103c2d1edef62343f5b69878eae2249d63cc4b8f65240a8
7
+ data.tar.gz: 21ddafc671eda822144b6b73abfb89892ffac954f0692952985299f40f7d0e1aba472077aac72a1f681da315ba35de795f43d95df323c2178576716d02eafc75
@@ -101,12 +101,12 @@ module Bulkrax
101
101
  def exporter_params
102
102
  params[:exporter][:export_source] = params[:exporter]["export_source_#{params[:exporter][:export_from]}".to_sym]
103
103
  if params[:exporter][:date_filter] == "1"
104
- params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type,
105
- :parser_klass, :limit, :start_date, :finish_date, :work_visibility,
104
+ params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type, :generated_metadata,
105
+ :include_thumbnails, :parser_klass, :limit, :start_date, :finish_date, :work_visibility,
106
106
  :workflow_status, field_mapping: {})
107
107
  else
108
- params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type,
109
- :parser_klass, :limit, :work_visibility, :workflow_status,
108
+ params.fetch(:exporter).permit(:name, :user_id, :export_source, :export_from, :export_type, :generated_metadata,
109
+ :include_thumbnails, :parser_klass, :limit, :work_visibility, :workflow_status,
110
110
  field_mapping: {}).merge(start_date: nil, finish_date: nil)
111
111
  end
112
112
  end
@@ -42,7 +42,7 @@ module Bulkrax
42
42
  def send_file_contents
43
43
  self.status = 200
44
44
  prepare_file_headers
45
- stream_body file.read
45
+ send_file file
46
46
  end
47
47
 
48
48
  def prepare_file_headers
@@ -53,9 +53,5 @@ module Bulkrax
53
53
  response.headers['Last-Modified'] = File.mtime(file_path).utc.strftime("%a, %d %b %Y %T GMT")
54
54
  self.content_type = download_content_type
55
55
  end
56
-
57
- def stream_body(iostream)
58
- self.response_body = iostream
59
- end
60
56
  end
61
57
  end
@@ -61,6 +61,7 @@ module Bulkrax
61
61
  work_actor.update(environment(attrs))
62
62
  end
63
63
  end
64
+ object.apply_depositor_metadata(@user) && object.save! if object.depositor.nil?
64
65
  log_updated(object)
65
66
  end
66
67
 
@@ -107,6 +108,7 @@ module Bulkrax
107
108
  end
108
109
  end
109
110
  end
111
+ object.apply_depositor_metadata(@user) && object.save! if object.depositor.nil?
110
112
  log_created(object)
111
113
  end
112
114
 
@@ -141,7 +143,6 @@ module Bulkrax
141
143
  attrs = clean_attrs(attrs)
142
144
  attrs = collection_type(attrs)
143
145
  object.attributes = attrs
144
- object.apply_depositor_metadata(@user)
145
146
  object.save!
146
147
  end
147
148
 
@@ -33,7 +33,7 @@ module Bulkrax
33
33
  # is the child in the relationship, and vice versa if a child_identifier is passed.
34
34
  def perform(parent_identifier:, importer_run_id:) # rubocop:disable Metrics/AbcSize
35
35
  pending_relationships = Bulkrax::PendingRelationship.find_each.select do |rel|
36
- rel.bulkrax_importer_run_id == importer_run_id && rel.parent_id == parent_identifier
36
+ rel.importer_run_id == importer_run_id && rel.parent_id == parent_identifier
37
37
  end.sort_by(&:order)
38
38
 
39
39
  @importer_run_id = importer_run_id
@@ -12,6 +12,8 @@ module Bulkrax
12
12
  import(importer, only_updates_since_last_import)
13
13
  update_current_run_counters(importer)
14
14
  schedule(importer) if importer.schedulable?
15
+ rescue CSV::MalformedCSVError => e
16
+ importer.status_info(e)
15
17
  end
16
18
 
17
19
  def import(importer, only_updates_since_last_import)
@@ -99,6 +99,7 @@ module Bulkrax
99
99
  build_files_metadata unless hyrax_record.is_a?(Collection)
100
100
  build_relationship_metadata
101
101
  build_mapping_metadata
102
+ self.save!
102
103
 
103
104
  self.parsed_metadata
104
105
  end
@@ -113,9 +114,10 @@ module Bulkrax
113
114
  def build_files_metadata
114
115
  file_mapping = key_for_export('file')
115
116
  file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
116
- filenames = file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
117
+ filenames = map_file_sets(file_sets)
117
118
 
118
119
  handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
120
+ build_thumbnail_files if hyrax_record.work?
119
121
  end
120
122
 
121
123
  def build_relationship_metadata
@@ -140,6 +142,7 @@ module Bulkrax
140
142
  end
141
143
 
142
144
  def build_mapping_metadata
145
+ mapping = fetch_field_mapping
143
146
  mapping.each do |key, value|
144
147
  # these keys are handled by other methods
145
148
  next if ['model', 'file', related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
@@ -217,6 +220,16 @@ module Bulkrax
217
220
  end
218
221
  end
219
222
 
223
+ def build_thumbnail_files
224
+ return unless importerexporter.include_thumbnails
225
+
226
+ thumbnail_mapping = 'thumbnail_file'
227
+ file_sets = Array.wrap(hyrax_record.thumbnail)
228
+
229
+ filenames = map_file_sets(file_sets)
230
+ handle_join_on_export(thumbnail_mapping, filenames, false)
231
+ end
232
+
220
233
  def handle_join_on_export(key, values, join)
221
234
  if join
222
235
  parsed_metadata[key] = values.join(' | ') # TODO: make split char dynamic
@@ -252,7 +265,6 @@ module Bulkrax
252
265
  raise ::StandardError, 'Only expected to find one matching entry' if matching_collection_entries.count > 1
253
266
  identifiers << matching_collection_entries.first&.identifier
254
267
  end
255
-
256
268
  @collection_identifiers = identifiers.compact.presence || []
257
269
  end
258
270
 
@@ -283,5 +295,11 @@ module Bulkrax
283
295
  return f if File.exist?(f)
284
296
  raise "File #{f} does not exist"
285
297
  end
298
+
299
+ private
300
+
301
+ def map_file_sets(file_sets)
302
+ file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
303
+ end
286
304
  end
287
305
  end
@@ -4,6 +4,8 @@ module Bulkrax
4
4
  # Custom error class for collections_created?
5
5
  class CollectionsCreatedError < RuntimeError; end
6
6
  class OAIError < RuntimeError; end
7
+ # TODO: remove when ApplicationParser#bagit_zip_file_size_check is removed
8
+ class BagitZipError < RuntimeError; end
7
9
  class Entry < ApplicationRecord
8
10
  include Bulkrax::HasMatchers
9
11
  include Bulkrax::ImportBehavior
@@ -34,6 +36,7 @@ module Bulkrax
34
36
  delegate :client,
35
37
  :collection_name,
36
38
  :user,
39
+ :generated_metadata_mapping,
37
40
  :related_parents_raw_mapping,
38
41
  :related_parents_parsed_mapping,
39
42
  :related_children_raw_mapping,
@@ -70,6 +73,15 @@ module Bulkrax
70
73
  parser&.work_identifier&.to_s || 'source'
71
74
  end
72
75
 
76
+ # Returns field_mapping hash based on whether or not generated metadata should be included
77
+ def fetch_field_mapping
78
+ return self.mapping if importerexporter.generated_metadata
79
+
80
+ self.mapping.each do |key, value|
81
+ self.mapping.delete(key) if value[generated_metadata_mapping]
82
+ end
83
+ end
84
+
73
85
  def self.parent_field(parser)
74
86
  parser.related_parents_parsed_mapping
75
87
  end
@@ -51,6 +51,14 @@ module Bulkrax
51
51
  self.start_date.present? || self.finish_date.present?
52
52
  end
53
53
 
54
+ def include_thumbnails?
55
+ self.include_thumbnails
56
+ end
57
+
58
+ def generated_metadata?
59
+ self.generated_metadata
60
+ end
61
+
54
62
  def work_visibility_list
55
63
  [
56
64
  ['Any', ''],
@@ -96,16 +96,19 @@ module Bulkrax
96
96
  end
97
97
 
98
98
  def current_run
99
- @current_run ||= if file? && zip?
100
- self.importer_runs.create!
101
- else
102
- entry_counts = {
103
- total_work_entries: self.limit || parser.works_total,
104
- total_collection_entries: parser.collections_total,
105
- total_file_set_entries: parser.file_sets_total
106
- }
107
- self.importer_runs.create!(entry_counts)
108
- end
99
+ return @current_run if @current_run.present?
100
+
101
+ @current_run = self.importer_runs.create!
102
+ return @current_run if file? && zip?
103
+
104
+ entry_counts = {
105
+ total_work_entries: self.limit || parser.works_total,
106
+ total_collection_entries: parser.collections_total,
107
+ total_file_set_entries: parser.file_sets_total
108
+ }
109
+ @current_run.update!(entry_counts)
110
+
111
+ @current_run
109
112
  end
110
113
 
111
114
  def last_run
@@ -4,9 +4,10 @@ module Bulkrax
4
4
  class ImporterRun < ApplicationRecord
5
5
  belongs_to :importer
6
6
  has_many :statuses, as: :runnable, dependent: :destroy
7
+ has_many :pending_relationships, dependent: :destroy
7
8
 
8
9
  def parents
9
- PendingRelationship.where(bulkrax_importer_run_id: id).pluck(:parent_id).uniq
10
+ pending_relationships.pluck(:parent_id).uniq
10
11
  end
11
12
  end
12
13
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Bulkrax
4
4
  class PendingRelationship < ApplicationRecord
5
- belongs_to :bulkrax_importer_run, class_name: "::Bulkrax::ImporterRun"
5
+ belongs_to :importer_run
6
6
  end
7
7
  end
@@ -7,6 +7,8 @@ module Bulkrax
7
7
 
8
8
  def build_for_exporter
9
9
  build_export_metadata
10
+ # TODO(alishaevn): determine if the line below is still necessary
11
+ # the csv and bagit parsers also have write_files methods
10
12
  write_files if export_type == 'full' && !importerexporter.parser_klass.include?('Bagit')
11
13
  rescue RSolr::Error::Http, CollectionsCreatedError => e
12
14
  raise e
@@ -28,6 +30,7 @@ module Bulkrax
28
30
  return if hyrax_record.is_a?(Collection)
29
31
 
30
32
  file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
33
+ file_sets << hyrax_record.thumbnail if hyrax_record.thumbnail.present? && hyrax_record.work? && exporter.include_thumbnails
31
34
  file_sets.each do |fs|
32
35
  path = File.join(exporter_export_path, 'files')
33
36
  FileUtils.mkdir_p(path)
@@ -48,7 +51,7 @@ module Bulkrax
48
51
  fn = file_set.original_file.file_name.first
49
52
  mime = Mime::Type.lookup(file_set.original_file.mime_type)
50
53
  ext_mime = MIME::Types.of(file_set.original_file.file_name).first
51
- if fn.include?(file_set.id) || importerexporter.metadata_only?
54
+ if fn.include?(file_set.id) || importerexporter.metadata_only? || importerexporter.parser_klass.include?('Bagit')
52
55
  filename = "#{fn}.#{mime.to_sym}"
53
56
  filename = fn if mime.to_s == ext_mime.to_s
54
57
  else
@@ -28,5 +28,13 @@ module Bulkrax
28
28
 
29
29
  raise StandardError, 'File set must be related to at least one work'
30
30
  end
31
+
32
+ def parent_jobs
33
+ false # FileSet relationships are handled in ObjectFactory#create_file_set
34
+ end
35
+
36
+ def child_jobs
37
+ raise ::StandardError, 'A FileSet cannot be a parent of a Collection, Work, or other FileSet'
38
+ end
31
39
  end
32
40
  end
@@ -50,7 +50,7 @@ module Bulkrax
50
50
  self.parsed_metadata[related_parents_parsed_mapping].each do |parent_identifier|
51
51
  next if parent_identifier.blank?
52
52
 
53
- PendingRelationship.create!(child_id: self.identifier, parent_id: parent_identifier, bulkrax_importer_run_id: importerexporter.last_run.id, order: self.id)
53
+ PendingRelationship.create!(child_id: self.identifier, parent_id: parent_identifier, importer_run_id: importerexporter.last_run.id, order: self.id)
54
54
  end
55
55
  end
56
56
 
@@ -58,7 +58,7 @@ module Bulkrax
58
58
  self.parsed_metadata[related_children_parsed_mapping].each do |child_identifier|
59
59
  next if child_identifier.blank?
60
60
 
61
- PendingRelationship.create!(parent_id: self.identifier, child_id: child_identifier, bulkrax_importer_run_id: importerexporter.last_run.id, order: self.id)
61
+ PendingRelationship.create!(parent_id: self.identifier, child_id: child_identifier, importer_run_id: importerexporter.last_run.id, order: self.id)
62
62
  end
63
63
  end
64
64
 
@@ -18,7 +18,7 @@ module Bulkrax
18
18
  end
19
19
 
20
20
  def failed?
21
- current_status&.status_message&.match(/fail/i)
21
+ current_status&.status_message&.eql?('Failed')
22
22
  end
23
23
 
24
24
  def succeeded?
@@ -51,6 +51,10 @@ module Bulkrax
51
51
  @work_identifier ||= get_field_mapping_hash_for('source_identifier')&.keys&.first&.to_sym || :source
52
52
  end
53
53
 
54
+ def generated_metadata_mapping
55
+ @generated_metadata_mapping ||= 'generated'
56
+ end
57
+
54
58
  def related_parents_raw_mapping
55
59
  @related_parents_raw_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.values&.first&.[]('from')&.first
56
60
  end
@@ -242,15 +246,49 @@ module Bulkrax
242
246
  def write
243
247
  write_files
244
248
  zip
249
+ # uncomment next line to debug for faulty zipping during bagit export
250
+ bagit_zip_file_size_check if importerexporter.parser_klass.include?('Bagit')
245
251
  end
246
252
 
247
253
  def unzip(file_to_unzip)
248
- WillowSword::ZipPackage.new(file_to_unzip, importer_unzip_path).unzip_file
254
+ Zip::File.open(file_to_unzip) do |zip_file|
255
+ zip_file.each do |entry|
256
+ entry_path = File.join(importer_unzip_path, entry.name)
257
+ FileUtils.mkdir_p(File.dirname(entry_path))
258
+ zip_file.extract(entry, entry_path) unless File.exist?(entry_path)
259
+ end
260
+ end
249
261
  end
250
262
 
251
263
  def zip
264
+ require 'zip'
252
265
  FileUtils.rm_rf(exporter_export_zip_path)
253
- WillowSword::ZipPackage.new(exporter_export_path, exporter_export_zip_path).create_zip
266
+ Zip::File.open(exporter_export_zip_path, create: true) do |zip_file|
267
+ Dir["#{exporter_export_path}/**/**"].each do |file|
268
+ zip_file.add(file.sub("#{exporter_export_path}/", ''), file)
269
+ end
270
+ end
271
+ end
272
+
273
+ # TODO: remove Entry::BagitZipError as well as this method when we're sure it's not needed
274
+ def bagit_zip_file_size_check
275
+ Zip::File.open(exporter_export_zip_path) do |zip_file|
276
+ zip_file.select { |entry| entry.name.include?('data/') && entry.file? }.each do |zipped_file|
277
+ Dir["#{exporter_export_path}/**/data/*"].select { |file| file.include?(zipped_file.name) }.each do |file|
278
+ begin
279
+ raise BagitZipError, "Invalid Bag, file size mismatch for #{file.sub("#{exporter_export_path}/", '')}" if File.size(file) != zipped_file.size
280
+ rescue BagitZipError => e
281
+ matched_entry_ids = importerexporter.entry_ids.select do |id|
282
+ Bulkrax::Entry.find(id).identifier.include?(zipped_file.name.split('/').first)
283
+ end
284
+ matched_entry_ids.each do |entry_id|
285
+ Bulkrax::Entry.find(entry_id).status_info(e)
286
+ status_info('Complete (with failures)')
287
+ end
288
+ end
289
+ end
290
+ end
291
+ end
254
292
  end
255
293
 
256
294
  # Is this a file?
@@ -272,7 +310,6 @@ module Bulkrax
272
310
 
273
311
  def real_import_file_path
274
312
  return importer_unzip_path if file? && zip?
275
-
276
313
  parser_fields['import_file_path']
277
314
  end
278
315
  end
@@ -1,9 +1,11 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- class BagitParser < ApplicationParser
4
+ class BagitParser < ApplicationParser # rubocop:disable Metrics/ClassLength
5
+ include ExportBehavior
6
+
5
7
  def self.export_supported?
6
- false # @todo will be supported
8
+ true
7
9
  end
8
10
 
9
11
  def valid_import?
@@ -14,19 +16,11 @@ module Bulkrax
14
16
  end
15
17
 
16
18
  def entry_class
17
- parser_fields['metadata_format'].constantize
18
- end
19
-
20
- def collection_entry_class
21
- parser_fields['metadata_format'].gsub('Entry', 'CollectionEntry').constantize
22
- rescue
23
- Entry
24
- end
25
-
26
- def file_set_entry_class
27
- csv_format = Bulkrax::Importer.last.parser_fields['metadata_format'] == "Bulkrax::CsvEntry"
28
- csv_format ? CsvFileSetEntry : RdfFileSetEntry
19
+ rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
20
+ rdf_format ? RdfEntry : CsvEntry
29
21
  end
22
+ alias collection_entry_class entry_class
23
+ alias file_set_entry_class entry_class
30
24
 
31
25
  # Take a random sample of 10 metadata_paths and work out the import fields from that
32
26
  def import_fields
@@ -101,7 +95,185 @@ module Bulkrax
101
95
  end
102
96
 
103
97
  def total
104
- metadata_paths.count
98
+ importerexporter.entries.count
99
+ end
100
+
101
+ def extra_filters
102
+ output = ""
103
+ if importerexporter.start_date.present?
104
+ start_dt = importerexporter.start_date.to_datetime.strftime('%FT%TZ')
105
+ finish_dt = importerexporter.finish_date.present? ? importerexporter.finish_date.to_datetime.end_of_day.strftime('%FT%TZ') : "NOW"
106
+ output += " AND system_modified_dtsi:[#{start_dt} TO #{finish_dt}]"
107
+ end
108
+ output += importerexporter.work_visibility.present? ? " AND visibility_ssi:#{importerexporter.work_visibility}" : ""
109
+ output += importerexporter.workflow_status.present? ? " AND workflow_state_name_ssim:#{importerexporter.workflow_status}" : ""
110
+ output
111
+ end
112
+
113
+ def current_record_ids
114
+ @work_ids = []
115
+ @collection_ids = []
116
+ @file_set_ids = []
117
+
118
+ case importerexporter.export_from
119
+ when 'all'
120
+ @work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
121
+ @collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
122
+ @file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
123
+ when 'collection'
124
+ @work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
125
+ when 'worktype'
126
+ @work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
127
+ when 'importer'
128
+ set_ids_for_exporting_from_importer
129
+ end
130
+
131
+ @work_ids + @collection_ids + @file_set_ids
132
+ end
133
+
134
+ # Set the following instance variables: @work_ids, @collection_ids, @file_set_ids
135
+ # @see #current_record_ids
136
+ def set_ids_for_exporting_from_importer
137
+ entry_ids = Importer.find(importerexporter.export_source).entries.pluck(:id)
138
+ complete_statuses = Status.latest_by_statusable
139
+ .includes(:statusable)
140
+ .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
141
+
142
+ complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
143
+ extra_filters = extra_filters.presence || '*:*'
144
+
145
+ { :@work_ids => ::Hyrax.config.curation_concerns, :@collection_ids => [::Collection], :@file_set_ids => [::FileSet] }.each do |instance_var, models_to_search|
146
+ instance_variable_set(instance_var, ActiveFedora::SolrService.post(
147
+ extra_filters.to_s,
148
+ fq: [
149
+ %(#{::Solrizer.solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
150
+ "has_model_ssim:(#{models_to_search.join(' OR ')})"
151
+ ],
152
+ fl: 'id',
153
+ rows: 2_000_000_000
154
+ )['response']['docs'].map { |obj| obj['id'] })
155
+ end
156
+ end
157
+
158
+ def create_new_entries
159
+ current_record_ids.each_with_index do |id, index|
160
+ break if limit_reached?(limit, index)
161
+
162
+ this_entry_class = if @collection_ids.include?(id)
163
+ collection_entry_class
164
+ elsif @file_set_ids.include?(id)
165
+ file_set_entry_class
166
+ else
167
+ entry_class
168
+ end
169
+ new_entry = find_or_create_entry(this_entry_class, id, 'Bulkrax::Exporter')
170
+
171
+ begin
172
+ entry = ExportWorkJob.perform_now(new_entry.id, current_run.id)
173
+ rescue => e
174
+ Rails.logger.info("#{e.message} was detected during export")
175
+ end
176
+
177
+ self.headers |= entry.parsed_metadata.keys if entry
178
+ end
179
+ end
180
+ alias create_from_collection create_new_entries
181
+ alias create_from_importer create_new_entries
182
+ alias create_from_worktype create_new_entries
183
+ alias create_from_all create_new_entries
184
+
185
+ # export methods
186
+
187
+ # rubocop:disable Metrics/AbcSize
188
+ def write_files
189
+ require 'open-uri'
190
+ require 'socket'
191
+ importerexporter.entries.where(identifier: current_record_ids)[0..limit || total].each do |e|
192
+ bag = BagIt::Bag.new setup_bagit_folder(e.identifier)
193
+ w = ActiveFedora::Base.find(e.identifier)
194
+ next unless Hyrax.config.curation_concerns.include?(w.class)
195
+
196
+ w.file_sets.each do |fs|
197
+ file_name = filename(fs)
198
+ next if file_name.blank?
199
+ io = open(fs.original_file.uri)
200
+ file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
201
+ file.write(io.read)
202
+ file.close
203
+ bag.add_file(file_name, file.path)
204
+ end
205
+ CSV.open(setup_csv_metadata_export_file(e.identifier), "w", headers: export_headers, write_headers: true) do |csv|
206
+ csv << e.parsed_metadata
207
+ end
208
+ write_triples(e)
209
+ bag.manifest!(algo: 'sha256')
210
+ end
211
+ end
212
+ # rubocop:enable Metrics/AbcSize
213
+
214
+ def setup_csv_metadata_export_file(id)
215
+ File.join(importerexporter.exporter_export_path, id, 'metadata.csv')
216
+ end
217
+
218
+ def key_allowed(key)
219
+ !Bulkrax.reserved_properties.include?(key) &&
220
+ new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
221
+ key != source_identifier.to_s
222
+ end
223
+
224
+ # All possible column names
225
+ def export_headers
226
+ headers = sort_headers(self.headers)
227
+
228
+ # we don't want access_control_id exported and we want file at the end
229
+ headers.delete('access_control_id') if headers.include?('access_control_id')
230
+
231
+ # add the headers below at the beginning or end to maintain the preexisting export behavior
232
+ headers.prepend('model')
233
+ headers.prepend(source_identifier.to_s)
234
+ headers.prepend('id')
235
+
236
+ headers.uniq
237
+ end
238
+
239
+ def object_names
240
+ return @object_names if @object_names
241
+
242
+ @object_names = mapping.values.map { |value| value['object'] }
243
+ @object_names.uniq!.delete(nil)
244
+
245
+ @object_names
246
+ end
247
+
248
+ def sort_headers(headers)
249
+ # converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
250
+ # while keeping objects grouped together
251
+ headers.sort_by do |item|
252
+ number = item.match(/\d+/)&.[](0) || 0.to_s
253
+ sort_number = number.rjust(4, "0")
254
+ object_prefix = object_names.detect { |o| item.match(/^#{o}/) } || item
255
+ remainder = item.gsub(/^#{object_prefix}_/, '').gsub(/_#{number}/, '')
256
+ "#{object_prefix}_#{sort_number}_#{remainder}"
257
+ end
258
+ end
259
+
260
+ def setup_triple_metadata_export_file(id)
261
+ File.join(importerexporter.exporter_export_path, id, 'metadata.nt')
262
+ end
263
+
264
+ def setup_bagit_folder(id)
265
+ File.join(importerexporter.exporter_export_path, id)
266
+ end
267
+
268
+ def write_triples(e)
269
+ sd = SolrDocument.find(e.identifier)
270
+ return if sd.nil?
271
+
272
+ req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname })
273
+ rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples)
274
+ File.open(setup_triple_metadata_export_file(e.identifier), "w") do |triples|
275
+ triples.write(rdf)
276
+ end
105
277
  end
106
278
 
107
279
  def required_elements?(keys)
@@ -126,11 +298,7 @@ module Bulkrax
126
298
  def bags
127
299
  return @bags if @bags.present?
128
300
  new_bag = bag(import_file_path)
129
- @bags = if new_bag
130
- [new_bag]
131
- else
132
- Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
133
- end
301
+ @bags = new_bag ? [new_bag] : Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
134
302
  @bags.delete(nil)
135
303
  raise StandardError, 'No valid bags found' if @bags.blank?
136
304
  return @bags
@@ -60,6 +60,16 @@
60
60
  hint: 'leave blank or 0 for all records',
61
61
  label: t('bulkrax.exporter.labels.limit') %>
62
62
 
63
+ <%= form.input :generated_metadata?,
64
+ as: :boolean,
65
+ label: t('bulkrax.exporter.labels.generated_metadata'),
66
+ hint: t('bulkrax.exporter.hints.generated_metadata') %>
67
+
68
+ <%= form.input :include_thumbnails?,
69
+ as: :boolean,
70
+ label: t('bulkrax.exporter.labels.include_thumbnails'),
71
+ hint: t('bulkrax.exporter.hints.include_thumbnails') %>
72
+
63
73
  <%= form.input :date_filter,
64
74
  as: :boolean,
65
75
  label: t('bulkrax.exporter.labels.filter_by_date') %>
@@ -57,6 +57,18 @@
57
57
  <strong><%= t('bulkrax.exporter.labels.limit') %>:</strong>
58
58
  <%= @exporter.limit %>
59
59
  </p>
60
+
61
+ <p class='bulkrax-p-align'>
62
+ <strong><%= t('bulkrax.exporter.labels.generated_metadata') %>:</strong>
63
+ <%= @exporter.generated_metadata %>
64
+ </p>
65
+
66
+ <p class='bulkrax-p-align'>
67
+ <strong><%= t('bulkrax.exporter.labels.include_thumbnails') %>:</strong>
68
+ <%= @exporter.include_thumbnails %>
69
+ </p>
70
+
71
+
60
72
  <%= render partial: 'bulkrax/shared/bulkrax_errors', locals: {item: @exporter} %>
61
73
 
62
74
  <%= render partial: 'bulkrax/shared/bulkrax_field_mapping', locals: {item: @exporter} %>
@@ -16,6 +16,8 @@ en:
16
16
  filter_by_date: Filter By Date
17
17
  finish_date: End Date
18
18
  full: Metadata and Files
19
+ include_thumbnails: Include Thumbnails?
20
+ generated_metadata: Include Generated Metadata?
19
21
  importer: Importer
20
22
  limit: Limit
21
23
  metadata: Metadata Only
@@ -35,3 +37,6 @@ en:
35
37
  ingested: "Ingested"
36
38
  unapproved: "Unapproved"
37
39
  needs_repair: "Needs Repair"
40
+ hints:
41
+ include_thumbnails: "These exported fields currently cannot be imported."
42
+ generated_metadata: "These exported fields currently cannot be imported."
@@ -0,0 +1,5 @@
1
+ class AddIncludeThumbnailsToBulkraxExporters < ActiveRecord::Migration[5.2]
2
+ def change
3
+ add_column :bulkrax_exporters, :include_thumbnails, :boolean, default: false unless column_exists?(:bulkrax_exporters, :include_thumbnails)
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ class AddGeneratedMetadataToBulkraxExporters < ActiveRecord::Migration[5.2]
2
+ def change
3
+ add_column :bulkrax_exporters, :generated_metadata, :boolean, default: false unless column_exists?(:bulkrax_exporters, :generated_metadata)
4
+ end
5
+ end
@@ -0,0 +1,7 @@
1
+ class RenameBulkraxImporterRunToImporterRun < ActiveRecord::Migration[5.2]
2
+ def change
3
+ if column_exists?(:bulkrax_pending_relationships, :bulkrax_importer_run_id)
4
+ rename_column :bulkrax_pending_relationships, :bulkrax_importer_run_id, :importer_run_id
5
+ end
6
+ end
7
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- VERSION = '3.2.0'
4
+ VERSION = '3.3.2'
5
5
  end
data/lib/bulkrax.rb CHANGED
@@ -9,6 +9,7 @@ module Bulkrax
9
9
  :default_work_type,
10
10
  :default_field_mapping,
11
11
  :fill_in_blank_source_identifiers,
12
+ :generated_metadata_mapping,
12
13
  :related_children_field_mapping,
13
14
  :related_parents_field_mapping,
14
15
  :reserved_properties,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bulkrax
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.0
4
+ version: 3.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Kaufman
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-27 00:00:00.000000000 Z
11
+ date: 2022-06-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -253,6 +253,7 @@ files:
253
253
  - app/controllers/bulkrax/exporters_controller.rb
254
254
  - app/controllers/bulkrax/importers_controller.rb
255
255
  - app/controllers/concerns/bulkrax/api.rb
256
+ - app/controllers/concerns/bulkrax/download_behavior.rb
256
257
  - app/factories/bulkrax/object_factory.rb
257
258
  - app/helpers/bulkrax/application_helper.rb
258
259
  - app/helpers/bulkrax/exporters_helper.rb
@@ -297,7 +298,6 @@ files:
297
298
  - app/models/bulkrax/rdf_file_set_entry.rb
298
299
  - app/models/bulkrax/status.rb
299
300
  - app/models/bulkrax/xml_entry.rb
300
- - app/models/concerns/bulkrax/download_behavior.rb
301
301
  - app/models/concerns/bulkrax/dynamic_record_lookup.rb
302
302
  - app/models/concerns/bulkrax/errored_entries.rb
303
303
  - app/models/concerns/bulkrax/export_behavior.rb
@@ -372,6 +372,9 @@ files:
372
372
  - db/migrate/20220119213325_add_work_counters_to_importer_runs.rb
373
373
  - db/migrate/20220301001839_create_bulkrax_pending_relationships.rb
374
374
  - db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb
375
+ - db/migrate/20220412233954_add_include_thumbnails_to_bulkrax_exporters.rb
376
+ - db/migrate/20220413180915_add_generated_metadata_to_bulkrax_exporters.rb
377
+ - db/migrate/20220609001128_rename_bulkrax_importer_run_to_importer_run.rb
375
378
  - lib/bulkrax.rb
376
379
  - lib/bulkrax/engine.rb
377
380
  - lib/bulkrax/version.rb