bulkrax 3.3.1 → 3.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2525e4aed8b31a897668d556c311b29189ec1b7a951b4303604acd3ccb11fcc8
4
- data.tar.gz: d642de2af38c8c82108b113641ab1e191ae05b96b2ce3528dde7d7c908639291
3
+ metadata.gz: 0a80674a9f582c3b8e83f442318908edb6ca9f0b615c970d09b17d941cc8027d
4
+ data.tar.gz: a2a53116ef49e03dde1aa1df14d8259a2b4abf06a82cff63a9d4ba622ba6600a
5
5
  SHA512:
6
- metadata.gz: 6547c0283f75626cda95a8cc09cceff84ac576b7ca4034b72d729ab16d50c71657dac0e78739b5ef6f2d83aa85e1ddc8c45646f6c44408a0a989b4b33b93c487
7
- data.tar.gz: 9ae11d35d0b3caf9d45237590e42541c0004268e6c86e8d44e6754900b253894813c7e817b471ed9379b7eefad1ec3a27d2cfebbe94865727ecb69b6b8652480
6
+ metadata.gz: af3d75fb03105e37f7374f3a7f863c545d9cc9c95ab2f18bbbf7b4692024e09811f103a372327b4724c836568bad176ed0ad0b7be929ed556259aa9b0793fce6
7
+ data.tar.gz: 1117a185fbab2bae0746187f464bebea855759a5ecccf0d34f098ac55ad7a2952e663268372262ba8f97820c8c1f02bd29c74a388cfd8ea9cfed84a46dad94cf
@@ -42,10 +42,12 @@ module Bulkrax
42
42
  pending_relationships.each do |rel|
43
43
  raise ::StandardError, %("#{rel}" needs either a child or a parent to create a relationship) if rel.child_id.nil? || rel.parent_id.nil?
44
44
  @child_entry, child_record = find_record(rel.child_id, importer_run_id)
45
- child_record.is_a?(::Collection) ? @child_records[:collections] << child_record : @child_records[:works] << child_record
45
+ if child_record
46
+ child_record.is_a?(::Collection) ? @child_records[:collections] << child_record : @child_records[:works] << child_record
47
+ end
46
48
  end
47
49
 
48
- if (child_records[:collections].blank? && child_records[:works].blank?) || parent_record.blank?
50
+ if (child_records[:collections].blank? && child_records[:works].blank?) || parent_record.nil?
49
51
  reschedule({ parent_identifier: parent_identifier, importer_run_id: importer_run_id })
50
52
  return false # stop current job from continuing to run after rescheduling
51
53
  end
@@ -6,26 +6,26 @@ module Bulkrax
6
6
 
7
7
  def perform(*args)
8
8
  entry = Entry.find(args[0])
9
+ exporter_run = ExporterRun.find(args[1])
9
10
  begin
10
11
  entry.build
11
12
  entry.save
12
13
  rescue StandardError
13
14
  # rubocop:disable Rails/SkipsModelValidations
14
- ExporterRun.find(args[1]).increment!(:failed_records)
15
- ExporterRun.find(args[1]).decrement!(:enqueued_records)
15
+ exporter_run.increment!(:failed_records)
16
+ exporter_run.decrement!(:enqueued_records) unless exporter_run.enqueued_records <= 0
16
17
  raise
17
18
  else
18
19
  if entry.failed?
19
- ExporterRun.find(args[1]).increment!(:failed_records)
20
- ExporterRun.find(args[1]).decrement!(:enqueued_records)
20
+ exporter_run.increment!(:failed_records)
21
+ exporter_run.decrement!(:enqueued_records) unless exporter_run.enqueued_records <= 0
21
22
  raise entry.reload.current_status.error_class.constantize
22
23
  else
23
- ExporterRun.find(args[1]).increment!(:processed_records)
24
- ExporterRun.find(args[1]).decrement!(:enqueued_records)
24
+ exporter_run.increment!(:processed_records)
25
+ exporter_run.decrement!(:enqueued_records) unless exporter_run.enqueued_records <= 0
25
26
  end
26
27
  # rubocop:enable Rails/SkipsModelValidations
27
28
  end
28
- exporter_run = ExporterRun.find(args[1])
29
29
  return entry if exporter_run.enqueued_records.positive?
30
30
 
31
31
  if exporter_run.failed_records.positive?
@@ -12,6 +12,8 @@ module Bulkrax
12
12
  import(importer, only_updates_since_last_import)
13
13
  update_current_run_counters(importer)
14
14
  schedule(importer) if importer.schedulable?
15
+ rescue CSV::MalformedCSVError => e
16
+ importer.status_info(e)
15
17
  end
16
18
 
17
19
  def import(importer, only_updates_since_last_import)
@@ -114,9 +114,10 @@ module Bulkrax
114
114
  def build_files_metadata
115
115
  file_mapping = key_for_export('file')
116
116
  file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
117
- filenames = file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
117
+ filenames = map_file_sets(file_sets)
118
118
 
119
119
  handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
120
+ build_thumbnail_files if hyrax_record.work?
120
121
  end
121
122
 
122
123
  def build_relationship_metadata
@@ -219,15 +220,6 @@ module Bulkrax
219
220
  end
220
221
  end
221
222
 
222
- def build_files
223
- file_mapping = mapping['file']&.[]('from')&.first || 'file'
224
- file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
225
-
226
- filenames = map_file_sets(file_sets)
227
- handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
228
- build_thumbnail_files if hyrax_record.work?
229
- end
230
-
231
223
  def build_thumbnail_files
232
224
  return unless importerexporter.include_thumbnails
233
225
 
@@ -14,13 +14,15 @@ module Bulkrax
14
14
  validates :name, presence: true
15
15
  validates :parser_klass, presence: true
16
16
 
17
- delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
17
+ delegate :write, :create_from_collection, :create_from_collections_metadata, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
18
18
 
19
19
  def export
20
20
  current_run && setup_export_path
21
21
  case self.export_from
22
22
  when 'collection'
23
23
  create_from_collection
24
+ when 'collections metadata'
25
+ create_from_collections_metadata
24
26
  when 'importer'
25
27
  create_from_importer
26
28
  when 'worktype'
@@ -87,6 +89,7 @@ module Bulkrax
87
89
  [
88
90
  [I18n.t('bulkrax.exporter.labels.importer'), 'importer'],
89
91
  [I18n.t('bulkrax.exporter.labels.collection'), 'collection'],
92
+ [I18n.t('bulkrax.exporter.labels.collections_metadata'), 'collections metadata'],
90
93
  [I18n.t('bulkrax.exporter.labels.worktype'), 'worktype'],
91
94
  [I18n.t('bulkrax.exporter.labels.all'), 'all']
92
95
  ]
@@ -96,16 +96,19 @@ module Bulkrax
96
96
  end
97
97
 
98
98
  def current_run
99
- @current_run ||= if file? && zip?
100
- self.importer_runs.create!
101
- else
102
- entry_counts = {
103
- total_work_entries: self.limit || parser.works_total,
104
- total_collection_entries: parser.collections_total,
105
- total_file_set_entries: parser.file_sets_total
106
- }
107
- self.importer_runs.create!(entry_counts)
108
- end
99
+ return @current_run if @current_run.present?
100
+
101
+ @current_run = self.importer_runs.create!
102
+ return @current_run if file? && zip?
103
+
104
+ entry_counts = {
105
+ total_work_entries: self.limit || parser.works_total,
106
+ total_collection_entries: parser.collections_total,
107
+ total_file_set_entries: parser.file_sets_total
108
+ }
109
+ @current_run.update!(entry_counts)
110
+
111
+ @current_run
109
112
  end
110
113
 
111
114
  def last_run
@@ -12,15 +12,14 @@ module Bulkrax
12
12
  # check for our entry in our current importer first
13
13
  importer_id = ImporterRun.find(importer_run_id).importer_id
14
14
  default_scope = { identifier: identifier, importerexporter_type: 'Bulkrax::Importer' }
15
- record = Entry.find_by(default_scope.merge({ importerexporter_id: importer_id })) || Entry.find_by(default_scope)
16
15
 
17
- # TODO(alishaevn): discuss whether we are only looking for Collection models here
18
- # use ActiveFedora::Base.find(identifier) instead?
19
- record ||= ::Collection.where(id: identifier).first # rubocop:disable Rails/FindBy
20
- if record.blank?
21
- available_work_types.each do |work_type|
22
- record ||= work_type.where(id: identifier).first # rubocop:disable Rails/FindBy
23
- end
16
+ begin
17
+ # the identifier parameter can be a :source_identifier or the id of an object
18
+ record = Entry.find_by(default_scope.merge({ importerexporter_id: importer_id })) || Entry.find_by(default_scope)
19
+ record ||= ActiveFedora::Base.find(identifier)
20
+ # NameError for if ActiveFedora isn't installed
21
+ rescue NameError, ActiveFedora::ObjectNotFoundError
22
+ record = nil
24
23
  end
25
24
 
26
25
  # return the found entry here instead of searching for it again in the CreateRelationshipsJob
@@ -51,7 +51,7 @@ module Bulkrax
51
51
  fn = file_set.original_file.file_name.first
52
52
  mime = Mime::Type.lookup(file_set.original_file.mime_type)
53
53
  ext_mime = MIME::Types.of(file_set.original_file.file_name).first
54
- if fn.include?(file_set.id) || importerexporter.metadata_only? || importerexporter.parser_klass.include?('Bagit')
54
+ if fn.include?(file_set.id) || importerexporter.metadata_only?
55
55
  filename = "#{fn}.#{mime.to_sym}"
56
56
  filename = fn if mime.to_s == ext_mime.to_s
57
57
  else
@@ -8,10 +8,14 @@ module Bulkrax
8
8
 
9
9
  def add_path_to_file
10
10
  parsed_metadata['file'].each_with_index do |filename, i|
11
- path_to_file = ::File.join(parser.path_to_files, filename)
11
+ next if filename.blank?
12
+
13
+ path_to_file = parser.path_to_files(filename: filename)
12
14
 
13
15
  parsed_metadata['file'][i] = path_to_file
14
16
  end
17
+ parsed_metadata['file'].delete('')
18
+
15
19
  raise ::StandardError, "one or more file paths are invalid: #{parsed_metadata['file'].join(', ')}" unless parsed_metadata['file'].map { |file_path| ::File.file?(file_path) }.all?
16
20
 
17
21
  parsed_metadata['file']
@@ -12,8 +12,8 @@ module Bulkrax
12
12
  raise CollectionsCreatedError unless collections_created?
13
13
  @item = factory.run!
14
14
  add_user_to_permission_templates! if self.class.to_s.include?("Collection")
15
- parent_jobs if self.parsed_metadata[related_parents_parsed_mapping].present?
16
- child_jobs if self.parsed_metadata[related_children_parsed_mapping].present?
15
+ parent_jobs if self.parsed_metadata[related_parents_parsed_mapping]&.join.present?
16
+ child_jobs if self.parsed_metadata[related_children_parsed_mapping]&.join.present?
17
17
  end
18
18
  rescue RSolr::Error::Http, CollectionsCreatedError => e
19
19
  raise e
@@ -18,7 +18,7 @@ module Bulkrax
18
18
  end
19
19
 
20
20
  def failed?
21
- current_status&.status_message&.match(/fail/i)
21
+ current_status&.status_message&.eql?('Failed')
22
22
  end
23
23
 
24
24
  def succeeded?
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+ require 'zip'
2
3
 
3
4
  module Bulkrax
4
5
  class ApplicationParser # rubocop:disable Metrics/ClassLength
@@ -261,7 +262,6 @@ module Bulkrax
261
262
  end
262
263
 
263
264
  def zip
264
- require 'zip'
265
265
  FileUtils.rm_rf(exporter_export_zip_path)
266
266
  Zip::File.open(exporter_export_zip_path, create: true) do |zip_file|
267
267
  Dir["#{exporter_export_path}/**/**"].each do |file|
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- class BagitParser < ApplicationParser # rubocop:disable Metrics/ClassLength
4
+ class BagitParser < CsvParser # rubocop:disable Metrics/ClassLength
5
5
  include ExportBehavior
6
6
 
7
7
  def self.export_supported?
@@ -19,8 +19,10 @@ module Bulkrax
19
19
  rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
20
20
  rdf_format ? RdfEntry : CsvEntry
21
21
  end
22
- alias collection_entry_class entry_class
23
- alias file_set_entry_class entry_class
22
+
23
+ def path_to_files(filename:)
24
+ @path_to_files ||= Dir.glob(File.join(import_file_path, '**/data', filename)).first
25
+ end
24
26
 
25
27
  # Take a random sample of 10 metadata_paths and work out the import fields from that
26
28
  def import_fields
@@ -30,39 +32,41 @@ module Bulkrax
30
32
  end.flatten.compact.uniq
31
33
  end
32
34
 
33
- # Assume a single metadata record per path
34
- # Create an Array of all metadata records, one per file
35
+ # Create an Array of all metadata records
35
36
  def records(_opts = {})
36
37
  raise StandardError, 'No BagIt records were found' if bags.blank?
37
38
  @records ||= bags.map do |bag|
38
39
  path = metadata_path(bag)
39
40
  raise StandardError, 'No metadata files were found' if path.blank?
40
41
  data = entry_class.read_data(path)
41
- data = entry_class.data_for_entry(data, source_identifier, self)
42
- data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
43
- data
42
+ get_data(bag, data)
44
43
  end
44
+
45
+ @records = @records.flatten
45
46
  end
46
47
 
47
- # Find or create collections referenced by works
48
- # If the import data also contains records for these works, they will be updated
49
- # during create works
50
- def create_collections
51
- collections.each_with_index do |collection, index|
52
- next if collection.blank?
53
- metadata = {
54
- title: [collection],
55
- work_identifier => [collection],
56
- visibility: 'open',
57
- collection_type_gid: Hyrax::CollectionType.find_or_create_default_collection_type.gid
58
- }
59
- new_entry = find_or_create_entry(collection_entry_class, collection, 'Bulkrax::Importer', metadata)
60
- ImportCollectionJob.perform_now(new_entry.id, current_run.id)
61
- increment_counters(index, collection: true)
48
+ def get_data(bag, data)
49
+ if entry_class == CsvEntry
50
+ data = data.map do |data_row|
51
+ record_data = entry_class.data_for_entry(data_row, source_identifier, self)
52
+ next record_data if importerexporter.metadata_only?
53
+
54
+ record_data[:file] = bag.bag_files.join('|') if ::Hyrax.config.curation_concerns.include? record_data[:model]&.constantize
55
+ record_data
56
+ end
57
+ else
58
+ data = entry_class.data_for_entry(data, source_identifier, self)
59
+ data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
62
60
  end
61
+
62
+ data
63
63
  end
64
64
 
65
65
  def create_works
66
+ entry_class == CsvEntry ? super : create_rdf_works
67
+ end
68
+
69
+ def create_rdf_works
66
70
  records.each_with_index do |record, index|
67
71
  next unless record_has_source_identifier(record, index)
68
72
  break if limit_reached?(limit, index)
@@ -81,33 +85,16 @@ module Bulkrax
81
85
  status_info(e)
82
86
  end
83
87
 
84
- def collections
85
- records.map { |r| r[related_parents_parsed_mapping].split(/\s*[;|]\s*/) if r[related_parents_parsed_mapping].present? }.flatten.compact.uniq
86
- end
87
-
88
- def collections_total
89
- collections.size
90
- end
91
-
92
- # TODO: change to differentiate between collection and work records when adding ability to import collection metadata
93
- def works_total
94
- total
95
- end
96
-
97
88
  def total
98
- importerexporter.entries.count
99
- end
89
+ @total = importer.parser_fields['total'] || 0 if importer?
100
90
 
101
- def extra_filters
102
- output = ""
103
- if importerexporter.start_date.present?
104
- start_dt = importerexporter.start_date.to_datetime.strftime('%FT%TZ')
105
- finish_dt = importerexporter.finish_date.present? ? importerexporter.finish_date.to_datetime.end_of_day.strftime('%FT%TZ') : "NOW"
106
- output += " AND system_modified_dtsi:[#{start_dt} TO #{finish_dt}]"
107
- end
108
- output += importerexporter.work_visibility.present? ? " AND visibility_ssi:#{importerexporter.work_visibility}" : ""
109
- output += importerexporter.workflow_status.present? ? " AND workflow_state_name_ssim:#{importerexporter.workflow_status}" : ""
110
- output
91
+ @total = if exporter?
92
+ limit.nil? || limit.zero? ? current_record_ids.count : limit
93
+ end
94
+
95
+ return @total || 0
96
+ rescue StandardError
97
+ @total = 0
111
98
  end
112
99
 
113
100
  def current_record_ids
@@ -118,7 +105,6 @@ module Bulkrax
118
105
  case importerexporter.export_from
119
106
  when 'all'
120
107
  @work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
121
- @collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
122
108
  @file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
123
109
  when 'collection'
124
110
  @work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
@@ -127,89 +113,49 @@ module Bulkrax
127
113
  when 'importer'
128
114
  set_ids_for_exporting_from_importer
129
115
  end
130
-
131
116
  @work_ids + @collection_ids + @file_set_ids
132
117
  end
133
118
 
134
- # Set the following instance variables: @work_ids, @collection_ids, @file_set_ids
135
- # @see #current_record_ids
136
- def set_ids_for_exporting_from_importer
137
- entry_ids = Importer.find(importerexporter.export_source).entries.pluck(:id)
138
- complete_statuses = Status.latest_by_statusable
139
- .includes(:statusable)
140
- .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
141
-
142
- complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
143
- extra_filters = extra_filters.presence || '*:*'
144
-
145
- { :@work_ids => ::Hyrax.config.curation_concerns, :@collection_ids => [::Collection], :@file_set_ids => [::FileSet] }.each do |instance_var, models_to_search|
146
- instance_variable_set(instance_var, ActiveFedora::SolrService.post(
147
- extra_filters.to_s,
148
- fq: [
149
- %(#{::Solrizer.solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
150
- "has_model_ssim:(#{models_to_search.join(' OR ')})"
151
- ],
152
- fl: 'id',
153
- rows: 2_000_000_000
154
- )['response']['docs'].map { |obj| obj['id'] })
155
- end
156
- end
157
-
158
- def create_new_entries
159
- current_record_ids.each_with_index do |id, index|
160
- break if limit_reached?(limit, index)
161
-
162
- this_entry_class = if @collection_ids.include?(id)
163
- collection_entry_class
164
- elsif @file_set_ids.include?(id)
165
- file_set_entry_class
166
- else
167
- entry_class
168
- end
169
- new_entry = find_or_create_entry(this_entry_class, id, 'Bulkrax::Exporter')
170
-
171
- begin
172
- entry = ExportWorkJob.perform_now(new_entry.id, current_run.id)
173
- rescue => e
174
- Rails.logger.info("#{e.message} was detected during export")
175
- end
176
-
177
- self.headers |= entry.parsed_metadata.keys if entry
178
- end
179
- end
180
- alias create_from_collection create_new_entries
181
- alias create_from_importer create_new_entries
182
- alias create_from_worktype create_new_entries
183
- alias create_from_all create_new_entries
184
-
185
119
  # export methods
186
120
 
187
- # rubocop:disable Metrics/AbcSize
121
+ # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
188
122
  def write_files
189
123
  require 'open-uri'
190
124
  require 'socket'
191
- importerexporter.entries.where(identifier: current_record_ids)[0..limit || total].each do |e|
192
- bag = BagIt::Bag.new setup_bagit_folder(e.identifier)
193
- w = ActiveFedora::Base.find(e.identifier)
194
- next unless Hyrax.config.curation_concerns.include?(w.class)
125
+ importerexporter.entries.where(identifier: current_record_ids)[0..limit || total].each do |entry|
126
+ record = ActiveFedora::Base.find(entry.identifier)
127
+ next unless Hyrax.config.curation_concerns.include?(record.class)
128
+ bag = BagIt::Bag.new setup_bagit_folder(entry.identifier)
129
+ bag_entries = [entry]
130
+
131
+ record.file_sets.each do |fs|
132
+ if @file_set_ids.present?
133
+ file_set_entry = Bulkrax::CsvFileSetEntry.where("parsed_metadata LIKE '%#{fs.id}%'").first
134
+ bag_entries << file_set_entry unless file_set_entry.nil?
135
+ end
195
136
 
196
- w.file_sets.each do |fs|
197
137
  file_name = filename(fs)
198
138
  next if file_name.blank?
199
139
  io = open(fs.original_file.uri)
200
140
  file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
201
141
  file.write(io.read)
202
142
  file.close
203
- bag.add_file(file_name, file.path)
143
+ begin
144
+ bag.add_file(file_name, file.path)
145
+ rescue => e
146
+ entry.status_info(e)
147
+ status_info(e)
148
+ end
204
149
  end
205
- CSV.open(setup_csv_metadata_export_file(e.identifier), "w", headers: export_headers, write_headers: true) do |csv|
206
- csv << e.parsed_metadata
150
+
151
+ CSV.open(setup_csv_metadata_export_file(entry.identifier), "w", headers: export_headers, write_headers: true) do |csv|
152
+ bag_entries.each { |csv_entry| csv << csv_entry.parsed_metadata }
207
153
  end
208
- write_triples(e)
154
+ write_triples(entry)
209
155
  bag.manifest!(algo: 'sha256')
210
156
  end
211
157
  end
212
- # rubocop:enable Metrics/AbcSize
158
+ # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
213
159
 
214
160
  def setup_csv_metadata_export_file(id)
215
161
  File.join(importerexporter.exporter_export_path, id, 'metadata.csv')
@@ -221,42 +167,6 @@ module Bulkrax
221
167
  key != source_identifier.to_s
222
168
  end
223
169
 
224
- # All possible column names
225
- def export_headers
226
- headers = sort_headers(self.headers)
227
-
228
- # we don't want access_control_id exported and we want file at the end
229
- headers.delete('access_control_id') if headers.include?('access_control_id')
230
-
231
- # add the headers below at the beginning or end to maintain the preexisting export behavior
232
- headers.prepend('model')
233
- headers.prepend(source_identifier.to_s)
234
- headers.prepend('id')
235
-
236
- headers.uniq
237
- end
238
-
239
- def object_names
240
- return @object_names if @object_names
241
-
242
- @object_names = mapping.values.map { |value| value['object'] }
243
- @object_names.uniq!.delete(nil)
244
-
245
- @object_names
246
- end
247
-
248
- def sort_headers(headers)
249
- # converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
250
- # while keeping objects grouped together
251
- headers.sort_by do |item|
252
- number = item.match(/\d+/)&.[](0) || 0.to_s
253
- sort_number = number.rjust(4, "0")
254
- object_prefix = object_names.detect { |o| item.match(/^#{o}/) } || item
255
- remainder = item.gsub(/^#{object_prefix}_/, '').gsub(/_#{number}/, '')
256
- "#{object_prefix}_#{sort_number}_#{remainder}"
257
- end
258
- end
259
-
260
170
  def setup_triple_metadata_export_file(id)
261
171
  File.join(importerexporter.exporter_export_path, id, 'metadata.nt')
262
172
  end
@@ -276,11 +186,6 @@ module Bulkrax
276
186
  end
277
187
  end
278
188
 
279
- def required_elements?(keys)
280
- return if keys.blank?
281
- !required_elements.map { |el| keys.map(&:to_s).include?(el) }.include?(false)
282
- end
283
-
284
189
  # @todo - investigate getting directory structure
285
190
  # @todo - investigate using perform_later, and having the importer check for
286
191
  # DownloadCloudFileJob before it starts
@@ -331,5 +236,11 @@ module Bulkrax
331
236
  return nil unless bag.valid?
332
237
  bag
333
238
  end
239
+
240
+ # use the version of this method from the application parser instead
241
+ def real_import_file_path
242
+ return importer_unzip_path if file? && zip?
243
+ parser_fields['import_file_path']
244
+ end
334
245
  end
335
246
  end
@@ -195,6 +195,9 @@ module Bulkrax
195
195
  @file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
196
196
  when 'collection'
197
197
  @work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
198
+ @collection_ids = ActiveFedora::SolrService.query("id:#{importerexporter.export_source} #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
199
+ when 'collections metadata'
200
+ @collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
198
201
  when 'worktype'
199
202
  @work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
200
203
  when 'importer'
@@ -251,6 +254,7 @@ module Bulkrax
251
254
  end
252
255
  end
253
256
  alias create_from_collection create_new_entries
257
+ alias create_from_collections_metadata create_new_entries
254
258
  alias create_from_importer create_new_entries
255
259
  alias create_from_worktype create_new_entries
256
260
  alias create_from_all create_new_entries
@@ -268,8 +272,8 @@ module Bulkrax
268
272
  CsvFileSetEntry
269
273
  end
270
274
 
271
- # See https://stackoverflow.com/questions/2650517/count-the-number-of-lines-in-a-file-without-reading-entire-file-into-memory
272
- # Changed to grep as wc -l counts blank lines, and ignores the final unescaped line (which may or may not contain data)
275
+ # TODO: figure out why using the version of this method that's in the bagit parser
276
+ # breaks specs for the "if importer?" line
273
277
  def total
274
278
  @total = importer.parser_fields['total'] || 0 if importer?
275
279
  @total = limit || current_record_ids.count if exporter?
@@ -378,10 +382,11 @@ module Bulkrax
378
382
  end
379
383
 
380
384
  # Retrieve the path where we expect to find the files
381
- def path_to_files
385
+ def path_to_files(**args)
386
+ filename = args.fetch(:filename, '')
387
+
382
388
  @path_to_files ||= File.join(
383
- zip? ? importer_unzip_path : File.dirname(import_file_path),
384
- 'files'
389
+ zip? ? importer_unzip_path : File.dirname(import_file_path), 'files', filename
385
390
  )
386
391
  end
387
392
 
@@ -15,20 +15,21 @@
15
15
 
16
16
  <%= form.hidden_field :user_id, value: current_user.id %>
17
17
 
18
- <%= form.input :export_type,
19
- collection: form.object.export_type_list,
20
- label: t('bulkrax.exporter.labels.export_type'),
18
+ <%= form.input :export_type,
19
+ collection: form.object.export_type_list,
20
+ label: t('bulkrax.exporter.labels.export_type'),
21
21
  required: true,
22
22
  prompt: 'Please select an export type' %>
23
23
 
24
- <%= form.input :export_from,
25
- collection: form.object.export_from_list,
26
- label: t('bulkrax.exporter.labels.export_from'),
24
+ <%= form.input :export_from,
25
+ collection: form.object.export_from_list,
26
+ label: t('bulkrax.exporter.labels.export_from'),
27
27
  required: true,
28
28
  prompt: 'Please select an export source' %>
29
29
 
30
30
  <%= form.input :export_source_importer,
31
31
  label: t('bulkrax.exporter.labels.importer'),
32
+ required: true,
32
33
  prompt: 'Select from the list',
33
34
  label_html: { class: 'importer export-source-option hidden' },
34
35
  input_html: { class: 'importer export-source-option hidden' },
@@ -37,6 +38,7 @@
37
38
  <%= form.input :export_source_collection,
38
39
  prompt: 'Start typing ...',
39
40
  label: t('bulkrax.exporter.labels.collection'),
41
+ required: true,
40
42
  placeholder: @collection&.title&.first,
41
43
  label_html: { class: 'collection export-source-option hidden' },
42
44
  input_html: {
@@ -50,13 +52,14 @@
50
52
 
51
53
  <%= form.input :export_source_worktype,
52
54
  label: t('bulkrax.exporter.labels.worktype'),
55
+ required: true,
53
56
  prompt: 'Select from the list',
54
57
  label_html: { class: 'worktype export-source-option hidden' },
55
58
  input_html: { class: 'worktype export-source-option hidden' },
56
59
  collection: Hyrax.config.curation_concerns.map {|cc| [cc.to_s, cc.to_s] } %>
57
60
 
58
- <%= form.input :limit,
59
- as: :integer,
61
+ <%= form.input :limit,
62
+ as: :integer,
60
63
  hint: 'leave blank or 0 for all records',
61
64
  label: t('bulkrax.exporter.labels.limit') %>
62
65
 
@@ -90,8 +93,8 @@
90
93
  collection: form.object.workflow_status_list,
91
94
  label: t('bulkrax.exporter.labels.status') %>
92
95
 
93
- <%= form.input :parser_klass,
94
- collection: Bulkrax.parsers.map {|p| [p[:name], p[:class_name], {'data-partial' => p[:partial]}] if p[:class_name].constantize.export_supported? }.compact,
96
+ <%= form.input :parser_klass,
97
+ collection: Bulkrax.parsers.map {|p| [p[:name], p[:class_name], {'data-partial' => p[:partial]}] if p[:class_name].constantize.export_supported? }.compact,
95
98
  label: t('bulkrax.exporter.labels.export_format') %>
96
99
  </div>
97
100
 
@@ -40,6 +40,11 @@
40
40
  <% when 'collection' %>
41
41
  <% collection = Collection.find(@exporter.export_source) %>
42
42
  <%= link_to collection&.title&.first, hyrax.dashboard_collection_path(collection.id) %>
43
+ <% when 'collections metadata' %>
44
+ <% collections = Collection.all %>
45
+ <% collections.each_with_index do |c, i| %>
46
+ <%= link_to c&.title&.first, hyrax.dashboard_collection_path(c.id) %><%= ',' if i != collections.count - 1 %>
47
+ <% end %>
43
48
  <% when 'importer' %>
44
49
  <% importer = Bulkrax::Importer.find(@exporter.export_source) %>
45
50
  <%= link_to importer.name, bulkrax.importer_path(importer.id) %>
@@ -8,6 +8,7 @@ en:
8
8
  labels:
9
9
  all: All
10
10
  collection: Collection
11
+ collections_metadata: All Collections' Metadata (only)
11
12
  export_format: Export Format
12
13
  export_from: Export From
13
14
  export_source: Export Source
@@ -1,7 +1,17 @@
1
1
  class RenameBulkraxImporterRunToImporterRun < ActiveRecord::Migration[5.2]
2
- def change
2
+ def up
3
3
  if column_exists?(:bulkrax_pending_relationships, :bulkrax_importer_run_id)
4
+ remove_foreign_key :bulkrax_pending_relationships, :bulkrax_importer_runs
5
+ remove_index :bulkrax_pending_relationships, column: :bulkrax_importer_run_id
6
+
4
7
  rename_column :bulkrax_pending_relationships, :bulkrax_importer_run_id, :importer_run_id
8
+
9
+ add_foreign_key :bulkrax_pending_relationships, :bulkrax_importer_runs, column: :importer_run_id
10
+ add_index :bulkrax_pending_relationships, :importer_run_id, name: 'index_bulkrax_pending_relationships_on_importer_run_id'
5
11
  end
6
12
  end
13
+
14
+ def down
15
+ rename_column :bulkrax_pending_relationships, :importer_run_id, :bulkrax_importer_run_id
16
+ end
7
17
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- VERSION = '3.3.1'
4
+ VERSION = '3.5.0'
5
5
  end
@@ -10,7 +10,7 @@ class Bulkrax::InstallGenerator < Rails::Generators::Base
10
10
  end
11
11
 
12
12
  def add_to_gemfile
13
- gem 'willow_sword', github: 'notch8/willow_sword'
13
+ gem 'bulkrax'
14
14
 
15
15
  Bundler.with_clean_env do
16
16
  run "bundle install"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bulkrax
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.1
4
+ version: 3.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Kaufman
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-09 00:00:00.000000000 Z
11
+ date: 2022-06-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -25,81 +25,81 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: 5.1.6
27
27
  - !ruby/object:Gem::Dependency
28
- name: loofah
28
+ name: bagit
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ">="
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 2.2.3
33
+ version: '0.4'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ">="
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 2.2.3
40
+ version: '0.4'
41
41
  - !ruby/object:Gem::Dependency
42
- name: rack
42
+ name: coderay
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: 2.0.6
47
+ version: '0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: 2.0.6
54
+ version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: simple_form
56
+ name: iso8601
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - ">="
59
+ - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '0'
61
+ version: 0.9.0
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - ">="
66
+ - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '0'
68
+ version: 0.9.0
69
69
  - !ruby/object:Gem::Dependency
70
- name: iso8601
70
+ name: kaminari
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - "~>"
73
+ - - ">="
74
74
  - !ruby/object:Gem::Version
75
- version: 0.9.0
75
+ version: '0'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - "~>"
80
+ - - ">="
81
81
  - !ruby/object:Gem::Version
82
- version: 0.9.0
82
+ version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
- name: oai
84
+ name: language_list
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - ">="
87
+ - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '0.4'
90
- - - "<"
89
+ version: '1.2'
90
+ - - ">="
91
91
  - !ruby/object:Gem::Version
92
- version: 2.x
92
+ version: 1.2.1
93
93
  type: :runtime
94
94
  prerelease: false
95
95
  version_requirements: !ruby/object:Gem::Requirement
96
96
  requirements:
97
- - - ">="
97
+ - - "~>"
98
98
  - !ruby/object:Gem::Version
99
- version: '0.4'
100
- - - "<"
99
+ version: '1.2'
100
+ - - ">="
101
101
  - !ruby/object:Gem::Version
102
- version: 2.x
102
+ version: 1.2.1
103
103
  - !ruby/object:Gem::Dependency
104
104
  name: libxml-ruby
105
105
  requirement: !ruby/object:Gem::Requirement
@@ -115,61 +115,75 @@ dependencies:
115
115
  - !ruby/object:Gem::Version
116
116
  version: 3.1.0
117
117
  - !ruby/object:Gem::Dependency
118
- name: language_list
118
+ name: loofah
119
119
  requirement: !ruby/object:Gem::Requirement
120
120
  requirements:
121
- - - "~>"
122
- - !ruby/object:Gem::Version
123
- version: '1.2'
124
121
  - - ">="
125
122
  - !ruby/object:Gem::Version
126
- version: 1.2.1
123
+ version: 2.2.3
127
124
  type: :runtime
128
125
  prerelease: false
129
126
  version_requirements: !ruby/object:Gem::Requirement
130
127
  requirements:
131
- - - "~>"
132
- - !ruby/object:Gem::Version
133
- version: '1.2'
134
128
  - - ">="
135
129
  - !ruby/object:Gem::Version
136
- version: 1.2.1
130
+ version: 2.2.3
137
131
  - !ruby/object:Gem::Dependency
138
- name: rdf
132
+ name: oai
139
133
  requirement: !ruby/object:Gem::Requirement
140
134
  requirements:
141
135
  - - ">="
142
136
  - !ruby/object:Gem::Version
143
- version: 2.0.2
137
+ version: '0.4'
144
138
  - - "<"
145
139
  - !ruby/object:Gem::Version
146
- version: '4.0'
140
+ version: 2.x
147
141
  type: :runtime
148
142
  prerelease: false
149
143
  version_requirements: !ruby/object:Gem::Requirement
150
144
  requirements:
151
145
  - - ">="
152
146
  - !ruby/object:Gem::Version
153
- version: 2.0.2
147
+ version: '0.4'
154
148
  - - "<"
155
149
  - !ruby/object:Gem::Version
156
- version: '4.0'
150
+ version: 2.x
157
151
  - !ruby/object:Gem::Dependency
158
- name: bagit
152
+ name: rack
159
153
  requirement: !ruby/object:Gem::Requirement
160
154
  requirements:
161
- - - "~>"
155
+ - - ">="
162
156
  - !ruby/object:Gem::Version
163
- version: '0.4'
157
+ version: 2.0.6
164
158
  type: :runtime
165
159
  prerelease: false
166
160
  version_requirements: !ruby/object:Gem::Requirement
167
161
  requirements:
168
- - - "~>"
162
+ - - ">="
169
163
  - !ruby/object:Gem::Version
170
- version: '0.4'
164
+ version: 2.0.6
171
165
  - !ruby/object:Gem::Dependency
172
- name: coderay
166
+ name: rdf
167
+ requirement: !ruby/object:Gem::Requirement
168
+ requirements:
169
+ - - ">="
170
+ - !ruby/object:Gem::Version
171
+ version: 2.0.2
172
+ - - "<"
173
+ - !ruby/object:Gem::Version
174
+ version: '4.0'
175
+ type: :runtime
176
+ prerelease: false
177
+ version_requirements: !ruby/object:Gem::Requirement
178
+ requirements:
179
+ - - ">="
180
+ - !ruby/object:Gem::Version
181
+ version: 2.0.2
182
+ - - "<"
183
+ - !ruby/object:Gem::Version
184
+ version: '4.0'
185
+ - !ruby/object:Gem::Dependency
186
+ name: rubyzip
173
187
  requirement: !ruby/object:Gem::Requirement
174
188
  requirements:
175
189
  - - ">="
@@ -183,7 +197,7 @@ dependencies:
183
197
  - !ruby/object:Gem::Version
184
198
  version: '0'
185
199
  - !ruby/object:Gem::Dependency
186
- name: kaminari
200
+ name: simple_form
187
201
  requirement: !ruby/object:Gem::Requirement
188
202
  requirements:
189
203
  - - ">="
@@ -390,7 +404,7 @@ homepage: https://github.com/samvera-labs/bulkrax
390
404
  licenses:
391
405
  - Apache-2.0
392
406
  metadata: {}
393
- post_install_message:
407
+ post_install_message:
394
408
  rdoc_options: []
395
409
  require_paths:
396
410
  - lib
@@ -405,8 +419,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
405
419
  - !ruby/object:Gem::Version
406
420
  version: '0'
407
421
  requirements: []
408
- rubygems_version: 3.1.4
409
- signing_key:
422
+ rubygems_version: 3.0.3
423
+ signing_key:
410
424
  specification_version: 4
411
425
  summary: Import and export tool for Hyrax and Hyku
412
426
  test_files: []