bulkrax 3.3.1 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2525e4aed8b31a897668d556c311b29189ec1b7a951b4303604acd3ccb11fcc8
4
- data.tar.gz: d642de2af38c8c82108b113641ab1e191ae05b96b2ce3528dde7d7c908639291
3
+ metadata.gz: 0a80674a9f582c3b8e83f442318908edb6ca9f0b615c970d09b17d941cc8027d
4
+ data.tar.gz: a2a53116ef49e03dde1aa1df14d8259a2b4abf06a82cff63a9d4ba622ba6600a
5
5
  SHA512:
6
- metadata.gz: 6547c0283f75626cda95a8cc09cceff84ac576b7ca4034b72d729ab16d50c71657dac0e78739b5ef6f2d83aa85e1ddc8c45646f6c44408a0a989b4b33b93c487
7
- data.tar.gz: 9ae11d35d0b3caf9d45237590e42541c0004268e6c86e8d44e6754900b253894813c7e817b471ed9379b7eefad1ec3a27d2cfebbe94865727ecb69b6b8652480
6
+ metadata.gz: af3d75fb03105e37f7374f3a7f863c545d9cc9c95ab2f18bbbf7b4692024e09811f103a372327b4724c836568bad176ed0ad0b7be929ed556259aa9b0793fce6
7
+ data.tar.gz: 1117a185fbab2bae0746187f464bebea855759a5ecccf0d34f098ac55ad7a2952e663268372262ba8f97820c8c1f02bd29c74a388cfd8ea9cfed84a46dad94cf
@@ -42,10 +42,12 @@ module Bulkrax
42
42
  pending_relationships.each do |rel|
43
43
  raise ::StandardError, %("#{rel}" needs either a child or a parent to create a relationship) if rel.child_id.nil? || rel.parent_id.nil?
44
44
  @child_entry, child_record = find_record(rel.child_id, importer_run_id)
45
- child_record.is_a?(::Collection) ? @child_records[:collections] << child_record : @child_records[:works] << child_record
45
+ if child_record
46
+ child_record.is_a?(::Collection) ? @child_records[:collections] << child_record : @child_records[:works] << child_record
47
+ end
46
48
  end
47
49
 
48
- if (child_records[:collections].blank? && child_records[:works].blank?) || parent_record.blank?
50
+ if (child_records[:collections].blank? && child_records[:works].blank?) || parent_record.nil?
49
51
  reschedule({ parent_identifier: parent_identifier, importer_run_id: importer_run_id })
50
52
  return false # stop current job from continuing to run after rescheduling
51
53
  end
@@ -6,26 +6,26 @@ module Bulkrax
6
6
 
7
7
  def perform(*args)
8
8
  entry = Entry.find(args[0])
9
+ exporter_run = ExporterRun.find(args[1])
9
10
  begin
10
11
  entry.build
11
12
  entry.save
12
13
  rescue StandardError
13
14
  # rubocop:disable Rails/SkipsModelValidations
14
- ExporterRun.find(args[1]).increment!(:failed_records)
15
- ExporterRun.find(args[1]).decrement!(:enqueued_records)
15
+ exporter_run.increment!(:failed_records)
16
+ exporter_run.decrement!(:enqueued_records) unless exporter_run.enqueued_records <= 0
16
17
  raise
17
18
  else
18
19
  if entry.failed?
19
- ExporterRun.find(args[1]).increment!(:failed_records)
20
- ExporterRun.find(args[1]).decrement!(:enqueued_records)
20
+ exporter_run.increment!(:failed_records)
21
+ exporter_run.decrement!(:enqueued_records) unless exporter_run.enqueued_records <= 0
21
22
  raise entry.reload.current_status.error_class.constantize
22
23
  else
23
- ExporterRun.find(args[1]).increment!(:processed_records)
24
- ExporterRun.find(args[1]).decrement!(:enqueued_records)
24
+ exporter_run.increment!(:processed_records)
25
+ exporter_run.decrement!(:enqueued_records) unless exporter_run.enqueued_records <= 0
25
26
  end
26
27
  # rubocop:enable Rails/SkipsModelValidations
27
28
  end
28
- exporter_run = ExporterRun.find(args[1])
29
29
  return entry if exporter_run.enqueued_records.positive?
30
30
 
31
31
  if exporter_run.failed_records.positive?
@@ -12,6 +12,8 @@ module Bulkrax
12
12
  import(importer, only_updates_since_last_import)
13
13
  update_current_run_counters(importer)
14
14
  schedule(importer) if importer.schedulable?
15
+ rescue CSV::MalformedCSVError => e
16
+ importer.status_info(e)
15
17
  end
16
18
 
17
19
  def import(importer, only_updates_since_last_import)
@@ -114,9 +114,10 @@ module Bulkrax
114
114
  def build_files_metadata
115
115
  file_mapping = key_for_export('file')
116
116
  file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
117
- filenames = file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
117
+ filenames = map_file_sets(file_sets)
118
118
 
119
119
  handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
120
+ build_thumbnail_files if hyrax_record.work?
120
121
  end
121
122
 
122
123
  def build_relationship_metadata
@@ -219,15 +220,6 @@ module Bulkrax
219
220
  end
220
221
  end
221
222
 
222
- def build_files
223
- file_mapping = mapping['file']&.[]('from')&.first || 'file'
224
- file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
225
-
226
- filenames = map_file_sets(file_sets)
227
- handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
228
- build_thumbnail_files if hyrax_record.work?
229
- end
230
-
231
223
  def build_thumbnail_files
232
224
  return unless importerexporter.include_thumbnails
233
225
 
@@ -14,13 +14,15 @@ module Bulkrax
14
14
  validates :name, presence: true
15
15
  validates :parser_klass, presence: true
16
16
 
17
- delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
17
+ delegate :write, :create_from_collection, :create_from_collections_metadata, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
18
18
 
19
19
  def export
20
20
  current_run && setup_export_path
21
21
  case self.export_from
22
22
  when 'collection'
23
23
  create_from_collection
24
+ when 'collections metadata'
25
+ create_from_collections_metadata
24
26
  when 'importer'
25
27
  create_from_importer
26
28
  when 'worktype'
@@ -87,6 +89,7 @@ module Bulkrax
87
89
  [
88
90
  [I18n.t('bulkrax.exporter.labels.importer'), 'importer'],
89
91
  [I18n.t('bulkrax.exporter.labels.collection'), 'collection'],
92
+ [I18n.t('bulkrax.exporter.labels.collections_metadata'), 'collections metadata'],
90
93
  [I18n.t('bulkrax.exporter.labels.worktype'), 'worktype'],
91
94
  [I18n.t('bulkrax.exporter.labels.all'), 'all']
92
95
  ]
@@ -96,16 +96,19 @@ module Bulkrax
96
96
  end
97
97
 
98
98
  def current_run
99
- @current_run ||= if file? && zip?
100
- self.importer_runs.create!
101
- else
102
- entry_counts = {
103
- total_work_entries: self.limit || parser.works_total,
104
- total_collection_entries: parser.collections_total,
105
- total_file_set_entries: parser.file_sets_total
106
- }
107
- self.importer_runs.create!(entry_counts)
108
- end
99
+ return @current_run if @current_run.present?
100
+
101
+ @current_run = self.importer_runs.create!
102
+ return @current_run if file? && zip?
103
+
104
+ entry_counts = {
105
+ total_work_entries: self.limit || parser.works_total,
106
+ total_collection_entries: parser.collections_total,
107
+ total_file_set_entries: parser.file_sets_total
108
+ }
109
+ @current_run.update!(entry_counts)
110
+
111
+ @current_run
109
112
  end
110
113
 
111
114
  def last_run
@@ -12,15 +12,14 @@ module Bulkrax
12
12
  # check for our entry in our current importer first
13
13
  importer_id = ImporterRun.find(importer_run_id).importer_id
14
14
  default_scope = { identifier: identifier, importerexporter_type: 'Bulkrax::Importer' }
15
- record = Entry.find_by(default_scope.merge({ importerexporter_id: importer_id })) || Entry.find_by(default_scope)
16
15
 
17
- # TODO(alishaevn): discuss whether we are only looking for Collection models here
18
- # use ActiveFedora::Base.find(identifier) instead?
19
- record ||= ::Collection.where(id: identifier).first # rubocop:disable Rails/FindBy
20
- if record.blank?
21
- available_work_types.each do |work_type|
22
- record ||= work_type.where(id: identifier).first # rubocop:disable Rails/FindBy
23
- end
16
+ begin
17
+ # the identifier parameter can be a :source_identifier or the id of an object
18
+ record = Entry.find_by(default_scope.merge({ importerexporter_id: importer_id })) || Entry.find_by(default_scope)
19
+ record ||= ActiveFedora::Base.find(identifier)
20
+ # NameError for if ActiveFedora isn't installed
21
+ rescue NameError, ActiveFedora::ObjectNotFoundError
22
+ record = nil
24
23
  end
25
24
 
26
25
  # return the found entry here instead of searching for it again in the CreateRelationshipsJob
@@ -51,7 +51,7 @@ module Bulkrax
51
51
  fn = file_set.original_file.file_name.first
52
52
  mime = Mime::Type.lookup(file_set.original_file.mime_type)
53
53
  ext_mime = MIME::Types.of(file_set.original_file.file_name).first
54
- if fn.include?(file_set.id) || importerexporter.metadata_only? || importerexporter.parser_klass.include?('Bagit')
54
+ if fn.include?(file_set.id) || importerexporter.metadata_only?
55
55
  filename = "#{fn}.#{mime.to_sym}"
56
56
  filename = fn if mime.to_s == ext_mime.to_s
57
57
  else
@@ -8,10 +8,14 @@ module Bulkrax
8
8
 
9
9
  def add_path_to_file
10
10
  parsed_metadata['file'].each_with_index do |filename, i|
11
- path_to_file = ::File.join(parser.path_to_files, filename)
11
+ next if filename.blank?
12
+
13
+ path_to_file = parser.path_to_files(filename: filename)
12
14
 
13
15
  parsed_metadata['file'][i] = path_to_file
14
16
  end
17
+ parsed_metadata['file'].delete('')
18
+
15
19
  raise ::StandardError, "one or more file paths are invalid: #{parsed_metadata['file'].join(', ')}" unless parsed_metadata['file'].map { |file_path| ::File.file?(file_path) }.all?
16
20
 
17
21
  parsed_metadata['file']
@@ -12,8 +12,8 @@ module Bulkrax
12
12
  raise CollectionsCreatedError unless collections_created?
13
13
  @item = factory.run!
14
14
  add_user_to_permission_templates! if self.class.to_s.include?("Collection")
15
- parent_jobs if self.parsed_metadata[related_parents_parsed_mapping].present?
16
- child_jobs if self.parsed_metadata[related_children_parsed_mapping].present?
15
+ parent_jobs if self.parsed_metadata[related_parents_parsed_mapping]&.join.present?
16
+ child_jobs if self.parsed_metadata[related_children_parsed_mapping]&.join.present?
17
17
  end
18
18
  rescue RSolr::Error::Http, CollectionsCreatedError => e
19
19
  raise e
@@ -18,7 +18,7 @@ module Bulkrax
18
18
  end
19
19
 
20
20
  def failed?
21
- current_status&.status_message&.match(/fail/i)
21
+ current_status&.status_message&.eql?('Failed')
22
22
  end
23
23
 
24
24
  def succeeded?
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+ require 'zip'
2
3
 
3
4
  module Bulkrax
4
5
  class ApplicationParser # rubocop:disable Metrics/ClassLength
@@ -261,7 +262,6 @@ module Bulkrax
261
262
  end
262
263
 
263
264
  def zip
264
- require 'zip'
265
265
  FileUtils.rm_rf(exporter_export_zip_path)
266
266
  Zip::File.open(exporter_export_zip_path, create: true) do |zip_file|
267
267
  Dir["#{exporter_export_path}/**/**"].each do |file|
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- class BagitParser < ApplicationParser # rubocop:disable Metrics/ClassLength
4
+ class BagitParser < CsvParser # rubocop:disable Metrics/ClassLength
5
5
  include ExportBehavior
6
6
 
7
7
  def self.export_supported?
@@ -19,8 +19,10 @@ module Bulkrax
19
19
  rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
20
20
  rdf_format ? RdfEntry : CsvEntry
21
21
  end
22
- alias collection_entry_class entry_class
23
- alias file_set_entry_class entry_class
22
+
23
+ def path_to_files(filename:)
24
+ @path_to_files ||= Dir.glob(File.join(import_file_path, '**/data', filename)).first
25
+ end
24
26
 
25
27
  # Take a random sample of 10 metadata_paths and work out the import fields from that
26
28
  def import_fields
@@ -30,39 +32,41 @@ module Bulkrax
30
32
  end.flatten.compact.uniq
31
33
  end
32
34
 
33
- # Assume a single metadata record per path
34
- # Create an Array of all metadata records, one per file
35
+ # Create an Array of all metadata records
35
36
  def records(_opts = {})
36
37
  raise StandardError, 'No BagIt records were found' if bags.blank?
37
38
  @records ||= bags.map do |bag|
38
39
  path = metadata_path(bag)
39
40
  raise StandardError, 'No metadata files were found' if path.blank?
40
41
  data = entry_class.read_data(path)
41
- data = entry_class.data_for_entry(data, source_identifier, self)
42
- data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
43
- data
42
+ get_data(bag, data)
44
43
  end
44
+
45
+ @records = @records.flatten
45
46
  end
46
47
 
47
- # Find or create collections referenced by works
48
- # If the import data also contains records for these works, they will be updated
49
- # during create works
50
- def create_collections
51
- collections.each_with_index do |collection, index|
52
- next if collection.blank?
53
- metadata = {
54
- title: [collection],
55
- work_identifier => [collection],
56
- visibility: 'open',
57
- collection_type_gid: Hyrax::CollectionType.find_or_create_default_collection_type.gid
58
- }
59
- new_entry = find_or_create_entry(collection_entry_class, collection, 'Bulkrax::Importer', metadata)
60
- ImportCollectionJob.perform_now(new_entry.id, current_run.id)
61
- increment_counters(index, collection: true)
48
+ def get_data(bag, data)
49
+ if entry_class == CsvEntry
50
+ data = data.map do |data_row|
51
+ record_data = entry_class.data_for_entry(data_row, source_identifier, self)
52
+ next record_data if importerexporter.metadata_only?
53
+
54
+ record_data[:file] = bag.bag_files.join('|') if ::Hyrax.config.curation_concerns.include? record_data[:model]&.constantize
55
+ record_data
56
+ end
57
+ else
58
+ data = entry_class.data_for_entry(data, source_identifier, self)
59
+ data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
62
60
  end
61
+
62
+ data
63
63
  end
64
64
 
65
65
  def create_works
66
+ entry_class == CsvEntry ? super : create_rdf_works
67
+ end
68
+
69
+ def create_rdf_works
66
70
  records.each_with_index do |record, index|
67
71
  next unless record_has_source_identifier(record, index)
68
72
  break if limit_reached?(limit, index)
@@ -81,33 +85,16 @@ module Bulkrax
81
85
  status_info(e)
82
86
  end
83
87
 
84
- def collections
85
- records.map { |r| r[related_parents_parsed_mapping].split(/\s*[;|]\s*/) if r[related_parents_parsed_mapping].present? }.flatten.compact.uniq
86
- end
87
-
88
- def collections_total
89
- collections.size
90
- end
91
-
92
- # TODO: change to differentiate between collection and work records when adding ability to import collection metadata
93
- def works_total
94
- total
95
- end
96
-
97
88
  def total
98
- importerexporter.entries.count
99
- end
89
+ @total = importer.parser_fields['total'] || 0 if importer?
100
90
 
101
- def extra_filters
102
- output = ""
103
- if importerexporter.start_date.present?
104
- start_dt = importerexporter.start_date.to_datetime.strftime('%FT%TZ')
105
- finish_dt = importerexporter.finish_date.present? ? importerexporter.finish_date.to_datetime.end_of_day.strftime('%FT%TZ') : "NOW"
106
- output += " AND system_modified_dtsi:[#{start_dt} TO #{finish_dt}]"
107
- end
108
- output += importerexporter.work_visibility.present? ? " AND visibility_ssi:#{importerexporter.work_visibility}" : ""
109
- output += importerexporter.workflow_status.present? ? " AND workflow_state_name_ssim:#{importerexporter.workflow_status}" : ""
110
- output
91
+ @total = if exporter?
92
+ limit.nil? || limit.zero? ? current_record_ids.count : limit
93
+ end
94
+
95
+ return @total || 0
96
+ rescue StandardError
97
+ @total = 0
111
98
  end
112
99
 
113
100
  def current_record_ids
@@ -118,7 +105,6 @@ module Bulkrax
118
105
  case importerexporter.export_from
119
106
  when 'all'
120
107
  @work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
121
- @collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
122
108
  @file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
123
109
  when 'collection'
124
110
  @work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
@@ -127,89 +113,49 @@ module Bulkrax
127
113
  when 'importer'
128
114
  set_ids_for_exporting_from_importer
129
115
  end
130
-
131
116
  @work_ids + @collection_ids + @file_set_ids
132
117
  end
133
118
 
134
- # Set the following instance variables: @work_ids, @collection_ids, @file_set_ids
135
- # @see #current_record_ids
136
- def set_ids_for_exporting_from_importer
137
- entry_ids = Importer.find(importerexporter.export_source).entries.pluck(:id)
138
- complete_statuses = Status.latest_by_statusable
139
- .includes(:statusable)
140
- .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
141
-
142
- complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
143
- extra_filters = extra_filters.presence || '*:*'
144
-
145
- { :@work_ids => ::Hyrax.config.curation_concerns, :@collection_ids => [::Collection], :@file_set_ids => [::FileSet] }.each do |instance_var, models_to_search|
146
- instance_variable_set(instance_var, ActiveFedora::SolrService.post(
147
- extra_filters.to_s,
148
- fq: [
149
- %(#{::Solrizer.solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
150
- "has_model_ssim:(#{models_to_search.join(' OR ')})"
151
- ],
152
- fl: 'id',
153
- rows: 2_000_000_000
154
- )['response']['docs'].map { |obj| obj['id'] })
155
- end
156
- end
157
-
158
- def create_new_entries
159
- current_record_ids.each_with_index do |id, index|
160
- break if limit_reached?(limit, index)
161
-
162
- this_entry_class = if @collection_ids.include?(id)
163
- collection_entry_class
164
- elsif @file_set_ids.include?(id)
165
- file_set_entry_class
166
- else
167
- entry_class
168
- end
169
- new_entry = find_or_create_entry(this_entry_class, id, 'Bulkrax::Exporter')
170
-
171
- begin
172
- entry = ExportWorkJob.perform_now(new_entry.id, current_run.id)
173
- rescue => e
174
- Rails.logger.info("#{e.message} was detected during export")
175
- end
176
-
177
- self.headers |= entry.parsed_metadata.keys if entry
178
- end
179
- end
180
- alias create_from_collection create_new_entries
181
- alias create_from_importer create_new_entries
182
- alias create_from_worktype create_new_entries
183
- alias create_from_all create_new_entries
184
-
185
119
  # export methods
186
120
 
187
- # rubocop:disable Metrics/AbcSize
121
+ # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
188
122
  def write_files
189
123
  require 'open-uri'
190
124
  require 'socket'
191
- importerexporter.entries.where(identifier: current_record_ids)[0..limit || total].each do |e|
192
- bag = BagIt::Bag.new setup_bagit_folder(e.identifier)
193
- w = ActiveFedora::Base.find(e.identifier)
194
- next unless Hyrax.config.curation_concerns.include?(w.class)
125
+ importerexporter.entries.where(identifier: current_record_ids)[0..limit || total].each do |entry|
126
+ record = ActiveFedora::Base.find(entry.identifier)
127
+ next unless Hyrax.config.curation_concerns.include?(record.class)
128
+ bag = BagIt::Bag.new setup_bagit_folder(entry.identifier)
129
+ bag_entries = [entry]
130
+
131
+ record.file_sets.each do |fs|
132
+ if @file_set_ids.present?
133
+ file_set_entry = Bulkrax::CsvFileSetEntry.where("parsed_metadata LIKE '%#{fs.id}%'").first
134
+ bag_entries << file_set_entry unless file_set_entry.nil?
135
+ end
195
136
 
196
- w.file_sets.each do |fs|
197
137
  file_name = filename(fs)
198
138
  next if file_name.blank?
199
139
  io = open(fs.original_file.uri)
200
140
  file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
201
141
  file.write(io.read)
202
142
  file.close
203
- bag.add_file(file_name, file.path)
143
+ begin
144
+ bag.add_file(file_name, file.path)
145
+ rescue => e
146
+ entry.status_info(e)
147
+ status_info(e)
148
+ end
204
149
  end
205
- CSV.open(setup_csv_metadata_export_file(e.identifier), "w", headers: export_headers, write_headers: true) do |csv|
206
- csv << e.parsed_metadata
150
+
151
+ CSV.open(setup_csv_metadata_export_file(entry.identifier), "w", headers: export_headers, write_headers: true) do |csv|
152
+ bag_entries.each { |csv_entry| csv << csv_entry.parsed_metadata }
207
153
  end
208
- write_triples(e)
154
+ write_triples(entry)
209
155
  bag.manifest!(algo: 'sha256')
210
156
  end
211
157
  end
212
- # rubocop:enable Metrics/AbcSize
158
+ # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
213
159
 
214
160
  def setup_csv_metadata_export_file(id)
215
161
  File.join(importerexporter.exporter_export_path, id, 'metadata.csv')
@@ -221,42 +167,6 @@ module Bulkrax
221
167
  key != source_identifier.to_s
222
168
  end
223
169
 
224
- # All possible column names
225
- def export_headers
226
- headers = sort_headers(self.headers)
227
-
228
- # we don't want access_control_id exported and we want file at the end
229
- headers.delete('access_control_id') if headers.include?('access_control_id')
230
-
231
- # add the headers below at the beginning or end to maintain the preexisting export behavior
232
- headers.prepend('model')
233
- headers.prepend(source_identifier.to_s)
234
- headers.prepend('id')
235
-
236
- headers.uniq
237
- end
238
-
239
- def object_names
240
- return @object_names if @object_names
241
-
242
- @object_names = mapping.values.map { |value| value['object'] }
243
- @object_names.uniq!.delete(nil)
244
-
245
- @object_names
246
- end
247
-
248
- def sort_headers(headers)
249
- # converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
250
- # while keeping objects grouped together
251
- headers.sort_by do |item|
252
- number = item.match(/\d+/)&.[](0) || 0.to_s
253
- sort_number = number.rjust(4, "0")
254
- object_prefix = object_names.detect { |o| item.match(/^#{o}/) } || item
255
- remainder = item.gsub(/^#{object_prefix}_/, '').gsub(/_#{number}/, '')
256
- "#{object_prefix}_#{sort_number}_#{remainder}"
257
- end
258
- end
259
-
260
170
  def setup_triple_metadata_export_file(id)
261
171
  File.join(importerexporter.exporter_export_path, id, 'metadata.nt')
262
172
  end
@@ -276,11 +186,6 @@ module Bulkrax
276
186
  end
277
187
  end
278
188
 
279
- def required_elements?(keys)
280
- return if keys.blank?
281
- !required_elements.map { |el| keys.map(&:to_s).include?(el) }.include?(false)
282
- end
283
-
284
189
  # @todo - investigate getting directory structure
285
190
  # @todo - investigate using perform_later, and having the importer check for
286
191
  # DownloadCloudFileJob before it starts
@@ -331,5 +236,11 @@ module Bulkrax
331
236
  return nil unless bag.valid?
332
237
  bag
333
238
  end
239
+
240
+ # use the version of this method from the application parser instead
241
+ def real_import_file_path
242
+ return importer_unzip_path if file? && zip?
243
+ parser_fields['import_file_path']
244
+ end
334
245
  end
335
246
  end
@@ -195,6 +195,9 @@ module Bulkrax
195
195
  @file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
196
196
  when 'collection'
197
197
  @work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
198
+ @collection_ids = ActiveFedora::SolrService.query("id:#{importerexporter.export_source} #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
199
+ when 'collections metadata'
200
+ @collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
198
201
  when 'worktype'
199
202
  @work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
200
203
  when 'importer'
@@ -251,6 +254,7 @@ module Bulkrax
251
254
  end
252
255
  end
253
256
  alias create_from_collection create_new_entries
257
+ alias create_from_collections_metadata create_new_entries
254
258
  alias create_from_importer create_new_entries
255
259
  alias create_from_worktype create_new_entries
256
260
  alias create_from_all create_new_entries
@@ -268,8 +272,8 @@ module Bulkrax
268
272
  CsvFileSetEntry
269
273
  end
270
274
 
271
- # See https://stackoverflow.com/questions/2650517/count-the-number-of-lines-in-a-file-without-reading-entire-file-into-memory
272
- # Changed to grep as wc -l counts blank lines, and ignores the final unescaped line (which may or may not contain data)
275
+ # TODO: figure out why using the version of this method that's in the bagit parser
276
+ # breaks specs for the "if importer?" line
273
277
  def total
274
278
  @total = importer.parser_fields['total'] || 0 if importer?
275
279
  @total = limit || current_record_ids.count if exporter?
@@ -378,10 +382,11 @@ module Bulkrax
378
382
  end
379
383
 
380
384
  # Retrieve the path where we expect to find the files
381
- def path_to_files
385
+ def path_to_files(**args)
386
+ filename = args.fetch(:filename, '')
387
+
382
388
  @path_to_files ||= File.join(
383
- zip? ? importer_unzip_path : File.dirname(import_file_path),
384
- 'files'
389
+ zip? ? importer_unzip_path : File.dirname(import_file_path), 'files', filename
385
390
  )
386
391
  end
387
392
 
@@ -15,20 +15,21 @@
15
15
 
16
16
  <%= form.hidden_field :user_id, value: current_user.id %>
17
17
 
18
- <%= form.input :export_type,
19
- collection: form.object.export_type_list,
20
- label: t('bulkrax.exporter.labels.export_type'),
18
+ <%= form.input :export_type,
19
+ collection: form.object.export_type_list,
20
+ label: t('bulkrax.exporter.labels.export_type'),
21
21
  required: true,
22
22
  prompt: 'Please select an export type' %>
23
23
 
24
- <%= form.input :export_from,
25
- collection: form.object.export_from_list,
26
- label: t('bulkrax.exporter.labels.export_from'),
24
+ <%= form.input :export_from,
25
+ collection: form.object.export_from_list,
26
+ label: t('bulkrax.exporter.labels.export_from'),
27
27
  required: true,
28
28
  prompt: 'Please select an export source' %>
29
29
 
30
30
  <%= form.input :export_source_importer,
31
31
  label: t('bulkrax.exporter.labels.importer'),
32
+ required: true,
32
33
  prompt: 'Select from the list',
33
34
  label_html: { class: 'importer export-source-option hidden' },
34
35
  input_html: { class: 'importer export-source-option hidden' },
@@ -37,6 +38,7 @@
37
38
  <%= form.input :export_source_collection,
38
39
  prompt: 'Start typing ...',
39
40
  label: t('bulkrax.exporter.labels.collection'),
41
+ required: true,
40
42
  placeholder: @collection&.title&.first,
41
43
  label_html: { class: 'collection export-source-option hidden' },
42
44
  input_html: {
@@ -50,13 +52,14 @@
50
52
 
51
53
  <%= form.input :export_source_worktype,
52
54
  label: t('bulkrax.exporter.labels.worktype'),
55
+ required: true,
53
56
  prompt: 'Select from the list',
54
57
  label_html: { class: 'worktype export-source-option hidden' },
55
58
  input_html: { class: 'worktype export-source-option hidden' },
56
59
  collection: Hyrax.config.curation_concerns.map {|cc| [cc.to_s, cc.to_s] } %>
57
60
 
58
- <%= form.input :limit,
59
- as: :integer,
61
+ <%= form.input :limit,
62
+ as: :integer,
60
63
  hint: 'leave blank or 0 for all records',
61
64
  label: t('bulkrax.exporter.labels.limit') %>
62
65
 
@@ -90,8 +93,8 @@
90
93
  collection: form.object.workflow_status_list,
91
94
  label: t('bulkrax.exporter.labels.status') %>
92
95
 
93
- <%= form.input :parser_klass,
94
- collection: Bulkrax.parsers.map {|p| [p[:name], p[:class_name], {'data-partial' => p[:partial]}] if p[:class_name].constantize.export_supported? }.compact,
96
+ <%= form.input :parser_klass,
97
+ collection: Bulkrax.parsers.map {|p| [p[:name], p[:class_name], {'data-partial' => p[:partial]}] if p[:class_name].constantize.export_supported? }.compact,
95
98
  label: t('bulkrax.exporter.labels.export_format') %>
96
99
  </div>
97
100
 
@@ -40,6 +40,11 @@
40
40
  <% when 'collection' %>
41
41
  <% collection = Collection.find(@exporter.export_source) %>
42
42
  <%= link_to collection&.title&.first, hyrax.dashboard_collection_path(collection.id) %>
43
+ <% when 'collections metadata' %>
44
+ <% collections = Collection.all %>
45
+ <% collections.each_with_index do |c, i| %>
46
+ <%= link_to c&.title&.first, hyrax.dashboard_collection_path(c.id) %><%= ',' if i != collections.count - 1 %>
47
+ <% end %>
43
48
  <% when 'importer' %>
44
49
  <% importer = Bulkrax::Importer.find(@exporter.export_source) %>
45
50
  <%= link_to importer.name, bulkrax.importer_path(importer.id) %>
@@ -8,6 +8,7 @@ en:
8
8
  labels:
9
9
  all: All
10
10
  collection: Collection
11
+ collections_metadata: All Collections' Metadata (only)
11
12
  export_format: Export Format
12
13
  export_from: Export From
13
14
  export_source: Export Source
@@ -1,7 +1,17 @@
1
1
  class RenameBulkraxImporterRunToImporterRun < ActiveRecord::Migration[5.2]
2
- def change
2
+ def up
3
3
  if column_exists?(:bulkrax_pending_relationships, :bulkrax_importer_run_id)
4
+ remove_foreign_key :bulkrax_pending_relationships, :bulkrax_importer_runs
5
+ remove_index :bulkrax_pending_relationships, column: :bulkrax_importer_run_id
6
+
4
7
  rename_column :bulkrax_pending_relationships, :bulkrax_importer_run_id, :importer_run_id
8
+
9
+ add_foreign_key :bulkrax_pending_relationships, :bulkrax_importer_runs, column: :importer_run_id
10
+ add_index :bulkrax_pending_relationships, :importer_run_id, name: 'index_bulkrax_pending_relationships_on_importer_run_id'
5
11
  end
6
12
  end
13
+
14
+ def down
15
+ rename_column :bulkrax_pending_relationships, :importer_run_id, :bulkrax_importer_run_id
16
+ end
7
17
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- VERSION = '3.3.1'
4
+ VERSION = '3.5.0'
5
5
  end
@@ -10,7 +10,7 @@ class Bulkrax::InstallGenerator < Rails::Generators::Base
10
10
  end
11
11
 
12
12
  def add_to_gemfile
13
- gem 'willow_sword', github: 'notch8/willow_sword'
13
+ gem 'bulkrax'
14
14
 
15
15
  Bundler.with_clean_env do
16
16
  run "bundle install"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bulkrax
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.1
4
+ version: 3.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Kaufman
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-09 00:00:00.000000000 Z
11
+ date: 2022-06-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -25,81 +25,81 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: 5.1.6
27
27
  - !ruby/object:Gem::Dependency
28
- name: loofah
28
+ name: bagit
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ">="
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 2.2.3
33
+ version: '0.4'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ">="
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 2.2.3
40
+ version: '0.4'
41
41
  - !ruby/object:Gem::Dependency
42
- name: rack
42
+ name: coderay
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: 2.0.6
47
+ version: '0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: 2.0.6
54
+ version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: simple_form
56
+ name: iso8601
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - ">="
59
+ - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '0'
61
+ version: 0.9.0
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - ">="
66
+ - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '0'
68
+ version: 0.9.0
69
69
  - !ruby/object:Gem::Dependency
70
- name: iso8601
70
+ name: kaminari
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - "~>"
73
+ - - ">="
74
74
  - !ruby/object:Gem::Version
75
- version: 0.9.0
75
+ version: '0'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - "~>"
80
+ - - ">="
81
81
  - !ruby/object:Gem::Version
82
- version: 0.9.0
82
+ version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
- name: oai
84
+ name: language_list
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - ">="
87
+ - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '0.4'
90
- - - "<"
89
+ version: '1.2'
90
+ - - ">="
91
91
  - !ruby/object:Gem::Version
92
- version: 2.x
92
+ version: 1.2.1
93
93
  type: :runtime
94
94
  prerelease: false
95
95
  version_requirements: !ruby/object:Gem::Requirement
96
96
  requirements:
97
- - - ">="
97
+ - - "~>"
98
98
  - !ruby/object:Gem::Version
99
- version: '0.4'
100
- - - "<"
99
+ version: '1.2'
100
+ - - ">="
101
101
  - !ruby/object:Gem::Version
102
- version: 2.x
102
+ version: 1.2.1
103
103
  - !ruby/object:Gem::Dependency
104
104
  name: libxml-ruby
105
105
  requirement: !ruby/object:Gem::Requirement
@@ -115,61 +115,75 @@ dependencies:
115
115
  - !ruby/object:Gem::Version
116
116
  version: 3.1.0
117
117
  - !ruby/object:Gem::Dependency
118
- name: language_list
118
+ name: loofah
119
119
  requirement: !ruby/object:Gem::Requirement
120
120
  requirements:
121
- - - "~>"
122
- - !ruby/object:Gem::Version
123
- version: '1.2'
124
121
  - - ">="
125
122
  - !ruby/object:Gem::Version
126
- version: 1.2.1
123
+ version: 2.2.3
127
124
  type: :runtime
128
125
  prerelease: false
129
126
  version_requirements: !ruby/object:Gem::Requirement
130
127
  requirements:
131
- - - "~>"
132
- - !ruby/object:Gem::Version
133
- version: '1.2'
134
128
  - - ">="
135
129
  - !ruby/object:Gem::Version
136
- version: 1.2.1
130
+ version: 2.2.3
137
131
  - !ruby/object:Gem::Dependency
138
- name: rdf
132
+ name: oai
139
133
  requirement: !ruby/object:Gem::Requirement
140
134
  requirements:
141
135
  - - ">="
142
136
  - !ruby/object:Gem::Version
143
- version: 2.0.2
137
+ version: '0.4'
144
138
  - - "<"
145
139
  - !ruby/object:Gem::Version
146
- version: '4.0'
140
+ version: 2.x
147
141
  type: :runtime
148
142
  prerelease: false
149
143
  version_requirements: !ruby/object:Gem::Requirement
150
144
  requirements:
151
145
  - - ">="
152
146
  - !ruby/object:Gem::Version
153
- version: 2.0.2
147
+ version: '0.4'
154
148
  - - "<"
155
149
  - !ruby/object:Gem::Version
156
- version: '4.0'
150
+ version: 2.x
157
151
  - !ruby/object:Gem::Dependency
158
- name: bagit
152
+ name: rack
159
153
  requirement: !ruby/object:Gem::Requirement
160
154
  requirements:
161
- - - "~>"
155
+ - - ">="
162
156
  - !ruby/object:Gem::Version
163
- version: '0.4'
157
+ version: 2.0.6
164
158
  type: :runtime
165
159
  prerelease: false
166
160
  version_requirements: !ruby/object:Gem::Requirement
167
161
  requirements:
168
- - - "~>"
162
+ - - ">="
169
163
  - !ruby/object:Gem::Version
170
- version: '0.4'
164
+ version: 2.0.6
171
165
  - !ruby/object:Gem::Dependency
172
- name: coderay
166
+ name: rdf
167
+ requirement: !ruby/object:Gem::Requirement
168
+ requirements:
169
+ - - ">="
170
+ - !ruby/object:Gem::Version
171
+ version: 2.0.2
172
+ - - "<"
173
+ - !ruby/object:Gem::Version
174
+ version: '4.0'
175
+ type: :runtime
176
+ prerelease: false
177
+ version_requirements: !ruby/object:Gem::Requirement
178
+ requirements:
179
+ - - ">="
180
+ - !ruby/object:Gem::Version
181
+ version: 2.0.2
182
+ - - "<"
183
+ - !ruby/object:Gem::Version
184
+ version: '4.0'
185
+ - !ruby/object:Gem::Dependency
186
+ name: rubyzip
173
187
  requirement: !ruby/object:Gem::Requirement
174
188
  requirements:
175
189
  - - ">="
@@ -183,7 +197,7 @@ dependencies:
183
197
  - !ruby/object:Gem::Version
184
198
  version: '0'
185
199
  - !ruby/object:Gem::Dependency
186
- name: kaminari
200
+ name: simple_form
187
201
  requirement: !ruby/object:Gem::Requirement
188
202
  requirements:
189
203
  - - ">="
@@ -390,7 +404,7 @@ homepage: https://github.com/samvera-labs/bulkrax
390
404
  licenses:
391
405
  - Apache-2.0
392
406
  metadata: {}
393
- post_install_message:
407
+ post_install_message:
394
408
  rdoc_options: []
395
409
  require_paths:
396
410
  - lib
@@ -405,8 +419,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
405
419
  - !ruby/object:Gem::Version
406
420
  version: '0'
407
421
  requirements: []
408
- rubygems_version: 3.1.4
409
- signing_key:
422
+ rubygems_version: 3.0.3
423
+ signing_key:
410
424
  specification_version: 4
411
425
  summary: Import and export tool for Hyrax and Hyku
412
426
  test_files: []