bulkrax 7.0.0 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/javascripts/bulkrax/datatables.js +1 -1
  3. data/app/controllers/bulkrax/exporters_controller.rb +1 -1
  4. data/app/controllers/bulkrax/importers_controller.rb +2 -1
  5. data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
  6. data/app/factories/bulkrax/object_factory.rb +135 -163
  7. data/app/factories/bulkrax/object_factory_interface.rb +491 -0
  8. data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
  9. data/app/helpers/bulkrax/importers_helper.rb +1 -1
  10. data/app/helpers/bulkrax/validation_helper.rb +4 -4
  11. data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
  12. data/app/jobs/bulkrax/delete_job.rb +3 -2
  13. data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
  14. data/app/jobs/bulkrax/import_file_set_job.rb +5 -2
  15. data/app/jobs/bulkrax/importer_job.rb +18 -2
  16. data/app/matchers/bulkrax/application_matcher.rb +0 -2
  17. data/app/models/bulkrax/csv_collection_entry.rb +1 -1
  18. data/app/models/bulkrax/csv_entry.rb +7 -6
  19. data/app/models/bulkrax/entry.rb +7 -11
  20. data/app/models/bulkrax/exporter.rb +2 -2
  21. data/app/models/bulkrax/importer.rb +1 -3
  22. data/app/models/bulkrax/oai_entry.rb +0 -3
  23. data/app/models/bulkrax/oai_set_entry.rb +1 -1
  24. data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
  25. data/app/models/bulkrax/rdf_entry.rb +70 -69
  26. data/app/models/bulkrax/xml_entry.rb +0 -1
  27. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
  28. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  29. data/app/models/concerns/bulkrax/file_factory.rb +174 -118
  30. data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
  31. data/app/models/concerns/bulkrax/has_matchers.rb +28 -25
  32. data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
  33. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
  34. data/app/parsers/bulkrax/application_parser.rb +31 -7
  35. data/app/parsers/bulkrax/bagit_parser.rb +175 -174
  36. data/app/parsers/bulkrax/csv_parser.rb +15 -5
  37. data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
  38. data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
  39. data/app/parsers/bulkrax/xml_parser.rb +0 -2
  40. data/app/services/bulkrax/factory_class_finder.rb +2 -0
  41. data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
  42. data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
  43. data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
  44. data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
  45. data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
  46. data/app/views/bulkrax/entries/show.html.erb +9 -8
  47. data/app/views/bulkrax/exporters/edit.html.erb +1 -1
  48. data/app/views/bulkrax/exporters/new.html.erb +1 -1
  49. data/app/views/bulkrax/exporters/show.html.erb +4 -2
  50. data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
  51. data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
  52. data/app/views/bulkrax/importers/edit.html.erb +1 -1
  53. data/app/views/bulkrax/importers/new.html.erb +1 -1
  54. data/app/views/bulkrax/importers/show.html.erb +1 -1
  55. data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
  56. data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
  57. data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
  58. data/config/locales/bulkrax.en.yml +7 -0
  59. data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
  60. data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
  61. data/lib/bulkrax/engine.rb +23 -6
  62. data/lib/bulkrax/version.rb +1 -1
  63. data/lib/bulkrax.rb +54 -52
  64. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
  65. data/lib/tasks/bulkrax_tasks.rake +1 -0
  66. data/lib/tasks/reset.rake +4 -4
  67. metadata +24 -8
  68. data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
  69. data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
  70. data/lib/bulkrax/persistence_layer.rb +0 -38
@@ -1,223 +1,224 @@
1
1
  # frozen_string_literal: true
2
- require 'bagit'
3
2
 
4
- module Bulkrax
5
- class BagitParser < CsvParser # rubocop:disable Metrics/ClassLength
6
- include ExportBehavior
3
+ unless ENV.fetch('BULKRAX_NO_BAGIT', 'false').to_s == 'true'
4
+ module Bulkrax
5
+ class BagitParser < CsvParser # rubocop:disable Metrics/ClassLength
6
+ include ExportBehavior
7
7
 
8
- def self.export_supported?
9
- true
10
- end
11
-
12
- def valid_import?
13
- return true if import_fields.present?
14
- rescue => e
15
- set_status_info(e)
16
- false
17
- end
8
+ def self.export_supported?
9
+ true
10
+ end
18
11
 
19
- def entry_class
20
- rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
21
- rdf_format ? RdfEntry : CsvEntry
22
- end
12
+ def valid_import?
13
+ return true if import_fields.present?
14
+ rescue => e
15
+ set_status_info(e)
16
+ false
17
+ end
23
18
 
24
- def path_to_files(filename:)
25
- @path_to_files ||= Dir.glob(File.join(import_file_path, '**/data', filename)).first
26
- end
19
+ def entry_class
20
+ rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
21
+ rdf_format ? RdfEntry : CsvEntry
22
+ end
27
23
 
28
- # Take a random sample of 10 metadata_paths and work out the import fields from that
29
- def import_fields
30
- raise StandardError, 'No metadata files were found' if metadata_paths.blank?
31
- @import_fields ||= metadata_paths.sample(10).map do |path|
32
- entry_class.fields_from_data(entry_class.read_data(path))
33
- end.flatten.compact.uniq
34
- end
24
+ def path_to_files(filename:)
25
+ @path_to_files ||= Dir.glob(File.join(import_file_path, '**/data', filename)).first
26
+ end
35
27
 
36
- # Create an Array of all metadata records
37
- def records(_opts = {})
38
- raise StandardError, 'No BagIt records were found' if bags.blank?
39
- @records ||= bags.map do |bag|
40
- path = metadata_path(bag)
41
- raise StandardError, 'No metadata files were found' if path.blank?
42
- data = entry_class.read_data(path)
43
- get_data(bag, data)
28
+ # Take a random sample of 10 metadata_paths and work out the import fields from that
29
+ def import_fields
30
+ raise StandardError, 'No metadata files were found' if metadata_paths.blank?
31
+ @import_fields ||= metadata_paths.sample(10).map do |path|
32
+ entry_class.fields_from_data(entry_class.read_data(path))
33
+ end.flatten.compact.uniq
44
34
  end
45
35
 
46
- @records = @records.flatten
47
- end
36
+ # Create an Array of all metadata records
37
+ def records(_opts = {})
38
+ raise StandardError, 'No BagIt records were found' if bags.blank?
39
+ @records ||= bags.map do |bag|
40
+ path = metadata_path(bag)
41
+ raise StandardError, 'No metadata files were found' if path.blank?
42
+ data = entry_class.read_data(path)
43
+ get_data(bag, data)
44
+ end
45
+
46
+ @records = @records.flatten
47
+ end
48
48
 
49
- def get_data(bag, data)
50
- if entry_class == CsvEntry
51
- data = data.map do |data_row|
52
- record_data = entry_class.data_for_entry(data_row, source_identifier, self)
53
- next record_data if importerexporter.metadata_only?
49
+ def get_data(bag, data)
50
+ if entry_class == CsvEntry
51
+ data = data.map do |data_row|
52
+ record_data = entry_class.data_for_entry(data_row, source_identifier, self)
53
+ next record_data if importerexporter.metadata_only?
54
54
 
55
- record_data[:file] = bag.bag_files.join('|') if Bulkrax.curation_concerns.include? record_data[:model]&.constantize
56
- record_data
55
+ record_data[:file] = bag.bag_files.join('|') if Bulkrax.curation_concerns.include? record_data[:model]&.constantize
56
+ record_data
57
+ end
58
+ else
59
+ data = entry_class.data_for_entry(data, source_identifier, self)
60
+ data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
57
61
  end
58
- else
59
- data = entry_class.data_for_entry(data, source_identifier, self)
60
- data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
62
+
63
+ data
61
64
  end
62
65
 
63
- data
64
- end
66
+ # export methods
65
67
 
66
- # export methods
68
+ # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
69
+ def write_files
70
+ require 'open-uri'
71
+ require 'socket'
67
72
 
68
- # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
69
- def write_files
70
- require 'open-uri'
71
- require 'socket'
73
+ folder_count = 1
74
+ records_in_folder = 0
75
+ work_entries = importerexporter.entries.where(type: work_entry_class.to_s)
76
+ collection_entries = importerexporter.entries.where(type: collection_entry_class.to_s)
77
+ file_set_entries = importerexporter.entries.where(type: file_set_entry_class.to_s)
72
78
 
73
- folder_count = 1
74
- records_in_folder = 0
75
- work_entries = importerexporter.entries.where(type: work_entry_class.to_s)
76
- collection_entries = importerexporter.entries.where(type: collection_entry_class.to_s)
77
- file_set_entries = importerexporter.entries.where(type: file_set_entry_class.to_s)
79
+ work_entries[0..limit || total].each do |entry|
80
+ record = Bulkrax.object_factory.find(entry.identifier)
81
+ next unless record
78
82
 
79
- work_entries[0..limit || total].each do |entry|
80
- record = ActiveFedora::Base.find(entry.identifier)
81
- next unless record
83
+ bag_entries = [entry]
82
84
 
83
- bag_entries = [entry]
85
+ if record.member_of_collection_ids.present?
86
+ collection_entries.each { |ce| bag_entries << ce if ce.parsed_metadata.value?(record.id) }
87
+ end
84
88
 
85
- if record.member_of_collection_ids.present?
86
- collection_entries.each { |ce| bag_entries << ce if ce.parsed_metadata.value?(record.id) }
87
- end
89
+ if record.file_sets.present?
90
+ file_set_entries.each { |fse| bag_entries << fse if fse.parsed_metadata.value?(record.id) }
91
+ end
88
92
 
89
- if record.file_sets.present?
90
- file_set_entries.each { |fse| bag_entries << fse if fse.parsed_metadata.value?(record.id) }
91
- end
93
+ records_in_folder += bag_entries.count
94
+ if records_in_folder > records_split_count
95
+ folder_count += 1
96
+ records_in_folder = bag_entries.count
97
+ end
92
98
 
93
- records_in_folder += bag_entries.count
94
- if records_in_folder > records_split_count
95
- folder_count += 1
96
- records_in_folder = bag_entries.count
97
- end
99
+ bag ||= BagIt::Bag.new setup_bagit_folder(folder_count, entry.identifier)
100
+
101
+ record.file_sets.each do |fs|
102
+ file_name = filename(fs)
103
+ next if file_name.blank? || fs.original_file.blank?
104
+
105
+ io = open(fs.original_file.uri)
106
+ file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
107
+ file.write(io.read)
108
+ file.close
109
+ begin
110
+ bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank?
111
+ rescue => e
112
+ entry.set_status_info(e)
113
+ set_status_info(e)
114
+ end
115
+ end
98
116
 
99
- bag ||= BagIt::Bag.new setup_bagit_folder(folder_count, entry.identifier)
100
-
101
- record.file_sets.each do |fs|
102
- file_name = filename(fs)
103
- next if file_name.blank? || fs.original_file.blank?
104
-
105
- io = open(fs.original_file.uri)
106
- file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
107
- file.write(io.read)
108
- file.close
109
- begin
110
- bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank?
111
- rescue => e
112
- entry.set_status_info(e)
113
- set_status_info(e)
117
+ CSV.open(setup_csv_metadata_export_file(folder_count, entry.identifier), "w", headers: export_headers, write_headers: true) do |csv|
118
+ bag_entries.each { |csv_entry| csv << csv_entry.parsed_metadata }
114
119
  end
115
- end
116
120
 
117
- CSV.open(setup_csv_metadata_export_file(folder_count, entry.identifier), "w", headers: export_headers, write_headers: true) do |csv|
118
- bag_entries.each { |csv_entry| csv << csv_entry.parsed_metadata }
121
+ write_triples(folder_count, entry)
122
+ bag.manifest!(algo: 'sha256')
119
123
  end
120
-
121
- write_triples(folder_count, entry)
122
- bag.manifest!(algo: 'sha256')
123
124
  end
124
- end
125
- # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
125
+ # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
126
126
 
127
- def setup_csv_metadata_export_file(folder_count, id)
128
- path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
129
- FileUtils.mkdir_p(path) unless File.exist?(path)
127
+ def setup_csv_metadata_export_file(folder_count, id)
128
+ path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
129
+ FileUtils.mkdir_p(path) unless File.exist?(path)
130
130
 
131
- File.join(path, id, 'metadata.csv')
132
- end
131
+ File.join(path, id, 'metadata.csv')
132
+ end
133
133
 
134
- def key_allowed(key)
135
- !Bulkrax.reserved_properties.include?(key) &&
136
- new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
137
- key != source_identifier.to_s
138
- end
134
+ def key_allowed(key)
135
+ !Bulkrax.reserved_properties.include?(key) &&
136
+ new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
137
+ key != source_identifier.to_s
138
+ end
139
139
 
140
- def setup_triple_metadata_export_file(folder_count, id)
141
- path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
142
- FileUtils.mkdir_p(path) unless File.exist?(path)
140
+ def setup_triple_metadata_export_file(folder_count, id)
141
+ path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
142
+ FileUtils.mkdir_p(path) unless File.exist?(path)
143
143
 
144
- File.join(path, id, 'metadata.nt')
145
- end
144
+ File.join(path, id, 'metadata.nt')
145
+ end
146
146
 
147
- def setup_bagit_folder(folder_count, id)
148
- path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
149
- FileUtils.mkdir_p(path) unless File.exist?(path)
147
+ def setup_bagit_folder(folder_count, id)
148
+ path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
149
+ FileUtils.mkdir_p(path) unless File.exist?(path)
150
150
 
151
- File.join(path, id)
152
- end
151
+ File.join(path, id)
152
+ end
153
153
 
154
- # @todo(bjustice) - remove hyrax reference
155
- def write_triples(folder_count, e)
156
- sd = SolrDocument.find(e.identifier)
157
- return if sd.nil?
154
+ # @todo(bjustice) - remove hyrax reference
155
+ def write_triples(folder_count, e)
156
+ sd = SolrDocument.find(e.identifier)
157
+ return if sd.nil?
158
158
 
159
- req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname })
160
- rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples)
161
- File.open(setup_triple_metadata_export_file(folder_count, e.identifier), "w") do |triples|
162
- triples.write(rdf)
159
+ req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname })
160
+ rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples)
161
+ File.open(setup_triple_metadata_export_file(folder_count, e.identifier), "w") do |triples|
162
+ triples.write(rdf)
163
+ end
163
164
  end
164
- end
165
165
 
166
- # @todo - investigate getting directory structure
167
- # @todo - investigate using perform_later, and having the importer check for
168
- # DownloadCloudFileJob before it starts
169
- def retrieve_cloud_files(files)
170
- # There should only be one zip file for Bagit, take the first
171
- return if files['0'].blank?
172
- target_file = File.join(path_for_import, files['0']['file_name'].tr(' ', '_'))
173
- # Now because we want the files in place before the importer runs
174
- Bulkrax::DownloadCloudFileJob.perform_now(files['0'], target_file)
175
- return target_file
176
- end
166
+ # @todo - investigate getting directory structure
167
+ # @todo - investigate using perform_later, and having the importer check for
168
+ # DownloadCloudFileJob before it starts
169
+ def retrieve_cloud_files(files, _importer)
170
+ # There should only be one zip file for Bagit, take the first
171
+ return if files['0'].blank?
172
+ target_file = File.join(path_for_import, files['0']['file_name'].tr(' ', '_'))
173
+ # Now because we want the files in place before the importer runs
174
+ Bulkrax::DownloadCloudFileJob.perform_now(files['0'], target_file)
175
+ return target_file
176
+ end
177
177
 
178
- private
178
+ private
179
179
 
180
- def bags
181
- return @bags if @bags.present?
182
- new_bag = bag(import_file_path)
183
- @bags = new_bag ? [new_bag] : Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
184
- @bags.delete(nil)
185
- raise StandardError, 'No valid bags found' if @bags.blank?
186
- return @bags
187
- end
180
+ def bags
181
+ return @bags if @bags.present?
182
+ new_bag = bag(import_file_path)
183
+ @bags = new_bag ? [new_bag] : Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
184
+ @bags.delete(nil)
185
+ raise StandardError, 'No valid bags found' if @bags.blank?
186
+ return @bags
187
+ end
188
188
 
189
- # Gather the paths to all bags; skip any stray files
190
- def bag_paths
191
- bags.map(&:bag_dir)
192
- end
189
+ # Gather the paths to all bags; skip any stray files
190
+ def bag_paths
191
+ bags.map(&:bag_dir)
192
+ end
193
193
 
194
- def metadata_file_name
195
- raise StandardError, 'The metadata file name must be specified' if parser_fields['metadata_file_name'].blank?
196
- parser_fields['metadata_file_name']
197
- end
194
+ def metadata_file_name
195
+ raise StandardError, 'The metadata file name must be specified' if parser_fields['metadata_file_name'].blank?
196
+ parser_fields['metadata_file_name']
197
+ end
198
198
 
199
- # Gather the paths to all metadata files matching the metadata_file_name
200
- def metadata_paths
201
- @metadata_paths ||= bag_paths.map do |b|
202
- Dir.glob("#{b}/**/*").select { |f| File.file?(f) && f.ends_with?(metadata_file_name) }
203
- end.flatten.compact
204
- end
199
+ # Gather the paths to all metadata files matching the metadata_file_name
200
+ def metadata_paths
201
+ @metadata_paths ||= bag_paths.map do |b|
202
+ Dir.glob("#{b}/**/*").select { |f| File.file?(f) && f.ends_with?(metadata_file_name) }
203
+ end.flatten.compact
204
+ end
205
205
 
206
- def metadata_path(bag)
207
- Dir.glob("#{bag.bag_dir}/**/*").detect { |f| File.file?(f) && f.ends_with?(metadata_file_name) }
208
- end
206
+ def metadata_path(bag)
207
+ Dir.glob("#{bag.bag_dir}/**/*").detect { |f| File.file?(f) && f.ends_with?(metadata_file_name) }
208
+ end
209
209
 
210
- def bag(path)
211
- return nil unless path && File.exist?(File.join(path, 'bagit.txt'))
212
- bag = BagIt::Bag.new(path)
213
- return nil unless bag.valid?
214
- bag
215
- end
210
+ def bag(path)
211
+ return nil unless path && File.exist?(File.join(path, 'bagit.txt'))
212
+ bag = BagIt::Bag.new(path)
213
+ return nil unless bag.valid?
214
+ bag
215
+ end
216
216
 
217
- # use the version of this method from the application parser instead
218
- def real_import_file_path
219
- return importer_unzip_path if file? && zip?
220
- parser_fields['import_file_path']
217
+ # use the version of this method from the application parser instead
218
+ def real_import_file_path
219
+ return importer_unzip_path if file? && zip?
220
+ parser_fields['import_file_path']
221
+ end
221
222
  end
222
223
  end
223
224
  end
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'csv'
4
3
  module Bulkrax
5
4
  class CsvParser < ApplicationParser # rubocop:disable Metrics/ClassLength
6
5
  include ErroredEntries
@@ -23,6 +22,7 @@ module Bulkrax
23
22
  @records = csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
24
23
  end
25
24
 
25
+ # rubocop:disable Metrics/AbcSize
26
26
  def build_records
27
27
  @collections = []
28
28
  @works = []
@@ -34,7 +34,9 @@ module Bulkrax
34
34
  next unless r.key?(model_mapping)
35
35
 
36
36
  model = r[model_mapping].nil? ? "" : r[model_mapping].strip
37
- if model.casecmp('collection').zero?
37
+ # TODO: Eventually this should be refactored to us Hyrax.config.collection_model
38
+ # We aren't right now because so many Bulkrax users are in between Fedora and Valkyrie
39
+ if model.casecmp('collection').zero? || model.casecmp('collectionresource').zero?
38
40
  @collections << r
39
41
  elsif model.casecmp('fileset').zero?
40
42
  @file_sets << r
@@ -52,6 +54,7 @@ module Bulkrax
52
54
 
53
55
  true
54
56
  end
57
+ # rubocop:enabled Metrics/AbcSize
55
58
 
56
59
  def collections
57
60
  build_records if @collections.nil?
@@ -190,9 +193,10 @@ module Bulkrax
190
193
  # @todo - investigate getting directory structure
191
194
  # @todo - investigate using perform_later, and having the importer check for
192
195
  # DownloadCloudFileJob before it starts
193
- def retrieve_cloud_files(files)
196
+ def retrieve_cloud_files(files, importer)
194
197
  files_path = File.join(path_for_import, 'files')
195
198
  FileUtils.mkdir_p(files_path) unless File.exist?(files_path)
199
+ target_files = []
196
200
  files.each_pair do |_key, file|
197
201
  # fixes bug where auth headers do not get attached properly
198
202
  if file['auth_header'].present?
@@ -201,10 +205,12 @@ module Bulkrax
201
205
  end
202
206
  # this only works for uniquely named files
203
207
  target_file = File.join(files_path, file['file_name'].tr(' ', '_'))
208
+ target_files << target_file
204
209
  # Now because we want the files in place before the importer runs
205
210
  # Problematic for a large upload
206
- Bulkrax::DownloadCloudFileJob.perform_now(file, target_file)
211
+ Bulkrax::DownloadCloudFileJob.perform_later(file, target_file)
207
212
  end
213
+ importer[:parser_fields]['original_file_paths'] = target_files
208
214
  return nil
209
215
  end
210
216
 
@@ -225,6 +231,7 @@ module Bulkrax
225
231
  CSV.open(setup_export_file(folder_count), "w", headers: export_headers, write_headers: true) do |csv|
226
232
  group.each do |entry|
227
233
  csv << entry.parsed_metadata
234
+ # TODO: This is precarious when we have descendents of Bulkrax::CsvCollectionEntry
228
235
  next if importerexporter.metadata_only? || entry.type == 'Bulkrax::CsvCollectionEntry'
229
236
 
230
237
  store_files(entry.identifier, folder_count.to_s)
@@ -234,7 +241,7 @@ module Bulkrax
234
241
  end
235
242
 
236
243
  def store_files(identifier, folder_count)
237
- record = ActiveFedora::Base.find(identifier)
244
+ record = Bulkrax.object_factory.find(identifier)
238
245
  return unless record
239
246
 
240
247
  file_sets = record.file_set? ? Array.wrap(record) : record.file_sets
@@ -286,6 +293,9 @@ module Bulkrax
286
293
 
287
294
  def sort_entries(entries)
288
295
  # always export models in the same order: work, collection, file set
296
+ #
297
+ # TODO: This is a problem in that only these classes are compared. Instead
298
+ # We should add a comparison operator to the classes.
289
299
  entries.sort_by do |entry|
290
300
  case entry.type
291
301
  when 'Bulkrax::CsvCollectionEntry'
@@ -105,6 +105,24 @@ module Bulkrax
105
105
  set_status_info(e)
106
106
  end
107
107
 
108
+ def create_file_sets; end
109
+
110
+ def create_relationships
111
+ ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importerexporter.id)
112
+ end
113
+
114
+ def record_raw_metadata(_record)
115
+ nil
116
+ end
117
+
118
+ def record_deleted?(_record)
119
+ false
120
+ end
121
+
122
+ def record_remove_and_rerun?(_record)
123
+ false
124
+ end
125
+
108
126
  # oai records so not let us set the source identifier easily
109
127
  def record_has_source_identifier(record, index)
110
128
  identifier = record.send(source_identifier)
@@ -149,12 +149,12 @@ module Bulkrax
149
149
  end
150
150
 
151
151
  def works
152
- @works ||= ActiveFedora::SolrService.query(works_query, **works_query_kwargs)
152
+ @works ||= Bulkrax.object_factory.query(works_query, **works_query_kwargs)
153
153
  end
154
154
 
155
155
  def collections
156
156
  @collections ||= if collections_query
157
- ActiveFedora::SolrService.query(collections_query, **collections_query_kwargs)
157
+ Bulkrax.object_factory.query(collections_query, **collections_query_kwargs)
158
158
  else
159
159
  []
160
160
  end
@@ -173,43 +173,39 @@ module Bulkrax
173
173
  # @see https://github.com/samvera/hyrax/blob/64c0bbf0dc0d3e1b49f040b50ea70d177cc9d8f6/app/indexers/hyrax/work_indexer.rb#L15-L18
174
174
  def file_sets
175
175
  @file_sets ||= ParserExportRecordSet.in_batches(candidate_file_set_ids) do |batch_of_ids|
176
- fsq = "has_model_ssim:#{Bulkrax.file_model_class} AND id:(\"" + batch_of_ids.join('" OR "') + "\")"
176
+ fsq = "has_model_ssim:#{Bulkrax.file_model_internal_resource} AND id:(\"" + batch_of_ids.join('" OR "') + "\")"
177
177
  fsq += extra_filters if extra_filters.present?
178
- ActiveFedora::SolrService.query(
178
+ Bulkrax.object_factory.query(
179
179
  fsq,
180
- { fl: "id", method: :post, rows: batch_of_ids.size }
180
+ fl: "id", method: :post, rows: batch_of_ids.size
181
181
  )
182
182
  end
183
183
  end
184
184
 
185
185
  def solr_name(base_name)
186
- if Module.const_defined?(:Solrizer)
187
- ::Solrizer.solr_name(base_name)
188
- else
189
- ::ActiveFedora.index_field_mapper.solr_name(base_name)
190
- end
186
+ Bulkrax.object_factory.solr_name(base_name)
191
187
  end
192
188
  end
193
189
 
194
190
  class All < Base
195
191
  def works_query
196
- "has_model_ssim:(#{Bulkrax.curation_concerns.join(' OR ')}) #{extra_filters}"
192
+ "has_model_ssim:(#{Bulkrax.curation_concern_internal_resources.join(' OR ')}) #{extra_filters}"
197
193
  end
198
194
 
199
195
  def collections_query
200
- "has_model_ssim:Collection #{extra_filters}"
196
+ "has_model_ssim:#{Bulkrax.collection_model_internal_resource} #{extra_filters}"
201
197
  end
202
198
  end
203
199
 
204
200
  class Collection < Base
205
201
  def works_query
206
202
  "member_of_collection_ids_ssim:#{importerexporter.export_source} #{extra_filters} AND " \
207
- "has_model_ssim:(#{Bulkrax.curation_concerns.join(' OR ')})"
203
+ "has_model_ssim:(#{Bulkrax.curation_concern_internal_resources.join(' OR ')})"
208
204
  end
209
205
 
210
206
  def collections_query
211
207
  "(id:#{importerexporter.export_source} #{extra_filters}) OR " \
212
- "(has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source})"
208
+ "(has_model_ssim:#{Bulkrax.collection_model_internal_resource} AND member_of_collection_ids_ssim:#{importerexporter.export_source})"
213
209
  end
214
210
  end
215
211
 
@@ -247,12 +243,12 @@ module Bulkrax
247
243
 
248
244
  def works
249
245
  @works ||= ParserExportRecordSet.in_batches(complete_entry_identifiers) do |ids|
250
- ActiveFedora::SolrService.query(
246
+ Bulkrax.object_factory.query(
251
247
  extra_filters.to_s,
252
248
  **query_kwargs.merge(
253
249
  fq: [
254
250
  %(#{solr_name(work_identifier)}:("#{ids.join('" OR "')}")),
255
- "has_model_ssim:(#{Bulkrax.curation_concerns.join(' OR ')})"
251
+ "has_model_ssim:(#{Bulkrax.curation_concern_internal_resources.join(' OR ')})"
256
252
  ],
257
253
  fl: 'id'
258
254
  )
@@ -262,12 +258,12 @@ module Bulkrax
262
258
 
263
259
  def collections
264
260
  @collections ||= ParserExportRecordSet.in_batches(complete_entry_identifiers) do |ids|
265
- ActiveFedora::SolrService.query(
266
- "has_model_ssim:Collection #{extra_filters}",
261
+ Bulkrax.object_factory.query(
262
+ "has_model_ssim:#{Bulkrax.collection_model_internal_resource} #{extra_filters}",
267
263
  **query_kwargs.merge(
268
264
  fq: [
269
265
  %(#{solr_name(work_identifier)}:("#{ids.join('" OR "')}")),
270
- "has_model_ssim:Collection"
266
+ "has_model_ssim:#{Bulkrax.collection_model_internal_resource}"
271
267
  ],
272
268
  fl: "id"
273
269
  )
@@ -281,12 +277,12 @@ module Bulkrax
281
277
  # @see Bulkrax::ParserExportRecordSet::Base#file_sets
282
278
  def file_sets
283
279
  @file_sets ||= ParserExportRecordSet.in_batches(complete_entry_identifiers) do |ids|
284
- ActiveFedora::SolrService.query(
280
+ Bulkrax.object_factory.query(
285
281
  extra_filters,
286
- query_kwargs.merge(
282
+ **query_kwargs.merge(
287
283
  fq: [
288
284
  %(#{solr_name(work_identifier)}:("#{ids.join('" OR "')}")),
289
- "has_model_ssim:#{Bulkrax.file_model_class}"
285
+ "has_model_ssim:#{Bulkrax.file_model_internal_resource}"
290
286
  ],
291
287
  fl: 'id'
292
288
  )
@@ -1,6 +1,4 @@
1
1
  # frozen_string_literal: true
2
- require 'marcel'
3
-
4
2
  module Bulkrax
5
3
  class XmlParser < ApplicationParser
6
4
  def entry_class
@@ -29,6 +29,8 @@ module Bulkrax
29
29
  def self.call(name, suffix: SUFFIX)
30
30
  if name.end_with?(suffix)
31
31
  name.constantize
32
+ elsif name == "FileSet"
33
+ Bulkrax.file_model_class
32
34
  else
33
35
  begin
34
36
  "#{name}#{suffix}".constantize