bulkrax 7.0.0 → 8.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/javascripts/bulkrax/datatables.js +1 -1
  3. data/app/concerns/loggable.rb +25 -0
  4. data/app/controllers/bulkrax/exporters_controller.rb +1 -1
  5. data/app/controllers/bulkrax/importers_controller.rb +2 -1
  6. data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
  7. data/app/factories/bulkrax/object_factory.rb +135 -163
  8. data/app/factories/bulkrax/object_factory_interface.rb +483 -0
  9. data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
  10. data/app/factories/bulkrax/valkyrize-hyku.code-workspace +19 -0
  11. data/app/helpers/bulkrax/importers_helper.rb +1 -1
  12. data/app/helpers/bulkrax/validation_helper.rb +4 -4
  13. data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
  14. data/app/jobs/bulkrax/delete_job.rb +3 -2
  15. data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
  16. data/app/jobs/bulkrax/import_file_set_job.rb +23 -19
  17. data/app/jobs/bulkrax/importer_job.rb +18 -2
  18. data/app/matchers/bulkrax/application_matcher.rb +0 -2
  19. data/app/models/bulkrax/csv_collection_entry.rb +1 -1
  20. data/app/models/bulkrax/csv_entry.rb +7 -6
  21. data/app/models/bulkrax/entry.rb +7 -11
  22. data/app/models/bulkrax/exporter.rb +2 -2
  23. data/app/models/bulkrax/importer.rb +1 -3
  24. data/app/models/bulkrax/oai_entry.rb +0 -3
  25. data/app/models/bulkrax/oai_set_entry.rb +1 -1
  26. data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
  27. data/app/models/bulkrax/rdf_entry.rb +70 -69
  28. data/app/models/bulkrax/xml_entry.rb +0 -1
  29. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
  30. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  31. data/app/models/concerns/bulkrax/file_factory.rb +178 -118
  32. data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
  33. data/app/models/concerns/bulkrax/has_matchers.rb +39 -25
  34. data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
  35. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
  36. data/app/parsers/bulkrax/application_parser.rb +31 -7
  37. data/app/parsers/bulkrax/bagit_parser.rb +175 -174
  38. data/app/parsers/bulkrax/csv_parser.rb +15 -5
  39. data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
  40. data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
  41. data/app/parsers/bulkrax/xml_parser.rb +0 -2
  42. data/app/services/bulkrax/factory_class_finder.rb +2 -0
  43. data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
  44. data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
  45. data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
  46. data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
  47. data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
  48. data/app/views/bulkrax/entries/show.html.erb +9 -8
  49. data/app/views/bulkrax/exporters/edit.html.erb +1 -1
  50. data/app/views/bulkrax/exporters/new.html.erb +1 -1
  51. data/app/views/bulkrax/exporters/show.html.erb +4 -2
  52. data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
  53. data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
  54. data/app/views/bulkrax/importers/edit.html.erb +1 -1
  55. data/app/views/bulkrax/importers/new.html.erb +1 -1
  56. data/app/views/bulkrax/importers/show.html.erb +1 -1
  57. data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
  58. data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
  59. data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
  60. data/config/locales/bulkrax.en.yml +7 -0
  61. data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
  62. data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
  63. data/lib/bulkrax/engine.rb +23 -6
  64. data/lib/bulkrax/version.rb +1 -1
  65. data/lib/bulkrax.rb +54 -52
  66. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
  67. data/lib/tasks/bulkrax_tasks.rake +1 -0
  68. data/lib/tasks/reset.rake +4 -4
  69. metadata +25 -7
  70. data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
  71. data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
  72. data/lib/bulkrax/persistence_layer.rb +0 -38
@@ -1,223 +1,224 @@
1
1
  # frozen_string_literal: true
2
- require 'bagit'
3
2
 
4
- module Bulkrax
5
- class BagitParser < CsvParser # rubocop:disable Metrics/ClassLength
6
- include ExportBehavior
3
+ unless ENV.fetch('BULKRAX_NO_BAGIT', 'false').to_s == 'true'
4
+ module Bulkrax
5
+ class BagitParser < CsvParser # rubocop:disable Metrics/ClassLength
6
+ include ExportBehavior
7
7
 
8
- def self.export_supported?
9
- true
10
- end
11
-
12
- def valid_import?
13
- return true if import_fields.present?
14
- rescue => e
15
- set_status_info(e)
16
- false
17
- end
8
+ def self.export_supported?
9
+ true
10
+ end
18
11
 
19
- def entry_class
20
- rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
21
- rdf_format ? RdfEntry : CsvEntry
22
- end
12
+ def valid_import?
13
+ return true if import_fields.present?
14
+ rescue => e
15
+ set_status_info(e)
16
+ false
17
+ end
23
18
 
24
- def path_to_files(filename:)
25
- @path_to_files ||= Dir.glob(File.join(import_file_path, '**/data', filename)).first
26
- end
19
+ def entry_class
20
+ rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
21
+ rdf_format ? RdfEntry : CsvEntry
22
+ end
27
23
 
28
- # Take a random sample of 10 metadata_paths and work out the import fields from that
29
- def import_fields
30
- raise StandardError, 'No metadata files were found' if metadata_paths.blank?
31
- @import_fields ||= metadata_paths.sample(10).map do |path|
32
- entry_class.fields_from_data(entry_class.read_data(path))
33
- end.flatten.compact.uniq
34
- end
24
+ def path_to_files(filename:)
25
+ @path_to_files ||= Dir.glob(File.join(import_file_path, '**/data', filename)).first
26
+ end
35
27
 
36
- # Create an Array of all metadata records
37
- def records(_opts = {})
38
- raise StandardError, 'No BagIt records were found' if bags.blank?
39
- @records ||= bags.map do |bag|
40
- path = metadata_path(bag)
41
- raise StandardError, 'No metadata files were found' if path.blank?
42
- data = entry_class.read_data(path)
43
- get_data(bag, data)
28
+ # Take a random sample of 10 metadata_paths and work out the import fields from that
29
+ def import_fields
30
+ raise StandardError, 'No metadata files were found' if metadata_paths.blank?
31
+ @import_fields ||= metadata_paths.sample(10).map do |path|
32
+ entry_class.fields_from_data(entry_class.read_data(path))
33
+ end.flatten.compact.uniq
44
34
  end
45
35
 
46
- @records = @records.flatten
47
- end
36
+ # Create an Array of all metadata records
37
+ def records(_opts = {})
38
+ raise StandardError, 'No BagIt records were found' if bags.blank?
39
+ @records ||= bags.map do |bag|
40
+ path = metadata_path(bag)
41
+ raise StandardError, 'No metadata files were found' if path.blank?
42
+ data = entry_class.read_data(path)
43
+ get_data(bag, data)
44
+ end
45
+
46
+ @records = @records.flatten
47
+ end
48
48
 
49
- def get_data(bag, data)
50
- if entry_class == CsvEntry
51
- data = data.map do |data_row|
52
- record_data = entry_class.data_for_entry(data_row, source_identifier, self)
53
- next record_data if importerexporter.metadata_only?
49
+ def get_data(bag, data)
50
+ if entry_class == CsvEntry
51
+ data = data.map do |data_row|
52
+ record_data = entry_class.data_for_entry(data_row, source_identifier, self)
53
+ next record_data if importerexporter.metadata_only?
54
54
 
55
- record_data[:file] = bag.bag_files.join('|') if Bulkrax.curation_concerns.include? record_data[:model]&.constantize
56
- record_data
55
+ record_data[:file] = bag.bag_files.join('|') if Bulkrax.curation_concerns.include? record_data[:model]&.constantize
56
+ record_data
57
+ end
58
+ else
59
+ data = entry_class.data_for_entry(data, source_identifier, self)
60
+ data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
57
61
  end
58
- else
59
- data = entry_class.data_for_entry(data, source_identifier, self)
60
- data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
62
+
63
+ data
61
64
  end
62
65
 
63
- data
64
- end
66
+ # export methods
65
67
 
66
- # export methods
68
+ # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
69
+ def write_files
70
+ require 'open-uri'
71
+ require 'socket'
67
72
 
68
- # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
69
- def write_files
70
- require 'open-uri'
71
- require 'socket'
73
+ folder_count = 1
74
+ records_in_folder = 0
75
+ work_entries = importerexporter.entries.where(type: work_entry_class.to_s)
76
+ collection_entries = importerexporter.entries.where(type: collection_entry_class.to_s)
77
+ file_set_entries = importerexporter.entries.where(type: file_set_entry_class.to_s)
72
78
 
73
- folder_count = 1
74
- records_in_folder = 0
75
- work_entries = importerexporter.entries.where(type: work_entry_class.to_s)
76
- collection_entries = importerexporter.entries.where(type: collection_entry_class.to_s)
77
- file_set_entries = importerexporter.entries.where(type: file_set_entry_class.to_s)
79
+ work_entries[0..limit || total].each do |entry|
80
+ record = Bulkrax.object_factory.find(entry.identifier)
81
+ next unless record
78
82
 
79
- work_entries[0..limit || total].each do |entry|
80
- record = ActiveFedora::Base.find(entry.identifier)
81
- next unless record
83
+ bag_entries = [entry]
82
84
 
83
- bag_entries = [entry]
85
+ if record.member_of_collection_ids.present?
86
+ collection_entries.each { |ce| bag_entries << ce if ce.parsed_metadata.value?(record.id) }
87
+ end
84
88
 
85
- if record.member_of_collection_ids.present?
86
- collection_entries.each { |ce| bag_entries << ce if ce.parsed_metadata.value?(record.id) }
87
- end
89
+ if record.file_sets.present?
90
+ file_set_entries.each { |fse| bag_entries << fse if fse.parsed_metadata.value?(record.id) }
91
+ end
88
92
 
89
- if record.file_sets.present?
90
- file_set_entries.each { |fse| bag_entries << fse if fse.parsed_metadata.value?(record.id) }
91
- end
93
+ records_in_folder += bag_entries.count
94
+ if records_in_folder > records_split_count
95
+ folder_count += 1
96
+ records_in_folder = bag_entries.count
97
+ end
92
98
 
93
- records_in_folder += bag_entries.count
94
- if records_in_folder > records_split_count
95
- folder_count += 1
96
- records_in_folder = bag_entries.count
97
- end
99
+ bag ||= BagIt::Bag.new setup_bagit_folder(folder_count, entry.identifier)
100
+
101
+ record.file_sets.each do |fs|
102
+ file_name = filename(fs)
103
+ next if file_name.blank? || fs.original_file.blank?
104
+
105
+ io = open(fs.original_file.uri)
106
+ file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
107
+ file.write(io.read)
108
+ file.close
109
+ begin
110
+ bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank?
111
+ rescue => e
112
+ entry.set_status_info(e)
113
+ set_status_info(e)
114
+ end
115
+ end
98
116
 
99
- bag ||= BagIt::Bag.new setup_bagit_folder(folder_count, entry.identifier)
100
-
101
- record.file_sets.each do |fs|
102
- file_name = filename(fs)
103
- next if file_name.blank? || fs.original_file.blank?
104
-
105
- io = open(fs.original_file.uri)
106
- file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
107
- file.write(io.read)
108
- file.close
109
- begin
110
- bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank?
111
- rescue => e
112
- entry.set_status_info(e)
113
- set_status_info(e)
117
+ CSV.open(setup_csv_metadata_export_file(folder_count, entry.identifier), "w", headers: export_headers, write_headers: true) do |csv|
118
+ bag_entries.each { |csv_entry| csv << csv_entry.parsed_metadata }
114
119
  end
115
- end
116
120
 
117
- CSV.open(setup_csv_metadata_export_file(folder_count, entry.identifier), "w", headers: export_headers, write_headers: true) do |csv|
118
- bag_entries.each { |csv_entry| csv << csv_entry.parsed_metadata }
121
+ write_triples(folder_count, entry)
122
+ bag.manifest!(algo: 'sha256')
119
123
  end
120
-
121
- write_triples(folder_count, entry)
122
- bag.manifest!(algo: 'sha256')
123
124
  end
124
- end
125
- # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
125
+ # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
126
126
 
127
- def setup_csv_metadata_export_file(folder_count, id)
128
- path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
129
- FileUtils.mkdir_p(path) unless File.exist?(path)
127
+ def setup_csv_metadata_export_file(folder_count, id)
128
+ path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
129
+ FileUtils.mkdir_p(path) unless File.exist?(path)
130
130
 
131
- File.join(path, id, 'metadata.csv')
132
- end
131
+ File.join(path, id, 'metadata.csv')
132
+ end
133
133
 
134
- def key_allowed(key)
135
- !Bulkrax.reserved_properties.include?(key) &&
136
- new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
137
- key != source_identifier.to_s
138
- end
134
+ def key_allowed(key)
135
+ !Bulkrax.reserved_properties.include?(key) &&
136
+ new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
137
+ key != source_identifier.to_s
138
+ end
139
139
 
140
- def setup_triple_metadata_export_file(folder_count, id)
141
- path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
142
- FileUtils.mkdir_p(path) unless File.exist?(path)
140
+ def setup_triple_metadata_export_file(folder_count, id)
141
+ path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
142
+ FileUtils.mkdir_p(path) unless File.exist?(path)
143
143
 
144
- File.join(path, id, 'metadata.nt')
145
- end
144
+ File.join(path, id, 'metadata.nt')
145
+ end
146
146
 
147
- def setup_bagit_folder(folder_count, id)
148
- path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
149
- FileUtils.mkdir_p(path) unless File.exist?(path)
147
+ def setup_bagit_folder(folder_count, id)
148
+ path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
149
+ FileUtils.mkdir_p(path) unless File.exist?(path)
150
150
 
151
- File.join(path, id)
152
- end
151
+ File.join(path, id)
152
+ end
153
153
 
154
- # @todo(bjustice) - remove hyrax reference
155
- def write_triples(folder_count, e)
156
- sd = SolrDocument.find(e.identifier)
157
- return if sd.nil?
154
+ # @todo(bjustice) - remove hyrax reference
155
+ def write_triples(folder_count, e)
156
+ sd = SolrDocument.find(e.identifier)
157
+ return if sd.nil?
158
158
 
159
- req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname })
160
- rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples)
161
- File.open(setup_triple_metadata_export_file(folder_count, e.identifier), "w") do |triples|
162
- triples.write(rdf)
159
+ req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname })
160
+ rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples)
161
+ File.open(setup_triple_metadata_export_file(folder_count, e.identifier), "w") do |triples|
162
+ triples.write(rdf)
163
+ end
163
164
  end
164
- end
165
165
 
166
- # @todo - investigate getting directory structure
167
- # @todo - investigate using perform_later, and having the importer check for
168
- # DownloadCloudFileJob before it starts
169
- def retrieve_cloud_files(files)
170
- # There should only be one zip file for Bagit, take the first
171
- return if files['0'].blank?
172
- target_file = File.join(path_for_import, files['0']['file_name'].tr(' ', '_'))
173
- # Now because we want the files in place before the importer runs
174
- Bulkrax::DownloadCloudFileJob.perform_now(files['0'], target_file)
175
- return target_file
176
- end
166
+ # @todo - investigate getting directory structure
167
+ # @todo - investigate using perform_later, and having the importer check for
168
+ # DownloadCloudFileJob before it starts
169
+ def retrieve_cloud_files(files, _importer)
170
+ # There should only be one zip file for Bagit, take the first
171
+ return if files['0'].blank?
172
+ target_file = File.join(path_for_import, files['0']['file_name'].tr(' ', '_'))
173
+ # Now because we want the files in place before the importer runs
174
+ Bulkrax::DownloadCloudFileJob.perform_now(files['0'], target_file)
175
+ return target_file
176
+ end
177
177
 
178
- private
178
+ private
179
179
 
180
- def bags
181
- return @bags if @bags.present?
182
- new_bag = bag(import_file_path)
183
- @bags = new_bag ? [new_bag] : Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
184
- @bags.delete(nil)
185
- raise StandardError, 'No valid bags found' if @bags.blank?
186
- return @bags
187
- end
180
+ def bags
181
+ return @bags if @bags.present?
182
+ new_bag = bag(import_file_path)
183
+ @bags = new_bag ? [new_bag] : Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
184
+ @bags.delete(nil)
185
+ raise StandardError, 'No valid bags found' if @bags.blank?
186
+ return @bags
187
+ end
188
188
 
189
- # Gather the paths to all bags; skip any stray files
190
- def bag_paths
191
- bags.map(&:bag_dir)
192
- end
189
+ # Gather the paths to all bags; skip any stray files
190
+ def bag_paths
191
+ bags.map(&:bag_dir)
192
+ end
193
193
 
194
- def metadata_file_name
195
- raise StandardError, 'The metadata file name must be specified' if parser_fields['metadata_file_name'].blank?
196
- parser_fields['metadata_file_name']
197
- end
194
+ def metadata_file_name
195
+ raise StandardError, 'The metadata file name must be specified' if parser_fields['metadata_file_name'].blank?
196
+ parser_fields['metadata_file_name']
197
+ end
198
198
 
199
- # Gather the paths to all metadata files matching the metadata_file_name
200
- def metadata_paths
201
- @metadata_paths ||= bag_paths.map do |b|
202
- Dir.glob("#{b}/**/*").select { |f| File.file?(f) && f.ends_with?(metadata_file_name) }
203
- end.flatten.compact
204
- end
199
+ # Gather the paths to all metadata files matching the metadata_file_name
200
+ def metadata_paths
201
+ @metadata_paths ||= bag_paths.map do |b|
202
+ Dir.glob("#{b}/**/*").select { |f| File.file?(f) && f.ends_with?(metadata_file_name) }
203
+ end.flatten.compact
204
+ end
205
205
 
206
- def metadata_path(bag)
207
- Dir.glob("#{bag.bag_dir}/**/*").detect { |f| File.file?(f) && f.ends_with?(metadata_file_name) }
208
- end
206
+ def metadata_path(bag)
207
+ Dir.glob("#{bag.bag_dir}/**/*").detect { |f| File.file?(f) && f.ends_with?(metadata_file_name) }
208
+ end
209
209
 
210
- def bag(path)
211
- return nil unless path && File.exist?(File.join(path, 'bagit.txt'))
212
- bag = BagIt::Bag.new(path)
213
- return nil unless bag.valid?
214
- bag
215
- end
210
+ def bag(path)
211
+ return nil unless path && File.exist?(File.join(path, 'bagit.txt'))
212
+ bag = BagIt::Bag.new(path)
213
+ return nil unless bag.valid?
214
+ bag
215
+ end
216
216
 
217
- # use the version of this method from the application parser instead
218
- def real_import_file_path
219
- return importer_unzip_path if file? && zip?
220
- parser_fields['import_file_path']
217
+ # use the version of this method from the application parser instead
218
+ def real_import_file_path
219
+ return importer_unzip_path if file? && zip?
220
+ parser_fields['import_file_path']
221
+ end
221
222
  end
222
223
  end
223
224
  end
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'csv'
4
3
  module Bulkrax
5
4
  class CsvParser < ApplicationParser # rubocop:disable Metrics/ClassLength
6
5
  include ErroredEntries
@@ -23,6 +22,7 @@ module Bulkrax
23
22
  @records = csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
24
23
  end
25
24
 
25
+ # rubocop:disable Metrics/AbcSize
26
26
  def build_records
27
27
  @collections = []
28
28
  @works = []
@@ -34,7 +34,9 @@ module Bulkrax
34
34
  next unless r.key?(model_mapping)
35
35
 
36
36
  model = r[model_mapping].nil? ? "" : r[model_mapping].strip
37
- if model.casecmp('collection').zero?
37
+ # TODO: Eventually this should be refactored to us Hyrax.config.collection_model
38
+ # We aren't right now because so many Bulkrax users are in between Fedora and Valkyrie
39
+ if model.casecmp('collection').zero? || model.casecmp('collectionresource').zero?
38
40
  @collections << r
39
41
  elsif model.casecmp('fileset').zero?
40
42
  @file_sets << r
@@ -52,6 +54,7 @@ module Bulkrax
52
54
 
53
55
  true
54
56
  end
57
+ # rubocop:enabled Metrics/AbcSize
55
58
 
56
59
  def collections
57
60
  build_records if @collections.nil?
@@ -190,9 +193,10 @@ module Bulkrax
190
193
  # @todo - investigate getting directory structure
191
194
  # @todo - investigate using perform_later, and having the importer check for
192
195
  # DownloadCloudFileJob before it starts
193
- def retrieve_cloud_files(files)
196
+ def retrieve_cloud_files(files, importer)
194
197
  files_path = File.join(path_for_import, 'files')
195
198
  FileUtils.mkdir_p(files_path) unless File.exist?(files_path)
199
+ target_files = []
196
200
  files.each_pair do |_key, file|
197
201
  # fixes bug where auth headers do not get attached properly
198
202
  if file['auth_header'].present?
@@ -201,10 +205,12 @@ module Bulkrax
201
205
  end
202
206
  # this only works for uniquely named files
203
207
  target_file = File.join(files_path, file['file_name'].tr(' ', '_'))
208
+ target_files << target_file
204
209
  # Now because we want the files in place before the importer runs
205
210
  # Problematic for a large upload
206
- Bulkrax::DownloadCloudFileJob.perform_now(file, target_file)
211
+ Bulkrax::DownloadCloudFileJob.perform_later(file, target_file)
207
212
  end
213
+ importer[:parser_fields]['original_file_paths'] = target_files
208
214
  return nil
209
215
  end
210
216
 
@@ -225,6 +231,7 @@ module Bulkrax
225
231
  CSV.open(setup_export_file(folder_count), "w", headers: export_headers, write_headers: true) do |csv|
226
232
  group.each do |entry|
227
233
  csv << entry.parsed_metadata
234
+ # TODO: This is precarious when we have descendents of Bulkrax::CsvCollectionEntry
228
235
  next if importerexporter.metadata_only? || entry.type == 'Bulkrax::CsvCollectionEntry'
229
236
 
230
237
  store_files(entry.identifier, folder_count.to_s)
@@ -234,7 +241,7 @@ module Bulkrax
234
241
  end
235
242
 
236
243
  def store_files(identifier, folder_count)
237
- record = ActiveFedora::Base.find(identifier)
244
+ record = Bulkrax.object_factory.find(identifier)
238
245
  return unless record
239
246
 
240
247
  file_sets = record.file_set? ? Array.wrap(record) : record.file_sets
@@ -286,6 +293,9 @@ module Bulkrax
286
293
 
287
294
  def sort_entries(entries)
288
295
  # always export models in the same order: work, collection, file set
296
+ #
297
+ # TODO: This is a problem in that only these classes are compared. Instead
298
+ # We should add a comparison operator to the classes.
289
299
  entries.sort_by do |entry|
290
300
  case entry.type
291
301
  when 'Bulkrax::CsvCollectionEntry'
@@ -105,6 +105,24 @@ module Bulkrax
105
105
  set_status_info(e)
106
106
  end
107
107
 
108
+ def create_file_sets; end
109
+
110
+ def create_relationships
111
+ ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importerexporter.id)
112
+ end
113
+
114
+ def record_raw_metadata(_record)
115
+ nil
116
+ end
117
+
118
+ def record_deleted?(_record)
119
+ false
120
+ end
121
+
122
+ def record_remove_and_rerun?(_record)
123
+ false
124
+ end
125
+
108
126
  # oai records so not let us set the source identifier easily
109
127
  def record_has_source_identifier(record, index)
110
128
  identifier = record.send(source_identifier)
@@ -149,12 +149,12 @@ module Bulkrax
149
149
  end
150
150
 
151
151
  def works
152
- @works ||= ActiveFedora::SolrService.query(works_query, **works_query_kwargs)
152
+ @works ||= Bulkrax.object_factory.query(works_query, **works_query_kwargs)
153
153
  end
154
154
 
155
155
  def collections
156
156
  @collections ||= if collections_query
157
- ActiveFedora::SolrService.query(collections_query, **collections_query_kwargs)
157
+ Bulkrax.object_factory.query(collections_query, **collections_query_kwargs)
158
158
  else
159
159
  []
160
160
  end
@@ -173,43 +173,39 @@ module Bulkrax
173
173
  # @see https://github.com/samvera/hyrax/blob/64c0bbf0dc0d3e1b49f040b50ea70d177cc9d8f6/app/indexers/hyrax/work_indexer.rb#L15-L18
174
174
  def file_sets
175
175
  @file_sets ||= ParserExportRecordSet.in_batches(candidate_file_set_ids) do |batch_of_ids|
176
- fsq = "has_model_ssim:#{Bulkrax.file_model_class} AND id:(\"" + batch_of_ids.join('" OR "') + "\")"
176
+ fsq = "has_model_ssim:#{Bulkrax.file_model_internal_resource} AND id:(\"" + batch_of_ids.join('" OR "') + "\")"
177
177
  fsq += extra_filters if extra_filters.present?
178
- ActiveFedora::SolrService.query(
178
+ Bulkrax.object_factory.query(
179
179
  fsq,
180
- { fl: "id", method: :post, rows: batch_of_ids.size }
180
+ fl: "id", method: :post, rows: batch_of_ids.size
181
181
  )
182
182
  end
183
183
  end
184
184
 
185
185
  def solr_name(base_name)
186
- if Module.const_defined?(:Solrizer)
187
- ::Solrizer.solr_name(base_name)
188
- else
189
- ::ActiveFedora.index_field_mapper.solr_name(base_name)
190
- end
186
+ Bulkrax.object_factory.solr_name(base_name)
191
187
  end
192
188
  end
193
189
 
194
190
  class All < Base
195
191
  def works_query
196
- "has_model_ssim:(#{Bulkrax.curation_concerns.join(' OR ')}) #{extra_filters}"
192
+ "has_model_ssim:(#{Bulkrax.curation_concern_internal_resources.join(' OR ')}) #{extra_filters}"
197
193
  end
198
194
 
199
195
  def collections_query
200
- "has_model_ssim:Collection #{extra_filters}"
196
+ "has_model_ssim:#{Bulkrax.collection_model_internal_resource} #{extra_filters}"
201
197
  end
202
198
  end
203
199
 
204
200
  class Collection < Base
205
201
  def works_query
206
202
  "member_of_collection_ids_ssim:#{importerexporter.export_source} #{extra_filters} AND " \
207
- "has_model_ssim:(#{Bulkrax.curation_concerns.join(' OR ')})"
203
+ "has_model_ssim:(#{Bulkrax.curation_concern_internal_resources.join(' OR ')})"
208
204
  end
209
205
 
210
206
  def collections_query
211
207
  "(id:#{importerexporter.export_source} #{extra_filters}) OR " \
212
- "(has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source})"
208
+ "(has_model_ssim:#{Bulkrax.collection_model_internal_resource} AND member_of_collection_ids_ssim:#{importerexporter.export_source})"
213
209
  end
214
210
  end
215
211
 
@@ -247,12 +243,12 @@ module Bulkrax
247
243
 
248
244
  def works
249
245
  @works ||= ParserExportRecordSet.in_batches(complete_entry_identifiers) do |ids|
250
- ActiveFedora::SolrService.query(
246
+ Bulkrax.object_factory.query(
251
247
  extra_filters.to_s,
252
248
  **query_kwargs.merge(
253
249
  fq: [
254
250
  %(#{solr_name(work_identifier)}:("#{ids.join('" OR "')}")),
255
- "has_model_ssim:(#{Bulkrax.curation_concerns.join(' OR ')})"
251
+ "has_model_ssim:(#{Bulkrax.curation_concern_internal_resources.join(' OR ')})"
256
252
  ],
257
253
  fl: 'id'
258
254
  )
@@ -262,12 +258,12 @@ module Bulkrax
262
258
 
263
259
  def collections
264
260
  @collections ||= ParserExportRecordSet.in_batches(complete_entry_identifiers) do |ids|
265
- ActiveFedora::SolrService.query(
266
- "has_model_ssim:Collection #{extra_filters}",
261
+ Bulkrax.object_factory.query(
262
+ "has_model_ssim:#{Bulkrax.collection_model_internal_resource} #{extra_filters}",
267
263
  **query_kwargs.merge(
268
264
  fq: [
269
265
  %(#{solr_name(work_identifier)}:("#{ids.join('" OR "')}")),
270
- "has_model_ssim:Collection"
266
+ "has_model_ssim:#{Bulkrax.collection_model_internal_resource}"
271
267
  ],
272
268
  fl: "id"
273
269
  )
@@ -281,12 +277,12 @@ module Bulkrax
281
277
  # @see Bulkrax::ParserExportRecordSet::Base#file_sets
282
278
  def file_sets
283
279
  @file_sets ||= ParserExportRecordSet.in_batches(complete_entry_identifiers) do |ids|
284
- ActiveFedora::SolrService.query(
280
+ Bulkrax.object_factory.query(
285
281
  extra_filters,
286
- query_kwargs.merge(
282
+ **query_kwargs.merge(
287
283
  fq: [
288
284
  %(#{solr_name(work_identifier)}:("#{ids.join('" OR "')}")),
289
- "has_model_ssim:#{Bulkrax.file_model_class}"
285
+ "has_model_ssim:#{Bulkrax.file_model_internal_resource}"
290
286
  ],
291
287
  fl: 'id'
292
288
  )
@@ -1,6 +1,4 @@
1
1
  # frozen_string_literal: true
2
- require 'marcel'
3
-
4
2
  module Bulkrax
5
3
  class XmlParser < ApplicationParser
6
4
  def entry_class
@@ -29,6 +29,8 @@ module Bulkrax
29
29
  def self.call(name, suffix: SUFFIX)
30
30
  if name.end_with?(suffix)
31
31
  name.constantize
32
+ elsif name == "FileSet"
33
+ Bulkrax.file_model_class
32
34
  else
33
35
  begin
34
36
  "#{name}#{suffix}".constantize