bulkrax 7.0.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/assets/javascripts/bulkrax/datatables.js +1 -1
- data/app/controllers/bulkrax/exporters_controller.rb +1 -1
- data/app/controllers/bulkrax/importers_controller.rb +2 -1
- data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
- data/app/factories/bulkrax/object_factory.rb +135 -163
- data/app/factories/bulkrax/object_factory_interface.rb +491 -0
- data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
- data/app/helpers/bulkrax/importers_helper.rb +1 -1
- data/app/helpers/bulkrax/validation_helper.rb +4 -4
- data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
- data/app/jobs/bulkrax/delete_job.rb +3 -2
- data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
- data/app/jobs/bulkrax/import_file_set_job.rb +5 -2
- data/app/jobs/bulkrax/importer_job.rb +18 -2
- data/app/matchers/bulkrax/application_matcher.rb +0 -2
- data/app/models/bulkrax/csv_collection_entry.rb +1 -1
- data/app/models/bulkrax/csv_entry.rb +7 -6
- data/app/models/bulkrax/entry.rb +7 -11
- data/app/models/bulkrax/exporter.rb +2 -2
- data/app/models/bulkrax/importer.rb +1 -3
- data/app/models/bulkrax/oai_entry.rb +0 -3
- data/app/models/bulkrax/oai_set_entry.rb +1 -1
- data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
- data/app/models/bulkrax/rdf_entry.rb +70 -69
- data/app/models/bulkrax/xml_entry.rb +0 -1
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
- data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/file_factory.rb +174 -118
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/has_matchers.rb +28 -25
- data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
- data/app/parsers/bulkrax/application_parser.rb +31 -7
- data/app/parsers/bulkrax/bagit_parser.rb +175 -174
- data/app/parsers/bulkrax/csv_parser.rb +15 -5
- data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
- data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
- data/app/parsers/bulkrax/xml_parser.rb +0 -2
- data/app/services/bulkrax/factory_class_finder.rb +2 -0
- data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
- data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
- data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
- data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/show.html.erb +9 -8
- data/app/views/bulkrax/exporters/edit.html.erb +1 -1
- data/app/views/bulkrax/exporters/new.html.erb +1 -1
- data/app/views/bulkrax/exporters/show.html.erb +4 -2
- data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
- data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
- data/app/views/bulkrax/importers/edit.html.erb +1 -1
- data/app/views/bulkrax/importers/new.html.erb +1 -1
- data/app/views/bulkrax/importers/show.html.erb +1 -1
- data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
- data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
- data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
- data/config/locales/bulkrax.en.yml +7 -0
- data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
- data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
- data/lib/bulkrax/engine.rb +23 -6
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +54 -52
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
- data/lib/tasks/bulkrax_tasks.rake +1 -0
- data/lib/tasks/reset.rake +4 -4
- metadata +24 -8
- data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
- data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
- data/lib/bulkrax/persistence_layer.rb +0 -38
@@ -1,223 +1,224 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require 'bagit'
|
3
2
|
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
unless ENV.fetch('BULKRAX_NO_BAGIT', 'false').to_s == 'true'
|
4
|
+
module Bulkrax
|
5
|
+
class BagitParser < CsvParser # rubocop:disable Metrics/ClassLength
|
6
|
+
include ExportBehavior
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
def valid_import?
|
13
|
-
return true if import_fields.present?
|
14
|
-
rescue => e
|
15
|
-
set_status_info(e)
|
16
|
-
false
|
17
|
-
end
|
8
|
+
def self.export_supported?
|
9
|
+
true
|
10
|
+
end
|
18
11
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
12
|
+
def valid_import?
|
13
|
+
return true if import_fields.present?
|
14
|
+
rescue => e
|
15
|
+
set_status_info(e)
|
16
|
+
false
|
17
|
+
end
|
23
18
|
|
24
|
-
|
25
|
-
|
26
|
-
|
19
|
+
def entry_class
|
20
|
+
rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
|
21
|
+
rdf_format ? RdfEntry : CsvEntry
|
22
|
+
end
|
27
23
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
@import_fields ||= metadata_paths.sample(10).map do |path|
|
32
|
-
entry_class.fields_from_data(entry_class.read_data(path))
|
33
|
-
end.flatten.compact.uniq
|
34
|
-
end
|
24
|
+
def path_to_files(filename:)
|
25
|
+
@path_to_files ||= Dir.glob(File.join(import_file_path, '**/data', filename)).first
|
26
|
+
end
|
35
27
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
data = entry_class.read_data(path)
|
43
|
-
get_data(bag, data)
|
28
|
+
# Take a random sample of 10 metadata_paths and work out the import fields from that
|
29
|
+
def import_fields
|
30
|
+
raise StandardError, 'No metadata files were found' if metadata_paths.blank?
|
31
|
+
@import_fields ||= metadata_paths.sample(10).map do |path|
|
32
|
+
entry_class.fields_from_data(entry_class.read_data(path))
|
33
|
+
end.flatten.compact.uniq
|
44
34
|
end
|
45
35
|
|
46
|
-
|
47
|
-
|
36
|
+
# Create an Array of all metadata records
|
37
|
+
def records(_opts = {})
|
38
|
+
raise StandardError, 'No BagIt records were found' if bags.blank?
|
39
|
+
@records ||= bags.map do |bag|
|
40
|
+
path = metadata_path(bag)
|
41
|
+
raise StandardError, 'No metadata files were found' if path.blank?
|
42
|
+
data = entry_class.read_data(path)
|
43
|
+
get_data(bag, data)
|
44
|
+
end
|
45
|
+
|
46
|
+
@records = @records.flatten
|
47
|
+
end
|
48
48
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
49
|
+
def get_data(bag, data)
|
50
|
+
if entry_class == CsvEntry
|
51
|
+
data = data.map do |data_row|
|
52
|
+
record_data = entry_class.data_for_entry(data_row, source_identifier, self)
|
53
|
+
next record_data if importerexporter.metadata_only?
|
54
54
|
|
55
|
-
|
56
|
-
|
55
|
+
record_data[:file] = bag.bag_files.join('|') if Bulkrax.curation_concerns.include? record_data[:model]&.constantize
|
56
|
+
record_data
|
57
|
+
end
|
58
|
+
else
|
59
|
+
data = entry_class.data_for_entry(data, source_identifier, self)
|
60
|
+
data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
|
57
61
|
end
|
58
|
-
|
59
|
-
data
|
60
|
-
data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
|
62
|
+
|
63
|
+
data
|
61
64
|
end
|
62
65
|
|
63
|
-
|
64
|
-
end
|
66
|
+
# export methods
|
65
67
|
|
66
|
-
|
68
|
+
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
69
|
+
def write_files
|
70
|
+
require 'open-uri'
|
71
|
+
require 'socket'
|
67
72
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
73
|
+
folder_count = 1
|
74
|
+
records_in_folder = 0
|
75
|
+
work_entries = importerexporter.entries.where(type: work_entry_class.to_s)
|
76
|
+
collection_entries = importerexporter.entries.where(type: collection_entry_class.to_s)
|
77
|
+
file_set_entries = importerexporter.entries.where(type: file_set_entry_class.to_s)
|
72
78
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
collection_entries = importerexporter.entries.where(type: collection_entry_class.to_s)
|
77
|
-
file_set_entries = importerexporter.entries.where(type: file_set_entry_class.to_s)
|
79
|
+
work_entries[0..limit || total].each do |entry|
|
80
|
+
record = Bulkrax.object_factory.find(entry.identifier)
|
81
|
+
next unless record
|
78
82
|
|
79
|
-
|
80
|
-
record = ActiveFedora::Base.find(entry.identifier)
|
81
|
-
next unless record
|
83
|
+
bag_entries = [entry]
|
82
84
|
|
83
|
-
|
85
|
+
if record.member_of_collection_ids.present?
|
86
|
+
collection_entries.each { |ce| bag_entries << ce if ce.parsed_metadata.value?(record.id) }
|
87
|
+
end
|
84
88
|
|
85
|
-
|
86
|
-
|
87
|
-
|
89
|
+
if record.file_sets.present?
|
90
|
+
file_set_entries.each { |fse| bag_entries << fse if fse.parsed_metadata.value?(record.id) }
|
91
|
+
end
|
88
92
|
|
89
|
-
|
90
|
-
|
91
|
-
|
93
|
+
records_in_folder += bag_entries.count
|
94
|
+
if records_in_folder > records_split_count
|
95
|
+
folder_count += 1
|
96
|
+
records_in_folder = bag_entries.count
|
97
|
+
end
|
92
98
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
99
|
+
bag ||= BagIt::Bag.new setup_bagit_folder(folder_count, entry.identifier)
|
100
|
+
|
101
|
+
record.file_sets.each do |fs|
|
102
|
+
file_name = filename(fs)
|
103
|
+
next if file_name.blank? || fs.original_file.blank?
|
104
|
+
|
105
|
+
io = open(fs.original_file.uri)
|
106
|
+
file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
|
107
|
+
file.write(io.read)
|
108
|
+
file.close
|
109
|
+
begin
|
110
|
+
bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank?
|
111
|
+
rescue => e
|
112
|
+
entry.set_status_info(e)
|
113
|
+
set_status_info(e)
|
114
|
+
end
|
115
|
+
end
|
98
116
|
|
99
|
-
|
100
|
-
|
101
|
-
record.file_sets.each do |fs|
|
102
|
-
file_name = filename(fs)
|
103
|
-
next if file_name.blank? || fs.original_file.blank?
|
104
|
-
|
105
|
-
io = open(fs.original_file.uri)
|
106
|
-
file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
|
107
|
-
file.write(io.read)
|
108
|
-
file.close
|
109
|
-
begin
|
110
|
-
bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank?
|
111
|
-
rescue => e
|
112
|
-
entry.set_status_info(e)
|
113
|
-
set_status_info(e)
|
117
|
+
CSV.open(setup_csv_metadata_export_file(folder_count, entry.identifier), "w", headers: export_headers, write_headers: true) do |csv|
|
118
|
+
bag_entries.each { |csv_entry| csv << csv_entry.parsed_metadata }
|
114
119
|
end
|
115
|
-
end
|
116
120
|
|
117
|
-
|
118
|
-
|
121
|
+
write_triples(folder_count, entry)
|
122
|
+
bag.manifest!(algo: 'sha256')
|
119
123
|
end
|
120
|
-
|
121
|
-
write_triples(folder_count, entry)
|
122
|
-
bag.manifest!(algo: 'sha256')
|
123
124
|
end
|
124
|
-
|
125
|
-
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
125
|
+
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
126
126
|
|
127
|
-
|
128
|
-
|
129
|
-
|
127
|
+
def setup_csv_metadata_export_file(folder_count, id)
|
128
|
+
path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
|
129
|
+
FileUtils.mkdir_p(path) unless File.exist?(path)
|
130
130
|
|
131
|
-
|
132
|
-
|
131
|
+
File.join(path, id, 'metadata.csv')
|
132
|
+
end
|
133
133
|
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
134
|
+
def key_allowed(key)
|
135
|
+
!Bulkrax.reserved_properties.include?(key) &&
|
136
|
+
new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
|
137
|
+
key != source_identifier.to_s
|
138
|
+
end
|
139
139
|
|
140
|
-
|
141
|
-
|
142
|
-
|
140
|
+
def setup_triple_metadata_export_file(folder_count, id)
|
141
|
+
path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
|
142
|
+
FileUtils.mkdir_p(path) unless File.exist?(path)
|
143
143
|
|
144
|
-
|
145
|
-
|
144
|
+
File.join(path, id, 'metadata.nt')
|
145
|
+
end
|
146
146
|
|
147
|
-
|
148
|
-
|
149
|
-
|
147
|
+
def setup_bagit_folder(folder_count, id)
|
148
|
+
path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
|
149
|
+
FileUtils.mkdir_p(path) unless File.exist?(path)
|
150
150
|
|
151
|
-
|
152
|
-
|
151
|
+
File.join(path, id)
|
152
|
+
end
|
153
153
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
154
|
+
# @todo(bjustice) - remove hyrax reference
|
155
|
+
def write_triples(folder_count, e)
|
156
|
+
sd = SolrDocument.find(e.identifier)
|
157
|
+
return if sd.nil?
|
158
158
|
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
159
|
+
req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname })
|
160
|
+
rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples)
|
161
|
+
File.open(setup_triple_metadata_export_file(folder_count, e.identifier), "w") do |triples|
|
162
|
+
triples.write(rdf)
|
163
|
+
end
|
163
164
|
end
|
164
|
-
end
|
165
165
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
166
|
+
# @todo - investigate getting directory structure
|
167
|
+
# @todo - investigate using perform_later, and having the importer check for
|
168
|
+
# DownloadCloudFileJob before it starts
|
169
|
+
def retrieve_cloud_files(files, _importer)
|
170
|
+
# There should only be one zip file for Bagit, take the first
|
171
|
+
return if files['0'].blank?
|
172
|
+
target_file = File.join(path_for_import, files['0']['file_name'].tr(' ', '_'))
|
173
|
+
# Now because we want the files in place before the importer runs
|
174
|
+
Bulkrax::DownloadCloudFileJob.perform_now(files['0'], target_file)
|
175
|
+
return target_file
|
176
|
+
end
|
177
177
|
|
178
|
-
|
178
|
+
private
|
179
179
|
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
180
|
+
def bags
|
181
|
+
return @bags if @bags.present?
|
182
|
+
new_bag = bag(import_file_path)
|
183
|
+
@bags = new_bag ? [new_bag] : Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
|
184
|
+
@bags.delete(nil)
|
185
|
+
raise StandardError, 'No valid bags found' if @bags.blank?
|
186
|
+
return @bags
|
187
|
+
end
|
188
188
|
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
189
|
+
# Gather the paths to all bags; skip any stray files
|
190
|
+
def bag_paths
|
191
|
+
bags.map(&:bag_dir)
|
192
|
+
end
|
193
193
|
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
194
|
+
def metadata_file_name
|
195
|
+
raise StandardError, 'The metadata file name must be specified' if parser_fields['metadata_file_name'].blank?
|
196
|
+
parser_fields['metadata_file_name']
|
197
|
+
end
|
198
198
|
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
199
|
+
# Gather the paths to all metadata files matching the metadata_file_name
|
200
|
+
def metadata_paths
|
201
|
+
@metadata_paths ||= bag_paths.map do |b|
|
202
|
+
Dir.glob("#{b}/**/*").select { |f| File.file?(f) && f.ends_with?(metadata_file_name) }
|
203
|
+
end.flatten.compact
|
204
|
+
end
|
205
205
|
|
206
|
-
|
207
|
-
|
208
|
-
|
206
|
+
def metadata_path(bag)
|
207
|
+
Dir.glob("#{bag.bag_dir}/**/*").detect { |f| File.file?(f) && f.ends_with?(metadata_file_name) }
|
208
|
+
end
|
209
209
|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
210
|
+
def bag(path)
|
211
|
+
return nil unless path && File.exist?(File.join(path, 'bagit.txt'))
|
212
|
+
bag = BagIt::Bag.new(path)
|
213
|
+
return nil unless bag.valid?
|
214
|
+
bag
|
215
|
+
end
|
216
216
|
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
217
|
+
# use the version of this method from the application parser instead
|
218
|
+
def real_import_file_path
|
219
|
+
return importer_unzip_path if file? && zip?
|
220
|
+
parser_fields['import_file_path']
|
221
|
+
end
|
221
222
|
end
|
222
223
|
end
|
223
224
|
end
|
@@ -1,6 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'csv'
|
4
3
|
module Bulkrax
|
5
4
|
class CsvParser < ApplicationParser # rubocop:disable Metrics/ClassLength
|
6
5
|
include ErroredEntries
|
@@ -23,6 +22,7 @@ module Bulkrax
|
|
23
22
|
@records = csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
|
24
23
|
end
|
25
24
|
|
25
|
+
# rubocop:disable Metrics/AbcSize
|
26
26
|
def build_records
|
27
27
|
@collections = []
|
28
28
|
@works = []
|
@@ -34,7 +34,9 @@ module Bulkrax
|
|
34
34
|
next unless r.key?(model_mapping)
|
35
35
|
|
36
36
|
model = r[model_mapping].nil? ? "" : r[model_mapping].strip
|
37
|
-
|
37
|
+
# TODO: Eventually this should be refactored to us Hyrax.config.collection_model
|
38
|
+
# We aren't right now because so many Bulkrax users are in between Fedora and Valkyrie
|
39
|
+
if model.casecmp('collection').zero? || model.casecmp('collectionresource').zero?
|
38
40
|
@collections << r
|
39
41
|
elsif model.casecmp('fileset').zero?
|
40
42
|
@file_sets << r
|
@@ -52,6 +54,7 @@ module Bulkrax
|
|
52
54
|
|
53
55
|
true
|
54
56
|
end
|
57
|
+
# rubocop:enabled Metrics/AbcSize
|
55
58
|
|
56
59
|
def collections
|
57
60
|
build_records if @collections.nil?
|
@@ -190,9 +193,10 @@ module Bulkrax
|
|
190
193
|
# @todo - investigate getting directory structure
|
191
194
|
# @todo - investigate using perform_later, and having the importer check for
|
192
195
|
# DownloadCloudFileJob before it starts
|
193
|
-
def retrieve_cloud_files(files)
|
196
|
+
def retrieve_cloud_files(files, importer)
|
194
197
|
files_path = File.join(path_for_import, 'files')
|
195
198
|
FileUtils.mkdir_p(files_path) unless File.exist?(files_path)
|
199
|
+
target_files = []
|
196
200
|
files.each_pair do |_key, file|
|
197
201
|
# fixes bug where auth headers do not get attached properly
|
198
202
|
if file['auth_header'].present?
|
@@ -201,10 +205,12 @@ module Bulkrax
|
|
201
205
|
end
|
202
206
|
# this only works for uniquely named files
|
203
207
|
target_file = File.join(files_path, file['file_name'].tr(' ', '_'))
|
208
|
+
target_files << target_file
|
204
209
|
# Now because we want the files in place before the importer runs
|
205
210
|
# Problematic for a large upload
|
206
|
-
Bulkrax::DownloadCloudFileJob.
|
211
|
+
Bulkrax::DownloadCloudFileJob.perform_later(file, target_file)
|
207
212
|
end
|
213
|
+
importer[:parser_fields]['original_file_paths'] = target_files
|
208
214
|
return nil
|
209
215
|
end
|
210
216
|
|
@@ -225,6 +231,7 @@ module Bulkrax
|
|
225
231
|
CSV.open(setup_export_file(folder_count), "w", headers: export_headers, write_headers: true) do |csv|
|
226
232
|
group.each do |entry|
|
227
233
|
csv << entry.parsed_metadata
|
234
|
+
# TODO: This is precarious when we have descendents of Bulkrax::CsvCollectionEntry
|
228
235
|
next if importerexporter.metadata_only? || entry.type == 'Bulkrax::CsvCollectionEntry'
|
229
236
|
|
230
237
|
store_files(entry.identifier, folder_count.to_s)
|
@@ -234,7 +241,7 @@ module Bulkrax
|
|
234
241
|
end
|
235
242
|
|
236
243
|
def store_files(identifier, folder_count)
|
237
|
-
record =
|
244
|
+
record = Bulkrax.object_factory.find(identifier)
|
238
245
|
return unless record
|
239
246
|
|
240
247
|
file_sets = record.file_set? ? Array.wrap(record) : record.file_sets
|
@@ -286,6 +293,9 @@ module Bulkrax
|
|
286
293
|
|
287
294
|
def sort_entries(entries)
|
288
295
|
# always export models in the same order: work, collection, file set
|
296
|
+
#
|
297
|
+
# TODO: This is a problem in that only these classes are compared. Instead
|
298
|
+
# We should add a comparison operator to the classes.
|
289
299
|
entries.sort_by do |entry|
|
290
300
|
case entry.type
|
291
301
|
when 'Bulkrax::CsvCollectionEntry'
|
@@ -105,6 +105,24 @@ module Bulkrax
|
|
105
105
|
set_status_info(e)
|
106
106
|
end
|
107
107
|
|
108
|
+
def create_file_sets; end
|
109
|
+
|
110
|
+
def create_relationships
|
111
|
+
ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importerexporter.id)
|
112
|
+
end
|
113
|
+
|
114
|
+
def record_raw_metadata(_record)
|
115
|
+
nil
|
116
|
+
end
|
117
|
+
|
118
|
+
def record_deleted?(_record)
|
119
|
+
false
|
120
|
+
end
|
121
|
+
|
122
|
+
def record_remove_and_rerun?(_record)
|
123
|
+
false
|
124
|
+
end
|
125
|
+
|
108
126
|
# oai records so not let us set the source identifier easily
|
109
127
|
def record_has_source_identifier(record, index)
|
110
128
|
identifier = record.send(source_identifier)
|
@@ -149,12 +149,12 @@ module Bulkrax
|
|
149
149
|
end
|
150
150
|
|
151
151
|
def works
|
152
|
-
@works ||=
|
152
|
+
@works ||= Bulkrax.object_factory.query(works_query, **works_query_kwargs)
|
153
153
|
end
|
154
154
|
|
155
155
|
def collections
|
156
156
|
@collections ||= if collections_query
|
157
|
-
|
157
|
+
Bulkrax.object_factory.query(collections_query, **collections_query_kwargs)
|
158
158
|
else
|
159
159
|
[]
|
160
160
|
end
|
@@ -173,43 +173,39 @@ module Bulkrax
|
|
173
173
|
# @see https://github.com/samvera/hyrax/blob/64c0bbf0dc0d3e1b49f040b50ea70d177cc9d8f6/app/indexers/hyrax/work_indexer.rb#L15-L18
|
174
174
|
def file_sets
|
175
175
|
@file_sets ||= ParserExportRecordSet.in_batches(candidate_file_set_ids) do |batch_of_ids|
|
176
|
-
fsq = "has_model_ssim:#{Bulkrax.
|
176
|
+
fsq = "has_model_ssim:#{Bulkrax.file_model_internal_resource} AND id:(\"" + batch_of_ids.join('" OR "') + "\")"
|
177
177
|
fsq += extra_filters if extra_filters.present?
|
178
|
-
|
178
|
+
Bulkrax.object_factory.query(
|
179
179
|
fsq,
|
180
|
-
|
180
|
+
fl: "id", method: :post, rows: batch_of_ids.size
|
181
181
|
)
|
182
182
|
end
|
183
183
|
end
|
184
184
|
|
185
185
|
def solr_name(base_name)
|
186
|
-
|
187
|
-
::Solrizer.solr_name(base_name)
|
188
|
-
else
|
189
|
-
::ActiveFedora.index_field_mapper.solr_name(base_name)
|
190
|
-
end
|
186
|
+
Bulkrax.object_factory.solr_name(base_name)
|
191
187
|
end
|
192
188
|
end
|
193
189
|
|
194
190
|
class All < Base
|
195
191
|
def works_query
|
196
|
-
"has_model_ssim:(#{Bulkrax.
|
192
|
+
"has_model_ssim:(#{Bulkrax.curation_concern_internal_resources.join(' OR ')}) #{extra_filters}"
|
197
193
|
end
|
198
194
|
|
199
195
|
def collections_query
|
200
|
-
"has_model_ssim
|
196
|
+
"has_model_ssim:#{Bulkrax.collection_model_internal_resource} #{extra_filters}"
|
201
197
|
end
|
202
198
|
end
|
203
199
|
|
204
200
|
class Collection < Base
|
205
201
|
def works_query
|
206
202
|
"member_of_collection_ids_ssim:#{importerexporter.export_source} #{extra_filters} AND " \
|
207
|
-
"has_model_ssim:(#{Bulkrax.
|
203
|
+
"has_model_ssim:(#{Bulkrax.curation_concern_internal_resources.join(' OR ')})"
|
208
204
|
end
|
209
205
|
|
210
206
|
def collections_query
|
211
207
|
"(id:#{importerexporter.export_source} #{extra_filters}) OR " \
|
212
|
-
"(has_model_ssim
|
208
|
+
"(has_model_ssim:#{Bulkrax.collection_model_internal_resource} AND member_of_collection_ids_ssim:#{importerexporter.export_source})"
|
213
209
|
end
|
214
210
|
end
|
215
211
|
|
@@ -247,12 +243,12 @@ module Bulkrax
|
|
247
243
|
|
248
244
|
def works
|
249
245
|
@works ||= ParserExportRecordSet.in_batches(complete_entry_identifiers) do |ids|
|
250
|
-
|
246
|
+
Bulkrax.object_factory.query(
|
251
247
|
extra_filters.to_s,
|
252
248
|
**query_kwargs.merge(
|
253
249
|
fq: [
|
254
250
|
%(#{solr_name(work_identifier)}:("#{ids.join('" OR "')}")),
|
255
|
-
"has_model_ssim:(#{Bulkrax.
|
251
|
+
"has_model_ssim:(#{Bulkrax.curation_concern_internal_resources.join(' OR ')})"
|
256
252
|
],
|
257
253
|
fl: 'id'
|
258
254
|
)
|
@@ -262,12 +258,12 @@ module Bulkrax
|
|
262
258
|
|
263
259
|
def collections
|
264
260
|
@collections ||= ParserExportRecordSet.in_batches(complete_entry_identifiers) do |ids|
|
265
|
-
|
266
|
-
"has_model_ssim
|
261
|
+
Bulkrax.object_factory.query(
|
262
|
+
"has_model_ssim:#{Bulkrax.collection_model_internal_resource} #{extra_filters}",
|
267
263
|
**query_kwargs.merge(
|
268
264
|
fq: [
|
269
265
|
%(#{solr_name(work_identifier)}:("#{ids.join('" OR "')}")),
|
270
|
-
"has_model_ssim
|
266
|
+
"has_model_ssim:#{Bulkrax.collection_model_internal_resource}"
|
271
267
|
],
|
272
268
|
fl: "id"
|
273
269
|
)
|
@@ -281,12 +277,12 @@ module Bulkrax
|
|
281
277
|
# @see Bulkrax::ParserExportRecordSet::Base#file_sets
|
282
278
|
def file_sets
|
283
279
|
@file_sets ||= ParserExportRecordSet.in_batches(complete_entry_identifiers) do |ids|
|
284
|
-
|
280
|
+
Bulkrax.object_factory.query(
|
285
281
|
extra_filters,
|
286
|
-
query_kwargs.merge(
|
282
|
+
**query_kwargs.merge(
|
287
283
|
fq: [
|
288
284
|
%(#{solr_name(work_identifier)}:("#{ids.join('" OR "')}")),
|
289
|
-
"has_model_ssim:#{Bulkrax.
|
285
|
+
"has_model_ssim:#{Bulkrax.file_model_internal_resource}"
|
290
286
|
],
|
291
287
|
fl: 'id'
|
292
288
|
)
|