bulkrax 7.0.0 → 8.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/assets/javascripts/bulkrax/datatables.js +1 -1
- data/app/controllers/bulkrax/exporters_controller.rb +1 -1
- data/app/controllers/bulkrax/importers_controller.rb +2 -1
- data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
- data/app/factories/bulkrax/object_factory.rb +135 -163
- data/app/factories/bulkrax/object_factory_interface.rb +491 -0
- data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
- data/app/helpers/bulkrax/importers_helper.rb +1 -1
- data/app/helpers/bulkrax/validation_helper.rb +4 -4
- data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
- data/app/jobs/bulkrax/delete_job.rb +3 -2
- data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
- data/app/jobs/bulkrax/import_file_set_job.rb +5 -2
- data/app/jobs/bulkrax/importer_job.rb +18 -2
- data/app/matchers/bulkrax/application_matcher.rb +0 -2
- data/app/models/bulkrax/csv_collection_entry.rb +1 -1
- data/app/models/bulkrax/csv_entry.rb +7 -6
- data/app/models/bulkrax/entry.rb +7 -11
- data/app/models/bulkrax/exporter.rb +2 -2
- data/app/models/bulkrax/importer.rb +1 -3
- data/app/models/bulkrax/oai_entry.rb +0 -3
- data/app/models/bulkrax/oai_set_entry.rb +1 -1
- data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
- data/app/models/bulkrax/rdf_entry.rb +70 -69
- data/app/models/bulkrax/xml_entry.rb +0 -1
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
- data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/file_factory.rb +174 -118
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/has_matchers.rb +28 -25
- data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
- data/app/parsers/bulkrax/application_parser.rb +31 -7
- data/app/parsers/bulkrax/bagit_parser.rb +175 -174
- data/app/parsers/bulkrax/csv_parser.rb +15 -5
- data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
- data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
- data/app/parsers/bulkrax/xml_parser.rb +0 -2
- data/app/services/bulkrax/factory_class_finder.rb +2 -0
- data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
- data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
- data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
- data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/show.html.erb +9 -8
- data/app/views/bulkrax/exporters/edit.html.erb +1 -1
- data/app/views/bulkrax/exporters/new.html.erb +1 -1
- data/app/views/bulkrax/exporters/show.html.erb +4 -2
- data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
- data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
- data/app/views/bulkrax/importers/edit.html.erb +1 -1
- data/app/views/bulkrax/importers/new.html.erb +1 -1
- data/app/views/bulkrax/importers/show.html.erb +1 -1
- data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
- data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
- data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
- data/config/locales/bulkrax.en.yml +7 -0
- data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
- data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
- data/lib/bulkrax/engine.rb +23 -6
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +54 -52
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
- data/lib/tasks/bulkrax_tasks.rake +1 -0
- data/lib/tasks/reset.rake +4 -4
- metadata +24 -8
- data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
- data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
- data/lib/bulkrax/persistence_layer.rb +0 -38
@@ -1,223 +1,224 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require 'bagit'
|
3
2
|
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
unless ENV.fetch('BULKRAX_NO_BAGIT', 'false').to_s == 'true'
|
4
|
+
module Bulkrax
|
5
|
+
class BagitParser < CsvParser # rubocop:disable Metrics/ClassLength
|
6
|
+
include ExportBehavior
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
def valid_import?
|
13
|
-
return true if import_fields.present?
|
14
|
-
rescue => e
|
15
|
-
set_status_info(e)
|
16
|
-
false
|
17
|
-
end
|
8
|
+
def self.export_supported?
|
9
|
+
true
|
10
|
+
end
|
18
11
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
12
|
+
def valid_import?
|
13
|
+
return true if import_fields.present?
|
14
|
+
rescue => e
|
15
|
+
set_status_info(e)
|
16
|
+
false
|
17
|
+
end
|
23
18
|
|
24
|
-
|
25
|
-
|
26
|
-
|
19
|
+
def entry_class
|
20
|
+
rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry"
|
21
|
+
rdf_format ? RdfEntry : CsvEntry
|
22
|
+
end
|
27
23
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
@import_fields ||= metadata_paths.sample(10).map do |path|
|
32
|
-
entry_class.fields_from_data(entry_class.read_data(path))
|
33
|
-
end.flatten.compact.uniq
|
34
|
-
end
|
24
|
+
def path_to_files(filename:)
|
25
|
+
@path_to_files ||= Dir.glob(File.join(import_file_path, '**/data', filename)).first
|
26
|
+
end
|
35
27
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
data = entry_class.read_data(path)
|
43
|
-
get_data(bag, data)
|
28
|
+
# Take a random sample of 10 metadata_paths and work out the import fields from that
|
29
|
+
def import_fields
|
30
|
+
raise StandardError, 'No metadata files were found' if metadata_paths.blank?
|
31
|
+
@import_fields ||= metadata_paths.sample(10).map do |path|
|
32
|
+
entry_class.fields_from_data(entry_class.read_data(path))
|
33
|
+
end.flatten.compact.uniq
|
44
34
|
end
|
45
35
|
|
46
|
-
|
47
|
-
|
36
|
+
# Create an Array of all metadata records
|
37
|
+
def records(_opts = {})
|
38
|
+
raise StandardError, 'No BagIt records were found' if bags.blank?
|
39
|
+
@records ||= bags.map do |bag|
|
40
|
+
path = metadata_path(bag)
|
41
|
+
raise StandardError, 'No metadata files were found' if path.blank?
|
42
|
+
data = entry_class.read_data(path)
|
43
|
+
get_data(bag, data)
|
44
|
+
end
|
45
|
+
|
46
|
+
@records = @records.flatten
|
47
|
+
end
|
48
48
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
49
|
+
def get_data(bag, data)
|
50
|
+
if entry_class == CsvEntry
|
51
|
+
data = data.map do |data_row|
|
52
|
+
record_data = entry_class.data_for_entry(data_row, source_identifier, self)
|
53
|
+
next record_data if importerexporter.metadata_only?
|
54
54
|
|
55
|
-
|
56
|
-
|
55
|
+
record_data[:file] = bag.bag_files.join('|') if Bulkrax.curation_concerns.include? record_data[:model]&.constantize
|
56
|
+
record_data
|
57
|
+
end
|
58
|
+
else
|
59
|
+
data = entry_class.data_for_entry(data, source_identifier, self)
|
60
|
+
data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
|
57
61
|
end
|
58
|
-
|
59
|
-
data
|
60
|
-
data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
|
62
|
+
|
63
|
+
data
|
61
64
|
end
|
62
65
|
|
63
|
-
|
64
|
-
end
|
66
|
+
# export methods
|
65
67
|
|
66
|
-
|
68
|
+
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
69
|
+
def write_files
|
70
|
+
require 'open-uri'
|
71
|
+
require 'socket'
|
67
72
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
73
|
+
folder_count = 1
|
74
|
+
records_in_folder = 0
|
75
|
+
work_entries = importerexporter.entries.where(type: work_entry_class.to_s)
|
76
|
+
collection_entries = importerexporter.entries.where(type: collection_entry_class.to_s)
|
77
|
+
file_set_entries = importerexporter.entries.where(type: file_set_entry_class.to_s)
|
72
78
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
collection_entries = importerexporter.entries.where(type: collection_entry_class.to_s)
|
77
|
-
file_set_entries = importerexporter.entries.where(type: file_set_entry_class.to_s)
|
79
|
+
work_entries[0..limit || total].each do |entry|
|
80
|
+
record = Bulkrax.object_factory.find(entry.identifier)
|
81
|
+
next unless record
|
78
82
|
|
79
|
-
|
80
|
-
record = ActiveFedora::Base.find(entry.identifier)
|
81
|
-
next unless record
|
83
|
+
bag_entries = [entry]
|
82
84
|
|
83
|
-
|
85
|
+
if record.member_of_collection_ids.present?
|
86
|
+
collection_entries.each { |ce| bag_entries << ce if ce.parsed_metadata.value?(record.id) }
|
87
|
+
end
|
84
88
|
|
85
|
-
|
86
|
-
|
87
|
-
|
89
|
+
if record.file_sets.present?
|
90
|
+
file_set_entries.each { |fse| bag_entries << fse if fse.parsed_metadata.value?(record.id) }
|
91
|
+
end
|
88
92
|
|
89
|
-
|
90
|
-
|
91
|
-
|
93
|
+
records_in_folder += bag_entries.count
|
94
|
+
if records_in_folder > records_split_count
|
95
|
+
folder_count += 1
|
96
|
+
records_in_folder = bag_entries.count
|
97
|
+
end
|
92
98
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
99
|
+
bag ||= BagIt::Bag.new setup_bagit_folder(folder_count, entry.identifier)
|
100
|
+
|
101
|
+
record.file_sets.each do |fs|
|
102
|
+
file_name = filename(fs)
|
103
|
+
next if file_name.blank? || fs.original_file.blank?
|
104
|
+
|
105
|
+
io = open(fs.original_file.uri)
|
106
|
+
file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
|
107
|
+
file.write(io.read)
|
108
|
+
file.close
|
109
|
+
begin
|
110
|
+
bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank?
|
111
|
+
rescue => e
|
112
|
+
entry.set_status_info(e)
|
113
|
+
set_status_info(e)
|
114
|
+
end
|
115
|
+
end
|
98
116
|
|
99
|
-
|
100
|
-
|
101
|
-
record.file_sets.each do |fs|
|
102
|
-
file_name = filename(fs)
|
103
|
-
next if file_name.blank? || fs.original_file.blank?
|
104
|
-
|
105
|
-
io = open(fs.original_file.uri)
|
106
|
-
file = Tempfile.new([file_name, File.extname(file_name)], binmode: true)
|
107
|
-
file.write(io.read)
|
108
|
-
file.close
|
109
|
-
begin
|
110
|
-
bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank?
|
111
|
-
rescue => e
|
112
|
-
entry.set_status_info(e)
|
113
|
-
set_status_info(e)
|
117
|
+
CSV.open(setup_csv_metadata_export_file(folder_count, entry.identifier), "w", headers: export_headers, write_headers: true) do |csv|
|
118
|
+
bag_entries.each { |csv_entry| csv << csv_entry.parsed_metadata }
|
114
119
|
end
|
115
|
-
end
|
116
120
|
|
117
|
-
|
118
|
-
|
121
|
+
write_triples(folder_count, entry)
|
122
|
+
bag.manifest!(algo: 'sha256')
|
119
123
|
end
|
120
|
-
|
121
|
-
write_triples(folder_count, entry)
|
122
|
-
bag.manifest!(algo: 'sha256')
|
123
124
|
end
|
124
|
-
|
125
|
-
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
125
|
+
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
126
126
|
|
127
|
-
|
128
|
-
|
129
|
-
|
127
|
+
def setup_csv_metadata_export_file(folder_count, id)
|
128
|
+
path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
|
129
|
+
FileUtils.mkdir_p(path) unless File.exist?(path)
|
130
130
|
|
131
|
-
|
132
|
-
|
131
|
+
File.join(path, id, 'metadata.csv')
|
132
|
+
end
|
133
133
|
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
134
|
+
def key_allowed(key)
|
135
|
+
!Bulkrax.reserved_properties.include?(key) &&
|
136
|
+
new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
|
137
|
+
key != source_identifier.to_s
|
138
|
+
end
|
139
139
|
|
140
|
-
|
141
|
-
|
142
|
-
|
140
|
+
def setup_triple_metadata_export_file(folder_count, id)
|
141
|
+
path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
|
142
|
+
FileUtils.mkdir_p(path) unless File.exist?(path)
|
143
143
|
|
144
|
-
|
145
|
-
|
144
|
+
File.join(path, id, 'metadata.nt')
|
145
|
+
end
|
146
146
|
|
147
|
-
|
148
|
-
|
149
|
-
|
147
|
+
def setup_bagit_folder(folder_count, id)
|
148
|
+
path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
|
149
|
+
FileUtils.mkdir_p(path) unless File.exist?(path)
|
150
150
|
|
151
|
-
|
152
|
-
|
151
|
+
File.join(path, id)
|
152
|
+
end
|
153
153
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
154
|
+
# @todo(bjustice) - remove hyrax reference
|
155
|
+
def write_triples(folder_count, e)
|
156
|
+
sd = SolrDocument.find(e.identifier)
|
157
|
+
return if sd.nil?
|
158
158
|
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
159
|
+
req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname })
|
160
|
+
rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples)
|
161
|
+
File.open(setup_triple_metadata_export_file(folder_count, e.identifier), "w") do |triples|
|
162
|
+
triples.write(rdf)
|
163
|
+
end
|
163
164
|
end
|
164
|
-
end
|
165
165
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
166
|
+
# @todo - investigate getting directory structure
|
167
|
+
# @todo - investigate using perform_later, and having the importer check for
|
168
|
+
# DownloadCloudFileJob before it starts
|
169
|
+
def retrieve_cloud_files(files, _importer)
|
170
|
+
# There should only be one zip file for Bagit, take the first
|
171
|
+
return if files['0'].blank?
|
172
|
+
target_file = File.join(path_for_import, files['0']['file_name'].tr(' ', '_'))
|
173
|
+
# Now because we want the files in place before the importer runs
|
174
|
+
Bulkrax::DownloadCloudFileJob.perform_now(files['0'], target_file)
|
175
|
+
return target_file
|
176
|
+
end
|
177
177
|
|
178
|
-
|
178
|
+
private
|
179
179
|
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
180
|
+
def bags
|
181
|
+
return @bags if @bags.present?
|
182
|
+
new_bag = bag(import_file_path)
|
183
|
+
@bags = new_bag ? [new_bag] : Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) }
|
184
|
+
@bags.delete(nil)
|
185
|
+
raise StandardError, 'No valid bags found' if @bags.blank?
|
186
|
+
return @bags
|
187
|
+
end
|
188
188
|
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
189
|
+
# Gather the paths to all bags; skip any stray files
|
190
|
+
def bag_paths
|
191
|
+
bags.map(&:bag_dir)
|
192
|
+
end
|
193
193
|
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
194
|
+
def metadata_file_name
|
195
|
+
raise StandardError, 'The metadata file name must be specified' if parser_fields['metadata_file_name'].blank?
|
196
|
+
parser_fields['metadata_file_name']
|
197
|
+
end
|
198
198
|
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
199
|
+
# Gather the paths to all metadata files matching the metadata_file_name
|
200
|
+
def metadata_paths
|
201
|
+
@metadata_paths ||= bag_paths.map do |b|
|
202
|
+
Dir.glob("#{b}/**/*").select { |f| File.file?(f) && f.ends_with?(metadata_file_name) }
|
203
|
+
end.flatten.compact
|
204
|
+
end
|
205
205
|
|
206
|
-
|
207
|
-
|
208
|
-
|
206
|
+
def metadata_path(bag)
|
207
|
+
Dir.glob("#{bag.bag_dir}/**/*").detect { |f| File.file?(f) && f.ends_with?(metadata_file_name) }
|
208
|
+
end
|
209
209
|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
210
|
+
def bag(path)
|
211
|
+
return nil unless path && File.exist?(File.join(path, 'bagit.txt'))
|
212
|
+
bag = BagIt::Bag.new(path)
|
213
|
+
return nil unless bag.valid?
|
214
|
+
bag
|
215
|
+
end
|
216
216
|
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
217
|
+
# use the version of this method from the application parser instead
|
218
|
+
def real_import_file_path
|
219
|
+
return importer_unzip_path if file? && zip?
|
220
|
+
parser_fields['import_file_path']
|
221
|
+
end
|
221
222
|
end
|
222
223
|
end
|
223
224
|
end
|
@@ -1,6 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'csv'
|
4
3
|
module Bulkrax
|
5
4
|
class CsvParser < ApplicationParser # rubocop:disable Metrics/ClassLength
|
6
5
|
include ErroredEntries
|
@@ -23,6 +22,7 @@ module Bulkrax
|
|
23
22
|
@records = csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
|
24
23
|
end
|
25
24
|
|
25
|
+
# rubocop:disable Metrics/AbcSize
|
26
26
|
def build_records
|
27
27
|
@collections = []
|
28
28
|
@works = []
|
@@ -34,7 +34,9 @@ module Bulkrax
|
|
34
34
|
next unless r.key?(model_mapping)
|
35
35
|
|
36
36
|
model = r[model_mapping].nil? ? "" : r[model_mapping].strip
|
37
|
-
|
37
|
+
# TODO: Eventually this should be refactored to us Hyrax.config.collection_model
|
38
|
+
# We aren't right now because so many Bulkrax users are in between Fedora and Valkyrie
|
39
|
+
if model.casecmp('collection').zero? || model.casecmp('collectionresource').zero?
|
38
40
|
@collections << r
|
39
41
|
elsif model.casecmp('fileset').zero?
|
40
42
|
@file_sets << r
|
@@ -52,6 +54,7 @@ module Bulkrax
|
|
52
54
|
|
53
55
|
true
|
54
56
|
end
|
57
|
+
# rubocop:enabled Metrics/AbcSize
|
55
58
|
|
56
59
|
def collections
|
57
60
|
build_records if @collections.nil?
|
@@ -190,9 +193,10 @@ module Bulkrax
|
|
190
193
|
# @todo - investigate getting directory structure
|
191
194
|
# @todo - investigate using perform_later, and having the importer check for
|
192
195
|
# DownloadCloudFileJob before it starts
|
193
|
-
def retrieve_cloud_files(files)
|
196
|
+
def retrieve_cloud_files(files, importer)
|
194
197
|
files_path = File.join(path_for_import, 'files')
|
195
198
|
FileUtils.mkdir_p(files_path) unless File.exist?(files_path)
|
199
|
+
target_files = []
|
196
200
|
files.each_pair do |_key, file|
|
197
201
|
# fixes bug where auth headers do not get attached properly
|
198
202
|
if file['auth_header'].present?
|
@@ -201,10 +205,12 @@ module Bulkrax
|
|
201
205
|
end
|
202
206
|
# this only works for uniquely named files
|
203
207
|
target_file = File.join(files_path, file['file_name'].tr(' ', '_'))
|
208
|
+
target_files << target_file
|
204
209
|
# Now because we want the files in place before the importer runs
|
205
210
|
# Problematic for a large upload
|
206
|
-
Bulkrax::DownloadCloudFileJob.
|
211
|
+
Bulkrax::DownloadCloudFileJob.perform_later(file, target_file)
|
207
212
|
end
|
213
|
+
importer[:parser_fields]['original_file_paths'] = target_files
|
208
214
|
return nil
|
209
215
|
end
|
210
216
|
|
@@ -225,6 +231,7 @@ module Bulkrax
|
|
225
231
|
CSV.open(setup_export_file(folder_count), "w", headers: export_headers, write_headers: true) do |csv|
|
226
232
|
group.each do |entry|
|
227
233
|
csv << entry.parsed_metadata
|
234
|
+
# TODO: This is precarious when we have descendents of Bulkrax::CsvCollectionEntry
|
228
235
|
next if importerexporter.metadata_only? || entry.type == 'Bulkrax::CsvCollectionEntry'
|
229
236
|
|
230
237
|
store_files(entry.identifier, folder_count.to_s)
|
@@ -234,7 +241,7 @@ module Bulkrax
|
|
234
241
|
end
|
235
242
|
|
236
243
|
def store_files(identifier, folder_count)
|
237
|
-
record =
|
244
|
+
record = Bulkrax.object_factory.find(identifier)
|
238
245
|
return unless record
|
239
246
|
|
240
247
|
file_sets = record.file_set? ? Array.wrap(record) : record.file_sets
|
@@ -286,6 +293,9 @@ module Bulkrax
|
|
286
293
|
|
287
294
|
def sort_entries(entries)
|
288
295
|
# always export models in the same order: work, collection, file set
|
296
|
+
#
|
297
|
+
# TODO: This is a problem in that only these classes are compared. Instead
|
298
|
+
# We should add a comparison operator to the classes.
|
289
299
|
entries.sort_by do |entry|
|
290
300
|
case entry.type
|
291
301
|
when 'Bulkrax::CsvCollectionEntry'
|
@@ -105,6 +105,24 @@ module Bulkrax
|
|
105
105
|
set_status_info(e)
|
106
106
|
end
|
107
107
|
|
108
|
+
def create_file_sets; end
|
109
|
+
|
110
|
+
def create_relationships
|
111
|
+
ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importerexporter.id)
|
112
|
+
end
|
113
|
+
|
114
|
+
def record_raw_metadata(_record)
|
115
|
+
nil
|
116
|
+
end
|
117
|
+
|
118
|
+
def record_deleted?(_record)
|
119
|
+
false
|
120
|
+
end
|
121
|
+
|
122
|
+
def record_remove_and_rerun?(_record)
|
123
|
+
false
|
124
|
+
end
|
125
|
+
|
108
126
|
# oai records so not let us set the source identifier easily
|
109
127
|
def record_has_source_identifier(record, index)
|
110
128
|
identifier = record.send(source_identifier)
|
@@ -149,12 +149,12 @@ module Bulkrax
|
|
149
149
|
end
|
150
150
|
|
151
151
|
def works
|
152
|
-
@works ||=
|
152
|
+
@works ||= Bulkrax.object_factory.query(works_query, **works_query_kwargs)
|
153
153
|
end
|
154
154
|
|
155
155
|
def collections
|
156
156
|
@collections ||= if collections_query
|
157
|
-
|
157
|
+
Bulkrax.object_factory.query(collections_query, **collections_query_kwargs)
|
158
158
|
else
|
159
159
|
[]
|
160
160
|
end
|
@@ -173,43 +173,39 @@ module Bulkrax
|
|
173
173
|
# @see https://github.com/samvera/hyrax/blob/64c0bbf0dc0d3e1b49f040b50ea70d177cc9d8f6/app/indexers/hyrax/work_indexer.rb#L15-L18
|
174
174
|
def file_sets
|
175
175
|
@file_sets ||= ParserExportRecordSet.in_batches(candidate_file_set_ids) do |batch_of_ids|
|
176
|
-
fsq = "has_model_ssim:#{Bulkrax.
|
176
|
+
fsq = "has_model_ssim:#{Bulkrax.file_model_internal_resource} AND id:(\"" + batch_of_ids.join('" OR "') + "\")"
|
177
177
|
fsq += extra_filters if extra_filters.present?
|
178
|
-
|
178
|
+
Bulkrax.object_factory.query(
|
179
179
|
fsq,
|
180
|
-
|
180
|
+
fl: "id", method: :post, rows: batch_of_ids.size
|
181
181
|
)
|
182
182
|
end
|
183
183
|
end
|
184
184
|
|
185
185
|
def solr_name(base_name)
|
186
|
-
|
187
|
-
::Solrizer.solr_name(base_name)
|
188
|
-
else
|
189
|
-
::ActiveFedora.index_field_mapper.solr_name(base_name)
|
190
|
-
end
|
186
|
+
Bulkrax.object_factory.solr_name(base_name)
|
191
187
|
end
|
192
188
|
end
|
193
189
|
|
194
190
|
class All < Base
|
195
191
|
def works_query
|
196
|
-
"has_model_ssim:(#{Bulkrax.
|
192
|
+
"has_model_ssim:(#{Bulkrax.curation_concern_internal_resources.join(' OR ')}) #{extra_filters}"
|
197
193
|
end
|
198
194
|
|
199
195
|
def collections_query
|
200
|
-
"has_model_ssim
|
196
|
+
"has_model_ssim:#{Bulkrax.collection_model_internal_resource} #{extra_filters}"
|
201
197
|
end
|
202
198
|
end
|
203
199
|
|
204
200
|
class Collection < Base
|
205
201
|
def works_query
|
206
202
|
"member_of_collection_ids_ssim:#{importerexporter.export_source} #{extra_filters} AND " \
|
207
|
-
"has_model_ssim:(#{Bulkrax.
|
203
|
+
"has_model_ssim:(#{Bulkrax.curation_concern_internal_resources.join(' OR ')})"
|
208
204
|
end
|
209
205
|
|
210
206
|
def collections_query
|
211
207
|
"(id:#{importerexporter.export_source} #{extra_filters}) OR " \
|
212
|
-
"(has_model_ssim
|
208
|
+
"(has_model_ssim:#{Bulkrax.collection_model_internal_resource} AND member_of_collection_ids_ssim:#{importerexporter.export_source})"
|
213
209
|
end
|
214
210
|
end
|
215
211
|
|
@@ -247,12 +243,12 @@ module Bulkrax
|
|
247
243
|
|
248
244
|
def works
|
249
245
|
@works ||= ParserExportRecordSet.in_batches(complete_entry_identifiers) do |ids|
|
250
|
-
|
246
|
+
Bulkrax.object_factory.query(
|
251
247
|
extra_filters.to_s,
|
252
248
|
**query_kwargs.merge(
|
253
249
|
fq: [
|
254
250
|
%(#{solr_name(work_identifier)}:("#{ids.join('" OR "')}")),
|
255
|
-
"has_model_ssim:(#{Bulkrax.
|
251
|
+
"has_model_ssim:(#{Bulkrax.curation_concern_internal_resources.join(' OR ')})"
|
256
252
|
],
|
257
253
|
fl: 'id'
|
258
254
|
)
|
@@ -262,12 +258,12 @@ module Bulkrax
|
|
262
258
|
|
263
259
|
def collections
|
264
260
|
@collections ||= ParserExportRecordSet.in_batches(complete_entry_identifiers) do |ids|
|
265
|
-
|
266
|
-
"has_model_ssim
|
261
|
+
Bulkrax.object_factory.query(
|
262
|
+
"has_model_ssim:#{Bulkrax.collection_model_internal_resource} #{extra_filters}",
|
267
263
|
**query_kwargs.merge(
|
268
264
|
fq: [
|
269
265
|
%(#{solr_name(work_identifier)}:("#{ids.join('" OR "')}")),
|
270
|
-
"has_model_ssim
|
266
|
+
"has_model_ssim:#{Bulkrax.collection_model_internal_resource}"
|
271
267
|
],
|
272
268
|
fl: "id"
|
273
269
|
)
|
@@ -281,12 +277,12 @@ module Bulkrax
|
|
281
277
|
# @see Bulkrax::ParserExportRecordSet::Base#file_sets
|
282
278
|
def file_sets
|
283
279
|
@file_sets ||= ParserExportRecordSet.in_batches(complete_entry_identifiers) do |ids|
|
284
|
-
|
280
|
+
Bulkrax.object_factory.query(
|
285
281
|
extra_filters,
|
286
|
-
query_kwargs.merge(
|
282
|
+
**query_kwargs.merge(
|
287
283
|
fq: [
|
288
284
|
%(#{solr_name(work_identifier)}:("#{ids.join('" OR "')}")),
|
289
|
-
"has_model_ssim:#{Bulkrax.
|
285
|
+
"has_model_ssim:#{Bulkrax.file_model_internal_resource}"
|
290
286
|
],
|
291
287
|
fl: 'id'
|
292
288
|
)
|