bulkrax 5.0.0 → 5.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/controllers/bulkrax/entries_controller.rb +4 -2
- data/app/controllers/bulkrax/exporters_controller.rb +13 -9
- data/app/controllers/bulkrax/importers_controller.rb +10 -10
- data/app/helpers/bulkrax/application_helper.rb +1 -1
- data/app/helpers/bulkrax/importers_helper.rb +2 -2
- data/app/helpers/bulkrax/validation_helper.rb +4 -4
- data/app/jobs/bulkrax/create_relationships_job.rb +78 -59
- data/app/jobs/bulkrax/delete_job.rb +1 -1
- data/app/jobs/bulkrax/export_work_job.rb +2 -2
- data/app/jobs/bulkrax/import_file_set_job.rb +1 -1
- data/app/jobs/bulkrax/import_work_job.rb +20 -7
- data/app/jobs/bulkrax/importer_job.rb +2 -2
- data/app/jobs/bulkrax/schedule_relationships_job.rb +2 -1
- data/app/matchers/bulkrax/application_matcher.rb +1 -0
- data/app/models/bulkrax/csv_entry.rb +93 -24
- data/app/models/bulkrax/exporter.rb +18 -19
- data/app/models/bulkrax/importer.rb +5 -5
- data/app/models/bulkrax/importer_run.rb +6 -0
- data/app/models/bulkrax/oai_entry.rb +14 -2
- data/app/models/bulkrax/pending_relationship.rb +4 -0
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +3 -1
- data/app/models/concerns/bulkrax/export_behavior.rb +6 -4
- data/app/models/concerns/bulkrax/has_matchers.rb +1 -0
- data/app/models/concerns/bulkrax/import_behavior.rb +6 -3
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +9 -1
- data/app/models/concerns/bulkrax/status_info.rb +9 -4
- data/app/parsers/bulkrax/application_parser.rb +14 -16
- data/app/parsers/bulkrax/bagit_parser.rb +6 -17
- data/app/parsers/bulkrax/csv_parser.rb +43 -111
- data/app/parsers/bulkrax/oai_dc_parser.rb +2 -2
- data/app/parsers/bulkrax/parser_export_record_set.rb +281 -0
- data/app/parsers/bulkrax/xml_parser.rb +9 -5
- data/app/services/bulkrax/remove_relationships_for_importer.rb +4 -2
- data/app/views/bulkrax/entries/show.html.erb +1 -1
- data/app/views/bulkrax/exporters/_form.html.erb +60 -45
- data/app/views/bulkrax/exporters/edit.html.erb +1 -1
- data/app/views/bulkrax/exporters/index.html.erb +2 -2
- data/app/views/bulkrax/exporters/new.html.erb +1 -1
- data/app/views/bulkrax/exporters/show.html.erb +3 -3
- data/app/views/bulkrax/importers/_bagit_fields.html.erb +13 -12
- data/app/views/bulkrax/importers/_csv_fields.html.erb +13 -12
- data/app/views/bulkrax/importers/_form.html.erb +5 -5
- data/app/views/bulkrax/importers/_oai_fields.html.erb +12 -10
- data/app/views/bulkrax/importers/_xml_fields.html.erb +12 -11
- data/app/views/bulkrax/importers/show.html.erb +18 -16
- data/app/views/bulkrax/shared/_collection_entries_tab.html.erb +6 -6
- data/app/views/bulkrax/shared/_file_set_entries_tab.html.erb +6 -6
- data/app/views/bulkrax/shared/_work_entries_tab.html.erb +6 -6
- data/config/locales/bulkrax.en.yml +26 -0
- data/lib/bulkrax/entry_spec_helper.rb +190 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +124 -45
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +1 -1
- data/lib/tasks/reset.rake +1 -1
- metadata +5 -3
@@ -84,23 +84,31 @@ module Bulkrax
|
|
84
84
|
@import_fields ||= records.inject(:merge).keys.compact.uniq
|
85
85
|
end
|
86
86
|
|
87
|
-
def required_elements?(
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
87
|
+
def required_elements?(record)
|
88
|
+
missing_elements(record).blank?
|
89
|
+
end
|
90
|
+
|
91
|
+
def missing_elements(record)
|
92
|
+
keys_from_record = keys_without_numbers(record.reject { |_, v| v.blank? }.keys.compact.uniq.map(&:to_s))
|
93
|
+
keys = []
|
94
|
+
# Because we're persisting the mapping in the database, these are likely string keys.
|
95
|
+
# However, there's no guarantee. So, we need to ensure that by running stringify.
|
96
|
+
importerexporter.mapping.stringify_keys.map do |k, v|
|
97
|
+
Array.wrap(v['from']).each do |vf|
|
98
|
+
keys << k if keys_from_record.include?(vf)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
required_elements.map(&:to_s) - keys.uniq.map(&:to_s)
|
94
102
|
end
|
95
103
|
|
96
104
|
def valid_import?
|
97
|
-
|
98
|
-
error_alert = "Missing at least one required element, missing element(s) are: #{missing_elements(
|
99
|
-
raise StandardError, error_alert unless required_elements?(
|
105
|
+
compressed_record = records.flat_map(&:to_a).partition { |_, v| !v }.flatten(1).to_h
|
106
|
+
error_alert = "Missing at least one required element, missing element(s) are: #{missing_elements(compressed_record).join(', ')}"
|
107
|
+
raise StandardError, error_alert unless required_elements?(compressed_record)
|
100
108
|
|
101
109
|
file_paths.is_a?(Array)
|
102
110
|
rescue StandardError => e
|
103
|
-
|
111
|
+
set_status_info(e)
|
104
112
|
false
|
105
113
|
end
|
106
114
|
|
@@ -140,7 +148,7 @@ module Bulkrax
|
|
140
148
|
end
|
141
149
|
true
|
142
150
|
rescue StandardError => e
|
143
|
-
|
151
|
+
set_status_info(e)
|
144
152
|
end
|
145
153
|
|
146
154
|
def create_entry_and_job(current_record, type)
|
@@ -167,102 +175,17 @@ module Bulkrax
|
|
167
175
|
path
|
168
176
|
end
|
169
177
|
|
170
|
-
def
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
output += " AND system_modified_dtsi:[#{start_dt} TO #{finish_dt}]"
|
176
|
-
end
|
177
|
-
output += importerexporter.work_visibility.present? ? " AND visibility_ssi:#{importerexporter.work_visibility}" : ""
|
178
|
-
output += importerexporter.workflow_status.present? ? " AND workflow_state_name_ssim:#{importerexporter.workflow_status}" : ""
|
179
|
-
output
|
180
|
-
end
|
181
|
-
|
182
|
-
def current_work_ids
|
183
|
-
ActiveSupport::Deprecation.warn('Bulkrax::CsvParser#current_work_ids will be replaced with #current_record_ids in version 3.0')
|
184
|
-
current_record_ids
|
185
|
-
end
|
186
|
-
|
187
|
-
# rubocop:disable Metrics/AbcSize
|
188
|
-
def current_record_ids
|
189
|
-
@work_ids = []
|
190
|
-
@collection_ids = []
|
191
|
-
@file_set_ids = []
|
192
|
-
|
193
|
-
case importerexporter.export_from
|
194
|
-
when 'all'
|
195
|
-
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
196
|
-
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
197
|
-
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
198
|
-
when 'collection'
|
199
|
-
@work_ids = ActiveFedora::SolrService.query(
|
200
|
-
"member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters} AND has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')})", method: :post, rows: 2_000_000_000
|
201
|
-
).map(&:id)
|
202
|
-
# get the parent collection and child collections
|
203
|
-
@collection_ids = ActiveFedora::SolrService.query("id:#{importerexporter.export_source} #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
204
|
-
@collection_ids += ActiveFedora::SolrService.query("has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source}", method: :post,
|
205
|
-
rows: 2_147_483_647).map(&:id)
|
206
|
-
find_child_file_sets(@work_ids)
|
207
|
-
when 'worktype'
|
208
|
-
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
209
|
-
find_child_file_sets(@work_ids)
|
210
|
-
when 'importer'
|
211
|
-
set_ids_for_exporting_from_importer
|
212
|
-
end
|
213
|
-
|
214
|
-
@work_ids + @collection_ids + @file_set_ids
|
215
|
-
end
|
216
|
-
# rubocop:enable Metrics/AbcSize
|
217
|
-
|
218
|
-
# find the related file set ids so entries can be made for export
|
219
|
-
def find_child_file_sets(work_ids)
|
220
|
-
work_ids.each do |id|
|
221
|
-
ActiveFedora::Base.find(id).file_set_ids.each { |fs_id| @file_set_ids << fs_id }
|
222
|
-
end
|
223
|
-
end
|
224
|
-
|
225
|
-
# Set the following instance variables: @work_ids, @collection_ids, @file_set_ids
|
226
|
-
# @see #current_record_ids
|
227
|
-
def set_ids_for_exporting_from_importer
|
228
|
-
entry_ids = Importer.find(importerexporter.export_source).entries.pluck(:id)
|
229
|
-
complete_statuses = Status.latest_by_statusable
|
230
|
-
.includes(:statusable)
|
231
|
-
.where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
|
232
|
-
|
233
|
-
complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
|
234
|
-
extra_filters = extra_filters.presence || '*:*'
|
235
|
-
|
236
|
-
{ :@work_ids => ::Hyrax.config.curation_concerns, :@collection_ids => [::Collection], :@file_set_ids => [::FileSet] }.each do |instance_var, models_to_search|
|
237
|
-
instance_variable_set(instance_var, ActiveFedora::SolrService.post(
|
238
|
-
extra_filters.to_s,
|
239
|
-
fq: [
|
240
|
-
%(#{solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
|
241
|
-
"has_model_ssim:(#{models_to_search.join(' OR ')})"
|
242
|
-
],
|
243
|
-
fl: 'id',
|
244
|
-
rows: 2_000_000_000
|
245
|
-
)['response']['docs'].map { |obj| obj['id'] })
|
246
|
-
end
|
247
|
-
end
|
248
|
-
|
249
|
-
def solr_name(base_name)
|
250
|
-
Module.const_defined?(:Solrizer) ? ::Solrizer.solr_name(base_name) : ::ActiveFedora.index_field_mapper.solr_name(base_name)
|
178
|
+
def current_records_for_export
|
179
|
+
@current_records_for_export ||= Bulkrax::ParserExportRecordSet.for(
|
180
|
+
parser: self,
|
181
|
+
export_from: importerexporter.export_from
|
182
|
+
)
|
251
183
|
end
|
252
184
|
|
253
185
|
def create_new_entries
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
this_entry_class = if @collection_ids.include?(id)
|
258
|
-
collection_entry_class
|
259
|
-
elsif @file_set_ids.include?(id)
|
260
|
-
file_set_entry_class
|
261
|
-
else
|
262
|
-
entry_class
|
263
|
-
end
|
264
|
-
new_entry = find_or_create_entry(this_entry_class, id, 'Bulkrax::Exporter')
|
265
|
-
|
186
|
+
# NOTE: The each method enforces the limit, as it can best optimize the underlying queries.
|
187
|
+
current_records_for_export.each do |id, entry_class|
|
188
|
+
new_entry = find_or_create_entry(entry_class, id, 'Bulkrax::Exporter')
|
266
189
|
begin
|
267
190
|
entry = ExportWorkJob.perform_now(new_entry.id, current_run.id)
|
268
191
|
rescue => e
|
@@ -291,16 +214,22 @@ module Bulkrax
|
|
291
214
|
end
|
292
215
|
|
293
216
|
def valid_entry_types
|
294
|
-
[
|
217
|
+
[collection_entry_class.to_s, file_set_entry_class.to_s, entry_class.to_s]
|
295
218
|
end
|
296
219
|
|
297
220
|
# TODO: figure out why using the version of this method that's in the bagit parser
|
298
221
|
# breaks specs for the "if importer?" line
|
299
222
|
def total
|
300
|
-
@total =
|
301
|
-
|
223
|
+
@total =
|
224
|
+
if importer?
|
225
|
+
importer.parser_fields['total'] || 0
|
226
|
+
elsif exporter?
|
227
|
+
limit.to_i.zero? ? current_records_for_export.count : limit.to_i
|
228
|
+
else
|
229
|
+
0
|
230
|
+
end
|
302
231
|
|
303
|
-
return @total
|
232
|
+
return @total
|
304
233
|
rescue StandardError
|
305
234
|
@total = 0
|
306
235
|
end
|
@@ -335,10 +264,13 @@ module Bulkrax
|
|
335
264
|
def write_files
|
336
265
|
require 'open-uri'
|
337
266
|
folder_count = 0
|
267
|
+
# TODO: This is not performant as well; unclear how to address, but lower priority as of
|
268
|
+
# <2023-02-21 Tue>.
|
338
269
|
sorted_entries = sort_entries(importerexporter.entries.uniq(&:identifier))
|
339
270
|
.select { |e| valid_entry_types.include?(e.type) }
|
340
271
|
|
341
|
-
|
272
|
+
group_size = limit.to_i.zero? ? total : limit.to_i
|
273
|
+
sorted_entries[0..group_size].in_groups_of(records_split_count, false) do |group|
|
342
274
|
folder_count += 1
|
343
275
|
|
344
276
|
CSV.open(setup_export_file(folder_count), "w", headers: export_headers, write_headers: true) do |csv|
|
@@ -398,7 +330,7 @@ module Bulkrax
|
|
398
330
|
return @object_names if @object_names
|
399
331
|
|
400
332
|
@object_names = mapping.values.map { |value| value['object'] }
|
401
|
-
@object_names.uniq
|
333
|
+
@object_names.uniq!&.delete(nil)
|
402
334
|
|
403
335
|
@object_names
|
404
336
|
end
|
@@ -63,9 +63,9 @@ module Bulkrax
|
|
63
63
|
|
64
64
|
def create_collections
|
65
65
|
metadata = {
|
66
|
-
visibility: 'open'
|
67
|
-
collection_type_gid: Hyrax::CollectionType.find_or_create_default_collection_type.gid
|
66
|
+
visibility: 'open'
|
68
67
|
}
|
68
|
+
metadata[:collection_type_gid] = Hyrax::CollectionType.find_or_create_default_collection_type.gid if defined?(::Hyrax)
|
69
69
|
|
70
70
|
collections.each_with_index do |set, index|
|
71
71
|
next unless collection_name == 'all' || collection_name == set.spec
|
@@ -0,0 +1,281 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
# This module is responsible for providing the means of querying Solr for the appropriate works,
|
5
|
+
# collections, and file sets for an export of entries.
|
6
|
+
#
|
7
|
+
# @see .for
|
8
|
+
module ParserExportRecordSet
|
9
|
+
# @api public
|
10
|
+
#
|
11
|
+
# A factory method for returning an object that can yield each id and associated entry_class as
|
12
|
+
# well as return the count of objects in the record set.
|
13
|
+
#
|
14
|
+
# @param parser [Bulkrax::ApplicationParser]
|
15
|
+
# @param export_from [String]
|
16
|
+
#
|
17
|
+
# @return [#each, #count] An object, likely a descendant of
|
18
|
+
# {Bulkrax::CurrentParserRecordSet::Base} that responds to {Base#count} and
|
19
|
+
# {Base#each}.
|
20
|
+
def self.for(parser:, export_from:)
|
21
|
+
"Bulkrax::ParserExportRecordSet::#{export_from.classify}".constantize.new(parser: parser)
|
22
|
+
end
|
23
|
+
|
24
|
+
# @abstract
|
25
|
+
#
|
26
|
+
# @note This has {#each} and {#count} but is not an Enumerable. But because it has these two
|
27
|
+
# methods that echo {Array}, we can do some lovely mocking and stubbing in those classes
|
28
|
+
# dependent on this file. :)
|
29
|
+
class Base
|
30
|
+
def initialize(parser:)
|
31
|
+
@parser = parser
|
32
|
+
end
|
33
|
+
attr_reader :parser
|
34
|
+
private :parser
|
35
|
+
|
36
|
+
delegate :limit_reached?, :work_entry_class, :collection_entry_class, :file_set_entry_class, :importerexporter, to: :parser
|
37
|
+
private :limit_reached?, :work_entry_class, :collection_entry_class, :file_set_entry_class, :importerexporter
|
38
|
+
|
39
|
+
# @return [Integer]
|
40
|
+
def count
|
41
|
+
sum = works.count + collections.count + file_sets.count
|
42
|
+
return sum if limit.zero?
|
43
|
+
return limit if sum > limit
|
44
|
+
return sum
|
45
|
+
end
|
46
|
+
|
47
|
+
# Yield first the works, then collections, then file sets. Once we've yielded as many times
|
48
|
+
# as the parser's limit, we break the iteration and return.
|
49
|
+
#
|
50
|
+
# @yieldparam id [String] The ID of the work/collection/file_set
|
51
|
+
# @yieldparam entry_class [Class] The parser associated entry class for the
|
52
|
+
# work/collection/file_set.
|
53
|
+
#
|
54
|
+
# @note The order of what we yield has been previously determined.
|
55
|
+
def each
|
56
|
+
counter = 0
|
57
|
+
|
58
|
+
works.each do |work|
|
59
|
+
break if limit_reached?(limit, counter)
|
60
|
+
yield(work.fetch('id'), work_entry_class)
|
61
|
+
counter += 1
|
62
|
+
end
|
63
|
+
|
64
|
+
return if limit_reached?(limit, counter)
|
65
|
+
|
66
|
+
collections.each do |collection|
|
67
|
+
break if limit_reached?(limit, counter)
|
68
|
+
yield(collection.fetch('id'), collection_entry_class)
|
69
|
+
counter += 1
|
70
|
+
end
|
71
|
+
|
72
|
+
return if limit_reached?(limit, counter)
|
73
|
+
|
74
|
+
file_sets.each do |file_set|
|
75
|
+
break if limit_reached?(limit, counter)
|
76
|
+
yield(file_set.fetch('id'), file_set_entry_class)
|
77
|
+
counter += 1
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
# Why call these candidates and not the actual file_set_ids? Because of implementation
|
84
|
+
# details of Hyrax. What are those details? The upstream application (as of v2.9.x) puts
|
85
|
+
# child works into the `file_set_ids_ssim` field. So we have a mix of file sets and works in
|
86
|
+
# that property.
|
87
|
+
#
|
88
|
+
# @see #file_sets
|
89
|
+
def candidate_file_set_ids
|
90
|
+
@candidate_file_set_ids ||= works.flat_map { |work| work.fetch("#{Bulkrax.file_model_class.to_s.underscore}_ids_ssim", []) }
|
91
|
+
end
|
92
|
+
|
93
|
+
# @note Specifically not memoizing this so we can merge values without changing the object.
|
94
|
+
#
|
95
|
+
# No sense attempting to query for more than the limit.
|
96
|
+
def query_kwargs
|
97
|
+
{ fl: "id,#{Bulkrax.file_model_class.to_s.underscore}_ids_ssim", method: :post, rows: row_limit }
|
98
|
+
end
|
99
|
+
|
100
|
+
# If we have a limit, we need not query beyond that limit
|
101
|
+
def row_limit
|
102
|
+
return 2_147_483_647 if limit.zero?
|
103
|
+
limit
|
104
|
+
end
|
105
|
+
|
106
|
+
def limit
|
107
|
+
parser.limit.to_i
|
108
|
+
end
|
109
|
+
|
110
|
+
alias works_query_kwargs query_kwargs
|
111
|
+
alias collections_query_kwargs query_kwargs
|
112
|
+
|
113
|
+
def extra_filters
|
114
|
+
output = ""
|
115
|
+
if importerexporter.start_date.present?
|
116
|
+
start_dt = importerexporter.start_date.to_datetime.strftime('%FT%TZ')
|
117
|
+
finish_dt = importerexporter.finish_date.present? ? importerexporter.finish_date.to_datetime.end_of_day.strftime('%FT%TZ') : "NOW"
|
118
|
+
output += " AND system_modified_dtsi:[#{start_dt} TO #{finish_dt}]"
|
119
|
+
end
|
120
|
+
output += importerexporter.work_visibility.present? ? " AND visibility_ssi:#{importerexporter.work_visibility}" : ""
|
121
|
+
output += importerexporter.workflow_status.present? ? " AND workflow_state_name_ssim:#{importerexporter.workflow_status}" : ""
|
122
|
+
output
|
123
|
+
end
|
124
|
+
|
125
|
+
def works
|
126
|
+
@works ||= ActiveFedora::SolrService.query(works_query, **works_query_kwargs)
|
127
|
+
end
|
128
|
+
|
129
|
+
def collections
|
130
|
+
@collections ||= if collections_query
|
131
|
+
ActiveFedora::SolrService.query(collections_query, **collections_query_kwargs)
|
132
|
+
else
|
133
|
+
[]
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
SOLR_QUERY_PAGE_SIZE = 512
|
138
|
+
|
139
|
+
# @note In most cases, when we don't have any candidate file sets, there is no need to query SOLR.
|
140
|
+
#
|
141
|
+
# @see Bulkrax::ParserExportRecordSet::Importer#file_sets
|
142
|
+
#
|
143
|
+
# Why can't we just use the candidate_file_set_ids? Because Hyrax is pushing child works into the
|
144
|
+
# `file_set_ids_ssim` field.
|
145
|
+
#
|
146
|
+
# For v2.9.x of Hryax; perhaps this is resolved.
|
147
|
+
#
|
148
|
+
# @see https://github.com/scientist-softserv/britishlibrary/issues/289
|
149
|
+
# @see https://github.com/samvera/hyrax/blob/64c0bbf0dc0d3e1b49f040b50ea70d177cc9d8f6/app/indexers/hyrax/work_indexer.rb#L15-L18
|
150
|
+
def file_sets
|
151
|
+
@file_sets ||= if candidate_file_set_ids.empty?
|
152
|
+
[]
|
153
|
+
else
|
154
|
+
results = []
|
155
|
+
candidate_file_set_ids.each_slice(SOLR_QUERY_PAGE_SIZE) do |ids|
|
156
|
+
fsq = "has_model_ssim:#{Bulkrax.file_model_class} AND id:(\"" + ids.join('" OR "') + "\")"
|
157
|
+
fsq += extra_filters if extra_filters.present?
|
158
|
+
results += ActiveFedora::SolrService.query(
|
159
|
+
fsq,
|
160
|
+
{ fl: "id", method: :post, rows: ids.size }
|
161
|
+
)
|
162
|
+
end
|
163
|
+
results
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def solr_name(base_name)
|
168
|
+
if Module.const_defined?(:Solrizer)
|
169
|
+
::Solrizer.solr_name(base_name)
|
170
|
+
else
|
171
|
+
::ActiveFedora.index_field_mapper.solr_name(base_name)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
class All < Base
|
177
|
+
def works_query
|
178
|
+
"has_model_ssim:(#{Bulkrax.curation_concerns.join(' OR ')}) #{extra_filters}"
|
179
|
+
end
|
180
|
+
|
181
|
+
def collections_query
|
182
|
+
"has_model_ssim:Collection #{extra_filters}"
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
class Collection < Base
|
187
|
+
def works_query
|
188
|
+
"member_of_collection_ids_ssim:#{importerexporter.export_source} #{extra_filters} AND " \
|
189
|
+
"has_model_ssim:(#{Bulkrax.curation_concerns.join(' OR ')})"
|
190
|
+
end
|
191
|
+
|
192
|
+
def collections_query
|
193
|
+
"(id:#{importerexporter.export_source} #{extra_filters}) OR " \
|
194
|
+
"(has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source})"
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
class Worktype < Base
|
199
|
+
def works_query
|
200
|
+
"has_model_ssim:#{importerexporter.export_source} #{extra_filters}"
|
201
|
+
end
|
202
|
+
|
203
|
+
def collections_query
|
204
|
+
nil
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
class Importer < Base
|
209
|
+
private
|
210
|
+
|
211
|
+
delegate :work_identifier, to: :parser
|
212
|
+
private :work_identifier
|
213
|
+
|
214
|
+
def extra_filters
|
215
|
+
'*:*' + super
|
216
|
+
end
|
217
|
+
|
218
|
+
def complete_entry_identifiers
|
219
|
+
@complete_entry_identifiers ||=
|
220
|
+
begin
|
221
|
+
entry_ids ||= Bulkrax::Importer.find(importerexporter.export_source).entries.pluck(:id)
|
222
|
+
complete_statuses ||= Bulkrax::Status.latest_by_statusable
|
223
|
+
.includes(:statusable)
|
224
|
+
.where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
|
225
|
+
|
226
|
+
complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
def works_query_kwargs
|
231
|
+
query_kwargs.merge(
|
232
|
+
fq: [
|
233
|
+
%(#{solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
|
234
|
+
"has_model_ssim:(#{Bulkrax.curation_concerns.join(' OR ')})"
|
235
|
+
],
|
236
|
+
fl: 'id'
|
237
|
+
)
|
238
|
+
end
|
239
|
+
|
240
|
+
def works_query
|
241
|
+
extra_filters.to_s
|
242
|
+
end
|
243
|
+
|
244
|
+
def collections_query_kwargs
|
245
|
+
query_kwargs.merge(
|
246
|
+
fq: [
|
247
|
+
%(#{solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
|
248
|
+
"has_model_ssim:Collection"
|
249
|
+
],
|
250
|
+
fl: 'id'
|
251
|
+
)
|
252
|
+
end
|
253
|
+
|
254
|
+
def collections_query
|
255
|
+
"has_model_ssim:Collection #{extra_filters}"
|
256
|
+
end
|
257
|
+
|
258
|
+
# This is an exception; we don't know how many candidate file sets there might be. So we will instead
|
259
|
+
# make the query (assuming that there are {#complete_entry_identifiers}).
|
260
|
+
#
|
261
|
+
# @see Bulkrax::ParserExportRecordSet::Base#file_sets
|
262
|
+
def file_sets
|
263
|
+
@file_sets ||= ActiveFedora::SolrService.query(file_sets_query, **file_sets_query_kwargs)
|
264
|
+
end
|
265
|
+
|
266
|
+
def file_sets_query_kwargs
|
267
|
+
query_kwargs.merge(
|
268
|
+
fq: [
|
269
|
+
%(#{solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
|
270
|
+
"has_model_ssim:#{Bulkrax.file_model_class}"
|
271
|
+
],
|
272
|
+
fl: 'id'
|
273
|
+
)
|
274
|
+
end
|
275
|
+
|
276
|
+
def file_sets_query
|
277
|
+
extra_filters
|
278
|
+
end
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
require 'marcel'
|
2
3
|
|
3
4
|
module Bulkrax
|
4
5
|
class XmlParser < ApplicationParser
|
@@ -31,7 +32,7 @@ module Bulkrax
|
|
31
32
|
raise StandardError, 'No records found' if records.blank?
|
32
33
|
true
|
33
34
|
rescue StandardError => e
|
34
|
-
|
35
|
+
set_status_info(e)
|
35
36
|
false
|
36
37
|
end
|
37
38
|
|
@@ -78,16 +79,19 @@ module Bulkrax
|
|
78
79
|
# Otherwise return all xml files in the given folder
|
79
80
|
def metadata_paths
|
80
81
|
@metadata_paths ||=
|
81
|
-
if file? &&
|
82
|
+
if file? && good_file_type?(import_file_path)
|
82
83
|
[import_file_path]
|
83
84
|
else
|
84
85
|
file_paths.select do |f|
|
85
|
-
|
86
|
-
f.include?("import_#{importerexporter.id}")
|
86
|
+
good_file_type?(f) && f.include?("import_#{importerexporter.id}")
|
87
87
|
end
|
88
88
|
end
|
89
89
|
end
|
90
90
|
|
91
|
+
def good_file_type?(path)
|
92
|
+
%w[.xml .xls .xsd].include?(File.extname(path)) || ::Marcel::MimeType.for(path).include?('application/xml')
|
93
|
+
end
|
94
|
+
|
91
95
|
def create_works
|
92
96
|
records.each_with_index do |record, index|
|
93
97
|
next unless record_has_source_identifier(record, index)
|
@@ -104,7 +108,7 @@ module Bulkrax
|
|
104
108
|
end
|
105
109
|
importer.record_status
|
106
110
|
rescue StandardError => e
|
107
|
-
|
111
|
+
set_status_info(e)
|
108
112
|
end
|
109
113
|
|
110
114
|
def total
|
@@ -55,7 +55,7 @@ module Bulkrax
|
|
55
55
|
progress_bar.increment
|
56
56
|
|
57
57
|
obj = entry.factory.find
|
58
|
-
next if obj.is_a?(
|
58
|
+
next if obj.is_a?(Bulkrax.file_model_class) # FileSets must be attached to a Work
|
59
59
|
|
60
60
|
if obj.is_a?(Collection)
|
61
61
|
remove_relationships_from_collection(obj)
|
@@ -63,7 +63,7 @@ module Bulkrax
|
|
63
63
|
remove_relationships_from_work(obj)
|
64
64
|
end
|
65
65
|
|
66
|
-
obj.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
|
66
|
+
obj.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX) if defined?(Hyrax)
|
67
67
|
obj.save!
|
68
68
|
end
|
69
69
|
end
|
@@ -75,6 +75,8 @@ module Bulkrax
|
|
75
75
|
work.save! if change.present?
|
76
76
|
end
|
77
77
|
|
78
|
+
return if defined?(Hyrax)
|
79
|
+
|
78
80
|
# Remove parent collection relationships
|
79
81
|
collection.member_of_collections.each do |parent_col|
|
80
82
|
Hyrax::Collections::NestedCollectionPersistenceService
|
@@ -48,7 +48,7 @@
|
|
48
48
|
<% record = @entry&.hyrax_record %>
|
49
49
|
<% if record.present? && @entry.factory_class %>
|
50
50
|
<strong><%= record.class.to_s %> Link:</strong>
|
51
|
-
<% if record.is_a?(Collection) %>
|
51
|
+
<% if defined?(Collection) && record.is_a?(Collection) %>
|
52
52
|
<%= link_to record.class.to_s, hyrax.polymorphic_path(record) %>
|
53
53
|
<% else %>
|
54
54
|
<%= link_to record.class.to_s, main_app.polymorphic_path(record) %>
|