bulkrax 5.0.0 → 5.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/bulkrax/entries_controller.rb +4 -2
- data/app/controllers/bulkrax/exporters_controller.rb +13 -9
- data/app/controllers/bulkrax/importers_controller.rb +10 -10
- data/app/helpers/bulkrax/application_helper.rb +1 -1
- data/app/helpers/bulkrax/importers_helper.rb +2 -2
- data/app/helpers/bulkrax/validation_helper.rb +4 -4
- data/app/jobs/bulkrax/create_relationships_job.rb +78 -59
- data/app/jobs/bulkrax/delete_job.rb +1 -1
- data/app/jobs/bulkrax/export_work_job.rb +2 -2
- data/app/jobs/bulkrax/import_file_set_job.rb +1 -1
- data/app/jobs/bulkrax/import_work_job.rb +20 -7
- data/app/jobs/bulkrax/importer_job.rb +2 -2
- data/app/jobs/bulkrax/schedule_relationships_job.rb +2 -1
- data/app/matchers/bulkrax/application_matcher.rb +1 -0
- data/app/models/bulkrax/csv_entry.rb +93 -24
- data/app/models/bulkrax/exporter.rb +18 -19
- data/app/models/bulkrax/importer.rb +5 -5
- data/app/models/bulkrax/importer_run.rb +6 -0
- data/app/models/bulkrax/oai_entry.rb +14 -2
- data/app/models/bulkrax/pending_relationship.rb +4 -0
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +3 -1
- data/app/models/concerns/bulkrax/export_behavior.rb +6 -4
- data/app/models/concerns/bulkrax/has_matchers.rb +1 -0
- data/app/models/concerns/bulkrax/import_behavior.rb +6 -3
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +9 -1
- data/app/models/concerns/bulkrax/status_info.rb +9 -4
- data/app/parsers/bulkrax/application_parser.rb +14 -16
- data/app/parsers/bulkrax/bagit_parser.rb +6 -17
- data/app/parsers/bulkrax/csv_parser.rb +43 -111
- data/app/parsers/bulkrax/oai_dc_parser.rb +2 -2
- data/app/parsers/bulkrax/parser_export_record_set.rb +281 -0
- data/app/parsers/bulkrax/xml_parser.rb +9 -5
- data/app/services/bulkrax/remove_relationships_for_importer.rb +4 -2
- data/app/views/bulkrax/entries/show.html.erb +1 -1
- data/app/views/bulkrax/exporters/_form.html.erb +60 -45
- data/app/views/bulkrax/exporters/edit.html.erb +1 -1
- data/app/views/bulkrax/exporters/index.html.erb +2 -2
- data/app/views/bulkrax/exporters/new.html.erb +1 -1
- data/app/views/bulkrax/exporters/show.html.erb +3 -3
- data/app/views/bulkrax/importers/_bagit_fields.html.erb +13 -12
- data/app/views/bulkrax/importers/_csv_fields.html.erb +13 -12
- data/app/views/bulkrax/importers/_form.html.erb +5 -5
- data/app/views/bulkrax/importers/_oai_fields.html.erb +12 -10
- data/app/views/bulkrax/importers/_xml_fields.html.erb +12 -11
- data/app/views/bulkrax/importers/show.html.erb +18 -16
- data/app/views/bulkrax/shared/_collection_entries_tab.html.erb +6 -6
- data/app/views/bulkrax/shared/_file_set_entries_tab.html.erb +6 -6
- data/app/views/bulkrax/shared/_work_entries_tab.html.erb +6 -6
- data/config/locales/bulkrax.en.yml +26 -0
- data/lib/bulkrax/entry_spec_helper.rb +190 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +124 -45
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +1 -1
- data/lib/tasks/reset.rake +1 -1
- metadata +5 -3
@@ -84,23 +84,31 @@ module Bulkrax
|
|
84
84
|
@import_fields ||= records.inject(:merge).keys.compact.uniq
|
85
85
|
end
|
86
86
|
|
87
|
-
def required_elements?(
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
87
|
+
def required_elements?(record)
|
88
|
+
missing_elements(record).blank?
|
89
|
+
end
|
90
|
+
|
91
|
+
def missing_elements(record)
|
92
|
+
keys_from_record = keys_without_numbers(record.reject { |_, v| v.blank? }.keys.compact.uniq.map(&:to_s))
|
93
|
+
keys = []
|
94
|
+
# Because we're persisting the mapping in the database, these are likely string keys.
|
95
|
+
# However, there's no guarantee. So, we need to ensure that by running stringify.
|
96
|
+
importerexporter.mapping.stringify_keys.map do |k, v|
|
97
|
+
Array.wrap(v['from']).each do |vf|
|
98
|
+
keys << k if keys_from_record.include?(vf)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
required_elements.map(&:to_s) - keys.uniq.map(&:to_s)
|
94
102
|
end
|
95
103
|
|
96
104
|
def valid_import?
|
97
|
-
|
98
|
-
error_alert = "Missing at least one required element, missing element(s) are: #{missing_elements(
|
99
|
-
raise StandardError, error_alert unless required_elements?(
|
105
|
+
compressed_record = records.flat_map(&:to_a).partition { |_, v| !v }.flatten(1).to_h
|
106
|
+
error_alert = "Missing at least one required element, missing element(s) are: #{missing_elements(compressed_record).join(', ')}"
|
107
|
+
raise StandardError, error_alert unless required_elements?(compressed_record)
|
100
108
|
|
101
109
|
file_paths.is_a?(Array)
|
102
110
|
rescue StandardError => e
|
103
|
-
|
111
|
+
set_status_info(e)
|
104
112
|
false
|
105
113
|
end
|
106
114
|
|
@@ -140,7 +148,7 @@ module Bulkrax
|
|
140
148
|
end
|
141
149
|
true
|
142
150
|
rescue StandardError => e
|
143
|
-
|
151
|
+
set_status_info(e)
|
144
152
|
end
|
145
153
|
|
146
154
|
def create_entry_and_job(current_record, type)
|
@@ -167,102 +175,17 @@ module Bulkrax
|
|
167
175
|
path
|
168
176
|
end
|
169
177
|
|
170
|
-
def
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
output += " AND system_modified_dtsi:[#{start_dt} TO #{finish_dt}]"
|
176
|
-
end
|
177
|
-
output += importerexporter.work_visibility.present? ? " AND visibility_ssi:#{importerexporter.work_visibility}" : ""
|
178
|
-
output += importerexporter.workflow_status.present? ? " AND workflow_state_name_ssim:#{importerexporter.workflow_status}" : ""
|
179
|
-
output
|
180
|
-
end
|
181
|
-
|
182
|
-
def current_work_ids
|
183
|
-
ActiveSupport::Deprecation.warn('Bulkrax::CsvParser#current_work_ids will be replaced with #current_record_ids in version 3.0')
|
184
|
-
current_record_ids
|
185
|
-
end
|
186
|
-
|
187
|
-
# rubocop:disable Metrics/AbcSize
|
188
|
-
def current_record_ids
|
189
|
-
@work_ids = []
|
190
|
-
@collection_ids = []
|
191
|
-
@file_set_ids = []
|
192
|
-
|
193
|
-
case importerexporter.export_from
|
194
|
-
when 'all'
|
195
|
-
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
196
|
-
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
197
|
-
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
198
|
-
when 'collection'
|
199
|
-
@work_ids = ActiveFedora::SolrService.query(
|
200
|
-
"member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters} AND has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')})", method: :post, rows: 2_000_000_000
|
201
|
-
).map(&:id)
|
202
|
-
# get the parent collection and child collections
|
203
|
-
@collection_ids = ActiveFedora::SolrService.query("id:#{importerexporter.export_source} #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
204
|
-
@collection_ids += ActiveFedora::SolrService.query("has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source}", method: :post,
|
205
|
-
rows: 2_147_483_647).map(&:id)
|
206
|
-
find_child_file_sets(@work_ids)
|
207
|
-
when 'worktype'
|
208
|
-
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
209
|
-
find_child_file_sets(@work_ids)
|
210
|
-
when 'importer'
|
211
|
-
set_ids_for_exporting_from_importer
|
212
|
-
end
|
213
|
-
|
214
|
-
@work_ids + @collection_ids + @file_set_ids
|
215
|
-
end
|
216
|
-
# rubocop:enable Metrics/AbcSize
|
217
|
-
|
218
|
-
# find the related file set ids so entries can be made for export
|
219
|
-
def find_child_file_sets(work_ids)
|
220
|
-
work_ids.each do |id|
|
221
|
-
ActiveFedora::Base.find(id).file_set_ids.each { |fs_id| @file_set_ids << fs_id }
|
222
|
-
end
|
223
|
-
end
|
224
|
-
|
225
|
-
# Set the following instance variables: @work_ids, @collection_ids, @file_set_ids
|
226
|
-
# @see #current_record_ids
|
227
|
-
def set_ids_for_exporting_from_importer
|
228
|
-
entry_ids = Importer.find(importerexporter.export_source).entries.pluck(:id)
|
229
|
-
complete_statuses = Status.latest_by_statusable
|
230
|
-
.includes(:statusable)
|
231
|
-
.where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
|
232
|
-
|
233
|
-
complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
|
234
|
-
extra_filters = extra_filters.presence || '*:*'
|
235
|
-
|
236
|
-
{ :@work_ids => ::Hyrax.config.curation_concerns, :@collection_ids => [::Collection], :@file_set_ids => [::FileSet] }.each do |instance_var, models_to_search|
|
237
|
-
instance_variable_set(instance_var, ActiveFedora::SolrService.post(
|
238
|
-
extra_filters.to_s,
|
239
|
-
fq: [
|
240
|
-
%(#{solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
|
241
|
-
"has_model_ssim:(#{models_to_search.join(' OR ')})"
|
242
|
-
],
|
243
|
-
fl: 'id',
|
244
|
-
rows: 2_000_000_000
|
245
|
-
)['response']['docs'].map { |obj| obj['id'] })
|
246
|
-
end
|
247
|
-
end
|
248
|
-
|
249
|
-
def solr_name(base_name)
|
250
|
-
Module.const_defined?(:Solrizer) ? ::Solrizer.solr_name(base_name) : ::ActiveFedora.index_field_mapper.solr_name(base_name)
|
178
|
+
def current_records_for_export
|
179
|
+
@current_records_for_export ||= Bulkrax::ParserExportRecordSet.for(
|
180
|
+
parser: self,
|
181
|
+
export_from: importerexporter.export_from
|
182
|
+
)
|
251
183
|
end
|
252
184
|
|
253
185
|
def create_new_entries
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
this_entry_class = if @collection_ids.include?(id)
|
258
|
-
collection_entry_class
|
259
|
-
elsif @file_set_ids.include?(id)
|
260
|
-
file_set_entry_class
|
261
|
-
else
|
262
|
-
entry_class
|
263
|
-
end
|
264
|
-
new_entry = find_or_create_entry(this_entry_class, id, 'Bulkrax::Exporter')
|
265
|
-
|
186
|
+
# NOTE: The each method enforces the limit, as it can best optimize the underlying queries.
|
187
|
+
current_records_for_export.each do |id, entry_class|
|
188
|
+
new_entry = find_or_create_entry(entry_class, id, 'Bulkrax::Exporter')
|
266
189
|
begin
|
267
190
|
entry = ExportWorkJob.perform_now(new_entry.id, current_run.id)
|
268
191
|
rescue => e
|
@@ -291,16 +214,22 @@ module Bulkrax
|
|
291
214
|
end
|
292
215
|
|
293
216
|
def valid_entry_types
|
294
|
-
[
|
217
|
+
[collection_entry_class.to_s, file_set_entry_class.to_s, entry_class.to_s]
|
295
218
|
end
|
296
219
|
|
297
220
|
# TODO: figure out why using the version of this method that's in the bagit parser
|
298
221
|
# breaks specs for the "if importer?" line
|
299
222
|
def total
|
300
|
-
@total =
|
301
|
-
|
223
|
+
@total =
|
224
|
+
if importer?
|
225
|
+
importer.parser_fields['total'] || 0
|
226
|
+
elsif exporter?
|
227
|
+
limit.to_i.zero? ? current_records_for_export.count : limit.to_i
|
228
|
+
else
|
229
|
+
0
|
230
|
+
end
|
302
231
|
|
303
|
-
return @total
|
232
|
+
return @total
|
304
233
|
rescue StandardError
|
305
234
|
@total = 0
|
306
235
|
end
|
@@ -335,10 +264,13 @@ module Bulkrax
|
|
335
264
|
def write_files
|
336
265
|
require 'open-uri'
|
337
266
|
folder_count = 0
|
267
|
+
# TODO: This is not performant as well; unclear how to address, but lower priority as of
|
268
|
+
# <2023-02-21 Tue>.
|
338
269
|
sorted_entries = sort_entries(importerexporter.entries.uniq(&:identifier))
|
339
270
|
.select { |e| valid_entry_types.include?(e.type) }
|
340
271
|
|
341
|
-
|
272
|
+
group_size = limit.to_i.zero? ? total : limit.to_i
|
273
|
+
sorted_entries[0..group_size].in_groups_of(records_split_count, false) do |group|
|
342
274
|
folder_count += 1
|
343
275
|
|
344
276
|
CSV.open(setup_export_file(folder_count), "w", headers: export_headers, write_headers: true) do |csv|
|
@@ -398,7 +330,7 @@ module Bulkrax
|
|
398
330
|
return @object_names if @object_names
|
399
331
|
|
400
332
|
@object_names = mapping.values.map { |value| value['object'] }
|
401
|
-
@object_names.uniq
|
333
|
+
@object_names.uniq!&.delete(nil)
|
402
334
|
|
403
335
|
@object_names
|
404
336
|
end
|
@@ -63,9 +63,9 @@ module Bulkrax
|
|
63
63
|
|
64
64
|
def create_collections
|
65
65
|
metadata = {
|
66
|
-
visibility: 'open'
|
67
|
-
collection_type_gid: Hyrax::CollectionType.find_or_create_default_collection_type.gid
|
66
|
+
visibility: 'open'
|
68
67
|
}
|
68
|
+
metadata[:collection_type_gid] = Hyrax::CollectionType.find_or_create_default_collection_type.gid if defined?(::Hyrax)
|
69
69
|
|
70
70
|
collections.each_with_index do |set, index|
|
71
71
|
next unless collection_name == 'all' || collection_name == set.spec
|
@@ -0,0 +1,281 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
# This module is responsible for providing the means of querying Solr for the appropriate works,
|
5
|
+
# collections, and file sets for an export of entries.
|
6
|
+
#
|
7
|
+
# @see .for
|
8
|
+
module ParserExportRecordSet
|
9
|
+
# @api public
|
10
|
+
#
|
11
|
+
# A factory method for returning an object that can yield each id and associated entry_class as
|
12
|
+
# well as return the count of objects in the record set.
|
13
|
+
#
|
14
|
+
# @param parser [Bulkrax::ApplicationParser]
|
15
|
+
# @param export_from [String]
|
16
|
+
#
|
17
|
+
# @return [#each, #count] An object, likely a descendant of
|
18
|
+
# {Bulkrax::CurrentParserRecordSet::Base} that responds to {Base#count} and
|
19
|
+
# {Base#each}.
|
20
|
+
def self.for(parser:, export_from:)
|
21
|
+
"Bulkrax::ParserExportRecordSet::#{export_from.classify}".constantize.new(parser: parser)
|
22
|
+
end
|
23
|
+
|
24
|
+
# @abstract
|
25
|
+
#
|
26
|
+
# @note This has {#each} and {#count} but is not an Enumerable. But because it has these two
|
27
|
+
# methods that echo {Array}, we can do some lovely mocking and stubbing in those classes
|
28
|
+
# dependent on this file. :)
|
29
|
+
class Base
|
30
|
+
def initialize(parser:)
|
31
|
+
@parser = parser
|
32
|
+
end
|
33
|
+
attr_reader :parser
|
34
|
+
private :parser
|
35
|
+
|
36
|
+
delegate :limit_reached?, :work_entry_class, :collection_entry_class, :file_set_entry_class, :importerexporter, to: :parser
|
37
|
+
private :limit_reached?, :work_entry_class, :collection_entry_class, :file_set_entry_class, :importerexporter
|
38
|
+
|
39
|
+
# @return [Integer]
|
40
|
+
def count
|
41
|
+
sum = works.count + collections.count + file_sets.count
|
42
|
+
return sum if limit.zero?
|
43
|
+
return limit if sum > limit
|
44
|
+
return sum
|
45
|
+
end
|
46
|
+
|
47
|
+
# Yield first the works, then collections, then file sets. Once we've yielded as many times
|
48
|
+
# as the parser's limit, we break the iteration and return.
|
49
|
+
#
|
50
|
+
# @yieldparam id [String] The ID of the work/collection/file_set
|
51
|
+
# @yieldparam entry_class [Class] The parser associated entry class for the
|
52
|
+
# work/collection/file_set.
|
53
|
+
#
|
54
|
+
# @note The order of what we yield has been previously determined.
|
55
|
+
def each
|
56
|
+
counter = 0
|
57
|
+
|
58
|
+
works.each do |work|
|
59
|
+
break if limit_reached?(limit, counter)
|
60
|
+
yield(work.fetch('id'), work_entry_class)
|
61
|
+
counter += 1
|
62
|
+
end
|
63
|
+
|
64
|
+
return if limit_reached?(limit, counter)
|
65
|
+
|
66
|
+
collections.each do |collection|
|
67
|
+
break if limit_reached?(limit, counter)
|
68
|
+
yield(collection.fetch('id'), collection_entry_class)
|
69
|
+
counter += 1
|
70
|
+
end
|
71
|
+
|
72
|
+
return if limit_reached?(limit, counter)
|
73
|
+
|
74
|
+
file_sets.each do |file_set|
|
75
|
+
break if limit_reached?(limit, counter)
|
76
|
+
yield(file_set.fetch('id'), file_set_entry_class)
|
77
|
+
counter += 1
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
# Why call these candidates and not the actual file_set_ids? Because of implementation
|
84
|
+
# details of Hyrax. What are those details? The upstream application (as of v2.9.x) puts
|
85
|
+
# child works into the `file_set_ids_ssim` field. So we have a mix of file sets and works in
|
86
|
+
# that property.
|
87
|
+
#
|
88
|
+
# @see #file_sets
|
89
|
+
def candidate_file_set_ids
|
90
|
+
@candidate_file_set_ids ||= works.flat_map { |work| work.fetch("#{Bulkrax.file_model_class.to_s.underscore}_ids_ssim", []) }
|
91
|
+
end
|
92
|
+
|
93
|
+
# @note Specifically not memoizing this so we can merge values without changing the object.
|
94
|
+
#
|
95
|
+
# No sense attempting to query for more than the limit.
|
96
|
+
def query_kwargs
|
97
|
+
{ fl: "id,#{Bulkrax.file_model_class.to_s.underscore}_ids_ssim", method: :post, rows: row_limit }
|
98
|
+
end
|
99
|
+
|
100
|
+
# If we have a limit, we need not query beyond that limit
|
101
|
+
def row_limit
|
102
|
+
return 2_147_483_647 if limit.zero?
|
103
|
+
limit
|
104
|
+
end
|
105
|
+
|
106
|
+
def limit
|
107
|
+
parser.limit.to_i
|
108
|
+
end
|
109
|
+
|
110
|
+
alias works_query_kwargs query_kwargs
|
111
|
+
alias collections_query_kwargs query_kwargs
|
112
|
+
|
113
|
+
def extra_filters
|
114
|
+
output = ""
|
115
|
+
if importerexporter.start_date.present?
|
116
|
+
start_dt = importerexporter.start_date.to_datetime.strftime('%FT%TZ')
|
117
|
+
finish_dt = importerexporter.finish_date.present? ? importerexporter.finish_date.to_datetime.end_of_day.strftime('%FT%TZ') : "NOW"
|
118
|
+
output += " AND system_modified_dtsi:[#{start_dt} TO #{finish_dt}]"
|
119
|
+
end
|
120
|
+
output += importerexporter.work_visibility.present? ? " AND visibility_ssi:#{importerexporter.work_visibility}" : ""
|
121
|
+
output += importerexporter.workflow_status.present? ? " AND workflow_state_name_ssim:#{importerexporter.workflow_status}" : ""
|
122
|
+
output
|
123
|
+
end
|
124
|
+
|
125
|
+
def works
|
126
|
+
@works ||= ActiveFedora::SolrService.query(works_query, **works_query_kwargs)
|
127
|
+
end
|
128
|
+
|
129
|
+
def collections
|
130
|
+
@collections ||= if collections_query
|
131
|
+
ActiveFedora::SolrService.query(collections_query, **collections_query_kwargs)
|
132
|
+
else
|
133
|
+
[]
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
SOLR_QUERY_PAGE_SIZE = 512
|
138
|
+
|
139
|
+
# @note In most cases, when we don't have any candidate file sets, there is no need to query SOLR.
|
140
|
+
#
|
141
|
+
# @see Bulkrax::ParserExportRecordSet::Importer#file_sets
|
142
|
+
#
|
143
|
+
# Why can't we just use the candidate_file_set_ids? Because Hyrax is pushing child works into the
|
144
|
+
# `file_set_ids_ssim` field.
|
145
|
+
#
|
146
|
+
# For v2.9.x of Hryax; perhaps this is resolved.
|
147
|
+
#
|
148
|
+
# @see https://github.com/scientist-softserv/britishlibrary/issues/289
|
149
|
+
# @see https://github.com/samvera/hyrax/blob/64c0bbf0dc0d3e1b49f040b50ea70d177cc9d8f6/app/indexers/hyrax/work_indexer.rb#L15-L18
|
150
|
+
def file_sets
|
151
|
+
@file_sets ||= if candidate_file_set_ids.empty?
|
152
|
+
[]
|
153
|
+
else
|
154
|
+
results = []
|
155
|
+
candidate_file_set_ids.each_slice(SOLR_QUERY_PAGE_SIZE) do |ids|
|
156
|
+
fsq = "has_model_ssim:#{Bulkrax.file_model_class} AND id:(\"" + ids.join('" OR "') + "\")"
|
157
|
+
fsq += extra_filters if extra_filters.present?
|
158
|
+
results += ActiveFedora::SolrService.query(
|
159
|
+
fsq,
|
160
|
+
{ fl: "id", method: :post, rows: ids.size }
|
161
|
+
)
|
162
|
+
end
|
163
|
+
results
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def solr_name(base_name)
|
168
|
+
if Module.const_defined?(:Solrizer)
|
169
|
+
::Solrizer.solr_name(base_name)
|
170
|
+
else
|
171
|
+
::ActiveFedora.index_field_mapper.solr_name(base_name)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
class All < Base
|
177
|
+
def works_query
|
178
|
+
"has_model_ssim:(#{Bulkrax.curation_concerns.join(' OR ')}) #{extra_filters}"
|
179
|
+
end
|
180
|
+
|
181
|
+
def collections_query
|
182
|
+
"has_model_ssim:Collection #{extra_filters}"
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
class Collection < Base
|
187
|
+
def works_query
|
188
|
+
"member_of_collection_ids_ssim:#{importerexporter.export_source} #{extra_filters} AND " \
|
189
|
+
"has_model_ssim:(#{Bulkrax.curation_concerns.join(' OR ')})"
|
190
|
+
end
|
191
|
+
|
192
|
+
def collections_query
|
193
|
+
"(id:#{importerexporter.export_source} #{extra_filters}) OR " \
|
194
|
+
"(has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source})"
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
class Worktype < Base
|
199
|
+
def works_query
|
200
|
+
"has_model_ssim:#{importerexporter.export_source} #{extra_filters}"
|
201
|
+
end
|
202
|
+
|
203
|
+
def collections_query
|
204
|
+
nil
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
class Importer < Base
|
209
|
+
private
|
210
|
+
|
211
|
+
delegate :work_identifier, to: :parser
|
212
|
+
private :work_identifier
|
213
|
+
|
214
|
+
def extra_filters
|
215
|
+
'*:*' + super
|
216
|
+
end
|
217
|
+
|
218
|
+
def complete_entry_identifiers
|
219
|
+
@complete_entry_identifiers ||=
|
220
|
+
begin
|
221
|
+
entry_ids ||= Bulkrax::Importer.find(importerexporter.export_source).entries.pluck(:id)
|
222
|
+
complete_statuses ||= Bulkrax::Status.latest_by_statusable
|
223
|
+
.includes(:statusable)
|
224
|
+
.where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
|
225
|
+
|
226
|
+
complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
def works_query_kwargs
|
231
|
+
query_kwargs.merge(
|
232
|
+
fq: [
|
233
|
+
%(#{solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
|
234
|
+
"has_model_ssim:(#{Bulkrax.curation_concerns.join(' OR ')})"
|
235
|
+
],
|
236
|
+
fl: 'id'
|
237
|
+
)
|
238
|
+
end
|
239
|
+
|
240
|
+
def works_query
|
241
|
+
extra_filters.to_s
|
242
|
+
end
|
243
|
+
|
244
|
+
def collections_query_kwargs
|
245
|
+
query_kwargs.merge(
|
246
|
+
fq: [
|
247
|
+
%(#{solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
|
248
|
+
"has_model_ssim:Collection"
|
249
|
+
],
|
250
|
+
fl: 'id'
|
251
|
+
)
|
252
|
+
end
|
253
|
+
|
254
|
+
def collections_query
|
255
|
+
"has_model_ssim:Collection #{extra_filters}"
|
256
|
+
end
|
257
|
+
|
258
|
+
# This is an exception; we don't know how many candidate file sets there might be. So we will instead
|
259
|
+
# make the query (assuming that there are {#complete_entry_identifiers}).
|
260
|
+
#
|
261
|
+
# @see Bulkrax::ParserExportRecordSet::Base#file_sets
|
262
|
+
def file_sets
|
263
|
+
@file_sets ||= ActiveFedora::SolrService.query(file_sets_query, **file_sets_query_kwargs)
|
264
|
+
end
|
265
|
+
|
266
|
+
def file_sets_query_kwargs
|
267
|
+
query_kwargs.merge(
|
268
|
+
fq: [
|
269
|
+
%(#{solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
|
270
|
+
"has_model_ssim:#{Bulkrax.file_model_class}"
|
271
|
+
],
|
272
|
+
fl: 'id'
|
273
|
+
)
|
274
|
+
end
|
275
|
+
|
276
|
+
def file_sets_query
|
277
|
+
extra_filters
|
278
|
+
end
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
require 'marcel'
|
2
3
|
|
3
4
|
module Bulkrax
|
4
5
|
class XmlParser < ApplicationParser
|
@@ -31,7 +32,7 @@ module Bulkrax
|
|
31
32
|
raise StandardError, 'No records found' if records.blank?
|
32
33
|
true
|
33
34
|
rescue StandardError => e
|
34
|
-
|
35
|
+
set_status_info(e)
|
35
36
|
false
|
36
37
|
end
|
37
38
|
|
@@ -78,16 +79,19 @@ module Bulkrax
|
|
78
79
|
# Otherwise return all xml files in the given folder
|
79
80
|
def metadata_paths
|
80
81
|
@metadata_paths ||=
|
81
|
-
if file? &&
|
82
|
+
if file? && good_file_type?(import_file_path)
|
82
83
|
[import_file_path]
|
83
84
|
else
|
84
85
|
file_paths.select do |f|
|
85
|
-
|
86
|
-
f.include?("import_#{importerexporter.id}")
|
86
|
+
good_file_type?(f) && f.include?("import_#{importerexporter.id}")
|
87
87
|
end
|
88
88
|
end
|
89
89
|
end
|
90
90
|
|
91
|
+
def good_file_type?(path)
|
92
|
+
%w[.xml .xls .xsd].include?(File.extname(path)) || ::Marcel::MimeType.for(path).include?('application/xml')
|
93
|
+
end
|
94
|
+
|
91
95
|
def create_works
|
92
96
|
records.each_with_index do |record, index|
|
93
97
|
next unless record_has_source_identifier(record, index)
|
@@ -104,7 +108,7 @@ module Bulkrax
|
|
104
108
|
end
|
105
109
|
importer.record_status
|
106
110
|
rescue StandardError => e
|
107
|
-
|
111
|
+
set_status_info(e)
|
108
112
|
end
|
109
113
|
|
110
114
|
def total
|
@@ -55,7 +55,7 @@ module Bulkrax
|
|
55
55
|
progress_bar.increment
|
56
56
|
|
57
57
|
obj = entry.factory.find
|
58
|
-
next if obj.is_a?(
|
58
|
+
next if obj.is_a?(Bulkrax.file_model_class) # FileSets must be attached to a Work
|
59
59
|
|
60
60
|
if obj.is_a?(Collection)
|
61
61
|
remove_relationships_from_collection(obj)
|
@@ -63,7 +63,7 @@ module Bulkrax
|
|
63
63
|
remove_relationships_from_work(obj)
|
64
64
|
end
|
65
65
|
|
66
|
-
obj.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
|
66
|
+
obj.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX) if defined?(Hyrax)
|
67
67
|
obj.save!
|
68
68
|
end
|
69
69
|
end
|
@@ -75,6 +75,8 @@ module Bulkrax
|
|
75
75
|
work.save! if change.present?
|
76
76
|
end
|
77
77
|
|
78
|
+
return if defined?(Hyrax)
|
79
|
+
|
78
80
|
# Remove parent collection relationships
|
79
81
|
collection.member_of_collections.each do |parent_col|
|
80
82
|
Hyrax::Collections::NestedCollectionPersistenceService
|
@@ -48,7 +48,7 @@
|
|
48
48
|
<% record = @entry&.hyrax_record %>
|
49
49
|
<% if record.present? && @entry.factory_class %>
|
50
50
|
<strong><%= record.class.to_s %> Link:</strong>
|
51
|
-
<% if record.is_a?(Collection) %>
|
51
|
+
<% if defined?(Collection) && record.is_a?(Collection) %>
|
52
52
|
<%= link_to record.class.to_s, hyrax.polymorphic_path(record) %>
|
53
53
|
<% else %>
|
54
54
|
<%= link_to record.class.to_s, main_app.polymorphic_path(record) %>
|