bulkrax 5.0.0 → 5.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/app/controllers/bulkrax/entries_controller.rb +4 -2
  3. data/app/controllers/bulkrax/exporters_controller.rb +13 -9
  4. data/app/controllers/bulkrax/importers_controller.rb +10 -10
  5. data/app/helpers/bulkrax/application_helper.rb +1 -1
  6. data/app/helpers/bulkrax/importers_helper.rb +2 -2
  7. data/app/helpers/bulkrax/validation_helper.rb +4 -4
  8. data/app/jobs/bulkrax/create_relationships_job.rb +78 -59
  9. data/app/jobs/bulkrax/delete_job.rb +1 -1
  10. data/app/jobs/bulkrax/export_work_job.rb +2 -2
  11. data/app/jobs/bulkrax/import_file_set_job.rb +1 -1
  12. data/app/jobs/bulkrax/import_work_job.rb +20 -7
  13. data/app/jobs/bulkrax/importer_job.rb +2 -2
  14. data/app/jobs/bulkrax/schedule_relationships_job.rb +2 -1
  15. data/app/matchers/bulkrax/application_matcher.rb +1 -0
  16. data/app/models/bulkrax/csv_entry.rb +93 -24
  17. data/app/models/bulkrax/exporter.rb +18 -19
  18. data/app/models/bulkrax/importer.rb +5 -5
  19. data/app/models/bulkrax/importer_run.rb +6 -0
  20. data/app/models/bulkrax/oai_entry.rb +14 -2
  21. data/app/models/bulkrax/pending_relationship.rb +4 -0
  22. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +3 -1
  23. data/app/models/concerns/bulkrax/export_behavior.rb +6 -4
  24. data/app/models/concerns/bulkrax/has_matchers.rb +1 -0
  25. data/app/models/concerns/bulkrax/import_behavior.rb +6 -3
  26. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +9 -1
  27. data/app/models/concerns/bulkrax/status_info.rb +9 -4
  28. data/app/parsers/bulkrax/application_parser.rb +14 -16
  29. data/app/parsers/bulkrax/bagit_parser.rb +6 -17
  30. data/app/parsers/bulkrax/csv_parser.rb +43 -111
  31. data/app/parsers/bulkrax/oai_dc_parser.rb +2 -2
  32. data/app/parsers/bulkrax/parser_export_record_set.rb +281 -0
  33. data/app/parsers/bulkrax/xml_parser.rb +9 -5
  34. data/app/services/bulkrax/remove_relationships_for_importer.rb +4 -2
  35. data/app/views/bulkrax/entries/show.html.erb +1 -1
  36. data/app/views/bulkrax/exporters/_form.html.erb +60 -45
  37. data/app/views/bulkrax/exporters/edit.html.erb +1 -1
  38. data/app/views/bulkrax/exporters/index.html.erb +2 -2
  39. data/app/views/bulkrax/exporters/new.html.erb +1 -1
  40. data/app/views/bulkrax/exporters/show.html.erb +3 -3
  41. data/app/views/bulkrax/importers/_bagit_fields.html.erb +13 -12
  42. data/app/views/bulkrax/importers/_csv_fields.html.erb +13 -12
  43. data/app/views/bulkrax/importers/_form.html.erb +5 -5
  44. data/app/views/bulkrax/importers/_oai_fields.html.erb +12 -10
  45. data/app/views/bulkrax/importers/_xml_fields.html.erb +12 -11
  46. data/app/views/bulkrax/importers/show.html.erb +18 -16
  47. data/app/views/bulkrax/shared/_collection_entries_tab.html.erb +6 -6
  48. data/app/views/bulkrax/shared/_file_set_entries_tab.html.erb +6 -6
  49. data/app/views/bulkrax/shared/_work_entries_tab.html.erb +6 -6
  50. data/config/locales/bulkrax.en.yml +26 -0
  51. data/lib/bulkrax/entry_spec_helper.rb +190 -0
  52. data/lib/bulkrax/version.rb +1 -1
  53. data/lib/bulkrax.rb +124 -45
  54. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +1 -1
  55. data/lib/tasks/reset.rake +1 -1
  56. metadata +5 -3
@@ -84,23 +84,31 @@ module Bulkrax
84
84
  @import_fields ||= records.inject(:merge).keys.compact.uniq
85
85
  end
86
86
 
87
- def required_elements?(keys)
88
- return if keys.blank?
89
- missing_elements(keys).blank?
90
- end
91
-
92
- def missing_elements(keys)
93
- required_elements.map(&:to_s) - keys.map(&:to_s)
87
+ def required_elements?(record)
88
+ missing_elements(record).blank?
89
+ end
90
+
91
+ def missing_elements(record)
92
+ keys_from_record = keys_without_numbers(record.reject { |_, v| v.blank? }.keys.compact.uniq.map(&:to_s))
93
+ keys = []
94
+ # Because we're persisting the mapping in the database, these are likely string keys.
95
+ # However, there's no guarantee. So, we need to ensure that by running stringify.
96
+ importerexporter.mapping.stringify_keys.map do |k, v|
97
+ Array.wrap(v['from']).each do |vf|
98
+ keys << k if keys_from_record.include?(vf)
99
+ end
100
+ end
101
+ required_elements.map(&:to_s) - keys.uniq.map(&:to_s)
94
102
  end
95
103
 
96
104
  def valid_import?
97
- import_strings = keys_without_numbers(import_fields.map(&:to_s))
98
- error_alert = "Missing at least one required element, missing element(s) are: #{missing_elements(import_strings).join(', ')}"
99
- raise StandardError, error_alert unless required_elements?(import_strings)
105
+ compressed_record = records.flat_map(&:to_a).partition { |_, v| !v }.flatten(1).to_h
106
+ error_alert = "Missing at least one required element, missing element(s) are: #{missing_elements(compressed_record).join(', ')}"
107
+ raise StandardError, error_alert unless required_elements?(compressed_record)
100
108
 
101
109
  file_paths.is_a?(Array)
102
110
  rescue StandardError => e
103
- status_info(e)
111
+ set_status_info(e)
104
112
  false
105
113
  end
106
114
 
@@ -140,7 +148,7 @@ module Bulkrax
140
148
  end
141
149
  true
142
150
  rescue StandardError => e
143
- status_info(e)
151
+ set_status_info(e)
144
152
  end
145
153
 
146
154
  def create_entry_and_job(current_record, type)
@@ -167,102 +175,17 @@ module Bulkrax
167
175
  path
168
176
  end
169
177
 
170
- def extra_filters
171
- output = ""
172
- if importerexporter.start_date.present?
173
- start_dt = importerexporter.start_date.to_datetime.strftime('%FT%TZ')
174
- finish_dt = importerexporter.finish_date.present? ? importerexporter.finish_date.to_datetime.end_of_day.strftime('%FT%TZ') : "NOW"
175
- output += " AND system_modified_dtsi:[#{start_dt} TO #{finish_dt}]"
176
- end
177
- output += importerexporter.work_visibility.present? ? " AND visibility_ssi:#{importerexporter.work_visibility}" : ""
178
- output += importerexporter.workflow_status.present? ? " AND workflow_state_name_ssim:#{importerexporter.workflow_status}" : ""
179
- output
180
- end
181
-
182
- def current_work_ids
183
- ActiveSupport::Deprecation.warn('Bulkrax::CsvParser#current_work_ids will be replaced with #current_record_ids in version 3.0')
184
- current_record_ids
185
- end
186
-
187
- # rubocop:disable Metrics/AbcSize
188
- def current_record_ids
189
- @work_ids = []
190
- @collection_ids = []
191
- @file_set_ids = []
192
-
193
- case importerexporter.export_from
194
- when 'all'
195
- @work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
196
- @collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
197
- @file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
198
- when 'collection'
199
- @work_ids = ActiveFedora::SolrService.query(
200
- "member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters} AND has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')})", method: :post, rows: 2_000_000_000
201
- ).map(&:id)
202
- # get the parent collection and child collections
203
- @collection_ids = ActiveFedora::SolrService.query("id:#{importerexporter.export_source} #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
204
- @collection_ids += ActiveFedora::SolrService.query("has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source}", method: :post,
205
- rows: 2_147_483_647).map(&:id)
206
- find_child_file_sets(@work_ids)
207
- when 'worktype'
208
- @work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
209
- find_child_file_sets(@work_ids)
210
- when 'importer'
211
- set_ids_for_exporting_from_importer
212
- end
213
-
214
- @work_ids + @collection_ids + @file_set_ids
215
- end
216
- # rubocop:enable Metrics/AbcSize
217
-
218
- # find the related file set ids so entries can be made for export
219
- def find_child_file_sets(work_ids)
220
- work_ids.each do |id|
221
- ActiveFedora::Base.find(id).file_set_ids.each { |fs_id| @file_set_ids << fs_id }
222
- end
223
- end
224
-
225
- # Set the following instance variables: @work_ids, @collection_ids, @file_set_ids
226
- # @see #current_record_ids
227
- def set_ids_for_exporting_from_importer
228
- entry_ids = Importer.find(importerexporter.export_source).entries.pluck(:id)
229
- complete_statuses = Status.latest_by_statusable
230
- .includes(:statusable)
231
- .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
232
-
233
- complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
234
- extra_filters = extra_filters.presence || '*:*'
235
-
236
- { :@work_ids => ::Hyrax.config.curation_concerns, :@collection_ids => [::Collection], :@file_set_ids => [::FileSet] }.each do |instance_var, models_to_search|
237
- instance_variable_set(instance_var, ActiveFedora::SolrService.post(
238
- extra_filters.to_s,
239
- fq: [
240
- %(#{solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
241
- "has_model_ssim:(#{models_to_search.join(' OR ')})"
242
- ],
243
- fl: 'id',
244
- rows: 2_000_000_000
245
- )['response']['docs'].map { |obj| obj['id'] })
246
- end
247
- end
248
-
249
- def solr_name(base_name)
250
- Module.const_defined?(:Solrizer) ? ::Solrizer.solr_name(base_name) : ::ActiveFedora.index_field_mapper.solr_name(base_name)
178
+ def current_records_for_export
179
+ @current_records_for_export ||= Bulkrax::ParserExportRecordSet.for(
180
+ parser: self,
181
+ export_from: importerexporter.export_from
182
+ )
251
183
  end
252
184
 
253
185
  def create_new_entries
254
- current_record_ids.each_with_index do |id, index|
255
- break if limit_reached?(limit, index)
256
-
257
- this_entry_class = if @collection_ids.include?(id)
258
- collection_entry_class
259
- elsif @file_set_ids.include?(id)
260
- file_set_entry_class
261
- else
262
- entry_class
263
- end
264
- new_entry = find_or_create_entry(this_entry_class, id, 'Bulkrax::Exporter')
265
-
186
+ # NOTE: The each method enforces the limit, as it can best optimize the underlying queries.
187
+ current_records_for_export.each do |id, entry_class|
188
+ new_entry = find_or_create_entry(entry_class, id, 'Bulkrax::Exporter')
266
189
  begin
267
190
  entry = ExportWorkJob.perform_now(new_entry.id, current_run.id)
268
191
  rescue => e
@@ -291,16 +214,22 @@ module Bulkrax
291
214
  end
292
215
 
293
216
  def valid_entry_types
294
- ['Bulkrax::CsvCollectionEntry', 'Bulkrax::CsvFileSetEntry', 'Bulkrax::CsvEntry']
217
+ [collection_entry_class.to_s, file_set_entry_class.to_s, entry_class.to_s]
295
218
  end
296
219
 
297
220
  # TODO: figure out why using the version of this method that's in the bagit parser
298
221
  # breaks specs for the "if importer?" line
299
222
  def total
300
- @total = importer.parser_fields['total'] || 0 if importer?
301
- @total = limit || current_record_ids.count if exporter?
223
+ @total =
224
+ if importer?
225
+ importer.parser_fields['total'] || 0
226
+ elsif exporter?
227
+ limit.to_i.zero? ? current_records_for_export.count : limit.to_i
228
+ else
229
+ 0
230
+ end
302
231
 
303
- return @total || 0
232
+ return @total
304
233
  rescue StandardError
305
234
  @total = 0
306
235
  end
@@ -335,10 +264,13 @@ module Bulkrax
335
264
  def write_files
336
265
  require 'open-uri'
337
266
  folder_count = 0
267
+ # TODO: This is not performant as well; unclear how to address, but lower priority as of
268
+ # <2023-02-21 Tue>.
338
269
  sorted_entries = sort_entries(importerexporter.entries.uniq(&:identifier))
339
270
  .select { |e| valid_entry_types.include?(e.type) }
340
271
 
341
- sorted_entries[0..limit || total].in_groups_of(records_split_count, false) do |group|
272
+ group_size = limit.to_i.zero? ? total : limit.to_i
273
+ sorted_entries[0..group_size].in_groups_of(records_split_count, false) do |group|
342
274
  folder_count += 1
343
275
 
344
276
  CSV.open(setup_export_file(folder_count), "w", headers: export_headers, write_headers: true) do |csv|
@@ -398,7 +330,7 @@ module Bulkrax
398
330
  return @object_names if @object_names
399
331
 
400
332
  @object_names = mapping.values.map { |value| value['object'] }
401
- @object_names.uniq!.delete(nil)
333
+ @object_names.uniq!&.delete(nil)
402
334
 
403
335
  @object_names
404
336
  end
@@ -63,9 +63,9 @@ module Bulkrax
63
63
 
64
64
  def create_collections
65
65
  metadata = {
66
- visibility: 'open',
67
- collection_type_gid: Hyrax::CollectionType.find_or_create_default_collection_type.gid
66
+ visibility: 'open'
68
67
  }
68
+ metadata[:collection_type_gid] = Hyrax::CollectionType.find_or_create_default_collection_type.gid if defined?(::Hyrax)
69
69
 
70
70
  collections.each_with_index do |set, index|
71
71
  next unless collection_name == 'all' || collection_name == set.spec
@@ -0,0 +1,281 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ # This module is responsible for providing the means of querying Solr for the appropriate works,
5
+ # collections, and file sets for an export of entries.
6
+ #
7
+ # @see .for
8
+ module ParserExportRecordSet
9
+ # @api public
10
+ #
11
+ # A factory method for returning an object that can yield each id and associated entry_class as
12
+ # well as return the count of objects in the record set.
13
+ #
14
+ # @param parser [Bulkrax::ApplicationParser]
15
+ # @param export_from [String]
16
+ #
17
+ # @return [#each, #count] An object, likely a descendant of
18
+ # {Bulkrax::CurrentParserRecordSet::Base} that responds to {Base#count} and
19
+ # {Base#each}.
20
+ def self.for(parser:, export_from:)
21
+ "Bulkrax::ParserExportRecordSet::#{export_from.classify}".constantize.new(parser: parser)
22
+ end
23
+
24
+ # @abstract
25
+ #
26
+ # @note This has {#each} and {#count} but is not an Enumerable. But because it has these two
27
+ # methods that echo {Array}, we can do some lovely mocking and stubbing in those classes
28
+ # dependent on this file. :)
29
+ class Base
30
+ def initialize(parser:)
31
+ @parser = parser
32
+ end
33
+ attr_reader :parser
34
+ private :parser
35
+
36
+ delegate :limit_reached?, :work_entry_class, :collection_entry_class, :file_set_entry_class, :importerexporter, to: :parser
37
+ private :limit_reached?, :work_entry_class, :collection_entry_class, :file_set_entry_class, :importerexporter
38
+
39
+ # @return [Integer]
40
+ def count
41
+ sum = works.count + collections.count + file_sets.count
42
+ return sum if limit.zero?
43
+ return limit if sum > limit
44
+ return sum
45
+ end
46
+
47
+ # Yield first the works, then collections, then file sets. Once we've yielded as many times
48
+ # as the parser's limit, we break the iteration and return.
49
+ #
50
+ # @yieldparam id [String] The ID of the work/collection/file_set
51
+ # @yieldparam entry_class [Class] The parser associated entry class for the
52
+ # work/collection/file_set.
53
+ #
54
+ # @note The order of what we yield has been previously determined.
55
+ def each
56
+ counter = 0
57
+
58
+ works.each do |work|
59
+ break if limit_reached?(limit, counter)
60
+ yield(work.fetch('id'), work_entry_class)
61
+ counter += 1
62
+ end
63
+
64
+ return if limit_reached?(limit, counter)
65
+
66
+ collections.each do |collection|
67
+ break if limit_reached?(limit, counter)
68
+ yield(collection.fetch('id'), collection_entry_class)
69
+ counter += 1
70
+ end
71
+
72
+ return if limit_reached?(limit, counter)
73
+
74
+ file_sets.each do |file_set|
75
+ break if limit_reached?(limit, counter)
76
+ yield(file_set.fetch('id'), file_set_entry_class)
77
+ counter += 1
78
+ end
79
+ end
80
+
81
+ private
82
+
83
+ # Why call these candidates and not the actual file_set_ids? Because of implementation
84
+ # details of Hyrax. What are those details? The upstream application (as of v2.9.x) puts
85
+ # child works into the `file_set_ids_ssim` field. So we have a mix of file sets and works in
86
+ # that property.
87
+ #
88
+ # @see #file_sets
89
+ def candidate_file_set_ids
90
+ @candidate_file_set_ids ||= works.flat_map { |work| work.fetch("#{Bulkrax.file_model_class.to_s.underscore}_ids_ssim", []) }
91
+ end
92
+
93
+ # @note Specifically not memoizing this so we can merge values without changing the object.
94
+ #
95
+ # No sense attempting to query for more than the limit.
96
+ def query_kwargs
97
+ { fl: "id,#{Bulkrax.file_model_class.to_s.underscore}_ids_ssim", method: :post, rows: row_limit }
98
+ end
99
+
100
+ # If we have a limit, we need not query beyond that limit
101
+ def row_limit
102
+ return 2_147_483_647 if limit.zero?
103
+ limit
104
+ end
105
+
106
+ def limit
107
+ parser.limit.to_i
108
+ end
109
+
110
+ alias works_query_kwargs query_kwargs
111
+ alias collections_query_kwargs query_kwargs
112
+
113
+ def extra_filters
114
+ output = ""
115
+ if importerexporter.start_date.present?
116
+ start_dt = importerexporter.start_date.to_datetime.strftime('%FT%TZ')
117
+ finish_dt = importerexporter.finish_date.present? ? importerexporter.finish_date.to_datetime.end_of_day.strftime('%FT%TZ') : "NOW"
118
+ output += " AND system_modified_dtsi:[#{start_dt} TO #{finish_dt}]"
119
+ end
120
+ output += importerexporter.work_visibility.present? ? " AND visibility_ssi:#{importerexporter.work_visibility}" : ""
121
+ output += importerexporter.workflow_status.present? ? " AND workflow_state_name_ssim:#{importerexporter.workflow_status}" : ""
122
+ output
123
+ end
124
+
125
+ def works
126
+ @works ||= ActiveFedora::SolrService.query(works_query, **works_query_kwargs)
127
+ end
128
+
129
+ def collections
130
+ @collections ||= if collections_query
131
+ ActiveFedora::SolrService.query(collections_query, **collections_query_kwargs)
132
+ else
133
+ []
134
+ end
135
+ end
136
+
137
+ SOLR_QUERY_PAGE_SIZE = 512
138
+
139
+ # @note In most cases, when we don't have any candidate file sets, there is no need to query SOLR.
140
+ #
141
+ # @see Bulkrax::ParserExportRecordSet::Importer#file_sets
142
+ #
143
+ # Why can't we just use the candidate_file_set_ids? Because Hyrax is pushing child works into the
144
+ # `file_set_ids_ssim` field.
145
+ #
146
+ # For v2.9.x of Hryax; perhaps this is resolved.
147
+ #
148
+ # @see https://github.com/scientist-softserv/britishlibrary/issues/289
149
+ # @see https://github.com/samvera/hyrax/blob/64c0bbf0dc0d3e1b49f040b50ea70d177cc9d8f6/app/indexers/hyrax/work_indexer.rb#L15-L18
150
+ def file_sets
151
+ @file_sets ||= if candidate_file_set_ids.empty?
152
+ []
153
+ else
154
+ results = []
155
+ candidate_file_set_ids.each_slice(SOLR_QUERY_PAGE_SIZE) do |ids|
156
+ fsq = "has_model_ssim:#{Bulkrax.file_model_class} AND id:(\"" + ids.join('" OR "') + "\")"
157
+ fsq += extra_filters if extra_filters.present?
158
+ results += ActiveFedora::SolrService.query(
159
+ fsq,
160
+ { fl: "id", method: :post, rows: ids.size }
161
+ )
162
+ end
163
+ results
164
+ end
165
+ end
166
+
167
+ def solr_name(base_name)
168
+ if Module.const_defined?(:Solrizer)
169
+ ::Solrizer.solr_name(base_name)
170
+ else
171
+ ::ActiveFedora.index_field_mapper.solr_name(base_name)
172
+ end
173
+ end
174
+ end
175
+
176
+ class All < Base
177
+ def works_query
178
+ "has_model_ssim:(#{Bulkrax.curation_concerns.join(' OR ')}) #{extra_filters}"
179
+ end
180
+
181
+ def collections_query
182
+ "has_model_ssim:Collection #{extra_filters}"
183
+ end
184
+ end
185
+
186
+ class Collection < Base
187
+ def works_query
188
+ "member_of_collection_ids_ssim:#{importerexporter.export_source} #{extra_filters} AND " \
189
+ "has_model_ssim:(#{Bulkrax.curation_concerns.join(' OR ')})"
190
+ end
191
+
192
+ def collections_query
193
+ "(id:#{importerexporter.export_source} #{extra_filters}) OR " \
194
+ "(has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source})"
195
+ end
196
+ end
197
+
198
+ class Worktype < Base
199
+ def works_query
200
+ "has_model_ssim:#{importerexporter.export_source} #{extra_filters}"
201
+ end
202
+
203
+ def collections_query
204
+ nil
205
+ end
206
+ end
207
+
208
+ class Importer < Base
209
+ private
210
+
211
+ delegate :work_identifier, to: :parser
212
+ private :work_identifier
213
+
214
+ def extra_filters
215
+ '*:*' + super
216
+ end
217
+
218
+ def complete_entry_identifiers
219
+ @complete_entry_identifiers ||=
220
+ begin
221
+ entry_ids ||= Bulkrax::Importer.find(importerexporter.export_source).entries.pluck(:id)
222
+ complete_statuses ||= Bulkrax::Status.latest_by_statusable
223
+ .includes(:statusable)
224
+ .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
225
+
226
+ complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
227
+ end
228
+ end
229
+
230
+ def works_query_kwargs
231
+ query_kwargs.merge(
232
+ fq: [
233
+ %(#{solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
234
+ "has_model_ssim:(#{Bulkrax.curation_concerns.join(' OR ')})"
235
+ ],
236
+ fl: 'id'
237
+ )
238
+ end
239
+
240
+ def works_query
241
+ extra_filters.to_s
242
+ end
243
+
244
+ def collections_query_kwargs
245
+ query_kwargs.merge(
246
+ fq: [
247
+ %(#{solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
248
+ "has_model_ssim:Collection"
249
+ ],
250
+ fl: 'id'
251
+ )
252
+ end
253
+
254
+ def collections_query
255
+ "has_model_ssim:Collection #{extra_filters}"
256
+ end
257
+
258
+ # This is an exception; we don't know how many candidate file sets there might be. So we will instead
259
+ # make the query (assuming that there are {#complete_entry_identifiers}).
260
+ #
261
+ # @see Bulkrax::ParserExportRecordSet::Base#file_sets
262
+ def file_sets
263
+ @file_sets ||= ActiveFedora::SolrService.query(file_sets_query, **file_sets_query_kwargs)
264
+ end
265
+
266
+ def file_sets_query_kwargs
267
+ query_kwargs.merge(
268
+ fq: [
269
+ %(#{solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
270
+ "has_model_ssim:#{Bulkrax.file_model_class}"
271
+ ],
272
+ fl: 'id'
273
+ )
274
+ end
275
+
276
+ def file_sets_query
277
+ extra_filters
278
+ end
279
+ end
280
+ end
281
+ end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+ require 'marcel'
2
3
 
3
4
  module Bulkrax
4
5
  class XmlParser < ApplicationParser
@@ -31,7 +32,7 @@ module Bulkrax
31
32
  raise StandardError, 'No records found' if records.blank?
32
33
  true
33
34
  rescue StandardError => e
34
- status_info(e)
35
+ set_status_info(e)
35
36
  false
36
37
  end
37
38
 
@@ -78,16 +79,19 @@ module Bulkrax
78
79
  # Otherwise return all xml files in the given folder
79
80
  def metadata_paths
80
81
  @metadata_paths ||=
81
- if file? && MIME::Types.type_for(import_file_path).include?('application/xml')
82
+ if file? && good_file_type?(import_file_path)
82
83
  [import_file_path]
83
84
  else
84
85
  file_paths.select do |f|
85
- MIME::Types.type_for(f).include?('application/xml') &&
86
- f.include?("import_#{importerexporter.id}")
86
+ good_file_type?(f) && f.include?("import_#{importerexporter.id}")
87
87
  end
88
88
  end
89
89
  end
90
90
 
91
+ def good_file_type?(path)
92
+ %w[.xml .xls .xsd].include?(File.extname(path)) || ::Marcel::MimeType.for(path).include?('application/xml')
93
+ end
94
+
91
95
  def create_works
92
96
  records.each_with_index do |record, index|
93
97
  next unless record_has_source_identifier(record, index)
@@ -104,7 +108,7 @@ module Bulkrax
104
108
  end
105
109
  importer.record_status
106
110
  rescue StandardError => e
107
- status_info(e)
111
+ set_status_info(e)
108
112
  end
109
113
 
110
114
  def total
@@ -55,7 +55,7 @@ module Bulkrax
55
55
  progress_bar.increment
56
56
 
57
57
  obj = entry.factory.find
58
- next if obj.is_a?(FileSet) # FileSets must be attached to a Work
58
+ next if obj.is_a?(Bulkrax.file_model_class) # FileSets must be attached to a Work
59
59
 
60
60
  if obj.is_a?(Collection)
61
61
  remove_relationships_from_collection(obj)
@@ -63,7 +63,7 @@ module Bulkrax
63
63
  remove_relationships_from_work(obj)
64
64
  end
65
65
 
66
- obj.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
66
+ obj.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX) if defined?(Hyrax)
67
67
  obj.save!
68
68
  end
69
69
  end
@@ -75,6 +75,8 @@ module Bulkrax
75
75
  work.save! if change.present?
76
76
  end
77
77
 
78
+ return if defined?(Hyrax)
79
+
78
80
  # Remove parent collection relationships
79
81
  collection.member_of_collections.each do |parent_col|
80
82
  Hyrax::Collections::NestedCollectionPersistenceService
@@ -48,7 +48,7 @@
48
48
  <% record = @entry&.hyrax_record %>
49
49
  <% if record.present? && @entry.factory_class %>
50
50
  <strong><%= record.class.to_s %> Link:</strong>
51
- <% if record.is_a?(Collection) %>
51
+ <% if defined?(Collection) && record.is_a?(Collection) %>
52
52
  <%= link_to record.class.to_s, hyrax.polymorphic_path(record) %>
53
53
  <% else %>
54
54
  <%= link_to record.class.to_s, main_app.polymorphic_path(record) %>