bulkrax 5.1.0 → 5.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/app/controllers/bulkrax/importers_controller.rb +3 -4
  3. data/app/helpers/bulkrax/validation_helper.rb +4 -4
  4. data/app/jobs/bulkrax/create_relationships_job.rb +3 -0
  5. data/app/jobs/bulkrax/import_work_job.rb +20 -7
  6. data/app/jobs/bulkrax/importer_job.rb +1 -1
  7. data/app/jobs/bulkrax/schedule_relationships_job.rb +2 -1
  8. data/app/matchers/bulkrax/application_matcher.rb +1 -0
  9. data/app/models/bulkrax/csv_entry.rb +93 -24
  10. data/app/models/bulkrax/exporter.rb +3 -12
  11. data/app/models/bulkrax/importer.rb +1 -1
  12. data/app/models/bulkrax/pending_relationship.rb +1 -1
  13. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +1 -1
  14. data/app/models/concerns/bulkrax/export_behavior.rb +6 -4
  15. data/app/models/concerns/bulkrax/has_matchers.rb +1 -0
  16. data/app/models/concerns/bulkrax/import_behavior.rb +6 -3
  17. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +9 -1
  18. data/app/parsers/bulkrax/application_parser.rb +14 -16
  19. data/app/parsers/bulkrax/bagit_parser.rb +9 -19
  20. data/app/parsers/bulkrax/csv_parser.rb +43 -111
  21. data/app/parsers/bulkrax/oai_dc_parser.rb +4 -2
  22. data/app/parsers/bulkrax/parser_export_record_set.rb +281 -0
  23. data/app/parsers/bulkrax/xml_parser.rb +9 -5
  24. data/app/services/bulkrax/remove_relationships_for_importer.rb +4 -2
  25. data/app/views/bulkrax/entries/show.html.erb +1 -1
  26. data/app/views/bulkrax/exporters/_form.html.erb +32 -33
  27. data/app/views/bulkrax/exporters/index.html.erb +2 -2
  28. data/app/views/bulkrax/exporters/show.html.erb +3 -3
  29. data/app/views/bulkrax/importers/_bagit_fields.html.erb +13 -12
  30. data/app/views/bulkrax/importers/_csv_fields.html.erb +13 -12
  31. data/app/views/bulkrax/importers/_oai_fields.html.erb +12 -10
  32. data/app/views/bulkrax/importers/_xml_fields.html.erb +12 -11
  33. data/app/views/bulkrax/importers/show.html.erb +18 -16
  34. data/app/views/bulkrax/shared/_collection_entries_tab.html.erb +6 -6
  35. data/app/views/bulkrax/shared/_file_set_entries_tab.html.erb +6 -6
  36. data/app/views/bulkrax/shared/_work_entries_tab.html.erb +6 -6
  37. data/config/locales/bulkrax.en.yml +26 -0
  38. data/lib/bulkrax/entry_spec_helper.rb +17 -0
  39. data/lib/bulkrax/version.rb +1 -1
  40. data/lib/bulkrax.rb +119 -46
  41. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +1 -1
  42. data/lib/tasks/reset.rake +1 -1
  43. metadata +7 -6
@@ -84,23 +84,31 @@ module Bulkrax
84
84
  @import_fields ||= records.inject(:merge).keys.compact.uniq
85
85
  end
86
86
 
87
- def required_elements?(keys)
88
- return if keys.blank?
89
- missing_elements(keys).blank?
90
- end
91
-
92
- def missing_elements(keys)
93
- required_elements.map(&:to_s) - keys.map(&:to_s)
87
+ def required_elements?(record)
88
+ missing_elements(record).blank?
89
+ end
90
+
91
+ def missing_elements(record)
92
+ keys_from_record = keys_without_numbers(record.reject { |_, v| v.blank? }.keys.compact.uniq.map(&:to_s))
93
+ keys = []
94
+ # Because we're persisting the mapping in the database, these are likely string keys.
95
+ # However, there's no guarantee. So, we need to ensure that by running stringify.
96
+ importerexporter.mapping.stringify_keys.map do |k, v|
97
+ Array.wrap(v['from']).each do |vf|
98
+ keys << k if keys_from_record.include?(vf)
99
+ end
100
+ end
101
+ required_elements.map(&:to_s) - keys.uniq.map(&:to_s)
94
102
  end
95
103
 
96
104
  def valid_import?
97
- import_strings = keys_without_numbers(import_fields.map(&:to_s))
98
- error_alert = "Missing at least one required element, missing element(s) are: #{missing_elements(import_strings).join(', ')}"
99
- raise StandardError, error_alert unless required_elements?(import_strings)
105
+ compressed_record = records.flat_map(&:to_a).partition { |_, v| !v }.flatten(1).to_h
106
+ error_alert = "Missing at least one required element, missing element(s) are: #{missing_elements(compressed_record).join(', ')}"
107
+ raise StandardError, error_alert unless required_elements?(compressed_record)
100
108
 
101
109
  file_paths.is_a?(Array)
102
110
  rescue StandardError => e
103
- status_info(e)
111
+ set_status_info(e)
104
112
  false
105
113
  end
106
114
 
@@ -140,7 +148,7 @@ module Bulkrax
140
148
  end
141
149
  true
142
150
  rescue StandardError => e
143
- status_info(e)
151
+ set_status_info(e)
144
152
  end
145
153
 
146
154
  def create_entry_and_job(current_record, type)
@@ -167,102 +175,17 @@ module Bulkrax
167
175
  path
168
176
  end
169
177
 
170
- def extra_filters
171
- output = ""
172
- if importerexporter.start_date.present?
173
- start_dt = importerexporter.start_date.to_datetime.strftime('%FT%TZ')
174
- finish_dt = importerexporter.finish_date.present? ? importerexporter.finish_date.to_datetime.end_of_day.strftime('%FT%TZ') : "NOW"
175
- output += " AND system_modified_dtsi:[#{start_dt} TO #{finish_dt}]"
176
- end
177
- output += importerexporter.work_visibility.present? ? " AND visibility_ssi:#{importerexporter.work_visibility}" : ""
178
- output += importerexporter.workflow_status.present? ? " AND workflow_state_name_ssim:#{importerexporter.workflow_status}" : ""
179
- output
180
- end
181
-
182
- def current_work_ids
183
- ActiveSupport::Deprecation.warn('Bulkrax::CsvParser#current_work_ids will be replaced with #current_record_ids in version 3.0')
184
- current_record_ids
185
- end
186
-
187
- # rubocop:disable Metrics/AbcSize
188
- def current_record_ids
189
- @work_ids = []
190
- @collection_ids = []
191
- @file_set_ids = []
192
-
193
- case importerexporter.export_from
194
- when 'all'
195
- @work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
196
- @collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
197
- @file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
198
- when 'collection'
199
- @work_ids = ActiveFedora::SolrService.query(
200
- "member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters} AND has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')})", method: :post, rows: 2_000_000_000
201
- ).map(&:id)
202
- # get the parent collection and child collections
203
- @collection_ids = ActiveFedora::SolrService.query("id:#{importerexporter.export_source} #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
204
- @collection_ids += ActiveFedora::SolrService.query("has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source}", method: :post,
205
- rows: 2_147_483_647).map(&:id)
206
- find_child_file_sets(@work_ids)
207
- when 'worktype'
208
- @work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
209
- find_child_file_sets(@work_ids)
210
- when 'importer'
211
- set_ids_for_exporting_from_importer
212
- end
213
-
214
- @work_ids + @collection_ids + @file_set_ids
215
- end
216
- # rubocop:enable Metrics/AbcSize
217
-
218
- # find the related file set ids so entries can be made for export
219
- def find_child_file_sets(work_ids)
220
- work_ids.each do |id|
221
- ActiveFedora::Base.find(id).file_set_ids.each { |fs_id| @file_set_ids << fs_id }
222
- end
223
- end
224
-
225
- # Set the following instance variables: @work_ids, @collection_ids, @file_set_ids
226
- # @see #current_record_ids
227
- def set_ids_for_exporting_from_importer
228
- entry_ids = Importer.find(importerexporter.export_source).entries.pluck(:id)
229
- complete_statuses = Status.latest_by_statusable
230
- .includes(:statusable)
231
- .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
232
-
233
- complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
234
- extra_filters = extra_filters.presence || '*:*'
235
-
236
- { :@work_ids => ::Hyrax.config.curation_concerns, :@collection_ids => [::Collection], :@file_set_ids => [::FileSet] }.each do |instance_var, models_to_search|
237
- instance_variable_set(instance_var, ActiveFedora::SolrService.post(
238
- extra_filters.to_s,
239
- fq: [
240
- %(#{solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
241
- "has_model_ssim:(#{models_to_search.join(' OR ')})"
242
- ],
243
- fl: 'id',
244
- rows: 2_000_000_000
245
- )['response']['docs'].map { |obj| obj['id'] })
246
- end
247
- end
248
-
249
- def solr_name(base_name)
250
- Module.const_defined?(:Solrizer) ? ::Solrizer.solr_name(base_name) : ::ActiveFedora.index_field_mapper.solr_name(base_name)
178
+ def current_records_for_export
179
+ @current_records_for_export ||= Bulkrax::ParserExportRecordSet.for(
180
+ parser: self,
181
+ export_from: importerexporter.export_from
182
+ )
251
183
  end
252
184
 
253
185
  def create_new_entries
254
- current_record_ids.each_with_index do |id, index|
255
- break if limit_reached?(limit, index)
256
-
257
- this_entry_class = if @collection_ids.include?(id)
258
- collection_entry_class
259
- elsif @file_set_ids.include?(id)
260
- file_set_entry_class
261
- else
262
- entry_class
263
- end
264
- new_entry = find_or_create_entry(this_entry_class, id, 'Bulkrax::Exporter')
265
-
186
+ # NOTE: The each method enforces the limit, as it can best optimize the underlying queries.
187
+ current_records_for_export.each do |id, entry_class|
188
+ new_entry = find_or_create_entry(entry_class, id, 'Bulkrax::Exporter')
266
189
  begin
267
190
  entry = ExportWorkJob.perform_now(new_entry.id, current_run.id)
268
191
  rescue => e
@@ -291,16 +214,22 @@ module Bulkrax
291
214
  end
292
215
 
293
216
  def valid_entry_types
294
- ['Bulkrax::CsvCollectionEntry', 'Bulkrax::CsvFileSetEntry', 'Bulkrax::CsvEntry']
217
+ [collection_entry_class.to_s, file_set_entry_class.to_s, entry_class.to_s]
295
218
  end
296
219
 
297
220
  # TODO: figure out why using the version of this method that's in the bagit parser
298
221
  # breaks specs for the "if importer?" line
299
222
  def total
300
- @total = importer.parser_fields['total'] || 0 if importer?
301
- @total = limit || current_record_ids.count if exporter?
223
+ @total =
224
+ if importer?
225
+ importer.parser_fields['total'] || 0
226
+ elsif exporter?
227
+ limit.to_i.zero? ? current_records_for_export.count : limit.to_i
228
+ else
229
+ 0
230
+ end
302
231
 
303
- return @total || 0
232
+ return @total
304
233
  rescue StandardError
305
234
  @total = 0
306
235
  end
@@ -335,10 +264,13 @@ module Bulkrax
335
264
  def write_files
336
265
  require 'open-uri'
337
266
  folder_count = 0
267
+ # TODO: This is not performant as well; unclear how to address, but lower priority as of
268
+ # <2023-02-21 Tue>.
338
269
  sorted_entries = sort_entries(importerexporter.entries.uniq(&:identifier))
339
270
  .select { |e| valid_entry_types.include?(e.type) }
340
271
 
341
- sorted_entries[0..limit || total].in_groups_of(records_split_count, false) do |group|
272
+ group_size = limit.to_i.zero? ? total : limit.to_i
273
+ sorted_entries[0..group_size].in_groups_of(records_split_count, false) do |group|
342
274
  folder_count += 1
343
275
 
344
276
  CSV.open(setup_export_file(folder_count), "w", headers: export_headers, write_headers: true) do |csv|
@@ -398,7 +330,7 @@ module Bulkrax
398
330
  return @object_names if @object_names
399
331
 
400
332
  @object_names = mapping.values.map { |value| value['object'] }
401
- @object_names.uniq!.delete(nil)
333
+ @object_names.uniq!&.delete(nil)
402
334
 
403
335
  @object_names
404
336
  end
@@ -30,6 +30,8 @@ module Bulkrax
30
30
  OaiSetEntry
31
31
  end
32
32
 
33
+ def file_set_entry_class; end
34
+
33
35
  def records(opts = {})
34
36
  opts[:metadata_prefix] ||= importerexporter.parser_fields['metadata_prefix']
35
37
  opts[:set] = collection_name unless collection_name == 'all'
@@ -63,9 +65,9 @@ module Bulkrax
63
65
 
64
66
  def create_collections
65
67
  metadata = {
66
- visibility: 'open',
67
- collection_type_gid: Hyrax::CollectionType.find_or_create_default_collection_type.gid
68
+ visibility: 'open'
68
69
  }
70
+ metadata[:collection_type_gid] = Hyrax::CollectionType.find_or_create_default_collection_type.gid if defined?(::Hyrax)
69
71
 
70
72
  collections.each_with_index do |set, index|
71
73
  next unless collection_name == 'all' || collection_name == set.spec
@@ -0,0 +1,281 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ # This module is responsible for providing the means of querying Solr for the appropriate works,
5
+ # collections, and file sets for an export of entries.
6
+ #
7
+ # @see .for
8
+ module ParserExportRecordSet
9
+ # @api public
10
+ #
11
+ # A factory method for returning an object that can yield each id and associated entry_class as
12
+ # well as return the count of objects in the record set.
13
+ #
14
+ # @param parser [Bulkrax::ApplicationParser]
15
+ # @param export_from [String]
16
+ #
17
+ # @return [#each, #count] An object, likely a descendant of
18
+ # {Bulkrax::CurrentParserRecordSet::Base} that responds to {Base#count} and
19
+ # {Base#each}.
20
+ def self.for(parser:, export_from:)
21
+ "Bulkrax::ParserExportRecordSet::#{export_from.classify}".constantize.new(parser: parser)
22
+ end
23
+
24
+ # @abstract
25
+ #
26
+ # @note This has {#each} and {#count} but is not an Enumerable. But because it has these two
27
+ # methods that echo {Array}, we can do some lovely mocking and stubbing in those classes
28
+ # dependent on this file. :)
29
+ class Base
30
+ def initialize(parser:)
31
+ @parser = parser
32
+ end
33
+ attr_reader :parser
34
+ private :parser
35
+
36
+ delegate :limit_reached?, :work_entry_class, :collection_entry_class, :file_set_entry_class, :importerexporter, to: :parser
37
+ private :limit_reached?, :work_entry_class, :collection_entry_class, :file_set_entry_class, :importerexporter
38
+
39
+ # @return [Integer]
40
+ def count
41
+ sum = works.count + collections.count + file_sets.count
42
+ return sum if limit.zero?
43
+ return limit if sum > limit
44
+ return sum
45
+ end
46
+
47
+ # Yield first the works, then collections, then file sets. Once we've yielded as many times
48
+ # as the parser's limit, we break the iteration and return.
49
+ #
50
+ # @yieldparam id [String] The ID of the work/collection/file_set
51
+ # @yieldparam entry_class [Class] The parser associated entry class for the
52
+ # work/collection/file_set.
53
+ #
54
+ # @note The order of what we yield has been previously determined.
55
+ def each
56
+ counter = 0
57
+
58
+ works.each do |work|
59
+ break if limit_reached?(limit, counter)
60
+ yield(work.fetch('id'), work_entry_class)
61
+ counter += 1
62
+ end
63
+
64
+ return if limit_reached?(limit, counter)
65
+
66
+ collections.each do |collection|
67
+ break if limit_reached?(limit, counter)
68
+ yield(collection.fetch('id'), collection_entry_class)
69
+ counter += 1
70
+ end
71
+
72
+ return if limit_reached?(limit, counter)
73
+
74
+ file_sets.each do |file_set|
75
+ break if limit_reached?(limit, counter)
76
+ yield(file_set.fetch('id'), file_set_entry_class)
77
+ counter += 1
78
+ end
79
+ end
80
+
81
+ private
82
+
83
+ # Why call these candidates and not the actual file_set_ids? Because of implementation
84
+ # details of Hyrax. What are those details? The upstream application (as of v2.9.x) puts
85
+ # child works into the `file_set_ids_ssim` field. So we have a mix of file sets and works in
86
+ # that property.
87
+ #
88
+ # @see #file_sets
89
+ def candidate_file_set_ids
90
+ @candidate_file_set_ids ||= works.flat_map { |work| work.fetch("#{Bulkrax.file_model_class.to_s.underscore}_ids_ssim", []) }
91
+ end
92
+
93
+ # @note Specifically not memoizing this so we can merge values without changing the object.
94
+ #
95
+ # No sense attempting to query for more than the limit.
96
+ def query_kwargs
97
+ { fl: "id,#{Bulkrax.file_model_class.to_s.underscore}_ids_ssim", method: :post, rows: row_limit }
98
+ end
99
+
100
+ # If we have a limit, we need not query beyond that limit
101
+ def row_limit
102
+ return 2_147_483_647 if limit.zero?
103
+ limit
104
+ end
105
+
106
+ def limit
107
+ parser.limit.to_i
108
+ end
109
+
110
+ alias works_query_kwargs query_kwargs
111
+ alias collections_query_kwargs query_kwargs
112
+
113
+ def extra_filters
114
+ output = ""
115
+ if importerexporter.start_date.present?
116
+ start_dt = importerexporter.start_date.to_datetime.strftime('%FT%TZ')
117
+ finish_dt = importerexporter.finish_date.present? ? importerexporter.finish_date.to_datetime.end_of_day.strftime('%FT%TZ') : "NOW"
118
+ output += " AND system_modified_dtsi:[#{start_dt} TO #{finish_dt}]"
119
+ end
120
+ output += importerexporter.work_visibility.present? ? " AND visibility_ssi:#{importerexporter.work_visibility}" : ""
121
+ output += importerexporter.workflow_status.present? ? " AND workflow_state_name_ssim:#{importerexporter.workflow_status}" : ""
122
+ output
123
+ end
124
+
125
+ def works
126
+ @works ||= ActiveFedora::SolrService.query(works_query, **works_query_kwargs)
127
+ end
128
+
129
+ def collections
130
+ @collections ||= if collections_query
131
+ ActiveFedora::SolrService.query(collections_query, **collections_query_kwargs)
132
+ else
133
+ []
134
+ end
135
+ end
136
+
137
+ SOLR_QUERY_PAGE_SIZE = 512
138
+
139
+ # @note In most cases, when we don't have any candidate file sets, there is no need to query SOLR.
140
+ #
141
+ # @see Bulkrax::ParserExportRecordSet::Importer#file_sets
142
+ #
143
+ # Why can't we just use the candidate_file_set_ids? Because Hyrax is pushing child works into the
144
+ # `file_set_ids_ssim` field.
145
+ #
146
+ # For v2.9.x of Hryax; perhaps this is resolved.
147
+ #
148
+ # @see https://github.com/scientist-softserv/britishlibrary/issues/289
149
+ # @see https://github.com/samvera/hyrax/blob/64c0bbf0dc0d3e1b49f040b50ea70d177cc9d8f6/app/indexers/hyrax/work_indexer.rb#L15-L18
150
+ def file_sets
151
+ @file_sets ||= if candidate_file_set_ids.empty?
152
+ []
153
+ else
154
+ results = []
155
+ candidate_file_set_ids.each_slice(SOLR_QUERY_PAGE_SIZE) do |ids|
156
+ fsq = "has_model_ssim:#{Bulkrax.file_model_class} AND id:(\"" + ids.join('" OR "') + "\")"
157
+ fsq += extra_filters if extra_filters.present?
158
+ results += ActiveFedora::SolrService.query(
159
+ fsq,
160
+ { fl: "id", method: :post, rows: ids.size }
161
+ )
162
+ end
163
+ results
164
+ end
165
+ end
166
+
167
+ def solr_name(base_name)
168
+ if Module.const_defined?(:Solrizer)
169
+ ::Solrizer.solr_name(base_name)
170
+ else
171
+ ::ActiveFedora.index_field_mapper.solr_name(base_name)
172
+ end
173
+ end
174
+ end
175
+
176
+ class All < Base
177
+ def works_query
178
+ "has_model_ssim:(#{Bulkrax.curation_concerns.join(' OR ')}) #{extra_filters}"
179
+ end
180
+
181
+ def collections_query
182
+ "has_model_ssim:Collection #{extra_filters}"
183
+ end
184
+ end
185
+
186
+ class Collection < Base
187
+ def works_query
188
+ "member_of_collection_ids_ssim:#{importerexporter.export_source} #{extra_filters} AND " \
189
+ "has_model_ssim:(#{Bulkrax.curation_concerns.join(' OR ')})"
190
+ end
191
+
192
+ def collections_query
193
+ "(id:#{importerexporter.export_source} #{extra_filters}) OR " \
194
+ "(has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source})"
195
+ end
196
+ end
197
+
198
+ class Worktype < Base
199
+ def works_query
200
+ "has_model_ssim:#{importerexporter.export_source} #{extra_filters}"
201
+ end
202
+
203
+ def collections_query
204
+ nil
205
+ end
206
+ end
207
+
208
+ class Importer < Base
209
+ private
210
+
211
+ delegate :work_identifier, to: :parser
212
+ private :work_identifier
213
+
214
+ def extra_filters
215
+ '*:*' + super
216
+ end
217
+
218
+ def complete_entry_identifiers
219
+ @complete_entry_identifiers ||=
220
+ begin
221
+ entry_ids ||= Bulkrax::Importer.find(importerexporter.export_source).entries.pluck(:id)
222
+ complete_statuses ||= Bulkrax::Status.latest_by_statusable
223
+ .includes(:statusable)
224
+ .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
225
+
226
+ complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
227
+ end
228
+ end
229
+
230
+ def works_query_kwargs
231
+ query_kwargs.merge(
232
+ fq: [
233
+ %(#{solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
234
+ "has_model_ssim:(#{Bulkrax.curation_concerns.join(' OR ')})"
235
+ ],
236
+ fl: 'id'
237
+ )
238
+ end
239
+
240
+ def works_query
241
+ extra_filters.to_s
242
+ end
243
+
244
+ def collections_query_kwargs
245
+ query_kwargs.merge(
246
+ fq: [
247
+ %(#{solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
248
+ "has_model_ssim:Collection"
249
+ ],
250
+ fl: 'id'
251
+ )
252
+ end
253
+
254
+ def collections_query
255
+ "has_model_ssim:Collection #{extra_filters}"
256
+ end
257
+
258
+ # This is an exception; we don't know how many candidate file sets there might be. So we will instead
259
+ # make the query (assuming that there are {#complete_entry_identifiers}).
260
+ #
261
+ # @see Bulkrax::ParserExportRecordSet::Base#file_sets
262
+ def file_sets
263
+ @file_sets ||= ActiveFedora::SolrService.query(file_sets_query, **file_sets_query_kwargs)
264
+ end
265
+
266
+ def file_sets_query_kwargs
267
+ query_kwargs.merge(
268
+ fq: [
269
+ %(#{solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
270
+ "has_model_ssim:#{Bulkrax.file_model_class}"
271
+ ],
272
+ fl: 'id'
273
+ )
274
+ end
275
+
276
+ def file_sets_query
277
+ extra_filters
278
+ end
279
+ end
280
+ end
281
+ end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+ require 'marcel'
2
3
 
3
4
  module Bulkrax
4
5
  class XmlParser < ApplicationParser
@@ -31,7 +32,7 @@ module Bulkrax
31
32
  raise StandardError, 'No records found' if records.blank?
32
33
  true
33
34
  rescue StandardError => e
34
- status_info(e)
35
+ set_status_info(e)
35
36
  false
36
37
  end
37
38
 
@@ -78,16 +79,19 @@ module Bulkrax
78
79
  # Otherwise return all xml files in the given folder
79
80
  def metadata_paths
80
81
  @metadata_paths ||=
81
- if file? && MIME::Types.type_for(import_file_path).include?('application/xml')
82
+ if file? && good_file_type?(import_file_path)
82
83
  [import_file_path]
83
84
  else
84
85
  file_paths.select do |f|
85
- MIME::Types.type_for(f).include?('application/xml') &&
86
- f.include?("import_#{importerexporter.id}")
86
+ good_file_type?(f) && f.include?("import_#{importerexporter.id}")
87
87
  end
88
88
  end
89
89
  end
90
90
 
91
+ def good_file_type?(path)
92
+ %w[.xml .xls .xsd].include?(File.extname(path)) || ::Marcel::MimeType.for(path).include?('application/xml')
93
+ end
94
+
91
95
  def create_works
92
96
  records.each_with_index do |record, index|
93
97
  next unless record_has_source_identifier(record, index)
@@ -104,7 +108,7 @@ module Bulkrax
104
108
  end
105
109
  importer.record_status
106
110
  rescue StandardError => e
107
- status_info(e)
111
+ set_status_info(e)
108
112
  end
109
113
 
110
114
  def total
@@ -55,7 +55,7 @@ module Bulkrax
55
55
  progress_bar.increment
56
56
 
57
57
  obj = entry.factory.find
58
- next if obj.is_a?(FileSet) # FileSets must be attached to a Work
58
+ next if obj.is_a?(Bulkrax.file_model_class) # FileSets must be attached to a Work
59
59
 
60
60
  if obj.is_a?(Collection)
61
61
  remove_relationships_from_collection(obj)
@@ -63,7 +63,7 @@ module Bulkrax
63
63
  remove_relationships_from_work(obj)
64
64
  end
65
65
 
66
- obj.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
66
+ obj.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX) if defined?(Hyrax)
67
67
  obj.save!
68
68
  end
69
69
  end
@@ -75,6 +75,8 @@ module Bulkrax
75
75
  work.save! if change.present?
76
76
  end
77
77
 
78
+ return if defined?(Hyrax)
79
+
78
80
  # Remove parent collection relationships
79
81
  collection.member_of_collections.each do |parent_col|
80
82
  Hyrax::Collections::NestedCollectionPersistenceService
@@ -48,7 +48,7 @@
48
48
  <% record = @entry&.hyrax_record %>
49
49
  <% if record.present? && @entry.factory_class %>
50
50
  <strong><%= record.class.to_s %> Link:</strong>
51
- <% if record.is_a?(Collection) %>
51
+ <% if defined?(Collection) && record.is_a?(Collection) %>
52
52
  <%= link_to record.class.to_s, hyrax.polymorphic_path(record) %>
53
53
  <% else %>
54
54
  <%= link_to record.class.to_s, main_app.polymorphic_path(record) %>
@@ -52,16 +52,13 @@
52
52
  }
53
53
  %>
54
54
 
55
- <% if defined?(::Hyrax) %>
56
- <%= form.input :export_source_worktype,
55
+ <%= form.input :export_source_worktype,
57
56
  label: t('bulkrax.exporter.labels.worktype'),
58
57
  required: true,
59
58
  prompt: 'Select from the list',
60
59
  label_html: { class: 'worktype export-source-option hidden' },
61
60
  input_html: { class: 'worktype export-source-option hidden form-control' },
62
- collection: Hyrax.config.curation_concerns.map {|cc| [cc.to_s, cc.to_s] } %>
63
- <% end %>
64
-
61
+ collection: Bulkrax.curation_concerns.map { |cc| [cc.to_s, cc.to_s] } %>
65
62
 
66
63
  <%= form.input :limit,
67
64
  as: :integer,
@@ -117,33 +114,35 @@
117
114
  <%# Find definitions for the functions called in this script in
118
115
  app/assets/javascripts/bulkrax/exporters.js %>
119
116
  <script>
120
- $(function() {
121
- // show the selected export_source option
122
- var selectedVal = $('.exporter_export_from option:selected').val();
123
- hideUnhide(selectedVal);
124
-
125
- // Select2 dropdowns don't like taking a value param. Thus,
126
- // if export_source_collection is present, we populate the input.
127
- var selectedCollectionId = "<%= @collection&.id %>"
128
- if (selectedCollectionId.length > 0) {
129
- $('#exporter_export_source_collection').val(selectedCollectionId)
130
- }
131
-
132
- // get the selected export_from option and show the corresponding export_source
133
- $('.exporter_export_from').change(function() {
134
- var selectedVal = $('.exporter_export_from option:selected').val();
135
- hideUnhide(selectedVal);
117
+ document.addEventListener("DOMContentLoaded", function() {
118
+ $(function () {
119
+ // show the selected export_source option
120
+ var selectedVal = $('.exporter_export_from option:selected').val();
121
+ hideUnhide(selectedVal);
122
+
123
+ // Select2 dropdowns don't like taking a value param. Thus,
124
+ // if export_source_collection is present, we populate the input.
125
+ var selectedCollectionId = "<%= @collection&.id %>"
126
+ if (selectedCollectionId.length > 0) {
127
+ $('#exporter_export_source_collection').val(selectedCollectionId)
128
+ }
129
+
130
+ // get the selected export_from option and show the corresponding export_source
131
+ $('.exporter_export_from').change(function () {
132
+ var selectedVal = $('.exporter_export_from option:selected').val();
133
+ hideUnhide(selectedVal);
134
+ });
135
+
136
+ // get the date filter option and show the corresponding date selectors
137
+ $('.exporter_date_filter').change(function () {
138
+ if ($('.exporter_date_filter').find(".boolean").is(":checked"))
139
+ $('#date_filter_picker').removeClass('hidden');
140
+ else
141
+ $('#date_filter_picker').addClass('hidden');
142
+ });
143
+
144
+ if ($('.exporter_date_filter').find(".boolean").is(":checked"))
145
+ $('#date_filter_picker').removeClass('hidden');
146
+ });
136
147
  });
137
-
138
- // get the date filter option and show the corresponding date selectors
139
- $('.exporter_date_filter').change(function() {
140
- if($('.exporter_date_filter').find(".boolean").is(":checked"))
141
- $('#date_filter_picker').removeClass('hidden');
142
- else
143
- $('#date_filter_picker').addClass('hidden');
144
- });
145
-
146
- if($('.exporter_date_filter').find(".boolean").is(":checked"))
147
- $('#date_filter_picker').removeClass('hidden');
148
- });
149
148
  </script>