bulkrax 4.3.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/javascripts/bulkrax/exporters.js +12 -0
  3. data/app/controllers/bulkrax/entries_controller.rb +5 -0
  4. data/app/controllers/bulkrax/exporters_controller.rb +5 -0
  5. data/app/controllers/bulkrax/importers_controller.rb +9 -1
  6. data/app/factories/bulkrax/object_factory.rb +87 -11
  7. data/app/jobs/bulkrax/create_relationships_job.rb +1 -1
  8. data/app/jobs/bulkrax/import_file_set_job.rb +1 -0
  9. data/app/jobs/bulkrax/import_work_job.rb +23 -13
  10. data/app/matchers/bulkrax/application_matcher.rb +5 -3
  11. data/app/models/bulkrax/csv_entry.rb +20 -8
  12. data/app/models/bulkrax/entry.rb +2 -1
  13. data/app/models/bulkrax/importer.rb +20 -15
  14. data/app/models/bulkrax/oai_entry.rb +42 -9
  15. data/app/models/bulkrax/rdf_entry.rb +1 -1
  16. data/app/models/bulkrax/xml_entry.rb +54 -12
  17. data/app/models/concerns/bulkrax/file_factory.rb +9 -3
  18. data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +8 -1
  19. data/app/models/concerns/bulkrax/import_behavior.rb +27 -19
  20. data/app/parsers/bulkrax/application_parser.rb +90 -13
  21. data/app/parsers/bulkrax/csv_parser.rb +13 -6
  22. data/app/parsers/bulkrax/oai_dc_parser.rb +2 -2
  23. data/app/parsers/bulkrax/xml_parser.rb +6 -0
  24. data/app/services/bulkrax/remove_relationships_for_importer.rb +107 -0
  25. data/app/views/bulkrax/exporters/_form.html.erb +3 -3
  26. data/app/views/bulkrax/shared/_file_set_entries_tab.html.erb +3 -3
  27. data/app/views/hyrax/dashboard/sidebar/_bulkrax_sidebar_additions.html.erb +9 -5
  28. data/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb +18 -7
  29. data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +1 -1
  30. data/db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb +1 -1
  31. data/db/migrate/20220118001339_add_import_attempts_to_entries.rb +1 -1
  32. data/db/migrate/20220119213325_add_work_counters_to_importer_runs.rb +1 -1
  33. data/db/migrate/20220301001839_create_bulkrax_pending_relationships.rb +1 -1
  34. data/db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb +1 -1
  35. data/db/migrate/20220412233954_add_include_thumbnails_to_bulkrax_exporters.rb +1 -1
  36. data/db/migrate/20220413180915_add_generated_metadata_to_bulkrax_exporters.rb +1 -1
  37. data/db/migrate/20220609001128_rename_bulkrax_importer_run_to_importer_run.rb +1 -1
  38. data/lib/bulkrax/version.rb +1 -1
  39. data/lib/bulkrax.rb +85 -11
  40. data/lib/generators/bulkrax/install_generator.rb +20 -0
  41. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +13 -1
  42. data/lib/tasks/reset.rake +65 -0
  43. metadata +6 -4
@@ -4,7 +4,7 @@ require 'nokogiri'
4
4
  module Bulkrax
5
5
  # Generic XML Entry
6
6
  class XmlEntry < Entry
7
- serialize :raw_metadata, JSON
7
+ serialize :raw_metadata, Bulkrax::NormalizedJson
8
8
 
9
9
  def self.fields_from_data(data); end
10
10
 
@@ -43,15 +43,14 @@ module Bulkrax
43
43
  raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
44
44
  self.parsed_metadata = {}
45
45
  self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
46
- xml_elements.each do |element_name|
47
- elements = record.xpath("//*[name()='#{element_name}']")
48
- next if elements.blank?
49
- elements.each do |el|
50
- el.children.map(&:content).each do |content|
51
- add_metadata(element_name, content) if content.present?
52
- end
53
- end
54
- end
46
+
47
+ # We need to establish the #factory_class before we proceed with the metadata. See
48
+ # https://github.com/samvera-labs/bulkrax/issues/702 for further details.
49
+ #
50
+ # tl;dr - if we don't have the right factory_class we might skip properties that are
51
+ # specifically assigned to the factory class
52
+ establish_factory_class
53
+ add_metadata_from_record
55
54
  add_visibility
56
55
  add_rights_statement
57
56
  add_admin_set_id
@@ -63,11 +62,54 @@ module Bulkrax
63
62
  self.parsed_metadata
64
63
  end
65
64
 
66
- # Grab the class from the real parser
67
- def xml_elements
65
+ def establish_factory_class
66
+ model_field_names = parser.model_field_mappings
67
+
68
+ each_candidate_metadata_node_name_and_content(elements: parser.model_field_mappings) do |name, content|
69
+ next unless model_field_names.include?(name)
70
+ add_metadata(name, content)
71
+ end
72
+ end
73
+
74
+ def add_metadata_from_record
75
+ each_candidate_metadata_node_name_and_content do |name, content|
76
+ add_metadata(name, content)
77
+ end
78
+ end
79
+
80
+ def each_candidate_metadata_node_name_and_content(elements: field_mapping_from_values_for_xml_element_names)
81
+ elements.each do |name|
82
+ # NOTE: the XML element name's case matters
83
+ nodes = record.xpath("//*[name()='#{name}']")
84
+ next if nodes.empty?
85
+
86
+ nodes.each do |node|
87
+ node.children.each do |content|
88
+ next if content.to_s.blank?
89
+
90
+ yield(name, content.to_s)
91
+ end
92
+ end
93
+ end
94
+ end
95
+
96
+ # Returns the explicitly declared "from" key's value of each parser's element's value. (Yes, I
97
+ # would like a proper class for the thing I just tried to describe.)
98
+ #
99
+ # @return [Array<String>]
100
+ #
101
+ # @todo Additionally, we may want to revisit the XML parser fundamental logic; namely we only
102
+ # parse nodes that are explicitly declared with in the `from`. This is a bit different
103
+ # than other parsers, in that they will make assumptions about each encountered column (in
104
+ # the case of CSV) or node (in the case of OAI). tl;dr - Here there be dragons.
105
+ def field_mapping_from_values_for_xml_element_names
68
106
  Bulkrax.field_mappings[self.importerexporter.parser_klass].map do |_k, v|
69
107
  v[:from]
70
108
  end.flatten.compact.uniq
71
109
  end
110
+
111
+ # Included for potential downstream adopters
112
+ alias xml_elements field_mapping_from_values_for_xml_element_names
113
+ deprecation_deprecate xml_elements: "Use '#{self}#field_mapping_from_values_for_xml_element_names' instead"
72
114
  end
73
115
  end
@@ -45,9 +45,15 @@ module Bulkrax
45
45
  end
46
46
 
47
47
  def new_remote_files
48
- return if object.is_a? FileSet
49
-
50
- @new_remote_files ||= if object.present? && object.file_sets.present?
48
+ @new_remote_files ||= if object.is_a? FileSet
49
+ parsed_remote_files.select do |file|
50
+ # is the url valid?
51
+ is_valid = file[:url]&.match(URI::ABS_URI)
52
+ # does the file already exist
53
+ is_existing = object.import_url && object.import_url == file[:url]
54
+ is_valid && !is_existing
55
+ end
56
+ elsif object.present? && object.file_sets.present?
51
57
  parsed_remote_files.select do |file|
52
58
  # is the url valid?
53
59
  is_valid = file[:url]&.match(URI::ABS_URI)
@@ -6,7 +6,14 @@ module Bulkrax
6
6
  ::FileSet
7
7
  end
8
8
 
9
+ def file_reference
10
+ return 'file' if parsed_metadata&.[]('file')&.map(&:present?)&.any?
11
+ return 'remote_files' if parsed_metadata&.[]('remote_files')&.map(&:present?)&.any?
12
+ end
13
+
9
14
  def add_path_to_file
15
+ return unless file_reference == 'file'
16
+
10
17
  parsed_metadata['file'].each_with_index do |filename, i|
11
18
  next if filename.blank?
12
19
 
@@ -22,7 +29,7 @@ module Bulkrax
22
29
  end
23
30
 
24
31
  def validate_presence_of_filename!
25
- return if parsed_metadata&.[]('file')&.map(&:present?)&.any?
32
+ return if parsed_metadata&.[](file_reference)&.map(&:present?)&.any?
26
33
 
27
34
  raise StandardError, 'File set must have a filename'
28
35
  end
@@ -117,23 +117,30 @@ module Bulkrax
117
117
  Bulkrax.qa_controlled_properties.each do |field|
118
118
  next if parsed_metadata[field].blank?
119
119
 
120
- parsed_metadata[field].each_with_index do |value, i|
121
- next if value.blank?
122
-
123
- if (validated_uri_value = validate_value(value, field))
124
- parsed_metadata[field][i] = validated_uri_value
125
- else
126
- debug_msg = %(Unable to locate active authority ID "#{value}" in config/authorities/#{field.pluralize}.yml)
127
- Rails.logger.debug(debug_msg)
128
- error_msg = %("#{value}" is not a valid and/or active authority ID for the :#{field} field)
129
- raise ::StandardError, error_msg
120
+ if multiple?(field)
121
+ parsed_metadata[field].each_with_index do |value, i|
122
+ next if value.blank?
123
+ parsed_metadata[field][i] = sanitize_controlled_uri_value(field, value)
130
124
  end
125
+ else
126
+ parsed_metadata[field] = sanitize_controlled_uri_value(field, parsed_metadata[field])
131
127
  end
132
128
  end
133
129
 
134
130
  true
135
131
  end
136
132
 
133
+ def sanitize_controlled_uri_value(field, value)
134
+ if (validated_uri_value = validate_value(value, field))
135
+ validated_uri_value
136
+ else
137
+ debug_msg = %(Unable to locate active authority ID "#{value}" in config/authorities/#{field.pluralize}.yml)
138
+ Rails.logger.debug(debug_msg)
139
+ error_msg = %("#{value}" is not a valid and/or active authority ID for the :#{field} field)
140
+ raise ::StandardError, error_msg
141
+ end
142
+ end
143
+
137
144
  # @param value [String] value to validate
138
145
  # @param field [String] name of the controlled property
139
146
  # @return [String, nil] validated URI value or nil
@@ -165,15 +172,16 @@ module Bulkrax
165
172
  end
166
173
 
167
174
  def factory
168
- @factory ||= Bulkrax::ObjectFactory.new(attributes: self.parsed_metadata,
169
- source_identifier_value: identifier,
170
- work_identifier: parser.work_identifier,
171
- related_parents_parsed_mapping: parser.related_parents_parsed_mapping,
172
- replace_files: replace_files,
173
- user: user,
174
- klass: factory_class,
175
- importer_run_id: importerexporter.last_run.id,
176
- update_files: update_files)
175
+ of = Bulkrax.object_factory || Bulkrax::ObjectFactory
176
+ @factory ||= of.new(attributes: self.parsed_metadata,
177
+ source_identifier_value: identifier,
178
+ work_identifier: parser.work_identifier,
179
+ related_parents_parsed_mapping: parser.related_parents_parsed_mapping,
180
+ replace_files: replace_files,
181
+ user: user,
182
+ klass: factory_class,
183
+ importer_run_id: importerexporter.last_run.id,
184
+ update_files: update_files)
177
185
  end
178
186
 
179
187
  def factory_class
@@ -2,6 +2,9 @@
2
2
  require 'zip'
3
3
 
4
4
  module Bulkrax
5
+ # An abstract class that establishes the API for Bulkrax's import and export parsing.
6
+ #
7
+ # @abstract Subclass the Bulkrax::ApplicationParser to create a parser that handles a specific format (e.g. CSV, Bagit, XML, etc).
5
8
  class ApplicationParser # rubocop:disable Metrics/ClassLength
6
9
  attr_accessor :importerexporter, :headers
7
10
  alias importer importerexporter
@@ -12,14 +15,21 @@ module Bulkrax
12
15
  :exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
13
16
  to: :importerexporter
14
17
 
18
+ # @todo Convert to `class_attribute :parser_fiels, default: {}`
15
19
  def self.parser_fields
16
20
  {}
17
21
  end
18
22
 
23
+ # @return [TrueClass,FalseClass] this parser does or does not support exports.
24
+ #
25
+ # @todo Convert to `class_attribute :export_supported, default: false, instance_predicate: true` and `self << class; alias export_supported? export_supported; end`
19
26
  def self.export_supported?
20
27
  false
21
28
  end
22
29
 
30
+ # @return [TrueClass,FalseClass] this parser does or does not support imports.
31
+ #
32
+ # @todo Convert to `class_attribute :import_supported, default: false, instance_predicate: true` and `self << class; alias import_supported? import_supported; end`
23
33
  def self.import_supported?
24
34
  true
25
35
  end
@@ -29,49 +39,76 @@ module Bulkrax
29
39
  @headers = []
30
40
  end
31
41
 
32
- # @api
42
+ # @api public
43
+ # @abstract Subclass and override {#entry_class} to implement behavior for the parser.
33
44
  def entry_class
34
- raise StandardError, 'must be defined'
45
+ raise NotImplementedError, 'must be defined'
35
46
  end
36
47
 
37
- # @api
48
+ # @api public
49
+ # @abstract Subclass and override {#collection_entry_class} to implement behavior for the parser.
38
50
  def collection_entry_class
39
- raise StandardError, 'must be defined'
51
+ raise NotImplementedError, 'must be defined'
40
52
  end
41
53
 
42
- # @api
54
+ # @api public
55
+ # @abstract Subclass and override {#file_set_entry_class} to implement behavior for the parser.
56
+ def file_set_entry_class
57
+ raise NotImplementedError, 'must be defined'
58
+ end
59
+
60
+ # @api public
61
+ # @abstract Subclass and override {#records} to implement behavior for the parser.
43
62
  def records(_opts = {})
44
- raise StandardError, 'must be defined'
63
+ raise NotImplementedError, 'must be defined'
45
64
  end
46
65
 
66
+ # @return [Symbol] the name of the identifying property in the source system from which we're
67
+ # importing (e.g. is *not* this application that mounts *this* Bulkrax engine).
68
+ #
69
+ # @see #work_identifier
70
+ # @see https://github.com/samvera-labs/bulkrax/wiki/CSV-Importer#source-identifier Bulkrax Wiki regarding source identifier
47
71
  def source_identifier
48
72
  @source_identifier ||= get_field_mapping_hash_for('source_identifier')&.values&.first&.[]('from')&.first&.to_sym || :source_identifier
49
73
  end
50
74
 
75
+ # @return [Symbol] the name of the identifying property for the system which we're importing
76
+ # into (e.g. the application that mounts *this* Bulkrax engine)
77
+ # @see #source_identifier
51
78
  def work_identifier
52
79
  @work_identifier ||= get_field_mapping_hash_for('source_identifier')&.keys&.first&.to_sym || :source
53
80
  end
54
81
 
82
+ # @return [String]
55
83
  def generated_metadata_mapping
56
84
  @generated_metadata_mapping ||= 'generated'
57
85
  end
58
86
 
87
+ # @return [String, NilClass]
88
+ # @see #related_parents_raw_mapping
59
89
  def related_parents_raw_mapping
60
90
  @related_parents_raw_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.values&.first&.[]('from')&.first
61
91
  end
62
92
 
93
+ # @return [String]
94
+ # @see #related_parents_field_mapping
63
95
  def related_parents_parsed_mapping
64
96
  @related_parents_parsed_mapping ||= (get_field_mapping_hash_for('related_parents_field_mapping')&.keys&.first || 'parents')
65
97
  end
66
98
 
99
+ # @return [String, NilClass]
100
+ # @see #related_children_parsed_mapping
67
101
  def related_children_raw_mapping
68
102
  @related_children_raw_mapping ||= get_field_mapping_hash_for('related_children_field_mapping')&.values&.first&.[]('from')&.first
69
103
  end
70
104
 
105
+ # @return [String]
106
+ # @see #related_children_raw_mapping
71
107
  def related_children_parsed_mapping
72
108
  @related_children_parsed_mapping ||= (get_field_mapping_hash_for('related_children_field_mapping')&.keys&.first || 'children')
73
109
  end
74
110
 
111
+ # @api private
75
112
  def get_field_mapping_hash_for(key)
76
113
  return instance_variable_get("@#{key}_hash") if instance_variable_get("@#{key}_hash").present?
77
114
 
@@ -85,6 +122,7 @@ module Bulkrax
85
122
  instance_variable_get("@#{key}_hash")
86
123
  end
87
124
 
125
+ # @return [Array<String>]
88
126
  def model_field_mappings
89
127
  model_mappings = Bulkrax.field_mappings[self.class.to_s]&.dig('model', :from) || []
90
128
  model_mappings |= ['model']
@@ -92,6 +130,7 @@ module Bulkrax
92
130
  model_mappings
93
131
  end
94
132
 
133
+ # @return [String]
95
134
  def perform_method
96
135
  if self.validate_only
97
136
  'perform_now'
@@ -100,29 +139,55 @@ module Bulkrax
100
139
  end
101
140
  end
102
141
 
142
+ # The visibility of the record. Acceptable values are: "open", "embaro", "lease", "authenticated", "restricted". The default is "open"
143
+ #
144
+ # @return [String]
145
+ # @see https://github.com/samvera/hydra-head/blob/main/hydra-access-controls/app/models/concerns/hydra/access_controls/access_right.rb Hydra::AccessControls::AccessRight for details on the range of values.
146
+ # @see https://github.com/samvera/hyrax/blob/bd2bcffc33e183904be2c175367648815f25bc2b/app/services/hyrax/visibility_intention.rb Hyrax::VisibilityIntention for how we process the visibility.
103
147
  def visibility
104
148
  @visibility ||= self.parser_fields['visibility'] || 'open'
105
149
  end
106
150
 
151
+ # @api public
152
+ #
153
+ # @param types [Array<Symbol>] the types of objects that we'll create.
154
+ #
155
+ # @see Bulkrax::Importer::DEFAULT_OBJECT_TYPES
156
+ # @see #create_collections
157
+ # @see #create_works
158
+ # @see #create_file_sets
159
+ # @see #create_relationships
160
+ def create_objects(types = [])
161
+ types.each do |object_type|
162
+ send("create_#{object_type.pluralize}")
163
+ end
164
+ end
165
+
166
+ # @abstract Subclass and override {#create_collections} to implement behavior for the parser.
107
167
  def create_collections
108
- raise StandardError, 'must be defined' if importer?
168
+ raise NotImplementedError, 'must be defined' if importer?
109
169
  end
110
170
 
171
+ # @abstract Subclass and override {#create_works} to implement behavior for the parser.
111
172
  def create_works
112
- raise StandardError, 'must be defined' if importer?
173
+ raise NotImplementedError, 'must be defined' if importer?
113
174
  end
114
175
 
176
+ # @abstract Subclass and override {#create_file_sets} to implement behavior for the parser.
115
177
  def create_file_sets
116
- raise StandardError, 'must be defined' if importer?
178
+ raise NotImplementedError, 'must be defined' if importer?
117
179
  end
118
180
 
181
+ # @abstract Subclass and override {#create_relationships} to implement behavior for the parser.
119
182
  def create_relationships
120
- raise StandardError, 'must be defined' if importer?
183
+ raise NotImplementedError, 'must be defined' if importer?
121
184
  end
122
185
 
123
186
  # Optional, define if using browse everything for file upload
124
187
  def retrieve_cloud_files(files); end
125
188
 
189
+ # @param file [#path, #original_filename] the file object that with the relevant data for the
190
+ # import.
126
191
  def write_import_file(file)
127
192
  path = File.join(path_for_import, file.original_filename)
128
193
  FileUtils.mv(
@@ -133,6 +198,8 @@ module Bulkrax
133
198
  end
134
199
 
135
200
  # Base path for imported and exported files
201
+ # @param [String]
202
+ # @return [String] the base path for files that this parser will "parse"
136
203
  def base_path(type = 'import')
137
204
  # account for multiple versions of hyku
138
205
  is_multitenant = ENV['HYKU_MULTITENANT'] == 'true' || ENV['SETTINGS__MULTITENANCY__ENABLED'] == 'true'
@@ -141,41 +208,48 @@ module Bulkrax
141
208
 
142
209
  # Path where we'll store the import metadata and files
143
210
  # this is used for uploaded and cloud files
211
+ # @return [String]
144
212
  def path_for_import
145
213
  @path_for_import = File.join(base_path, importerexporter.path_string)
146
214
  FileUtils.mkdir_p(@path_for_import) unless File.exist?(@path_for_import)
147
215
  @path_for_import
148
216
  end
149
217
 
218
+ # @abstract Subclass and override {#setup_export_file} to implement behavior for the parser.
150
219
  def setup_export_file
151
- raise StandardError, 'must be defined' if exporter?
220
+ raise NotImplementedError, 'must be defined' if exporter?
152
221
  end
153
222
 
223
+ # @abstract Subclass and override {#write_files} to implement behavior for the parser.
154
224
  def write_files
155
- raise StandardError, 'must be defined' if exporter?
225
+ raise NotImplementedError, 'must be defined' if exporter?
156
226
  end
157
227
 
228
+ # @return [TrueClass,FalseClass]
158
229
  def importer?
159
230
  importerexporter.is_a?(Bulkrax::Importer)
160
231
  end
161
232
 
233
+ # @return [TrueClass,FalseClass]
162
234
  def exporter?
163
235
  importerexporter.is_a?(Bulkrax::Exporter)
164
236
  end
165
237
 
166
238
  # @param limit [Integer] limit set on the importerexporter
167
239
  # @param index [Integer] index of current iteration
168
- # @return [boolean]
240
+ # @return [TrueClass,FalseClass]
169
241
  def limit_reached?(limit, index)
170
242
  return false if limit.nil? || limit.zero? # no limit
171
243
  index >= limit
172
244
  end
173
245
 
174
246
  # Override to add specific validations
247
+ # @return [TrueClass,FalseClass]
175
248
  def valid_import?
176
249
  true
177
250
  end
178
251
 
252
+ # @return [TrueClass,FalseClass]
179
253
  def record_has_source_identifier(record, index)
180
254
  if record[source_identifier].blank?
181
255
  if Bulkrax.fill_in_blank_source_identifiers.present?
@@ -199,6 +273,7 @@ module Bulkrax
199
273
  end
200
274
  # rubocop:enable Rails/SkipsModelValidations
201
275
 
276
+ # @return [Array<String>]
202
277
  def required_elements
203
278
  if Bulkrax.fill_in_blank_source_identifiers
204
279
  ['title']
@@ -287,12 +362,14 @@ module Bulkrax
287
362
  end
288
363
 
289
364
  # Path for the import
365
+ # @return [String]
290
366
  def import_file_path
291
367
  @import_file_path ||= real_import_file_path
292
368
  end
293
369
 
294
370
  private
295
371
 
372
+ # @return [String]
296
373
  def real_import_file_path
297
374
  return importer_unzip_path if file? && zip?
298
375
  parser_fields['import_file_path']
@@ -180,7 +180,7 @@ module Bulkrax
180
180
  end
181
181
 
182
182
  def current_work_ids
183
- ActiveSupport::Deprication.warn('Bulkrax::CsvParser#current_work_ids will be replaced with #current_record_ids in version 3.0')
183
+ ActiveSupport::Deprecation.warn('Bulkrax::CsvParser#current_work_ids will be replaced with #current_record_ids in version 3.0')
184
184
  current_record_ids
185
185
  end
186
186
 
@@ -196,10 +196,13 @@ module Bulkrax
196
196
  @collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
197
197
  @file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
198
198
  when 'collection'
199
- @work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters} AND has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')})", method: :post, rows: 2_000_000_000).map(&:id)
199
+ @work_ids = ActiveFedora::SolrService.query(
200
+ "member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters} AND has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')})", method: :post, rows: 2_000_000_000
201
+ ).map(&:id)
200
202
  # get the parent collection and child collections
201
203
  @collection_ids = ActiveFedora::SolrService.query("id:#{importerexporter.export_source} #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
202
- @collection_ids += ActiveFedora::SolrService.query("has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source}", method: :post, rows: 2_147_483_647).map(&:id)
204
+ @collection_ids += ActiveFedora::SolrService.query("has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source}", method: :post,
205
+ rows: 2_147_483_647).map(&:id)
203
206
  find_child_file_sets(@work_ids)
204
207
  when 'worktype'
205
208
  @work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
@@ -234,7 +237,7 @@ module Bulkrax
234
237
  instance_variable_set(instance_var, ActiveFedora::SolrService.post(
235
238
  extra_filters.to_s,
236
239
  fq: [
237
- %(#{::Solrizer.solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
240
+ %(#{solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
238
241
  "has_model_ssim:(#{models_to_search.join(' OR ')})"
239
242
  ],
240
243
  fl: 'id',
@@ -243,6 +246,10 @@ module Bulkrax
243
246
  end
244
247
  end
245
248
 
249
+ def solr_name(base_name)
250
+ Module.const_defined?(:Solrizer) ? ::Solrizer.solr_name(base_name) : ::ActiveFedora.index_field_mapper.solr_name(base_name)
251
+ end
252
+
246
253
  def create_new_entries
247
254
  current_record_ids.each_with_index do |id, index|
248
255
  break if limit_reached?(limit, index)
@@ -440,7 +447,7 @@ module Bulkrax
440
447
  file_mapping = Bulkrax.field_mappings.dig(self.class.to_s, 'file', :from)&.first&.to_sym || :file
441
448
  next if r[file_mapping].blank?
442
449
 
443
- r[file_mapping].split(/\s*[:;|]\s*/).map do |f|
450
+ r[file_mapping].split(Bulkrax.multi_value_element_split_on).map do |f|
444
451
  file = File.join(path_to_files, f.tr(' ', '_'))
445
452
  if File.exist?(file) # rubocop:disable Style/GuardClause
446
453
  file
@@ -468,7 +475,7 @@ module Bulkrax
468
475
  entry_uid ||= if Bulkrax.fill_in_blank_source_identifiers.present?
469
476
  Bulkrax.fill_in_blank_source_identifiers.call(self, records.find_index(collection_hash))
470
477
  else
471
- collection_hash[:title].split(/\s*[;|]\s*/).first
478
+ collection_hash[:title].split(Bulkrax.multi_value_element_split_on).first
472
479
  end
473
480
 
474
481
  entry_uid
@@ -13,8 +13,7 @@ module Bulkrax
13
13
  def client
14
14
  @client ||= OAI::Client.new(importerexporter.parser_fields['base_url'],
15
15
  headers: headers,
16
- parser: 'libxml',
17
- metadata_prefix: importerexporter.parser_fields['metadata_prefix'])
16
+ parser: 'libxml')
18
17
  rescue StandardError
19
18
  raise OAIError
20
19
  end
@@ -32,6 +31,7 @@ module Bulkrax
32
31
  end
33
32
 
34
33
  def records(opts = {})
34
+ opts[:metadata_prefix] ||= importerexporter.parser_fields['metadata_prefix']
35
35
  opts[:set] = collection_name unless collection_name == 'all'
36
36
 
37
37
  opts[:from] = importerexporter&.last_imported_at&.strftime("%Y-%m-%d") if importerexporter.last_imported_at && only_updates
@@ -12,6 +12,12 @@ module Bulkrax
12
12
  # @todo not yet supported
13
13
  def create_collections; end
14
14
 
15
+ # @todo not yet supported
16
+ def file_set_entry_class; end
17
+
18
+ # @todo not yet supported
19
+ def create_file_sets; end
20
+
15
21
  # TODO: change to differentiate between collection and work records when adding ability to import collection metadata
16
22
  def works_total
17
23
  total
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+ module Bulkrax
3
+ # This module is rather destructive; it will break relationships between the works, file sets, and
4
+ # collections that were imported via an importer. You probably don't want to run this on your
5
+ # data, except in the case where you have been testing a Bulkrax::Importer, the parsers and
6
+ # mappings. Then, you might have relationships that you want to remove.
7
+ #
8
+ # tl;dr - Caution this will break things!
9
+ class RemoveRelationshipsForImporter
10
+ # @api public
11
+ #
12
+ # Remove the relationships of the works and collections for all of the Bulkrax::Entry records
13
+ # associated with the given Bulkrax::Importer.
14
+ #
15
+ # @param importer [Bulkrax::Importer]
16
+ # @param with_progress_bar [Boolean]
17
+ def self.break_relationships_for!(importer:, with_progress_bar: false)
18
+ entries = importer.entries.select(&:succeeded?)
19
+ progress_bar = build_progress_bar_for(with_progress_bar: with_progress_bar, entries: entries)
20
+ new(progress_bar: progress_bar, entries: entries).break_relationships!
21
+ end
22
+
23
+ # @api private
24
+ #
25
+ # A null object that conforms to this class's use of a progress bar.
26
+ module NullProgressBar
27
+ def self.increment; end
28
+ end
29
+
30
+ # @api private
31
+ #
32
+ # @return [#increment]
33
+ def self.build_progress_bar_for(with_progress_bar:, entries:)
34
+ return NullProgressBar unless with_progress_bar
35
+
36
+ begin
37
+ require 'ruby-progressbar'
38
+ ProgessBar.create(total: entries.count)
39
+ rescue LoadError
40
+ Rails.logger.info("Using NullProgressBar because ProgressBar is not available due to a LoadError.")
41
+ end
42
+ end
43
+
44
+ # @param entries [#each]
45
+ # @param progress_bar [#increment]
46
+ def initialize(entries:, progress_bar:)
47
+ @progress_bar = progress_bar
48
+ @entries = entries
49
+ end
50
+
51
+ attr_reader :entries, :progress_bar
52
+
53
+ def break_relationships!
54
+ entries.each do |entry|
55
+ progress_bar.increment
56
+
57
+ obj = entry.factory.find
58
+ next if obj.is_a?(FileSet) # FileSets must be attached to a Work
59
+
60
+ if obj.is_a?(Collection)
61
+ remove_relationships_from_collection(obj)
62
+ else
63
+ remove_relationships_from_work(obj)
64
+ end
65
+
66
+ obj.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
67
+ obj.save!
68
+ end
69
+ end
70
+
71
+ def remove_relationships_from_collection(collection)
72
+ # Remove child work relationships
73
+ collection.member_works.each do |work|
74
+ change = work.member_of_collections.delete(collection)
75
+ work.save! if change.present?
76
+ end
77
+
78
+ # Remove parent collection relationships
79
+ collection.member_of_collections.each do |parent_col|
80
+ Hyrax::Collections::NestedCollectionPersistenceService
81
+ .remove_nested_relationship_for(parent: parent_col, child: collection)
82
+ end
83
+
84
+ # Remove child collection relationships
85
+ collection.member_collections.each do |child_col|
86
+ Hyrax::Collections::NestedCollectionPersistenceService
87
+ .remove_nested_relationship_for(parent: collection, child: child_col)
88
+ end
89
+ end
90
+
91
+ def remove_relationships_from_work(work)
92
+ # Remove parent collection relationships
93
+ work.member_of_collections = []
94
+
95
+ # Remove parent work relationships
96
+ work.member_of_works.each do |parent_work|
97
+ parent_work.members.delete(work)
98
+ parent_work.save!
99
+ end
100
+
101
+ # Remove child work relationships
102
+ work.member_works.each do |child_work|
103
+ work.member_works.delete(child_work)
104
+ end
105
+ end
106
+ end
107
+ end