bulkrax 5.1.0 → 5.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/app/controllers/bulkrax/importers_controller.rb +3 -4
  3. data/app/helpers/bulkrax/validation_helper.rb +4 -4
  4. data/app/jobs/bulkrax/create_relationships_job.rb +3 -0
  5. data/app/jobs/bulkrax/import_work_job.rb +20 -7
  6. data/app/jobs/bulkrax/importer_job.rb +1 -1
  7. data/app/jobs/bulkrax/schedule_relationships_job.rb +2 -1
  8. data/app/matchers/bulkrax/application_matcher.rb +1 -0
  9. data/app/models/bulkrax/csv_entry.rb +93 -24
  10. data/app/models/bulkrax/exporter.rb +3 -12
  11. data/app/models/bulkrax/importer.rb +1 -1
  12. data/app/models/bulkrax/pending_relationship.rb +1 -1
  13. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +1 -1
  14. data/app/models/concerns/bulkrax/export_behavior.rb +6 -4
  15. data/app/models/concerns/bulkrax/has_matchers.rb +1 -0
  16. data/app/models/concerns/bulkrax/import_behavior.rb +6 -3
  17. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +9 -1
  18. data/app/parsers/bulkrax/application_parser.rb +14 -16
  19. data/app/parsers/bulkrax/bagit_parser.rb +5 -16
  20. data/app/parsers/bulkrax/csv_parser.rb +43 -111
  21. data/app/parsers/bulkrax/oai_dc_parser.rb +2 -2
  22. data/app/parsers/bulkrax/parser_export_record_set.rb +281 -0
  23. data/app/parsers/bulkrax/xml_parser.rb +9 -5
  24. data/app/services/bulkrax/remove_relationships_for_importer.rb +4 -2
  25. data/app/views/bulkrax/entries/show.html.erb +1 -1
  26. data/app/views/bulkrax/exporters/_form.html.erb +32 -33
  27. data/app/views/bulkrax/exporters/index.html.erb +2 -2
  28. data/app/views/bulkrax/exporters/show.html.erb +3 -3
  29. data/app/views/bulkrax/importers/_bagit_fields.html.erb +13 -12
  30. data/app/views/bulkrax/importers/_csv_fields.html.erb +13 -12
  31. data/app/views/bulkrax/importers/_oai_fields.html.erb +12 -10
  32. data/app/views/bulkrax/importers/_xml_fields.html.erb +12 -11
  33. data/app/views/bulkrax/importers/show.html.erb +18 -16
  34. data/app/views/bulkrax/shared/_collection_entries_tab.html.erb +6 -6
  35. data/app/views/bulkrax/shared/_file_set_entries_tab.html.erb +6 -6
  36. data/app/views/bulkrax/shared/_work_entries_tab.html.erb +6 -6
  37. data/config/locales/bulkrax.en.yml +26 -0
  38. data/lib/bulkrax/entry_spec_helper.rb +17 -0
  39. data/lib/bulkrax/version.rb +1 -1
  40. data/lib/bulkrax.rb +119 -46
  41. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +1 -1
  42. data/lib/tasks/reset.rake +1 -1
  43. metadata +7 -6
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 490e0f170cc1128c00c467c3cc344db627d027a3b857d53dfa33b97805567d4b
4
- data.tar.gz: 7290801bacea707b7398e674a17acf56e7a770cfb3bea20958169588a4404175
3
+ metadata.gz: c9794c69e891f2397ac94398676f49db008e83b7a6df16e7a08f0ab393c148ca
4
+ data.tar.gz: fd2d48507add6bcbc7557f9240951d174c488e456579af65e20bb6cdc6f3c080
5
5
  SHA512:
6
- metadata.gz: a6f5486405e2d2eb7f6c0c49b17ed0926e55a701368e42c93db9b009a5f663682ec4141fe1dd58d0dde132fa747010ada7cd22187b81d60a7e8b6b23cbf2e24d
7
- data.tar.gz: d56a8780ef074d412ac7406d3f3ddb39b34b17bbe43c61a64bebe06f39952a1d62cbf2dbd01a7012bcd776686def03c5a5e6c556cc1384284c2dea8a89f3eec2
6
+ metadata.gz: 3b07a4178650201c48602c9bb9df29135a5cc51bc6954355732bd6ae5fa918c5fd4c88d187e305b7ee2877ca995818d7ec3a476e92ec173c7569c0a15ca229a2
7
+ data.tar.gz: 324393aeb341d82b23e6391cbe1ec2a64d3e55a61400330f1656f46b3bb22cd0c6e7335cfeaf1b3b12e6ca5faefea5405b96e1100b1df138dd4c5a87fd522d76
@@ -35,11 +35,10 @@ module Bulkrax
35
35
  elsif defined?(::Hyrax)
36
36
  add_importer_breadcrumbs
37
37
  add_breadcrumb @importer.name
38
-
39
- @work_entries = @importer.entries.where(type: @importer.parser.entry_class.to_s).page(params[:work_entries_page]).per(30)
40
- @collection_entries = @importer.entries.where(type: @importer.parser.collection_entry_class.to_s).page(params[:collections_entries_page]).per(30)
41
- @file_set_entries = @importer.entries.where(type: @importer.parser.file_set_entry_class.to_s).page(params[:file_set_entries_page]).per(30)
42
38
  end
39
+ @work_entries = @importer.entries.where(type: @importer.parser.entry_class.to_s).page(params[:work_entries_page]).per(30)
40
+ @collection_entries = @importer.entries.where(type: @importer.parser.collection_entry_class.to_s).page(params[:collections_entries_page]).per(30)
41
+ @file_set_entries = @importer.entries.where(type: @importer.parser.file_set_entry_class.to_s).page(params[:file_set_entries_page]).per(30)
43
42
  end
44
43
 
45
44
  # GET /importers/new
@@ -5,10 +5,8 @@ module Bulkrax
5
5
  def valid_create_params?
6
6
  check_admin_set
7
7
  check_user
8
- return true if valid_importer? &&
9
- valid_commit? &&
10
- valid_name? &&
11
- valid_parser_klass? &&
8
+ return true if valid_importer? && valid_commit? &&
9
+ valid_name? && valid_parser_klass? &&
12
10
  valid_parser_fields?
13
11
  end
14
12
 
@@ -19,6 +17,8 @@ module Bulkrax
19
17
  end
20
18
 
21
19
  def check_admin_set
20
+ return unless defined?(::Hyrax)
21
+
22
22
  if params[:importer][:admin_set_id].blank?
23
23
  params[:importer][:admin_set_id] = AdminSet::DEFAULT_ID
24
24
  else
@@ -16,6 +16,8 @@ module Bulkrax
16
16
  # to either an instance of a Work or an instance of a Collection.
17
17
  # NOTE: In the context of this job, "identifier" is used to generically refer
18
18
  # to either a record's ID or an Bulkrax::Entry's source_identifier.
19
+ # Please override with your own job for custom/non-hyrax applications
20
+ # set Bulkrax config variable :relationship_job to your custom class
19
21
  class CreateRelationshipsJob < ApplicationJob
20
22
  ##
21
23
  # @api public
@@ -112,6 +114,7 @@ module Bulkrax
112
114
  end
113
115
 
114
116
  def add_to_collection(child_record, parent_record)
117
+ parent_record.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
115
118
  child_record.member_of_collections << parent_record
116
119
  child_record.save!
117
120
  end
@@ -5,24 +5,37 @@ module Bulkrax
5
5
  queue_as :import
6
6
 
7
7
  # rubocop:disable Rails/SkipsModelValidations
8
+ #
9
+ # @note Yes, we are calling {ImporterRun.find} each time. these were on purpose to prevent race
10
+ # conditions on the database update. If you do not re-find (or at least reload) the object
11
+ # on each increment, the count can get messed up. Let's say there are two jobs A and B and
12
+ # a counter set to 2.
13
+ #
14
+ # - A grabs the importer_run (line 10)
15
+ # - B grabs the importer_run (line 10)
16
+ # - A Finishes the build, does the increment (now the counter is 3)
17
+ # - B Finishes the build, does the increment (now the counter is 3 again) and thus a count
18
+ # is lost.
19
+ #
20
+ # @see https://codingdeliberately.com/activerecord-increment/
21
+ # @see https://github.com/samvera-labs/bulkrax/commit/5c2c795452e13a98c9217fdac81ae2f5aea031a0#r105848236
8
22
  def perform(entry_id, run_id, time_to_live = 3, *)
9
23
  entry = Entry.find(entry_id)
10
- importer_run = ImporterRun.find(run_id)
11
24
  entry.build
12
25
  if entry.status == "Complete"
13
- importer_run.increment!(:processed_records)
14
- importer_run.increment!(:processed_works)
26
+ ImporterRun.find(run_id).increment!(:processed_records)
27
+ ImporterRun.find(run_id).increment!(:processed_works)
15
28
  else
16
29
  # do not retry here because whatever parse error kept you from creating a work will likely
17
30
  # keep preventing you from doing so.
18
- importer_run.increment!(:failed_records)
19
- importer_run.increment!(:failed_works)
31
+ ImporterRun.find(run_id).increment!(:failed_records)
32
+ ImporterRun.find(run_id).increment!(:failed_works)
20
33
  end
21
34
  # Regardless of completion or not, we want to decrement the enqueued records.
22
- importer_run.decrement!(:enqueued_records) unless importer_run.enqueued_records <= 0
35
+ ImporterRun.find(run_id).decrement!(:enqueued_records) unless ImporterRun.find(run_id).enqueued_records <= 0
23
36
 
24
37
  entry.save!
25
- entry.importer.current_run = importer_run
38
+ entry.importer.current_run = ImporterRun.find(run_id)
26
39
  entry.importer.record_status
27
40
  rescue Bulkrax::CollectionsCreatedError => e
28
41
  Rails.logger.warn("#{self.class} entry_id: #{entry_id}, run_id: #{run_id} encountered #{e.class}: #{e.message}")
@@ -12,7 +12,7 @@ module Bulkrax
12
12
  import(importer, only_updates_since_last_import)
13
13
  update_current_run_counters(importer)
14
14
  schedule(importer) if importer.schedulable?
15
- rescue CSV::MalformedCSVError => e
15
+ rescue ::CSV::MalformedCSVError => e
16
16
  importer.set_status_info(e)
17
17
  end
18
18
 
@@ -9,7 +9,8 @@ module Bulkrax
9
9
  return reschedule(importer_id) unless pending_num.zero?
10
10
 
11
11
  importer.last_run.parents.each do |parent_id|
12
- CreateRelationshipsJob.perform_later(parent_identifier: parent_id, importer_run_id: importer.last_run.id)
12
+ Bulkrax.relationship_job_class.constantize.perform_later(parent_identifier: parent_id,
13
+ importer_run_id: importer.last_run.id)
13
14
  end
14
15
  end
15
16
 
@@ -102,6 +102,7 @@ module Bulkrax
102
102
 
103
103
  # Only add valid resource types
104
104
  def parse_resource_type(src)
105
+ ActiveSupport::Deprecation.warn('#parse_resource_type will be removed in Bulkrax v6.0.0')
105
106
  Hyrax::ResourceTypesService.label(src.to_s.strip.titleize)
106
107
  rescue KeyError
107
108
  nil
@@ -13,15 +13,49 @@ module Bulkrax
13
13
  data.headers.flatten.compact.uniq
14
14
  end
15
15
 
16
+ class_attribute(:csv_read_data_options, default: {})
17
+
16
18
  # there's a risk that this reads the whole file into memory and could cause a memory leak
17
19
  def self.read_data(path)
18
20
  raise StandardError, 'CSV path empty' if path.blank?
19
- CSV.read(path,
21
+ options = {
20
22
  headers: true,
21
23
  header_converters: ->(h) { h.to_sym },
22
- encoding: 'utf-8')
24
+ encoding: 'utf-8'
25
+ }.merge(csv_read_data_options)
26
+
27
+ results = CSV.read(path, **options)
28
+ csv_wrapper_class.new(results)
23
29
  end
24
30
 
31
+ # The purpose of this class is to reject empty lines. This causes lots of grief in importing.
32
+ # But why not use {CSV.read}'s `skip_lines` option? Because for some CSVs, it will never finish
33
+ # reading the file.
34
+ #
35
+ # There is a spec that demonstrates this approach works.
36
+ class CsvWrapper
37
+ include Enumerable
38
+ def initialize(original)
39
+ @original = original
40
+ end
41
+
42
+ delegate :headers, to: :@original
43
+
44
+ def each
45
+ @original.each do |row|
46
+ next if all_fields_are_empty_for(row: row)
47
+ yield(row)
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ def all_fields_are_empty_for(row:)
54
+ row.to_hash.values.all?(&:blank?)
55
+ end
56
+ end
57
+ class_attribute :csv_wrapper_class, default: CsvWrapper
58
+
25
59
  def self.data_for_entry(data, _source_id, parser)
26
60
  # If a multi-line CSV data is passed, grab the first row
27
61
  data = data.first if data.is_a?(CSV::Table)
@@ -35,11 +69,7 @@ module Bulkrax
35
69
  end
36
70
 
37
71
  def build_metadata
38
- raise StandardError, 'Record not found' if record.nil?
39
- unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
40
- raise StandardError,
41
- "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}"
42
- end
72
+ validate_record
43
73
 
44
74
  self.parsed_metadata = {}
45
75
  add_identifier
@@ -56,6 +86,12 @@ module Bulkrax
56
86
  self.parsed_metadata
57
87
  end
58
88
 
89
+ def validate_record
90
+ raise StandardError, 'Record not found' if record.nil?
91
+ raise StandardError, "Missing required elements, missing element(s) are: "\
92
+ "#{importerexporter.parser.missing_elements(record).join(', ')}" unless importerexporter.parser.required_elements?(record)
93
+ end
94
+
59
95
  def add_identifier
60
96
  self.parsed_metadata[work_identifier] = [record[source_identifier]]
61
97
  end
@@ -67,9 +103,10 @@ module Bulkrax
67
103
  end
68
104
 
69
105
  def add_metadata_for_model
70
- if factory_class == Collection
71
- add_collection_type_gid
72
- elsif factory_class == FileSet
106
+ if defined?(::Collection) && factory_class == ::Collection
107
+ add_collection_type_gid if defined?(::Hyrax)
108
+ # add any additional collection metadata methods here
109
+ elsif factory_class == Bulkrax.file_model_class
73
110
  validate_presence_of_filename!
74
111
  add_path_to_file
75
112
  validate_presence_of_parent!
@@ -106,7 +143,7 @@ module Bulkrax
106
143
  self.parsed_metadata = {}
107
144
 
108
145
  build_system_metadata
109
- build_files_metadata unless hyrax_record.is_a?(Collection)
146
+ build_files_metadata if defined?(Collection) && !hyrax_record.is_a?(Collection)
110
147
  build_relationship_metadata
111
148
  build_mapping_metadata
112
149
  self.save!
@@ -157,26 +194,48 @@ module Bulkrax
157
194
  end
158
195
  end
159
196
 
197
+ # The purpose of this helper module is to make easier the testing of the rather complex
198
+ # switching logic for determining the method we use for building the value.
199
+ module AttributeBuilderMethod
200
+ # @param key [Symbol]
201
+ # @param value [Hash<String, Object>]
202
+ # @param entry [Bulkrax::Entry]
203
+ #
204
+ # @return [NilClass] when we won't be processing this field
205
+ # @return [Symbol] (either :build_value or :build_object)
206
+ def self.for(key:, value:, entry:)
207
+ return if key == 'model'
208
+ return if key == 'file'
209
+ return if key == entry.related_parents_parsed_mapping
210
+ return if key == entry.related_children_parsed_mapping
211
+ return if value['excluded'] || value[:excluded]
212
+ return if Bulkrax.reserved_properties.include?(key) && !entry.field_supported?(key)
213
+
214
+ object_key = key if value.key?('object') || value.key?(:object)
215
+ return unless entry.hyrax_record.respond_to?(key.to_s) || object_key.present?
216
+
217
+ models_to_skip = Array.wrap(value['skip_object_for_model_names'] || value[:skip_object_for_model_names] || [])
218
+
219
+ return :build_value if models_to_skip.detect { |model| entry.factory_class.model_name.name == model }
220
+ return :build_object if object_key.present?
221
+
222
+ :build_value
223
+ end
224
+ end
225
+
160
226
  def build_mapping_metadata
161
227
  mapping = fetch_field_mapping
162
228
  mapping.each do |key, value|
163
- # these keys are handled by other methods
164
- next if ['model', 'file', related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
165
- next if value['excluded']
166
- next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
167
-
168
- object_key = key if value.key?('object')
169
- next unless hyrax_record.respond_to?(key.to_s) || object_key.present?
229
+ method_name = AttributeBuilderMethod.for(key: key, value: value, entry: self)
230
+ next unless method_name
170
231
 
171
- if object_key.present?
172
- build_object(value)
173
- else
174
- build_value(key, value)
175
- end
232
+ send(method_name, key, value)
176
233
  end
177
234
  end
178
235
 
179
- def build_object(value)
236
+ def build_object(_key, value)
237
+ return unless hyrax_record.respond_to?(value['object'])
238
+
180
239
  data = hyrax_record.send(value['object'])
181
240
  return if data.empty?
182
241
 
@@ -185,6 +244,8 @@ module Bulkrax
185
244
  end
186
245
 
187
246
  def build_value(key, value)
247
+ return unless hyrax_record.respond_to?(key.to_s)
248
+
188
249
  data = hyrax_record.send(key.to_s)
189
250
  if data.is_a?(ActiveTriples::Relation)
190
251
  if value['join']
@@ -217,6 +278,14 @@ module Bulkrax
217
278
  end
218
279
 
219
280
  def object_metadata(data)
281
+ # NOTE: What is `d` in this case:
282
+ #
283
+ # "[{\"single_object_first_name\"=>\"Fake\", \"single_object_last_name\"=>\"Fakerson\", \"single_object_position\"=>\"Leader, Jester, Queen\", \"single_object_language\"=>\"english\"}]"
284
+ #
285
+ # The above is a stringified version of a Ruby string. Using eval is a very bad idea as it
286
+ # will execute the value of `d` within the full Ruby interpreter context.
287
+ #
288
+ # TODO: Would it be possible to store this as a non-string? Maybe the actual Ruby Array and Hash?
220
289
  data = data.map { |d| eval(d) }.flatten # rubocop:disable Security/Eval
221
290
 
222
291
  data.each_with_index do |obj, index|
@@ -18,18 +18,9 @@ module Bulkrax
18
18
 
19
19
  def export
20
20
  current_run && setup_export_path
21
- case self.export_from
22
- when 'collection'
23
- create_from_collection
24
- when 'importer'
25
- create_from_importer
26
- when 'worktype'
27
- create_from_worktype
28
- when 'all'
29
- create_from_all
30
- end
21
+ send("create_from_#{self.export_from}")
31
22
  rescue StandardError => e
32
- status_info(e)
23
+ set_status_info(e)
33
24
  end
34
25
 
35
26
  # #export_source accessors
@@ -139,7 +130,7 @@ module Bulkrax
139
130
  end
140
131
 
141
132
  def export_properties
142
- properties = Hyrax.config.registered_curation_concern_types.map { |work| work.constantize.properties.keys }.flatten.uniq.sort
133
+ properties = Bulkrax.curation_concerns.map { |work| work.properties.keys }.flatten.uniq.sort
143
134
  properties.reject { |prop| Bulkrax.reserved_properties.include?(prop) }
144
135
  end
145
136
 
@@ -160,7 +160,7 @@ module Bulkrax
160
160
  types = types_array || DEFAULT_OBJECT_TYPES
161
161
  parser.create_objects(types)
162
162
  rescue StandardError => e
163
- status_info(e)
163
+ set_status_info(e)
164
164
  end
165
165
 
166
166
  # Prepend the base_url to ensure unique set identifiers
@@ -6,6 +6,6 @@ module Bulkrax
6
6
 
7
7
  # Ideally we wouldn't have a column named "order", as it is a reserved SQL term. However, if we
8
8
  # quote the column, all is well...for the application.
9
- scope :ordered, -> { order("#{quoted_table_name}.#{connection.quote_column_name('order')}") }
9
+ scope :ordered, -> { order(Arel.sql("#{quoted_table_name}.#{connection.quote_column_name('order')}")) }
10
10
  end
11
11
  end
@@ -42,7 +42,7 @@ module Bulkrax
42
42
  @available_work_types ||= if defined?(::Hyku)
43
43
  ::Site.instance.available_works.map(&:constantize)
44
44
  else
45
- ::Hyrax.config.curation_concerns
45
+ Bulkrax.curation_concerns
46
46
  end
47
47
  end
48
48
  end
@@ -1,4 +1,6 @@
1
1
  # frozen_string_literal: true
2
+ require 'marcel'
3
+
2
4
  module Bulkrax
3
5
  module ExportBehavior
4
6
  extend ActiveSupport::Concern
@@ -10,9 +12,9 @@ module Bulkrax
10
12
  rescue RSolr::Error::Http, CollectionsCreatedError => e
11
13
  raise e
12
14
  rescue StandardError => e
13
- status_info(e)
15
+ set_status_info(e)
14
16
  else
15
- status_info
17
+ set_status_info
16
18
  end
17
19
 
18
20
  def build_export_metadata
@@ -27,8 +29,8 @@ module Bulkrax
27
29
  def filename(file_set)
28
30
  return if file_set.original_file.blank?
29
31
  fn = file_set.original_file.file_name.first
30
- mime = Mime::Type.lookup(file_set.original_file.mime_type)
31
- ext_mime = MIME::Types.of(file_set.original_file.file_name).first
32
+ mime = ::Marcel::MimeType.for(file_set.original_file.mime_type)
33
+ ext_mime = ::Marcel::MimeType.for(file_set.original_file.file_name)
32
34
  if fn.include?(file_set.id) || importerexporter.metadata_only?
33
35
  filename = "#{fn}.#{mime.to_sym}"
34
36
  filename = fn if mime.to_s == ext_mime.to_s
@@ -147,6 +147,7 @@ module Bulkrax
147
147
  %W[
148
148
  file
149
149
  remote_files
150
+ rights_statement
150
151
  #{related_parents_parsed_mapping}
151
152
  #{related_children_parsed_mapping}
152
153
  ]
@@ -11,16 +11,16 @@ module Bulkrax
11
11
  unless self.importerexporter.validate_only
12
12
  raise CollectionsCreatedError unless collections_created?
13
13
  @item = factory.run!
14
- add_user_to_permission_templates! if self.class.to_s.include?("Collection")
14
+ add_user_to_permission_templates! if self.class.to_s.include?("Collection") && defined?(::Hyrax)
15
15
  parent_jobs if self.parsed_metadata[related_parents_parsed_mapping]&.join.present?
16
16
  child_jobs if self.parsed_metadata[related_children_parsed_mapping]&.join.present?
17
17
  end
18
18
  rescue RSolr::Error::Http, CollectionsCreatedError => e
19
19
  raise e
20
20
  rescue StandardError => e
21
- status_info(e)
21
+ set_status_info(e)
22
22
  else
23
- status_info
23
+ set_status_info
24
24
  ensure
25
25
  self.save!
26
26
  end
@@ -93,6 +93,8 @@ module Bulkrax
93
93
  end
94
94
 
95
95
  def add_admin_set_id
96
+ return unless defined?(::Hyrax)
97
+
96
98
  self.parsed_metadata['admin_set_id'] = importerexporter.admin_set_id if self.parsed_metadata['admin_set_id'].blank?
97
99
  end
98
100
 
@@ -165,6 +167,7 @@ module Bulkrax
165
167
  # @param field [String] name of the controlled property
166
168
  # @return [Boolean] provided value is a present, active authority ID for the provided field
167
169
  def active_id_for_authority?(value, field)
170
+ return false unless defined?(::Hyrax)
168
171
  field_service = ('Hyrax::' + "#{field}_service".camelcase).constantize
169
172
  active_authority_ids = field_service.new.active_elements.map { |ae| ae['id'] }
170
173
 
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+ require 'marcel'
2
3
 
3
4
  module Bulkrax
4
5
  module ImporterExporterBehavior
@@ -50,7 +51,14 @@ module Bulkrax
50
51
 
51
52
  # Is this a zip file?
52
53
  def zip?
53
- parser_fields&.[]('import_file_path') && MIME::Types.type_for(parser_fields['import_file_path']).include?('application/zip')
54
+ filename = parser_fields&.[]('import_file_path')
55
+ return false unless filename
56
+ return false unless File.file?(filename)
57
+ returning_value = false
58
+ File.open(filename) do |file|
59
+ returning_value = ::Marcel::MimeType.for(file).include?('application/zip')
60
+ end
61
+ returning_value
54
62
  end
55
63
  end
56
64
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
  require 'zip'
3
+ require 'marcel'
3
4
 
4
5
  module Bulkrax
5
6
  # An abstract class that establishes the API for Bulkrax's import and export parsing.
@@ -10,10 +11,11 @@ module Bulkrax
10
11
  alias importer importerexporter
11
12
  alias exporter importerexporter
12
13
  delegate :only_updates, :limit, :current_run, :errors, :mapping,
13
- :seen, :increment_counters, :parser_fields, :user, :keys_without_numbers,
14
- :key_without_numbers, :status, :status_info, :status_at,
15
- :exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
16
- to: :importerexporter
14
+ :seen, :increment_counters, :parser_fields, :user, :keys_without_numbers,
15
+ :key_without_numbers, :status, :set_status_info, :status_info, :status_at,
16
+ :exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
17
+ :zip?, :file?,
18
+ to: :importerexporter
17
19
 
18
20
  # @todo Convert to `class_attribute :parser_fiels, default: {}`
19
21
  def self.parser_fields
@@ -275,10 +277,16 @@ module Bulkrax
275
277
 
276
278
  # @return [Array<String>]
277
279
  def required_elements
280
+ matched_elements = ((importerexporter.mapping.keys || []) & (Bulkrax.required_elements || []))
281
+ unless matched_elements.count == Bulkrax.required_elements.count
282
+ missing_elements = Bulkrax.required_elements - matched_elements
283
+ error_alert = "Missing mapping for at least one required element, missing mappings are: #{missing_elements.join(', ')}"
284
+ raise StandardError, error_alert
285
+ end
278
286
  if Bulkrax.fill_in_blank_source_identifiers
279
- ['title']
287
+ Bulkrax.required_elements
280
288
  else
281
- ['title', source_identifier]
289
+ Bulkrax.required_elements + [source_identifier]
282
290
  end
283
291
  end
284
292
 
@@ -351,16 +359,6 @@ module Bulkrax
351
359
  end
352
360
  end
353
361
 
354
- # Is this a file?
355
- def file?
356
- parser_fields&.[]('import_file_path') && File.file?(parser_fields['import_file_path'])
357
- end
358
-
359
- # Is this a zip file?
360
- def zip?
361
- parser_fields&.[]('import_file_path') && MIME::Types.type_for(parser_fields['import_file_path']).include?('application/zip')
362
- end
363
-
364
362
  # Path for the import
365
363
  # @return [String]
366
364
  def import_file_path
@@ -11,7 +11,7 @@ module Bulkrax
11
11
  def valid_import?
12
12
  return true if import_fields.present?
13
13
  rescue => e
14
- status_info(e)
14
+ set_status_info(e)
15
15
  false
16
16
  end
17
17
 
@@ -51,7 +51,7 @@ module Bulkrax
51
51
  record_data = entry_class.data_for_entry(data_row, source_identifier, self)
52
52
  next record_data if importerexporter.metadata_only?
53
53
 
54
- record_data[:file] = bag.bag_files.join('|') if ::Hyrax.config.curation_concerns.include? record_data[:model]&.constantize
54
+ record_data[:file] = bag.bag_files.join('|') if Bulkrax.curation_concerns.include? record_data[:model]&.constantize
55
55
  record_data
56
56
  end
57
57
  else
@@ -82,19 +82,7 @@ module Bulkrax
82
82
  end
83
83
  importer.record_status
84
84
  rescue StandardError => e
85
- status_info(e)
86
- end
87
-
88
- def total
89
- @total = importer.parser_fields['total'] || 0 if importer?
90
-
91
- @total = if exporter?
92
- limit.nil? || limit.zero? ? current_record_ids.count : limit
93
- end
94
-
95
- return @total || 0
96
- rescue StandardError
97
- @total = 0
85
+ set_status_info(e)
98
86
  end
99
87
 
100
88
  # export methods
@@ -144,7 +132,7 @@ module Bulkrax
144
132
  bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank?
145
133
  rescue => e
146
134
  entry.set_status_info(e)
147
- status_info(e)
135
+ set_status_info(e)
148
136
  end
149
137
  end
150
138
 
@@ -185,6 +173,7 @@ module Bulkrax
185
173
  File.join(path, id)
186
174
  end
187
175
 
176
+ # @todo(bjustice) - remove hyrax reference
188
177
  def write_triples(folder_count, e)
189
178
  sd = SolrDocument.find(e.identifier)
190
179
  return if sd.nil?