bulkrax 5.1.0 → 5.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/app/controllers/bulkrax/importers_controller.rb +3 -4
  3. data/app/helpers/bulkrax/validation_helper.rb +4 -4
  4. data/app/jobs/bulkrax/create_relationships_job.rb +3 -0
  5. data/app/jobs/bulkrax/import_work_job.rb +20 -7
  6. data/app/jobs/bulkrax/importer_job.rb +1 -1
  7. data/app/jobs/bulkrax/schedule_relationships_job.rb +2 -1
  8. data/app/matchers/bulkrax/application_matcher.rb +1 -0
  9. data/app/models/bulkrax/csv_entry.rb +93 -24
  10. data/app/models/bulkrax/exporter.rb +3 -12
  11. data/app/models/bulkrax/importer.rb +1 -1
  12. data/app/models/bulkrax/pending_relationship.rb +1 -1
  13. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +1 -1
  14. data/app/models/concerns/bulkrax/export_behavior.rb +6 -4
  15. data/app/models/concerns/bulkrax/has_matchers.rb +1 -0
  16. data/app/models/concerns/bulkrax/import_behavior.rb +6 -3
  17. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +9 -1
  18. data/app/parsers/bulkrax/application_parser.rb +14 -16
  19. data/app/parsers/bulkrax/bagit_parser.rb +9 -19
  20. data/app/parsers/bulkrax/csv_parser.rb +43 -111
  21. data/app/parsers/bulkrax/oai_dc_parser.rb +4 -2
  22. data/app/parsers/bulkrax/parser_export_record_set.rb +281 -0
  23. data/app/parsers/bulkrax/xml_parser.rb +9 -5
  24. data/app/services/bulkrax/remove_relationships_for_importer.rb +4 -2
  25. data/app/views/bulkrax/entries/show.html.erb +1 -1
  26. data/app/views/bulkrax/exporters/_form.html.erb +32 -33
  27. data/app/views/bulkrax/exporters/index.html.erb +2 -2
  28. data/app/views/bulkrax/exporters/show.html.erb +3 -3
  29. data/app/views/bulkrax/importers/_bagit_fields.html.erb +13 -12
  30. data/app/views/bulkrax/importers/_csv_fields.html.erb +13 -12
  31. data/app/views/bulkrax/importers/_oai_fields.html.erb +12 -10
  32. data/app/views/bulkrax/importers/_xml_fields.html.erb +12 -11
  33. data/app/views/bulkrax/importers/show.html.erb +18 -16
  34. data/app/views/bulkrax/shared/_collection_entries_tab.html.erb +6 -6
  35. data/app/views/bulkrax/shared/_file_set_entries_tab.html.erb +6 -6
  36. data/app/views/bulkrax/shared/_work_entries_tab.html.erb +6 -6
  37. data/config/locales/bulkrax.en.yml +26 -0
  38. data/lib/bulkrax/entry_spec_helper.rb +17 -0
  39. data/lib/bulkrax/version.rb +1 -1
  40. data/lib/bulkrax.rb +119 -46
  41. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +1 -1
  42. data/lib/tasks/reset.rake +1 -1
  43. metadata +7 -6
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 490e0f170cc1128c00c467c3cc344db627d027a3b857d53dfa33b97805567d4b
4
- data.tar.gz: 7290801bacea707b7398e674a17acf56e7a770cfb3bea20958169588a4404175
3
+ metadata.gz: 4a811bef32bb83948c7ea8fe9cbc3aead7e9fa3e09f88fe6d3ec45e4cdef7461
4
+ data.tar.gz: 253314c9d1a35505b50ad70e6fb0f9dec0f5331a55cdee6555cb137fff697e08
5
5
  SHA512:
6
- metadata.gz: a6f5486405e2d2eb7f6c0c49b17ed0926e55a701368e42c93db9b009a5f663682ec4141fe1dd58d0dde132fa747010ada7cd22187b81d60a7e8b6b23cbf2e24d
7
- data.tar.gz: d56a8780ef074d412ac7406d3f3ddb39b34b17bbe43c61a64bebe06f39952a1d62cbf2dbd01a7012bcd776686def03c5a5e6c556cc1384284c2dea8a89f3eec2
6
+ metadata.gz: 3c8281a0c12778d8db9a6e3e8dc39a9c591e2d81e9d668b9b52da400b65e48012f40b51b86aa75182fc26b7bf8d20e761af7a00b6c1c798df34dba35da7890fc
7
+ data.tar.gz: d71d16a09cfa1d9b0c3954bf2be9f2778bacdaa98e0a99d301c0b0762d8be04b0213f99463e05e6aab33321b64a9284551b32a1435e97569097e32c23e74f95d
@@ -35,11 +35,10 @@ module Bulkrax
35
35
  elsif defined?(::Hyrax)
36
36
  add_importer_breadcrumbs
37
37
  add_breadcrumb @importer.name
38
-
39
- @work_entries = @importer.entries.where(type: @importer.parser.entry_class.to_s).page(params[:work_entries_page]).per(30)
40
- @collection_entries = @importer.entries.where(type: @importer.parser.collection_entry_class.to_s).page(params[:collections_entries_page]).per(30)
41
- @file_set_entries = @importer.entries.where(type: @importer.parser.file_set_entry_class.to_s).page(params[:file_set_entries_page]).per(30)
42
38
  end
39
+ @work_entries = @importer.entries.where(type: @importer.parser.entry_class.to_s).page(params[:work_entries_page]).per(30)
40
+ @collection_entries = @importer.entries.where(type: @importer.parser.collection_entry_class.to_s).page(params[:collections_entries_page]).per(30)
41
+ @file_set_entries = @importer.entries.where(type: @importer.parser.file_set_entry_class.to_s).page(params[:file_set_entries_page]).per(30)
43
42
  end
44
43
 
45
44
  # GET /importers/new
@@ -5,10 +5,8 @@ module Bulkrax
5
5
  def valid_create_params?
6
6
  check_admin_set
7
7
  check_user
8
- return true if valid_importer? &&
9
- valid_commit? &&
10
- valid_name? &&
11
- valid_parser_klass? &&
8
+ return true if valid_importer? && valid_commit? &&
9
+ valid_name? && valid_parser_klass? &&
12
10
  valid_parser_fields?
13
11
  end
14
12
 
@@ -19,6 +17,8 @@ module Bulkrax
19
17
  end
20
18
 
21
19
  def check_admin_set
20
+ return unless defined?(::Hyrax)
21
+
22
22
  if params[:importer][:admin_set_id].blank?
23
23
  params[:importer][:admin_set_id] = AdminSet::DEFAULT_ID
24
24
  else
@@ -16,6 +16,8 @@ module Bulkrax
16
16
  # to either an instance of a Work or an instance of a Collection.
17
17
  # NOTE: In the context of this job, "identifier" is used to generically refer
18
18
  # to either a record's ID or an Bulkrax::Entry's source_identifier.
19
+ # Please override with your own job for custom/non-hyrax applications
20
+ # set Bulkrax config variable :relationship_job to your custom class
19
21
  class CreateRelationshipsJob < ApplicationJob
20
22
  ##
21
23
  # @api public
@@ -112,6 +114,7 @@ module Bulkrax
112
114
  end
113
115
 
114
116
  def add_to_collection(child_record, parent_record)
117
+ parent_record.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
115
118
  child_record.member_of_collections << parent_record
116
119
  child_record.save!
117
120
  end
@@ -5,24 +5,37 @@ module Bulkrax
5
5
  queue_as :import
6
6
 
7
7
  # rubocop:disable Rails/SkipsModelValidations
8
+ #
9
+ # @note Yes, we are calling {ImporterRun.find} each time. these were on purpose to prevent race
10
+ # conditions on the database update. If you do not re-find (or at least reload) the object
11
+ # on each increment, the count can get messed up. Let's say there are two jobs A and B and
12
+ # a counter set to 2.
13
+ #
14
+ # - A grabs the importer_run (line 10)
15
+ # - B grabs the importer_run (line 10)
16
+ # - A Finishes the build, does the increment (now the counter is 3)
17
+ # - B Finishes the build, does the increment (now the counter is 3 again) and thus a count
18
+ # is lost.
19
+ #
20
+ # @see https://codingdeliberately.com/activerecord-increment/
21
+ # @see https://github.com/samvera-labs/bulkrax/commit/5c2c795452e13a98c9217fdac81ae2f5aea031a0#r105848236
8
22
  def perform(entry_id, run_id, time_to_live = 3, *)
9
23
  entry = Entry.find(entry_id)
10
- importer_run = ImporterRun.find(run_id)
11
24
  entry.build
12
25
  if entry.status == "Complete"
13
- importer_run.increment!(:processed_records)
14
- importer_run.increment!(:processed_works)
26
+ ImporterRun.find(run_id).increment!(:processed_records)
27
+ ImporterRun.find(run_id).increment!(:processed_works)
15
28
  else
16
29
  # do not retry here because whatever parse error kept you from creating a work will likely
17
30
  # keep preventing you from doing so.
18
- importer_run.increment!(:failed_records)
19
- importer_run.increment!(:failed_works)
31
+ ImporterRun.find(run_id).increment!(:failed_records)
32
+ ImporterRun.find(run_id).increment!(:failed_works)
20
33
  end
21
34
  # Regardless of completion or not, we want to decrement the enqueued records.
22
- importer_run.decrement!(:enqueued_records) unless importer_run.enqueued_records <= 0
35
+ ImporterRun.find(run_id).decrement!(:enqueued_records) unless ImporterRun.find(run_id).enqueued_records <= 0
23
36
 
24
37
  entry.save!
25
- entry.importer.current_run = importer_run
38
+ entry.importer.current_run = ImporterRun.find(run_id)
26
39
  entry.importer.record_status
27
40
  rescue Bulkrax::CollectionsCreatedError => e
28
41
  Rails.logger.warn("#{self.class} entry_id: #{entry_id}, run_id: #{run_id} encountered #{e.class}: #{e.message}")
@@ -12,7 +12,7 @@ module Bulkrax
12
12
  import(importer, only_updates_since_last_import)
13
13
  update_current_run_counters(importer)
14
14
  schedule(importer) if importer.schedulable?
15
- rescue CSV::MalformedCSVError => e
15
+ rescue ::CSV::MalformedCSVError => e
16
16
  importer.set_status_info(e)
17
17
  end
18
18
 
@@ -9,7 +9,8 @@ module Bulkrax
9
9
  return reschedule(importer_id) unless pending_num.zero?
10
10
 
11
11
  importer.last_run.parents.each do |parent_id|
12
- CreateRelationshipsJob.perform_later(parent_identifier: parent_id, importer_run_id: importer.last_run.id)
12
+ Bulkrax.relationship_job_class.constantize.perform_later(parent_identifier: parent_id,
13
+ importer_run_id: importer.last_run.id)
13
14
  end
14
15
  end
15
16
 
@@ -102,6 +102,7 @@ module Bulkrax
102
102
 
103
103
  # Only add valid resource types
104
104
  def parse_resource_type(src)
105
+ ActiveSupport::Deprecation.warn('#parse_resource_type will be removed in Bulkrax v6.0.0')
105
106
  Hyrax::ResourceTypesService.label(src.to_s.strip.titleize)
106
107
  rescue KeyError
107
108
  nil
@@ -13,15 +13,49 @@ module Bulkrax
13
13
  data.headers.flatten.compact.uniq
14
14
  end
15
15
 
16
+ class_attribute(:csv_read_data_options, default: {})
17
+
16
18
  # there's a risk that this reads the whole file into memory and could cause a memory leak
17
19
  def self.read_data(path)
18
20
  raise StandardError, 'CSV path empty' if path.blank?
19
- CSV.read(path,
21
+ options = {
20
22
  headers: true,
21
23
  header_converters: ->(h) { h.to_sym },
22
- encoding: 'utf-8')
24
+ encoding: 'utf-8'
25
+ }.merge(csv_read_data_options)
26
+
27
+ results = CSV.read(path, **options)
28
+ csv_wrapper_class.new(results)
23
29
  end
24
30
 
31
+ # The purpose of this class is to reject empty lines. This causes lots of grief in importing.
32
+ # But why not use {CSV.read}'s `skip_lines` option? Because for some CSVs, it will never finish
33
+ # reading the file.
34
+ #
35
+ # There is a spec that demonstrates this approach works.
36
+ class CsvWrapper
37
+ include Enumerable
38
+ def initialize(original)
39
+ @original = original
40
+ end
41
+
42
+ delegate :headers, to: :@original
43
+
44
+ def each
45
+ @original.each do |row|
46
+ next if all_fields_are_empty_for(row: row)
47
+ yield(row)
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ def all_fields_are_empty_for(row:)
54
+ row.to_hash.values.all?(&:blank?)
55
+ end
56
+ end
57
+ class_attribute :csv_wrapper_class, default: CsvWrapper
58
+
25
59
  def self.data_for_entry(data, _source_id, parser)
26
60
  # If a multi-line CSV data is passed, grab the first row
27
61
  data = data.first if data.is_a?(CSV::Table)
@@ -35,11 +69,7 @@ module Bulkrax
35
69
  end
36
70
 
37
71
  def build_metadata
38
- raise StandardError, 'Record not found' if record.nil?
39
- unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
40
- raise StandardError,
41
- "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}"
42
- end
72
+ validate_record
43
73
 
44
74
  self.parsed_metadata = {}
45
75
  add_identifier
@@ -56,6 +86,12 @@ module Bulkrax
56
86
  self.parsed_metadata
57
87
  end
58
88
 
89
+ def validate_record
90
+ raise StandardError, 'Record not found' if record.nil?
91
+ raise StandardError, "Missing required elements, missing element(s) are: "\
92
+ "#{importerexporter.parser.missing_elements(record).join(', ')}" unless importerexporter.parser.required_elements?(record)
93
+ end
94
+
59
95
  def add_identifier
60
96
  self.parsed_metadata[work_identifier] = [record[source_identifier]]
61
97
  end
@@ -67,9 +103,10 @@ module Bulkrax
67
103
  end
68
104
 
69
105
  def add_metadata_for_model
70
- if factory_class == Collection
71
- add_collection_type_gid
72
- elsif factory_class == FileSet
106
+ if defined?(::Collection) && factory_class == ::Collection
107
+ add_collection_type_gid if defined?(::Hyrax)
108
+ # add any additional collection metadata methods here
109
+ elsif factory_class == Bulkrax.file_model_class
73
110
  validate_presence_of_filename!
74
111
  add_path_to_file
75
112
  validate_presence_of_parent!
@@ -106,7 +143,7 @@ module Bulkrax
106
143
  self.parsed_metadata = {}
107
144
 
108
145
  build_system_metadata
109
- build_files_metadata unless hyrax_record.is_a?(Collection)
146
+ build_files_metadata if defined?(Collection) && !hyrax_record.is_a?(Collection)
110
147
  build_relationship_metadata
111
148
  build_mapping_metadata
112
149
  self.save!
@@ -157,26 +194,48 @@ module Bulkrax
157
194
  end
158
195
  end
159
196
 
197
+ # The purpose of this helper module is to make easier the testing of the rather complex
198
+ # switching logic for determining the method we use for building the value.
199
+ module AttributeBuilderMethod
200
+ # @param key [Symbol]
201
+ # @param value [Hash<String, Object>]
202
+ # @param entry [Bulkrax::Entry]
203
+ #
204
+ # @return [NilClass] when we won't be processing this field
205
+ # @return [Symbol] (either :build_value or :build_object)
206
+ def self.for(key:, value:, entry:)
207
+ return if key == 'model'
208
+ return if key == 'file'
209
+ return if key == entry.related_parents_parsed_mapping
210
+ return if key == entry.related_children_parsed_mapping
211
+ return if value['excluded'] || value[:excluded]
212
+ return if Bulkrax.reserved_properties.include?(key) && !entry.field_supported?(key)
213
+
214
+ object_key = key if value.key?('object') || value.key?(:object)
215
+ return unless entry.hyrax_record.respond_to?(key.to_s) || object_key.present?
216
+
217
+ models_to_skip = Array.wrap(value['skip_object_for_model_names'] || value[:skip_object_for_model_names] || [])
218
+
219
+ return :build_value if models_to_skip.detect { |model| entry.factory_class.model_name.name == model }
220
+ return :build_object if object_key.present?
221
+
222
+ :build_value
223
+ end
224
+ end
225
+
160
226
  def build_mapping_metadata
161
227
  mapping = fetch_field_mapping
162
228
  mapping.each do |key, value|
163
- # these keys are handled by other methods
164
- next if ['model', 'file', related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
165
- next if value['excluded']
166
- next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
167
-
168
- object_key = key if value.key?('object')
169
- next unless hyrax_record.respond_to?(key.to_s) || object_key.present?
229
+ method_name = AttributeBuilderMethod.for(key: key, value: value, entry: self)
230
+ next unless method_name
170
231
 
171
- if object_key.present?
172
- build_object(value)
173
- else
174
- build_value(key, value)
175
- end
232
+ send(method_name, key, value)
176
233
  end
177
234
  end
178
235
 
179
- def build_object(value)
236
+ def build_object(_key, value)
237
+ return unless hyrax_record.respond_to?(value['object'])
238
+
180
239
  data = hyrax_record.send(value['object'])
181
240
  return if data.empty?
182
241
 
@@ -185,6 +244,8 @@ module Bulkrax
185
244
  end
186
245
 
187
246
  def build_value(key, value)
247
+ return unless hyrax_record.respond_to?(key.to_s)
248
+
188
249
  data = hyrax_record.send(key.to_s)
189
250
  if data.is_a?(ActiveTriples::Relation)
190
251
  if value['join']
@@ -217,6 +278,14 @@ module Bulkrax
217
278
  end
218
279
 
219
280
  def object_metadata(data)
281
+ # NOTE: What is `d` in this case:
282
+ #
283
+ # "[{\"single_object_first_name\"=>\"Fake\", \"single_object_last_name\"=>\"Fakerson\", \"single_object_position\"=>\"Leader, Jester, Queen\", \"single_object_language\"=>\"english\"}]"
284
+ #
285
+ # The above is a stringified version of a Ruby string. Using eval is a very bad idea as it
286
+ # will execute the value of `d` within the full Ruby interpreter context.
287
+ #
288
+ # TODO: Would it be possible to store this as a non-string? Maybe the actual Ruby Array and Hash?
220
289
  data = data.map { |d| eval(d) }.flatten # rubocop:disable Security/Eval
221
290
 
222
291
  data.each_with_index do |obj, index|
@@ -18,18 +18,9 @@ module Bulkrax
18
18
 
19
19
  def export
20
20
  current_run && setup_export_path
21
- case self.export_from
22
- when 'collection'
23
- create_from_collection
24
- when 'importer'
25
- create_from_importer
26
- when 'worktype'
27
- create_from_worktype
28
- when 'all'
29
- create_from_all
30
- end
21
+ send("create_from_#{self.export_from}")
31
22
  rescue StandardError => e
32
- status_info(e)
23
+ set_status_info(e)
33
24
  end
34
25
 
35
26
  # #export_source accessors
@@ -139,7 +130,7 @@ module Bulkrax
139
130
  end
140
131
 
141
132
  def export_properties
142
- properties = Hyrax.config.registered_curation_concern_types.map { |work| work.constantize.properties.keys }.flatten.uniq.sort
133
+ properties = Bulkrax.curation_concerns.map { |work| work.properties.keys }.flatten.uniq.sort
143
134
  properties.reject { |prop| Bulkrax.reserved_properties.include?(prop) }
144
135
  end
145
136
 
@@ -160,7 +160,7 @@ module Bulkrax
160
160
  types = types_array || DEFAULT_OBJECT_TYPES
161
161
  parser.create_objects(types)
162
162
  rescue StandardError => e
163
- status_info(e)
163
+ set_status_info(e)
164
164
  end
165
165
 
166
166
  # Prepend the base_url to ensure unique set identifiers
@@ -6,6 +6,6 @@ module Bulkrax
6
6
 
7
7
  # Ideally we wouldn't have a column named "order", as it is a reserved SQL term. However, if we
8
8
  # quote the column, all is well...for the application.
9
- scope :ordered, -> { order("#{quoted_table_name}.#{connection.quote_column_name('order')}") }
9
+ scope :ordered, -> { order(Arel.sql("#{quoted_table_name}.#{connection.quote_column_name('order')}")) }
10
10
  end
11
11
  end
@@ -42,7 +42,7 @@ module Bulkrax
42
42
  @available_work_types ||= if defined?(::Hyku)
43
43
  ::Site.instance.available_works.map(&:constantize)
44
44
  else
45
- ::Hyrax.config.curation_concerns
45
+ Bulkrax.curation_concerns
46
46
  end
47
47
  end
48
48
  end
@@ -1,4 +1,6 @@
1
1
  # frozen_string_literal: true
2
+ require 'marcel'
3
+
2
4
  module Bulkrax
3
5
  module ExportBehavior
4
6
  extend ActiveSupport::Concern
@@ -10,9 +12,9 @@ module Bulkrax
10
12
  rescue RSolr::Error::Http, CollectionsCreatedError => e
11
13
  raise e
12
14
  rescue StandardError => e
13
- status_info(e)
15
+ set_status_info(e)
14
16
  else
15
- status_info
17
+ set_status_info
16
18
  end
17
19
 
18
20
  def build_export_metadata
@@ -27,8 +29,8 @@ module Bulkrax
27
29
  def filename(file_set)
28
30
  return if file_set.original_file.blank?
29
31
  fn = file_set.original_file.file_name.first
30
- mime = Mime::Type.lookup(file_set.original_file.mime_type)
31
- ext_mime = MIME::Types.of(file_set.original_file.file_name).first
32
+ mime = ::Marcel::MimeType.for(file_set.original_file.mime_type)
33
+ ext_mime = ::Marcel::MimeType.for(file_set.original_file.file_name)
32
34
  if fn.include?(file_set.id) || importerexporter.metadata_only?
33
35
  filename = "#{fn}.#{mime.to_sym}"
34
36
  filename = fn if mime.to_s == ext_mime.to_s
@@ -147,6 +147,7 @@ module Bulkrax
147
147
  %W[
148
148
  file
149
149
  remote_files
150
+ rights_statement
150
151
  #{related_parents_parsed_mapping}
151
152
  #{related_children_parsed_mapping}
152
153
  ]
@@ -11,16 +11,16 @@ module Bulkrax
11
11
  unless self.importerexporter.validate_only
12
12
  raise CollectionsCreatedError unless collections_created?
13
13
  @item = factory.run!
14
- add_user_to_permission_templates! if self.class.to_s.include?("Collection")
14
+ add_user_to_permission_templates! if self.class.to_s.include?("Collection") && defined?(::Hyrax)
15
15
  parent_jobs if self.parsed_metadata[related_parents_parsed_mapping]&.join.present?
16
16
  child_jobs if self.parsed_metadata[related_children_parsed_mapping]&.join.present?
17
17
  end
18
18
  rescue RSolr::Error::Http, CollectionsCreatedError => e
19
19
  raise e
20
20
  rescue StandardError => e
21
- status_info(e)
21
+ set_status_info(e)
22
22
  else
23
- status_info
23
+ set_status_info
24
24
  ensure
25
25
  self.save!
26
26
  end
@@ -93,6 +93,8 @@ module Bulkrax
93
93
  end
94
94
 
95
95
  def add_admin_set_id
96
+ return unless defined?(::Hyrax)
97
+
96
98
  self.parsed_metadata['admin_set_id'] = importerexporter.admin_set_id if self.parsed_metadata['admin_set_id'].blank?
97
99
  end
98
100
 
@@ -165,6 +167,7 @@ module Bulkrax
165
167
  # @param field [String] name of the controlled property
166
168
  # @return [Boolean] provided value is a present, active authority ID for the provided field
167
169
  def active_id_for_authority?(value, field)
170
+ return false unless defined?(::Hyrax)
168
171
  field_service = ('Hyrax::' + "#{field}_service".camelcase).constantize
169
172
  active_authority_ids = field_service.new.active_elements.map { |ae| ae['id'] }
170
173
 
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+ require 'marcel'
2
3
 
3
4
  module Bulkrax
4
5
  module ImporterExporterBehavior
@@ -50,7 +51,14 @@ module Bulkrax
50
51
 
51
52
  # Is this a zip file?
52
53
  def zip?
53
- parser_fields&.[]('import_file_path') && MIME::Types.type_for(parser_fields['import_file_path']).include?('application/zip')
54
+ filename = parser_fields&.[]('import_file_path')
55
+ return false unless filename
56
+ return false unless File.file?(filename)
57
+ returning_value = false
58
+ File.open(filename) do |file|
59
+ returning_value = ::Marcel::MimeType.for(file).include?('application/zip')
60
+ end
61
+ returning_value
54
62
  end
55
63
  end
56
64
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
  require 'zip'
3
+ require 'marcel'
3
4
 
4
5
  module Bulkrax
5
6
  # An abstract class that establishes the API for Bulkrax's import and export parsing.
@@ -10,10 +11,11 @@ module Bulkrax
10
11
  alias importer importerexporter
11
12
  alias exporter importerexporter
12
13
  delegate :only_updates, :limit, :current_run, :errors, :mapping,
13
- :seen, :increment_counters, :parser_fields, :user, :keys_without_numbers,
14
- :key_without_numbers, :status, :status_info, :status_at,
15
- :exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
16
- to: :importerexporter
14
+ :seen, :increment_counters, :parser_fields, :user, :keys_without_numbers,
15
+ :key_without_numbers, :status, :set_status_info, :status_info, :status_at,
16
+ :exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
17
+ :zip?, :file?,
18
+ to: :importerexporter
17
19
 
18
20
  # @todo Convert to `class_attribute :parser_fiels, default: {}`
19
21
  def self.parser_fields
@@ -275,10 +277,16 @@ module Bulkrax
275
277
 
276
278
  # @return [Array<String>]
277
279
  def required_elements
280
+ matched_elements = ((importerexporter.mapping.keys || []) & (Bulkrax.required_elements || []))
281
+ unless matched_elements.count == Bulkrax.required_elements.count
282
+ missing_elements = Bulkrax.required_elements - matched_elements
283
+ error_alert = "Missing mapping for at least one required element, missing mappings are: #{missing_elements.join(', ')}"
284
+ raise StandardError, error_alert
285
+ end
278
286
  if Bulkrax.fill_in_blank_source_identifiers
279
- ['title']
287
+ Bulkrax.required_elements
280
288
  else
281
- ['title', source_identifier]
289
+ Bulkrax.required_elements + [source_identifier]
282
290
  end
283
291
  end
284
292
 
@@ -351,16 +359,6 @@ module Bulkrax
351
359
  end
352
360
  end
353
361
 
354
- # Is this a file?
355
- def file?
356
- parser_fields&.[]('import_file_path') && File.file?(parser_fields['import_file_path'])
357
- end
358
-
359
- # Is this a zip file?
360
- def zip?
361
- parser_fields&.[]('import_file_path') && MIME::Types.type_for(parser_fields['import_file_path']).include?('application/zip')
362
- end
363
-
364
362
  # Path for the import
365
363
  # @return [String]
366
364
  def import_file_path
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+ require 'bagit'
2
3
 
3
4
  module Bulkrax
4
5
  class BagitParser < CsvParser # rubocop:disable Metrics/ClassLength
@@ -11,7 +12,7 @@ module Bulkrax
11
12
  def valid_import?
12
13
  return true if import_fields.present?
13
14
  rescue => e
14
- status_info(e)
15
+ set_status_info(e)
15
16
  false
16
17
  end
17
18
 
@@ -51,7 +52,7 @@ module Bulkrax
51
52
  record_data = entry_class.data_for_entry(data_row, source_identifier, self)
52
53
  next record_data if importerexporter.metadata_only?
53
54
 
54
- record_data[:file] = bag.bag_files.join('|') if ::Hyrax.config.curation_concerns.include? record_data[:model]&.constantize
55
+ record_data[:file] = bag.bag_files.join('|') if Bulkrax.curation_concerns.include? record_data[:model]&.constantize
55
56
  record_data
56
57
  end
57
58
  else
@@ -82,19 +83,7 @@ module Bulkrax
82
83
  end
83
84
  importer.record_status
84
85
  rescue StandardError => e
85
- status_info(e)
86
- end
87
-
88
- def total
89
- @total = importer.parser_fields['total'] || 0 if importer?
90
-
91
- @total = if exporter?
92
- limit.nil? || limit.zero? ? current_record_ids.count : limit
93
- end
94
-
95
- return @total || 0
96
- rescue StandardError
97
- @total = 0
86
+ set_status_info(e)
98
87
  end
99
88
 
100
89
  # export methods
@@ -106,9 +95,9 @@ module Bulkrax
106
95
 
107
96
  folder_count = 1
108
97
  records_in_folder = 0
109
- work_entries = importerexporter.entries.where(identifier: @work_ids)
110
- collection_entries = importerexporter.entries.where(identifier: @collection_ids)
111
- file_set_entries = importerexporter.entries.where(identifier: @file_set_ids)
98
+ work_entries = importerexporter.entries.where(type: work_entry_class.to_s)
99
+ collection_entries = importerexporter.entries.where(type: collection_entry_class.to_s)
100
+ file_set_entries = importerexporter.entries.where(type: file_set_entry_class.to_s)
112
101
 
113
102
  work_entries[0..limit || total].each do |entry|
114
103
  record = ActiveFedora::Base.find(entry.identifier)
@@ -144,7 +133,7 @@ module Bulkrax
144
133
  bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank?
145
134
  rescue => e
146
135
  entry.set_status_info(e)
147
- status_info(e)
136
+ set_status_info(e)
148
137
  end
149
138
  end
150
139
 
@@ -185,6 +174,7 @@ module Bulkrax
185
174
  File.join(path, id)
186
175
  end
187
176
 
177
+ # @todo(bjustice) - remove hyrax reference
188
178
  def write_triples(folder_count, e)
189
179
  sd = SolrDocument.find(e.identifier)
190
180
  return if sd.nil?