bulkrax 6.0.1 → 8.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +7 -7
  3. data/app/assets/javascripts/bulkrax/bulkrax.js +11 -0
  4. data/app/assets/javascripts/bulkrax/datatables.js +139 -0
  5. data/app/assets/javascripts/bulkrax/exporters.js +4 -4
  6. data/app/assets/javascripts/bulkrax/importers.js.erb +15 -1
  7. data/app/assets/stylesheets/bulkrax/import_export.scss +6 -1
  8. data/app/controllers/bulkrax/entries_controller.rb +52 -3
  9. data/app/controllers/bulkrax/exporters_controller.rb +20 -8
  10. data/app/controllers/bulkrax/importers_controller.rb +31 -12
  11. data/app/controllers/concerns/bulkrax/datatables_behavior.rb +201 -0
  12. data/app/factories/bulkrax/object_factory.rb +135 -163
  13. data/app/factories/bulkrax/object_factory_interface.rb +491 -0
  14. data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
  15. data/app/helpers/bulkrax/application_helper.rb +7 -3
  16. data/app/helpers/bulkrax/importers_helper.rb +1 -1
  17. data/app/helpers/bulkrax/validation_helper.rb +4 -4
  18. data/app/jobs/bulkrax/create_relationships_job.rb +28 -17
  19. data/app/jobs/bulkrax/delete_and_import_collection_job.rb +8 -0
  20. data/app/jobs/bulkrax/delete_and_import_file_set_job.rb +8 -0
  21. data/app/jobs/bulkrax/delete_and_import_job.rb +20 -0
  22. data/app/jobs/bulkrax/delete_and_import_work_job.rb +8 -0
  23. data/app/jobs/bulkrax/delete_job.rb +8 -3
  24. data/app/jobs/bulkrax/download_cloud_file_job.rb +17 -4
  25. data/app/jobs/bulkrax/import_collection_job.rb +1 -1
  26. data/app/jobs/bulkrax/import_file_set_job.rb +6 -3
  27. data/app/jobs/bulkrax/import_job.rb +7 -0
  28. data/app/jobs/bulkrax/import_work_job.rb +1 -1
  29. data/app/jobs/bulkrax/importer_job.rb +19 -3
  30. data/app/matchers/bulkrax/application_matcher.rb +0 -2
  31. data/app/models/bulkrax/csv_collection_entry.rb +1 -3
  32. data/app/models/bulkrax/csv_entry.rb +9 -7
  33. data/app/models/bulkrax/entry.rb +9 -11
  34. data/app/models/bulkrax/exporter.rb +11 -4
  35. data/app/models/bulkrax/importer.rb +49 -10
  36. data/app/models/bulkrax/oai_entry.rb +0 -3
  37. data/app/models/bulkrax/oai_set_entry.rb +1 -3
  38. data/app/models/bulkrax/rdf_collection_entry.rb +1 -4
  39. data/app/models/bulkrax/rdf_entry.rb +70 -69
  40. data/app/models/bulkrax/status.rb +10 -1
  41. data/app/models/bulkrax/xml_entry.rb +0 -1
  42. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
  43. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  44. data/app/models/concerns/bulkrax/file_factory.rb +174 -118
  45. data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +5 -3
  46. data/app/models/concerns/bulkrax/has_matchers.rb +28 -25
  47. data/app/models/concerns/bulkrax/import_behavior.rb +14 -33
  48. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
  49. data/app/models/concerns/bulkrax/status_info.rb +8 -0
  50. data/app/parsers/bulkrax/application_parser.rb +116 -21
  51. data/app/parsers/bulkrax/bagit_parser.rb +173 -195
  52. data/app/parsers/bulkrax/csv_parser.rb +15 -57
  53. data/app/parsers/bulkrax/oai_dc_parser.rb +44 -16
  54. data/app/parsers/bulkrax/parser_export_record_set.rb +20 -24
  55. data/app/parsers/bulkrax/xml_parser.rb +18 -23
  56. data/app/services/bulkrax/factory_class_finder.rb +92 -0
  57. data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
  58. data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
  59. data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
  60. data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
  61. data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
  62. data/app/views/bulkrax/entries/show.html.erb +9 -8
  63. data/app/views/bulkrax/exporters/_form.html.erb +10 -10
  64. data/app/views/bulkrax/exporters/edit.html.erb +1 -1
  65. data/app/views/bulkrax/exporters/index.html.erb +13 -57
  66. data/app/views/bulkrax/exporters/new.html.erb +1 -1
  67. data/app/views/bulkrax/exporters/show.html.erb +6 -12
  68. data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
  69. data/app/views/bulkrax/importers/_csv_fields.html.erb +8 -2
  70. data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +8 -1
  71. data/app/views/bulkrax/importers/_edit_item_buttons.html.erb +18 -0
  72. data/app/views/bulkrax/importers/edit.html.erb +1 -1
  73. data/app/views/bulkrax/importers/index.html.erb +20 -64
  74. data/app/views/bulkrax/importers/new.html.erb +1 -1
  75. data/app/views/bulkrax/importers/show.html.erb +8 -14
  76. data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
  77. data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
  78. data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
  79. data/app/views/bulkrax/shared/_entries_tab.html.erb +16 -0
  80. data/config/locales/bulkrax.en.yml +7 -0
  81. data/config/routes.rb +8 -2
  82. data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
  83. data/db/migrate/20240208005801_denormalize_status_message.rb +7 -0
  84. data/db/migrate/20240209070952_update_identifier_index.rb +6 -0
  85. data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
  86. data/lib/bulkrax/engine.rb +23 -0
  87. data/lib/bulkrax/version.rb +1 -1
  88. data/lib/bulkrax.rb +107 -19
  89. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
  90. data/lib/tasks/bulkrax_tasks.rake +13 -0
  91. data/lib/tasks/reset.rake +4 -4
  92. metadata +64 -8
  93. data/app/views/bulkrax/shared/_collection_entries_tab.html.erb +0 -39
  94. data/app/views/bulkrax/shared/_file_set_entries_tab.html.erb +0 -39
  95. data/app/views/bulkrax/shared/_work_entries_tab.html.erb +0 -39
@@ -6,7 +6,7 @@ module Bulkrax
6
6
  class ImportFileSetJob < ApplicationJob
7
7
  include DynamicRecordLookup
8
8
 
9
- queue_as :import
9
+ queue_as Bulkrax.config.ingest_queue_name
10
10
 
11
11
  attr_reader :importer_run_id
12
12
 
@@ -63,8 +63,11 @@ module Bulkrax
63
63
  end
64
64
 
65
65
  def check_parent_is_a_work!(parent_identifier)
66
- error_msg = %(A record with the ID "#{parent_identifier}" was found, but it was a #{parent_record.class}, which is not an valid/available work type)
67
- raise ::StandardError, error_msg unless curation_concern?(parent_record)
66
+ case parent_record
67
+ when Bulkrax.collection_model_class, Bulkrax.file_model_class
68
+ error_msg = %(A record with the ID "#{parent_identifier}" was found, but it was a #{parent_record.class}, which is not an valid/available work type)
69
+ raise ::StandardError, error_msg
70
+ end
68
71
  end
69
72
 
70
73
  def find_parent_record(parent_identifier)
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class ImportJob < ApplicationJob
5
+ queue_as :import
6
+ end
7
+ end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class ImportWorkJob < ApplicationJob
5
- queue_as :import
5
+ queue_as Bulkrax.config.ingest_queue_name
6
6
 
7
7
  # rubocop:disable Rails/SkipsModelValidations
8
8
  #
@@ -2,10 +2,11 @@
2
2
 
3
3
  module Bulkrax
4
4
  class ImporterJob < ApplicationJob
5
- queue_as :import
5
+ queue_as Bulkrax.config.ingest_queue_name
6
6
 
7
7
  def perform(importer_id, only_updates_since_last_import = false)
8
8
  importer = Importer.find(importer_id)
9
+ return schedule(importer, Time.zone.now + 3.minutes, 'Rescheduling: cloud files are not ready yet') unless all_files_completed?(importer)
9
10
 
10
11
  importer.current_run
11
12
  unzip_imported_file(importer.parser)
@@ -16,6 +17,8 @@ module Bulkrax
16
17
  importer.set_status_info(e)
17
18
  end
18
19
 
20
+ private
21
+
19
22
  def import(importer, only_updates_since_last_import)
20
23
  importer.only_updates = only_updates_since_last_import || false
21
24
  return unless importer.valid_import?
@@ -36,8 +39,21 @@ module Bulkrax
36
39
  importer.current_run.save!
37
40
  end
38
41
 
39
- def schedule(importer)
40
- ImporterJob.set(wait_until: importer.next_import_at).perform_later(importer.id, true)
42
+ def schedule(importer, wait_until = importer.next_import_at, message = nil)
43
+ Rails.logger.info message if message
44
+ ImporterJob.set(wait_until: wait_until).perform_later(importer.id, true)
45
+ end
46
+
47
+ # checks the file sizes of the download files to match the original files
48
+ def all_files_completed?(importer)
49
+ cloud_files = importer.parser_fields['cloud_file_paths']
50
+ original_files = importer.parser_fields['original_file_paths']
51
+ return true unless cloud_files.present? && original_files.present?
52
+
53
+ imported_file_sizes = cloud_files.map { |_, v| v['file_size'].to_i }
54
+ original_file_sizes = original_files.map { |imported_file| File.size(imported_file) }
55
+
56
+ original_file_sizes == imported_file_sizes
41
57
  end
42
58
  end
43
59
  end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'language_list'
4
-
5
3
  module Bulkrax
6
4
  class ApplicationMatcher
7
5
  attr_accessor :to, :from, :parsed, :if, :split, :excluded, :nested_type
@@ -2,9 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class CsvCollectionEntry < CsvEntry
5
- def factory_class
6
- Collection
7
- end
5
+ self.default_work_type = Bulkrax.collection_model_class.to_s
8
6
 
9
7
  # Use identifier set by CsvParser#unique_collection_identifier, which falls back
10
8
  # on the Collection's first title if record[source_identifier] is not present
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'csv'
4
-
5
3
  module Bulkrax
6
4
  # TODO: We need to rework this class some to address the Metrics/ClassLength rubocop offense.
7
5
  # We do too much in these entry classes. We need to extract the common logic from the various
@@ -16,11 +14,12 @@ module Bulkrax
16
14
  class_attribute(:csv_read_data_options, default: {})
17
15
 
18
16
  # there's a risk that this reads the whole file into memory and could cause a memory leak
17
+ # we strip any special characters out of the headers. looking at you Excel
19
18
  def self.read_data(path)
20
19
  raise StandardError, 'CSV path empty' if path.blank?
21
20
  options = {
22
21
  headers: true,
23
- header_converters: ->(h) { h.to_s.strip.to_sym },
22
+ header_converters: ->(h) { h.to_s.gsub(/[^\w\d\. -]+/, '').strip.to_sym },
24
23
  encoding: 'utf-8'
25
24
  }.merge(csv_read_data_options)
26
25
 
@@ -105,7 +104,7 @@ module Bulkrax
105
104
  end
106
105
 
107
106
  def add_metadata_for_model
108
- if defined?(::Collection) && factory_class == ::Collection
107
+ if factory_class.present? && factory_class == Bulkrax.collection_model_class
109
108
  add_collection_type_gid if defined?(::Hyrax)
110
109
  # add any additional collection metadata methods here
111
110
  elsif factory_class == Bulkrax.file_model_class
@@ -145,7 +144,7 @@ module Bulkrax
145
144
  self.parsed_metadata = {}
146
145
 
147
146
  build_system_metadata
148
- build_files_metadata if defined?(Collection) && !hyrax_record.is_a?(Collection)
147
+ build_files_metadata if Bulkrax.collection_model_class.present? && !hyrax_record.is_a?(Bulkrax.collection_model_class)
149
148
  build_relationship_metadata
150
149
  build_mapping_metadata
151
150
  self.save!
@@ -157,9 +156,12 @@ module Bulkrax
157
156
  def build_system_metadata
158
157
  self.parsed_metadata['id'] = hyrax_record.id
159
158
  source_id = hyrax_record.send(work_identifier)
160
- source_id = source_id.to_a.first if source_id.is_a?(ActiveTriples::Relation)
159
+ # Because ActiveTriples::Relation does not respond to #to_ary we can't rely on Array.wrap universally
160
+ source_id = source_id.to_a if source_id.is_a?(ActiveTriples::Relation)
161
+ source_id = Array.wrap(source_id).first
161
162
  self.parsed_metadata[source_identifier] = source_id
162
- self.parsed_metadata[key_for_export('model')] = hyrax_record.has_model.first
163
+ model_name = hyrax_record.respond_to?(:to_rdf_representation) ? hyrax_record.to_rdf_representation : hyrax_record.has_model.first
164
+ self.parsed_metadata[key_for_export('model')] = model_name
163
165
  end
164
166
 
165
167
  def build_files_metadata
@@ -8,6 +8,8 @@ module Bulkrax
8
8
  class Entry < ApplicationRecord
9
9
  include Bulkrax::HasMatchers
10
10
  include Bulkrax::ImportBehavior
11
+ self.class_attribute :default_work_type, default: Bulkrax.default_work_type
12
+
11
13
  include Bulkrax::ExportBehavior
12
14
  include Bulkrax::StatusInfo
13
15
  include Bulkrax::HasLocalProcessing
@@ -101,22 +103,18 @@ module Bulkrax
101
103
  self.importerexporter_type == 'Bulkrax::Exporter'
102
104
  end
103
105
 
104
- def valid_system_id(model_class)
105
- return true if model_class.properties.keys.include?(work_identifier)
106
- raise(
107
- "#{model_class} does not implement the system_identifier_field: #{work_identifier}"
108
- )
109
- end
110
-
111
106
  def last_run
112
107
  self.importerexporter&.last_run
113
108
  end
114
109
 
115
110
  def find_collection(collection_identifier)
116
- return unless Collection.properties.keys.include?(work_identifier)
117
- Collection.where(
118
- work_identifier => collection_identifier
119
- ).detect { |m| m.send(work_identifier).include?(collection_identifier) }
111
+ Bulkrax.object_factory.search_by_property(
112
+ klass: Bulkrax.collection_model_class,
113
+ value: collection_identifier,
114
+ search_field: work_identifier,
115
+ name_field: work_identifier,
116
+ verify_property: true
117
+ )
120
118
  end
121
119
  end
122
120
  end
@@ -23,6 +23,10 @@ module Bulkrax
23
23
  set_status_info(e)
24
24
  end
25
25
 
26
+ def remove_and_rerun
27
+ self.parser_fields['remove_and_rerun']
28
+ end
29
+
26
30
  # #export_source accessors
27
31
  # Used in form to prevent it from getting confused as to which value to populate #export_source with.
28
32
  # Also, used to display the correct selected value when rendering edit form.
@@ -102,9 +106,12 @@ module Bulkrax
102
106
  Importer.all.map { |i| [i.name, i.id] }
103
107
  end
104
108
 
105
- def current_run
109
+ def current_run(skip_counts: false)
110
+ @current_run ||= self.exporter_runs.create! if skip_counts
111
+ return @current_run if @current_run
112
+
106
113
  total = self.limit || parser.total
107
- @current_run ||= self.exporter_runs.create!(total_work_entries: total, enqueued_records: total)
114
+ @current_run = self.exporter_runs.create!(total_work_entries: total, enqueued_records: total)
108
115
  end
109
116
 
110
117
  def last_run
@@ -130,8 +137,8 @@ module Bulkrax
130
137
  end
131
138
 
132
139
  def export_properties
133
- properties = Bulkrax.curation_concerns.map { |work| work.properties.keys }.flatten.uniq.sort
134
- properties.reject { |prop| Bulkrax.reserved_properties.include?(prop) }
140
+ # TODO: Does this work for Valkyrie?
141
+ Bulkrax.object_factory.export_properties
135
142
  end
136
143
 
137
144
  def metadata_only?
@@ -1,9 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'iso8601'
4
-
5
3
  module Bulkrax
6
- class Importer < ApplicationRecord
4
+ class Importer < ApplicationRecord # rubocop:disable Metrics/ClassLength
7
5
  include Bulkrax::ImporterExporterBehavior
8
6
  include Bulkrax::StatusInfo
9
7
 
@@ -18,7 +16,7 @@ module Bulkrax
18
16
  validates :admin_set_id, presence: true if defined?(::Hyrax)
19
17
  validates :parser_klass, presence: true
20
18
 
21
- delegate :valid_import?, :write_errored_entries_file, :visibility, to: :parser
19
+ delegate :create_parent_child_relationships, :valid_import?, :write_errored_entries_file, :visibility, to: :parser
22
20
 
23
21
  attr_accessor :only_updates, :file_style, :file
24
22
  attr_writer :current_run
@@ -103,11 +101,12 @@ module Bulkrax
103
101
  frequency.to_seconds != 0
104
102
  end
105
103
 
106
- def current_run
104
+ def current_run(skip_counts: false)
107
105
  return @current_run if @current_run.present?
108
106
 
109
107
  @current_run = self.importer_runs.create!
110
108
  return @current_run if file? && zip?
109
+ return @current_run if skip_counts
111
110
 
112
111
  entry_counts = {
113
112
  total_work_entries: self.limit || parser.works_total,
@@ -123,6 +122,29 @@ module Bulkrax
123
122
  @last_run ||= self.importer_runs.last
124
123
  end
125
124
 
125
+ def failed_entries?
126
+ entries.failed.any?
127
+ end
128
+
129
+ def failed_statuses
130
+ @failed_statuses ||= Bulkrax::Status.latest_by_statusable
131
+ .includes(:statusable)
132
+ .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', self.entries.pluck(:id), 'Bulkrax::Entry', 'Failed')
133
+ end
134
+
135
+ def failed_messages
136
+ failed_statuses.each_with_object({}) do |e, i|
137
+ i[e.error_message] ||= []
138
+ i[e.error_message] << e.id
139
+ end
140
+ end
141
+
142
+ def completed_statuses
143
+ @completed_statuses ||= Bulkrax::Status.latest_by_statusable
144
+ .includes(:statusable)
145
+ .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', self.entries.pluck(:id), 'Bulkrax::Entry', 'Complete')
146
+ end
147
+
126
148
  def seen
127
149
  @seen ||= {}
128
150
  end
@@ -135,6 +157,18 @@ module Bulkrax
135
157
  self.parser_fields['update_files']
136
158
  end
137
159
 
160
+ def remove_and_rerun
161
+ self.parser_fields['remove_and_rerun']
162
+ end
163
+
164
+ def metadata_only?
165
+ parser.parser_fields['metadata_only'] == true
166
+ end
167
+
168
+ def existing_entries?
169
+ parser.parser_fields['file_style']&.match(/Existing Entries/)
170
+ end
171
+
138
172
  def import_works
139
173
  import_objects(['work'])
140
174
  end
@@ -157,11 +191,20 @@ module Bulkrax
157
191
  self.only_updates ||= false
158
192
  self.save if self.new_record? # Object needs to be saved for statuses
159
193
  types = types_array || DEFAULT_OBJECT_TYPES
160
- parser.create_objects(types)
194
+ existing_entries? ? parser.rebuild_entries(types) : parser.create_objects(types)
195
+ mark_unseen_as_skipped
161
196
  rescue StandardError => e
162
197
  set_status_info(e)
163
198
  end
164
199
 
200
+ # After an import any entries we did not touch are skipped.
201
+ # They are not really pending, complete for the last run, or failed
202
+ def mark_unseen_as_skipped
203
+ entries.where.not(identifier: seen.keys).find_each do |entry|
204
+ entry.set_status_info('Skipped')
205
+ end
206
+ end
207
+
165
208
  # Prepend the base_url to ensure unique set identifiers
166
209
  # @todo - move to parser, as this is OAI specific
167
210
  def unique_collection_identifier(id)
@@ -192,9 +235,5 @@ module Bulkrax
192
235
  rescue
193
236
  "#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}"
194
237
  end
195
-
196
- def metadata_only?
197
- parser.parser_fields['metadata_only'] == true
198
- end
199
238
  end
200
239
  end
@@ -1,8 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'erb'
4
- require 'ostruct'
5
-
6
3
  module Bulkrax
7
4
  class OaiEntry < Entry
8
5
  serialize :raw_metadata, Bulkrax::NormalizedJson
@@ -2,9 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class OaiSetEntry < OaiEntry
5
- def factory_class
6
- Collection
7
- end
5
+ self.default_work_type = Bulkrax.collection_model_class.to_s
8
6
 
9
7
  def build_metadata
10
8
  self.parsed_metadata = self.raw_metadata
@@ -2,6 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class RdfCollectionEntry < RdfEntry
5
+ self.default_work_type = Bulkrax.collection_model_class.to_s
5
6
  def record
6
7
  @record ||= self.raw_metadata
7
8
  end
@@ -11,9 +12,5 @@ module Bulkrax
11
12
  add_local
12
13
  return self.parsed_metadata
13
14
  end
14
-
15
- def factory_class
16
- Collection
17
- end
18
15
  end
19
16
  end
@@ -1,91 +1,92 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rdf'
4
- module Bulkrax
5
- class RdfEntry < Entry
6
- serialize :raw_metadata, Bulkrax::NormalizedJson
3
+ unless ENV.fetch('BULKRAX_NO_RDF', 'false').to_s == 'true'
4
+ module Bulkrax
5
+ class RdfEntry < Entry
6
+ serialize :raw_metadata, Bulkrax::NormalizedJson
7
7
 
8
- def self.read_data(path)
9
- RDF::Reader.open(path)
10
- end
8
+ def self.read_data(path)
9
+ RDF::Reader.open(path)
10
+ end
11
11
 
12
- def self.fields_from_data(data)
13
- data.predicates.map(&:to_s)
14
- end
12
+ def self.fields_from_data(data)
13
+ data.predicates.map(&:to_s)
14
+ end
15
15
 
16
- def self.data_for_entry(data, source_id, parser)
17
- reader = data
18
- format = reader.class.format.to_sym
19
- collections = []
20
- children = []
21
- delete = nil
22
- data = RDF::Writer.for(format).buffer do |writer|
23
- reader.each_statement do |statement|
24
- collections << statement.object.to_s if parent_field(parser).present? && parent_field(parser) == statement.predicate.to_s
25
- children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
26
- delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
27
- writer << statement
16
+ def self.data_for_entry(data, source_id, parser)
17
+ reader = data
18
+ format = reader.class.format.to_sym
19
+ collections = []
20
+ children = []
21
+ delete = nil
22
+ data = RDF::Writer.for(format).buffer do |writer|
23
+ reader.each_statement do |statement|
24
+ collections << statement.object.to_s if parent_field(parser).present? && parent_field(parser) == statement.predicate.to_s
25
+ children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
26
+ delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
27
+ writer << statement
28
+ end
28
29
  end
30
+ return {
31
+ source_id => reader.subjects.first.to_s,
32
+ delete: delete,
33
+ format: format,
34
+ data: data,
35
+ collection: collections,
36
+ children: children
37
+ }
29
38
  end
30
- return {
31
- source_id => reader.subjects.first.to_s,
32
- delete: delete,
33
- format: format,
34
- data: data,
35
- collection: collections,
36
- children: children
37
- }
38
- end
39
39
 
40
- def self.related_children_parsed_mapping
41
- return @related_children_parsed_mapping if @related_children_parsed_mapping.present?
40
+ def self.related_children_parsed_mapping
41
+ return @related_children_parsed_mapping if @related_children_parsed_mapping.present?
42
42
 
43
- rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') }
44
- return if rdf_related_children_field_mapping.blank?
43
+ rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') }
44
+ return if rdf_related_children_field_mapping.blank?
45
45
 
46
- @related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first
47
- end
46
+ @related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first
47
+ end
48
48
 
49
- def record
50
- @record ||= RDF::Reader.for(self.raw_metadata['format'].to_sym).new(self.raw_metadata['data'])
51
- end
49
+ def record
50
+ @record ||= RDF::Reader.for(self.raw_metadata['format'].to_sym).new(self.raw_metadata['data'])
51
+ end
52
52
 
53
- def build_metadata
54
- raise StandardError, 'Record not found' if record.nil?
55
- raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
53
+ def build_metadata
54
+ raise StandardError, 'Record not found' if record.nil?
55
+ raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
56
56
 
57
- self.parsed_metadata = {}
58
- self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
57
+ self.parsed_metadata = {}
58
+ self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
59
59
 
60
- record.each_statement do |statement|
61
- # Only process the subject for our record (in case other data is in the file)
62
- next unless statement.subject.to_s == self.raw_metadata[source_identifier]
63
- add_metadata(statement.predicate.to_s, statement.object.to_s)
64
- end
65
- add_visibility
66
- add_rights_statement
67
- add_admin_set_id
68
- add_collections
69
- add_local
70
- self.parsed_metadata['file'] = self.raw_metadata['file']
60
+ record.each_statement do |statement|
61
+ # Only process the subject for our record (in case other data is in the file)
62
+ next unless statement.subject.to_s == self.raw_metadata[source_identifier]
63
+ add_metadata(statement.predicate.to_s, statement.object.to_s)
64
+ end
65
+ add_visibility
66
+ add_rights_statement
67
+ add_admin_set_id
68
+ add_collections
69
+ add_local
70
+ self.parsed_metadata['file'] = self.raw_metadata['file']
71
71
 
72
- self.parsed_metadata
73
- end
72
+ self.parsed_metadata
73
+ end
74
74
 
75
- def collections_created?
76
- return true if self.raw_metadata['collection'].blank?
77
- self.raw_metadata['collection'].length == self.collection_ids.length
78
- end
75
+ def collections_created?
76
+ return true if self.raw_metadata['collection'].blank?
77
+ self.raw_metadata['collection'].length == self.collection_ids.length
78
+ end
79
79
 
80
- def find_collection_ids
81
- return self.collection_ids if collections_created?
82
- if self.raw_metadata['collection'].present?
83
- self.raw_metadata['collection'].each do |collection|
84
- c = find_collection(collection)
85
- self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
80
+ def find_collection_ids
81
+ return self.collection_ids if collections_created?
82
+ if self.raw_metadata['collection'].present?
83
+ self.raw_metadata['collection'].each do |collection|
84
+ c = find_collection(collection)
85
+ self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
86
+ end
86
87
  end
88
+ return self.collection_ids
87
89
  end
88
- return self.collection_ids
89
90
  end
90
91
  end
91
92
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class Status < ApplicationRecord
5
- belongs_to :statusable, polymorphic: true
5
+ belongs_to :statusable, polymorphic: true, denormalize: { fields: %i[status_message], if: :latest? }
6
6
  belongs_to :runnable, polymorphic: true
7
7
  serialize :error_backtrace, Array
8
8
 
@@ -21,5 +21,14 @@ module Bulkrax
21
21
  status_table.join(latest_status_query.as(latest_status_table.name.to_s), Arel::Nodes::InnerJoin)
22
22
  .on(status_table[:id].eq(latest_status_table[:latest_status_id]))
23
23
  end
24
+
25
+ def latest?
26
+ # TODO: remove if statment when we stop supporting Hyrax < 4
27
+ self.id == if Gem::Version.new(Rails::VERSION::STRING) >= Gem::Version.new('6.0.0')
28
+ self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pick(:id)
29
+ else
30
+ self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pluck(:id).first # rubocop:disable Rails/Pick
31
+ end
32
+ end
24
33
  end
25
34
  end
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'nokogiri'
4
3
  module Bulkrax
5
4
  # Generic XML Entry
6
5
  class XmlEntry < Entry
@@ -18,9 +18,9 @@ module Bulkrax
18
18
  begin
19
19
  # the identifier parameter can be a :source_identifier or the id of an object
20
20
  record = Entry.find_by(default_scope.merge({ importerexporter_id: importer_id })) || Entry.find_by(default_scope)
21
- record ||= ActiveFedora::Base.find(identifier)
21
+ record ||= Bulkrax.object_factory.find(identifier)
22
22
  # NameError for if ActiveFedora isn't installed
23
- rescue NameError, ActiveFedora::ObjectNotFoundError
23
+ rescue NameError, ActiveFedora::ObjectNotFoundError, Bulkrax::ObjectFactoryInterface::ObjectNotFoundError
24
24
  record = nil
25
25
  end
26
26
 
@@ -28,22 +28,5 @@ module Bulkrax
28
28
  # also accounts for when the found entry isn't a part of this importer
29
29
  record.is_a?(Entry) ? [record, record.factory.find] : [nil, record]
30
30
  end
31
-
32
- # Check if the record is a Work
33
- def curation_concern?(record)
34
- available_work_types.include?(record.class)
35
- end
36
-
37
- private
38
-
39
- # @return [Array<Class>] list of work type classes
40
- def available_work_types
41
- # If running in a Hyku app, do not include disabled work types
42
- @available_work_types ||= if defined?(::Hyku)
43
- ::Site.instance.available_works.map(&:constantize)
44
- else
45
- Bulkrax.curation_concerns
46
- end
47
- end
48
31
  end
49
32
  end
@@ -1,5 +1,4 @@
1
1
  # frozen_string_literal: true
2
- require 'marcel'
3
2
 
4
3
  module Bulkrax
5
4
  module ExportBehavior
@@ -22,11 +21,12 @@ module Bulkrax
22
21
  end
23
22
 
24
23
  def hyrax_record
25
- @hyrax_record ||= ActiveFedora::Base.find(self.identifier)
24
+ @hyrax_record ||= Bulkrax.object_factory.find(self.identifier)
26
25
  end
27
26
 
28
27
  # Prepend the file_set id to ensure a unique filename and also one that is not longer than 255 characters
29
28
  def filename(file_set)
29
+ # NOTE: Will this work with Valkyrie?
30
30
  return if file_set.original_file.blank?
31
31
  fn = file_set.original_file.file_name.first
32
32
  mime = ::Marcel::MimeType.for(file_set.original_file.mime_type)