bulkrax 6.0.1 → 8.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +7 -7
- data/app/assets/javascripts/bulkrax/bulkrax.js +11 -0
- data/app/assets/javascripts/bulkrax/datatables.js +139 -0
- data/app/assets/javascripts/bulkrax/exporters.js +4 -4
- data/app/assets/javascripts/bulkrax/importers.js.erb +15 -1
- data/app/assets/stylesheets/bulkrax/import_export.scss +6 -1
- data/app/controllers/bulkrax/entries_controller.rb +52 -3
- data/app/controllers/bulkrax/exporters_controller.rb +20 -8
- data/app/controllers/bulkrax/importers_controller.rb +31 -12
- data/app/controllers/concerns/bulkrax/datatables_behavior.rb +201 -0
- data/app/factories/bulkrax/object_factory.rb +135 -163
- data/app/factories/bulkrax/object_factory_interface.rb +491 -0
- data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
- data/app/helpers/bulkrax/application_helper.rb +7 -3
- data/app/helpers/bulkrax/importers_helper.rb +1 -1
- data/app/helpers/bulkrax/validation_helper.rb +4 -4
- data/app/jobs/bulkrax/create_relationships_job.rb +28 -17
- data/app/jobs/bulkrax/delete_and_import_collection_job.rb +8 -0
- data/app/jobs/bulkrax/delete_and_import_file_set_job.rb +8 -0
- data/app/jobs/bulkrax/delete_and_import_job.rb +20 -0
- data/app/jobs/bulkrax/delete_and_import_work_job.rb +8 -0
- data/app/jobs/bulkrax/delete_job.rb +8 -3
- data/app/jobs/bulkrax/download_cloud_file_job.rb +17 -4
- data/app/jobs/bulkrax/import_collection_job.rb +1 -1
- data/app/jobs/bulkrax/import_file_set_job.rb +6 -3
- data/app/jobs/bulkrax/import_job.rb +7 -0
- data/app/jobs/bulkrax/import_work_job.rb +1 -1
- data/app/jobs/bulkrax/importer_job.rb +19 -3
- data/app/matchers/bulkrax/application_matcher.rb +0 -2
- data/app/models/bulkrax/csv_collection_entry.rb +1 -3
- data/app/models/bulkrax/csv_entry.rb +9 -7
- data/app/models/bulkrax/entry.rb +9 -11
- data/app/models/bulkrax/exporter.rb +11 -4
- data/app/models/bulkrax/importer.rb +49 -10
- data/app/models/bulkrax/oai_entry.rb +0 -3
- data/app/models/bulkrax/oai_set_entry.rb +1 -3
- data/app/models/bulkrax/rdf_collection_entry.rb +1 -4
- data/app/models/bulkrax/rdf_entry.rb +70 -69
- data/app/models/bulkrax/status.rb +10 -1
- data/app/models/bulkrax/xml_entry.rb +0 -1
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
- data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/file_factory.rb +174 -118
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +5 -3
- data/app/models/concerns/bulkrax/has_matchers.rb +28 -25
- data/app/models/concerns/bulkrax/import_behavior.rb +14 -33
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
- data/app/models/concerns/bulkrax/status_info.rb +8 -0
- data/app/parsers/bulkrax/application_parser.rb +116 -21
- data/app/parsers/bulkrax/bagit_parser.rb +173 -195
- data/app/parsers/bulkrax/csv_parser.rb +15 -57
- data/app/parsers/bulkrax/oai_dc_parser.rb +44 -16
- data/app/parsers/bulkrax/parser_export_record_set.rb +20 -24
- data/app/parsers/bulkrax/xml_parser.rb +18 -23
- data/app/services/bulkrax/factory_class_finder.rb +92 -0
- data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
- data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
- data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
- data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/show.html.erb +9 -8
- data/app/views/bulkrax/exporters/_form.html.erb +10 -10
- data/app/views/bulkrax/exporters/edit.html.erb +1 -1
- data/app/views/bulkrax/exporters/index.html.erb +13 -57
- data/app/views/bulkrax/exporters/new.html.erb +1 -1
- data/app/views/bulkrax/exporters/show.html.erb +6 -12
- data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
- data/app/views/bulkrax/importers/_csv_fields.html.erb +8 -2
- data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +8 -1
- data/app/views/bulkrax/importers/_edit_item_buttons.html.erb +18 -0
- data/app/views/bulkrax/importers/edit.html.erb +1 -1
- data/app/views/bulkrax/importers/index.html.erb +20 -64
- data/app/views/bulkrax/importers/new.html.erb +1 -1
- data/app/views/bulkrax/importers/show.html.erb +8 -14
- data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
- data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
- data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
- data/app/views/bulkrax/shared/_entries_tab.html.erb +16 -0
- data/config/locales/bulkrax.en.yml +7 -0
- data/config/routes.rb +8 -2
- data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
- data/db/migrate/20240208005801_denormalize_status_message.rb +7 -0
- data/db/migrate/20240209070952_update_identifier_index.rb +6 -0
- data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
- data/lib/bulkrax/engine.rb +23 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +107 -19
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
- data/lib/tasks/bulkrax_tasks.rake +13 -0
- data/lib/tasks/reset.rake +4 -4
- metadata +64 -8
- data/app/views/bulkrax/shared/_collection_entries_tab.html.erb +0 -39
- data/app/views/bulkrax/shared/_file_set_entries_tab.html.erb +0 -39
- data/app/views/bulkrax/shared/_work_entries_tab.html.erb +0 -39
@@ -6,7 +6,7 @@ module Bulkrax
|
|
6
6
|
class ImportFileSetJob < ApplicationJob
|
7
7
|
include DynamicRecordLookup
|
8
8
|
|
9
|
-
queue_as
|
9
|
+
queue_as Bulkrax.config.ingest_queue_name
|
10
10
|
|
11
11
|
attr_reader :importer_run_id
|
12
12
|
|
@@ -63,8 +63,11 @@ module Bulkrax
|
|
63
63
|
end
|
64
64
|
|
65
65
|
def check_parent_is_a_work!(parent_identifier)
|
66
|
-
|
67
|
-
|
66
|
+
case parent_record
|
67
|
+
when Bulkrax.collection_model_class, Bulkrax.file_model_class
|
68
|
+
error_msg = %(A record with the ID "#{parent_identifier}" was found, but it was a #{parent_record.class}, which is not an valid/available work type)
|
69
|
+
raise ::StandardError, error_msg
|
70
|
+
end
|
68
71
|
end
|
69
72
|
|
70
73
|
def find_parent_record(parent_identifier)
|
@@ -2,10 +2,11 @@
|
|
2
2
|
|
3
3
|
module Bulkrax
|
4
4
|
class ImporterJob < ApplicationJob
|
5
|
-
queue_as
|
5
|
+
queue_as Bulkrax.config.ingest_queue_name
|
6
6
|
|
7
7
|
def perform(importer_id, only_updates_since_last_import = false)
|
8
8
|
importer = Importer.find(importer_id)
|
9
|
+
return schedule(importer, Time.zone.now + 3.minutes, 'Rescheduling: cloud files are not ready yet') unless all_files_completed?(importer)
|
9
10
|
|
10
11
|
importer.current_run
|
11
12
|
unzip_imported_file(importer.parser)
|
@@ -16,6 +17,8 @@ module Bulkrax
|
|
16
17
|
importer.set_status_info(e)
|
17
18
|
end
|
18
19
|
|
20
|
+
private
|
21
|
+
|
19
22
|
def import(importer, only_updates_since_last_import)
|
20
23
|
importer.only_updates = only_updates_since_last_import || false
|
21
24
|
return unless importer.valid_import?
|
@@ -36,8 +39,21 @@ module Bulkrax
|
|
36
39
|
importer.current_run.save!
|
37
40
|
end
|
38
41
|
|
39
|
-
def schedule(importer)
|
40
|
-
|
42
|
+
def schedule(importer, wait_until = importer.next_import_at, message = nil)
|
43
|
+
Rails.logger.info message if message
|
44
|
+
ImporterJob.set(wait_until: wait_until).perform_later(importer.id, true)
|
45
|
+
end
|
46
|
+
|
47
|
+
# checks the file sizes of the download files to match the original files
|
48
|
+
def all_files_completed?(importer)
|
49
|
+
cloud_files = importer.parser_fields['cloud_file_paths']
|
50
|
+
original_files = importer.parser_fields['original_file_paths']
|
51
|
+
return true unless cloud_files.present? && original_files.present?
|
52
|
+
|
53
|
+
imported_file_sizes = cloud_files.map { |_, v| v['file_size'].to_i }
|
54
|
+
original_file_sizes = original_files.map { |imported_file| File.size(imported_file) }
|
55
|
+
|
56
|
+
original_file_sizes == imported_file_sizes
|
41
57
|
end
|
42
58
|
end
|
43
59
|
end
|
@@ -2,9 +2,7 @@
|
|
2
2
|
|
3
3
|
module Bulkrax
|
4
4
|
class CsvCollectionEntry < CsvEntry
|
5
|
-
|
6
|
-
Collection
|
7
|
-
end
|
5
|
+
self.default_work_type = Bulkrax.collection_model_class.to_s
|
8
6
|
|
9
7
|
# Use identifier set by CsvParser#unique_collection_identifier, which falls back
|
10
8
|
# on the Collection's first title if record[source_identifier] is not present
|
@@ -1,7 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'csv'
|
4
|
-
|
5
3
|
module Bulkrax
|
6
4
|
# TODO: We need to rework this class some to address the Metrics/ClassLength rubocop offense.
|
7
5
|
# We do too much in these entry classes. We need to extract the common logic from the various
|
@@ -16,11 +14,12 @@ module Bulkrax
|
|
16
14
|
class_attribute(:csv_read_data_options, default: {})
|
17
15
|
|
18
16
|
# there's a risk that this reads the whole file into memory and could cause a memory leak
|
17
|
+
# we strip any special characters out of the headers. looking at you Excel
|
19
18
|
def self.read_data(path)
|
20
19
|
raise StandardError, 'CSV path empty' if path.blank?
|
21
20
|
options = {
|
22
21
|
headers: true,
|
23
|
-
header_converters: ->(h) { h.to_s.strip.to_sym },
|
22
|
+
header_converters: ->(h) { h.to_s.gsub(/[^\w\d\. -]+/, '').strip.to_sym },
|
24
23
|
encoding: 'utf-8'
|
25
24
|
}.merge(csv_read_data_options)
|
26
25
|
|
@@ -105,7 +104,7 @@ module Bulkrax
|
|
105
104
|
end
|
106
105
|
|
107
106
|
def add_metadata_for_model
|
108
|
-
if
|
107
|
+
if factory_class.present? && factory_class == Bulkrax.collection_model_class
|
109
108
|
add_collection_type_gid if defined?(::Hyrax)
|
110
109
|
# add any additional collection metadata methods here
|
111
110
|
elsif factory_class == Bulkrax.file_model_class
|
@@ -145,7 +144,7 @@ module Bulkrax
|
|
145
144
|
self.parsed_metadata = {}
|
146
145
|
|
147
146
|
build_system_metadata
|
148
|
-
build_files_metadata if
|
147
|
+
build_files_metadata if Bulkrax.collection_model_class.present? && !hyrax_record.is_a?(Bulkrax.collection_model_class)
|
149
148
|
build_relationship_metadata
|
150
149
|
build_mapping_metadata
|
151
150
|
self.save!
|
@@ -157,9 +156,12 @@ module Bulkrax
|
|
157
156
|
def build_system_metadata
|
158
157
|
self.parsed_metadata['id'] = hyrax_record.id
|
159
158
|
source_id = hyrax_record.send(work_identifier)
|
160
|
-
|
159
|
+
# Because ActiveTriples::Relation does not respond to #to_ary we can't rely on Array.wrap universally
|
160
|
+
source_id = source_id.to_a if source_id.is_a?(ActiveTriples::Relation)
|
161
|
+
source_id = Array.wrap(source_id).first
|
161
162
|
self.parsed_metadata[source_identifier] = source_id
|
162
|
-
|
163
|
+
model_name = hyrax_record.respond_to?(:to_rdf_representation) ? hyrax_record.to_rdf_representation : hyrax_record.has_model.first
|
164
|
+
self.parsed_metadata[key_for_export('model')] = model_name
|
163
165
|
end
|
164
166
|
|
165
167
|
def build_files_metadata
|
data/app/models/bulkrax/entry.rb
CHANGED
@@ -8,6 +8,8 @@ module Bulkrax
|
|
8
8
|
class Entry < ApplicationRecord
|
9
9
|
include Bulkrax::HasMatchers
|
10
10
|
include Bulkrax::ImportBehavior
|
11
|
+
self.class_attribute :default_work_type, default: Bulkrax.default_work_type
|
12
|
+
|
11
13
|
include Bulkrax::ExportBehavior
|
12
14
|
include Bulkrax::StatusInfo
|
13
15
|
include Bulkrax::HasLocalProcessing
|
@@ -101,22 +103,18 @@ module Bulkrax
|
|
101
103
|
self.importerexporter_type == 'Bulkrax::Exporter'
|
102
104
|
end
|
103
105
|
|
104
|
-
def valid_system_id(model_class)
|
105
|
-
return true if model_class.properties.keys.include?(work_identifier)
|
106
|
-
raise(
|
107
|
-
"#{model_class} does not implement the system_identifier_field: #{work_identifier}"
|
108
|
-
)
|
109
|
-
end
|
110
|
-
|
111
106
|
def last_run
|
112
107
|
self.importerexporter&.last_run
|
113
108
|
end
|
114
109
|
|
115
110
|
def find_collection(collection_identifier)
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
111
|
+
Bulkrax.object_factory.search_by_property(
|
112
|
+
klass: Bulkrax.collection_model_class,
|
113
|
+
value: collection_identifier,
|
114
|
+
search_field: work_identifier,
|
115
|
+
name_field: work_identifier,
|
116
|
+
verify_property: true
|
117
|
+
)
|
120
118
|
end
|
121
119
|
end
|
122
120
|
end
|
@@ -23,6 +23,10 @@ module Bulkrax
|
|
23
23
|
set_status_info(e)
|
24
24
|
end
|
25
25
|
|
26
|
+
def remove_and_rerun
|
27
|
+
self.parser_fields['remove_and_rerun']
|
28
|
+
end
|
29
|
+
|
26
30
|
# #export_source accessors
|
27
31
|
# Used in form to prevent it from getting confused as to which value to populate #export_source with.
|
28
32
|
# Also, used to display the correct selected value when rendering edit form.
|
@@ -102,9 +106,12 @@ module Bulkrax
|
|
102
106
|
Importer.all.map { |i| [i.name, i.id] }
|
103
107
|
end
|
104
108
|
|
105
|
-
def current_run
|
109
|
+
def current_run(skip_counts: false)
|
110
|
+
@current_run ||= self.exporter_runs.create! if skip_counts
|
111
|
+
return @current_run if @current_run
|
112
|
+
|
106
113
|
total = self.limit || parser.total
|
107
|
-
@current_run
|
114
|
+
@current_run = self.exporter_runs.create!(total_work_entries: total, enqueued_records: total)
|
108
115
|
end
|
109
116
|
|
110
117
|
def last_run
|
@@ -130,8 +137,8 @@ module Bulkrax
|
|
130
137
|
end
|
131
138
|
|
132
139
|
def export_properties
|
133
|
-
|
134
|
-
|
140
|
+
# TODO: Does this work for Valkyrie?
|
141
|
+
Bulkrax.object_factory.export_properties
|
135
142
|
end
|
136
143
|
|
137
144
|
def metadata_only?
|
@@ -1,9 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'iso8601'
|
4
|
-
|
5
3
|
module Bulkrax
|
6
|
-
class Importer < ApplicationRecord
|
4
|
+
class Importer < ApplicationRecord # rubocop:disable Metrics/ClassLength
|
7
5
|
include Bulkrax::ImporterExporterBehavior
|
8
6
|
include Bulkrax::StatusInfo
|
9
7
|
|
@@ -18,7 +16,7 @@ module Bulkrax
|
|
18
16
|
validates :admin_set_id, presence: true if defined?(::Hyrax)
|
19
17
|
validates :parser_klass, presence: true
|
20
18
|
|
21
|
-
delegate :valid_import?, :write_errored_entries_file, :visibility, to: :parser
|
19
|
+
delegate :create_parent_child_relationships, :valid_import?, :write_errored_entries_file, :visibility, to: :parser
|
22
20
|
|
23
21
|
attr_accessor :only_updates, :file_style, :file
|
24
22
|
attr_writer :current_run
|
@@ -103,11 +101,12 @@ module Bulkrax
|
|
103
101
|
frequency.to_seconds != 0
|
104
102
|
end
|
105
103
|
|
106
|
-
def current_run
|
104
|
+
def current_run(skip_counts: false)
|
107
105
|
return @current_run if @current_run.present?
|
108
106
|
|
109
107
|
@current_run = self.importer_runs.create!
|
110
108
|
return @current_run if file? && zip?
|
109
|
+
return @current_run if skip_counts
|
111
110
|
|
112
111
|
entry_counts = {
|
113
112
|
total_work_entries: self.limit || parser.works_total,
|
@@ -123,6 +122,29 @@ module Bulkrax
|
|
123
122
|
@last_run ||= self.importer_runs.last
|
124
123
|
end
|
125
124
|
|
125
|
+
def failed_entries?
|
126
|
+
entries.failed.any?
|
127
|
+
end
|
128
|
+
|
129
|
+
def failed_statuses
|
130
|
+
@failed_statuses ||= Bulkrax::Status.latest_by_statusable
|
131
|
+
.includes(:statusable)
|
132
|
+
.where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', self.entries.pluck(:id), 'Bulkrax::Entry', 'Failed')
|
133
|
+
end
|
134
|
+
|
135
|
+
def failed_messages
|
136
|
+
failed_statuses.each_with_object({}) do |e, i|
|
137
|
+
i[e.error_message] ||= []
|
138
|
+
i[e.error_message] << e.id
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def completed_statuses
|
143
|
+
@completed_statuses ||= Bulkrax::Status.latest_by_statusable
|
144
|
+
.includes(:statusable)
|
145
|
+
.where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', self.entries.pluck(:id), 'Bulkrax::Entry', 'Complete')
|
146
|
+
end
|
147
|
+
|
126
148
|
def seen
|
127
149
|
@seen ||= {}
|
128
150
|
end
|
@@ -135,6 +157,18 @@ module Bulkrax
|
|
135
157
|
self.parser_fields['update_files']
|
136
158
|
end
|
137
159
|
|
160
|
+
def remove_and_rerun
|
161
|
+
self.parser_fields['remove_and_rerun']
|
162
|
+
end
|
163
|
+
|
164
|
+
def metadata_only?
|
165
|
+
parser.parser_fields['metadata_only'] == true
|
166
|
+
end
|
167
|
+
|
168
|
+
def existing_entries?
|
169
|
+
parser.parser_fields['file_style']&.match(/Existing Entries/)
|
170
|
+
end
|
171
|
+
|
138
172
|
def import_works
|
139
173
|
import_objects(['work'])
|
140
174
|
end
|
@@ -157,11 +191,20 @@ module Bulkrax
|
|
157
191
|
self.only_updates ||= false
|
158
192
|
self.save if self.new_record? # Object needs to be saved for statuses
|
159
193
|
types = types_array || DEFAULT_OBJECT_TYPES
|
160
|
-
parser.create_objects(types)
|
194
|
+
existing_entries? ? parser.rebuild_entries(types) : parser.create_objects(types)
|
195
|
+
mark_unseen_as_skipped
|
161
196
|
rescue StandardError => e
|
162
197
|
set_status_info(e)
|
163
198
|
end
|
164
199
|
|
200
|
+
# After an import any entries we did not touch are skipped.
|
201
|
+
# They are not really pending, complete for the last run, or failed
|
202
|
+
def mark_unseen_as_skipped
|
203
|
+
entries.where.not(identifier: seen.keys).find_each do |entry|
|
204
|
+
entry.set_status_info('Skipped')
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
165
208
|
# Prepend the base_url to ensure unique set identifiers
|
166
209
|
# @todo - move to parser, as this is OAI specific
|
167
210
|
def unique_collection_identifier(id)
|
@@ -192,9 +235,5 @@ module Bulkrax
|
|
192
235
|
rescue
|
193
236
|
"#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}"
|
194
237
|
end
|
195
|
-
|
196
|
-
def metadata_only?
|
197
|
-
parser.parser_fields['metadata_only'] == true
|
198
|
-
end
|
199
238
|
end
|
200
239
|
end
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
module Bulkrax
|
4
4
|
class RdfCollectionEntry < RdfEntry
|
5
|
+
self.default_work_type = Bulkrax.collection_model_class.to_s
|
5
6
|
def record
|
6
7
|
@record ||= self.raw_metadata
|
7
8
|
end
|
@@ -11,9 +12,5 @@ module Bulkrax
|
|
11
12
|
add_local
|
12
13
|
return self.parsed_metadata
|
13
14
|
end
|
14
|
-
|
15
|
-
def factory_class
|
16
|
-
Collection
|
17
|
-
end
|
18
15
|
end
|
19
16
|
end
|
@@ -1,91 +1,92 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
module Bulkrax
|
5
|
-
|
6
|
-
|
3
|
+
unless ENV.fetch('BULKRAX_NO_RDF', 'false').to_s == 'true'
|
4
|
+
module Bulkrax
|
5
|
+
class RdfEntry < Entry
|
6
|
+
serialize :raw_metadata, Bulkrax::NormalizedJson
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
8
|
+
def self.read_data(path)
|
9
|
+
RDF::Reader.open(path)
|
10
|
+
end
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
12
|
+
def self.fields_from_data(data)
|
13
|
+
data.predicates.map(&:to_s)
|
14
|
+
end
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
16
|
+
def self.data_for_entry(data, source_id, parser)
|
17
|
+
reader = data
|
18
|
+
format = reader.class.format.to_sym
|
19
|
+
collections = []
|
20
|
+
children = []
|
21
|
+
delete = nil
|
22
|
+
data = RDF::Writer.for(format).buffer do |writer|
|
23
|
+
reader.each_statement do |statement|
|
24
|
+
collections << statement.object.to_s if parent_field(parser).present? && parent_field(parser) == statement.predicate.to_s
|
25
|
+
children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
|
26
|
+
delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
|
27
|
+
writer << statement
|
28
|
+
end
|
28
29
|
end
|
30
|
+
return {
|
31
|
+
source_id => reader.subjects.first.to_s,
|
32
|
+
delete: delete,
|
33
|
+
format: format,
|
34
|
+
data: data,
|
35
|
+
collection: collections,
|
36
|
+
children: children
|
37
|
+
}
|
29
38
|
end
|
30
|
-
return {
|
31
|
-
source_id => reader.subjects.first.to_s,
|
32
|
-
delete: delete,
|
33
|
-
format: format,
|
34
|
-
data: data,
|
35
|
-
collection: collections,
|
36
|
-
children: children
|
37
|
-
}
|
38
|
-
end
|
39
39
|
|
40
|
-
|
41
|
-
|
40
|
+
def self.related_children_parsed_mapping
|
41
|
+
return @related_children_parsed_mapping if @related_children_parsed_mapping.present?
|
42
42
|
|
43
|
-
|
44
|
-
|
43
|
+
rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') }
|
44
|
+
return if rdf_related_children_field_mapping.blank?
|
45
45
|
|
46
|
-
|
47
|
-
|
46
|
+
@related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first
|
47
|
+
end
|
48
48
|
|
49
|
-
|
50
|
-
|
51
|
-
|
49
|
+
def record
|
50
|
+
@record ||= RDF::Reader.for(self.raw_metadata['format'].to_sym).new(self.raw_metadata['data'])
|
51
|
+
end
|
52
52
|
|
53
|
-
|
54
|
-
|
55
|
-
|
53
|
+
def build_metadata
|
54
|
+
raise StandardError, 'Record not found' if record.nil?
|
55
|
+
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
56
56
|
|
57
|
-
|
58
|
-
|
57
|
+
self.parsed_metadata = {}
|
58
|
+
self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
|
59
59
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
60
|
+
record.each_statement do |statement|
|
61
|
+
# Only process the subject for our record (in case other data is in the file)
|
62
|
+
next unless statement.subject.to_s == self.raw_metadata[source_identifier]
|
63
|
+
add_metadata(statement.predicate.to_s, statement.object.to_s)
|
64
|
+
end
|
65
|
+
add_visibility
|
66
|
+
add_rights_statement
|
67
|
+
add_admin_set_id
|
68
|
+
add_collections
|
69
|
+
add_local
|
70
|
+
self.parsed_metadata['file'] = self.raw_metadata['file']
|
71
71
|
|
72
|
-
|
73
|
-
|
72
|
+
self.parsed_metadata
|
73
|
+
end
|
74
74
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
75
|
+
def collections_created?
|
76
|
+
return true if self.raw_metadata['collection'].blank?
|
77
|
+
self.raw_metadata['collection'].length == self.collection_ids.length
|
78
|
+
end
|
79
79
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
80
|
+
def find_collection_ids
|
81
|
+
return self.collection_ids if collections_created?
|
82
|
+
if self.raw_metadata['collection'].present?
|
83
|
+
self.raw_metadata['collection'].each do |collection|
|
84
|
+
c = find_collection(collection)
|
85
|
+
self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
|
86
|
+
end
|
86
87
|
end
|
88
|
+
return self.collection_ids
|
87
89
|
end
|
88
|
-
return self.collection_ids
|
89
90
|
end
|
90
91
|
end
|
91
92
|
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
module Bulkrax
|
4
4
|
class Status < ApplicationRecord
|
5
|
-
belongs_to :statusable, polymorphic: true
|
5
|
+
belongs_to :statusable, polymorphic: true, denormalize: { fields: %i[status_message], if: :latest? }
|
6
6
|
belongs_to :runnable, polymorphic: true
|
7
7
|
serialize :error_backtrace, Array
|
8
8
|
|
@@ -21,5 +21,14 @@ module Bulkrax
|
|
21
21
|
status_table.join(latest_status_query.as(latest_status_table.name.to_s), Arel::Nodes::InnerJoin)
|
22
22
|
.on(status_table[:id].eq(latest_status_table[:latest_status_id]))
|
23
23
|
end
|
24
|
+
|
25
|
+
def latest?
|
26
|
+
# TODO: remove if statment when we stop supporting Hyrax < 4
|
27
|
+
self.id == if Gem::Version.new(Rails::VERSION::STRING) >= Gem::Version.new('6.0.0')
|
28
|
+
self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pick(:id)
|
29
|
+
else
|
30
|
+
self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pluck(:id).first # rubocop:disable Rails/Pick
|
31
|
+
end
|
32
|
+
end
|
24
33
|
end
|
25
34
|
end
|
@@ -18,9 +18,9 @@ module Bulkrax
|
|
18
18
|
begin
|
19
19
|
# the identifier parameter can be a :source_identifier or the id of an object
|
20
20
|
record = Entry.find_by(default_scope.merge({ importerexporter_id: importer_id })) || Entry.find_by(default_scope)
|
21
|
-
record ||=
|
21
|
+
record ||= Bulkrax.object_factory.find(identifier)
|
22
22
|
# NameError for if ActiveFedora isn't installed
|
23
|
-
rescue NameError, ActiveFedora::ObjectNotFoundError
|
23
|
+
rescue NameError, ActiveFedora::ObjectNotFoundError, Bulkrax::ObjectFactoryInterface::ObjectNotFoundError
|
24
24
|
record = nil
|
25
25
|
end
|
26
26
|
|
@@ -28,22 +28,5 @@ module Bulkrax
|
|
28
28
|
# also accounts for when the found entry isn't a part of this importer
|
29
29
|
record.is_a?(Entry) ? [record, record.factory.find] : [nil, record]
|
30
30
|
end
|
31
|
-
|
32
|
-
# Check if the record is a Work
|
33
|
-
def curation_concern?(record)
|
34
|
-
available_work_types.include?(record.class)
|
35
|
-
end
|
36
|
-
|
37
|
-
private
|
38
|
-
|
39
|
-
# @return [Array<Class>] list of work type classes
|
40
|
-
def available_work_types
|
41
|
-
# If running in a Hyku app, do not include disabled work types
|
42
|
-
@available_work_types ||= if defined?(::Hyku)
|
43
|
-
::Site.instance.available_works.map(&:constantize)
|
44
|
-
else
|
45
|
-
Bulkrax.curation_concerns
|
46
|
-
end
|
47
|
-
end
|
48
31
|
end
|
49
32
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require 'marcel'
|
3
2
|
|
4
3
|
module Bulkrax
|
5
4
|
module ExportBehavior
|
@@ -22,11 +21,12 @@ module Bulkrax
|
|
22
21
|
end
|
23
22
|
|
24
23
|
def hyrax_record
|
25
|
-
@hyrax_record ||=
|
24
|
+
@hyrax_record ||= Bulkrax.object_factory.find(self.identifier)
|
26
25
|
end
|
27
26
|
|
28
27
|
# Prepend the file_set id to ensure a unique filename and also one that is not longer than 255 characters
|
29
28
|
def filename(file_set)
|
29
|
+
# NOTE: Will this work with Valkyrie?
|
30
30
|
return if file_set.original_file.blank?
|
31
31
|
fn = file_set.original_file.file_name.first
|
32
32
|
mime = ::Marcel::MimeType.for(file_set.original_file.mime_type)
|