bulkrax 4.3.0 → 5.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/assets/javascripts/bulkrax/exporters.js +12 -0
- data/app/controllers/bulkrax/entries_controller.rb +5 -0
- data/app/controllers/bulkrax/exporters_controller.rb +5 -0
- data/app/controllers/bulkrax/importers_controller.rb +9 -1
- data/app/factories/bulkrax/object_factory.rb +87 -11
- data/app/jobs/bulkrax/create_relationships_job.rb +1 -1
- data/app/jobs/bulkrax/import_file_set_job.rb +1 -0
- data/app/jobs/bulkrax/import_work_job.rb +23 -13
- data/app/matchers/bulkrax/application_matcher.rb +5 -3
- data/app/models/bulkrax/csv_entry.rb +20 -8
- data/app/models/bulkrax/entry.rb +2 -1
- data/app/models/bulkrax/importer.rb +20 -15
- data/app/models/bulkrax/oai_entry.rb +42 -9
- data/app/models/bulkrax/rdf_entry.rb +1 -1
- data/app/models/bulkrax/xml_entry.rb +54 -12
- data/app/models/concerns/bulkrax/file_factory.rb +9 -3
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +8 -1
- data/app/models/concerns/bulkrax/import_behavior.rb +27 -19
- data/app/parsers/bulkrax/application_parser.rb +90 -13
- data/app/parsers/bulkrax/csv_parser.rb +13 -6
- data/app/parsers/bulkrax/oai_dc_parser.rb +2 -2
- data/app/parsers/bulkrax/xml_parser.rb +6 -0
- data/app/services/bulkrax/remove_relationships_for_importer.rb +107 -0
- data/app/views/bulkrax/exporters/_form.html.erb +3 -3
- data/app/views/bulkrax/shared/_file_set_entries_tab.html.erb +3 -3
- data/app/views/hyrax/dashboard/sidebar/_bulkrax_sidebar_additions.html.erb +9 -5
- data/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb +18 -7
- data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +1 -1
- data/db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb +1 -1
- data/db/migrate/20220118001339_add_import_attempts_to_entries.rb +1 -1
- data/db/migrate/20220119213325_add_work_counters_to_importer_runs.rb +1 -1
- data/db/migrate/20220301001839_create_bulkrax_pending_relationships.rb +1 -1
- data/db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb +1 -1
- data/db/migrate/20220412233954_add_include_thumbnails_to_bulkrax_exporters.rb +1 -1
- data/db/migrate/20220413180915_add_generated_metadata_to_bulkrax_exporters.rb +1 -1
- data/db/migrate/20220609001128_rename_bulkrax_importer_run_to_importer_run.rb +1 -1
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +85 -11
- data/lib/generators/bulkrax/install_generator.rb +20 -0
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +13 -1
- data/lib/tasks/reset.rake +65 -0
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 49a1f0ccc806cf73a7872a634c8c2819dac62f98f0a9dc163e7ada8931d9b1fe
|
4
|
+
data.tar.gz: ecf5c8ad3e4864110665cfb5c6c5f6329e3b868414815bba163646db839400e7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: df394e4fbbc6ca0a71eb595c075c5181d24098b1ab889ff185da10991ec01c2eaf60239658564838b67df4c0a421306de923d2eab4bbeffafb31321acb9ba1ff
|
7
|
+
data.tar.gz: 35b691e96d0e59f83efadc35364283239f7f741ab4bf9d859756dacc8483e380090512cd10bf3b6ec4ce81936d00c88eb4d1ed96742c384e0f3082028ffdcf45
|
@@ -1,10 +1,12 @@
|
|
1
1
|
function hideUnhide(field) {
|
2
2
|
var allSources = $('body').find('.export-source-option')
|
3
|
+
removeRequired(allSources)
|
3
4
|
hide(allSources)
|
4
5
|
|
5
6
|
if (field.length > 0) {
|
6
7
|
var selectedSource = $('.' + field)
|
7
8
|
unhideSelected(selectedSource)
|
9
|
+
addRequired(selectedSource)
|
8
10
|
}
|
9
11
|
|
10
12
|
if (field === 'collection') {
|
@@ -12,6 +14,16 @@ function hideUnhide(field) {
|
|
12
14
|
}
|
13
15
|
};
|
14
16
|
|
17
|
+
function addRequired(selectedSource) {
|
18
|
+
selectedSource.addClass('required').attr('required', 'required');
|
19
|
+
selectedSource.parent().addClass('required');
|
20
|
+
}
|
21
|
+
|
22
|
+
function removeRequired(allSources) {
|
23
|
+
allSources.removeClass('required').removeAttr('required');
|
24
|
+
allSources.parent().removeClass('required').removeAttr('required')
|
25
|
+
};
|
26
|
+
|
15
27
|
// hide all export_source
|
16
28
|
function hide(allSources) {
|
17
29
|
allSources.addClass('hidden');
|
@@ -7,6 +7,7 @@ module Bulkrax
|
|
7
7
|
class EntriesController < ApplicationController
|
8
8
|
include Hyrax::ThemedLayoutController
|
9
9
|
before_action :authenticate_user!
|
10
|
+
before_action :check_permissions
|
10
11
|
with_themed_layout 'dashboard'
|
11
12
|
|
12
13
|
def show
|
@@ -40,5 +41,9 @@ module Bulkrax
|
|
40
41
|
add_breadcrumb @exporter.name, bulkrax.exporter_path(@exporter.id)
|
41
42
|
add_breadcrumb @entry.id
|
42
43
|
end
|
44
|
+
|
45
|
+
def check_permissions
|
46
|
+
raise CanCan::AccessDenied unless current_ability.can_import_works? || current_ability.can_export_works?
|
47
|
+
end
|
43
48
|
end
|
44
49
|
end
|
@@ -7,6 +7,7 @@ module Bulkrax
|
|
7
7
|
include Hyrax::ThemedLayoutController
|
8
8
|
include Bulkrax::DownloadBehavior
|
9
9
|
before_action :authenticate_user!
|
10
|
+
before_action :check_permissions
|
10
11
|
before_action :set_exporter, only: [:show, :edit, :update, :destroy]
|
11
12
|
with_themed_layout 'dashboard'
|
12
13
|
|
@@ -131,5 +132,9 @@ module Bulkrax
|
|
131
132
|
def file_path
|
132
133
|
"#{@exporter.exporter_export_zip_path}/#{params['exporter']['exporter_export_zip_files']}"
|
133
134
|
end
|
135
|
+
|
136
|
+
def check_permissions
|
137
|
+
raise CanCan::AccessDenied unless current_ability.can_export_works?
|
138
|
+
end
|
134
139
|
end
|
135
140
|
end
|
@@ -14,6 +14,7 @@ module Bulkrax
|
|
14
14
|
protect_from_forgery unless: -> { api_request? }
|
15
15
|
before_action :token_authenticate!, if: -> { api_request? }, only: [:create, :update, :delete]
|
16
16
|
before_action :authenticate_user!, unless: -> { api_request? }
|
17
|
+
before_action :check_permissions
|
17
18
|
before_action :set_importer, only: [:show, :edit, :update, :destroy]
|
18
19
|
with_themed_layout 'dashboard'
|
19
20
|
|
@@ -76,6 +77,9 @@ module Bulkrax
|
|
76
77
|
@importer = Importer.new(importer_params)
|
77
78
|
field_mapping_params
|
78
79
|
@importer.validate_only = true if params[:commit] == 'Create and Validate'
|
80
|
+
# the following line is needed to handle updating remote files of a FileSet
|
81
|
+
# on a new import otherwise it only gets updated during the update path
|
82
|
+
@importer.parser_fields['update_files'] = true if params[:commit] == 'Create and Import'
|
79
83
|
if @importer.save
|
80
84
|
files_for_import(file, cloud_files)
|
81
85
|
if params[:commit] == 'Create and Import'
|
@@ -276,7 +280,7 @@ module Bulkrax
|
|
276
280
|
def setup_client(url)
|
277
281
|
return false if url.nil?
|
278
282
|
headers = { from: Bulkrax.server_name }
|
279
|
-
@client ||= OAI::Client.new(url, headers: headers, parser: 'libxml'
|
283
|
+
@client ||= OAI::Client.new(url, headers: headers, parser: 'libxml')
|
280
284
|
end
|
281
285
|
|
282
286
|
# Download methods
|
@@ -316,6 +320,10 @@ module Bulkrax
|
|
316
320
|
end
|
317
321
|
@importer.save
|
318
322
|
end
|
323
|
+
|
324
|
+
def check_permissions
|
325
|
+
raise CanCan::AccessDenied unless current_ability.can_import_works?
|
326
|
+
end
|
319
327
|
end
|
320
328
|
# rubocop:enable Metrics/ClassLength
|
321
329
|
end
|
@@ -6,6 +6,27 @@ module Bulkrax
|
|
6
6
|
include Bulkrax::FileFactory
|
7
7
|
include DynamicRecordLookup
|
8
8
|
|
9
|
+
# @api private
|
10
|
+
#
|
11
|
+
# These are the attributes that we assume all "work type" classes (e.g. the given :klass) will
|
12
|
+
# have in addition to their specific attributes.
|
13
|
+
#
|
14
|
+
# @return [Array<Symbol>]
|
15
|
+
# @see #permitted_attributes
|
16
|
+
class_attribute :base_permitted_attributes,
|
17
|
+
default: %i[id edit_users edit_groups read_groups visibility work_members_attributes admin_set_id]
|
18
|
+
|
19
|
+
# @return [Boolean]
|
20
|
+
#
|
21
|
+
# @example
|
22
|
+
# Bulkrax::ObjectFactory.transformation_removes_blank_hash_values = true
|
23
|
+
#
|
24
|
+
# @see #transform_attributes
|
25
|
+
# @see https://github.com/samvera-labs/bulkrax/pull/708 For discussion concerning this feature
|
26
|
+
# @see https://github.com/samvera-labs/bulkrax/wiki/Interacting-with-Metadata For documentation
|
27
|
+
# concerning default behavior.
|
28
|
+
class_attribute :transformation_removes_blank_hash_values, default: false
|
29
|
+
|
9
30
|
define_model_callbacks :save, :create
|
10
31
|
attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :related_parents_parsed_mapping, :importer_run_id
|
11
32
|
|
@@ -58,7 +79,7 @@ module Bulkrax
|
|
58
79
|
elsif klass == FileSet
|
59
80
|
update_file_set(attrs)
|
60
81
|
else
|
61
|
-
|
82
|
+
update_work(attrs)
|
62
83
|
end
|
63
84
|
end
|
64
85
|
object.apply_depositor_metadata(@user) && object.save! if object.depositor.nil?
|
@@ -104,7 +125,7 @@ module Bulkrax
|
|
104
125
|
elsif klass == FileSet
|
105
126
|
create_file_set(attrs)
|
106
127
|
else
|
107
|
-
|
128
|
+
create_work(attrs)
|
108
129
|
end
|
109
130
|
end
|
110
131
|
end
|
@@ -139,6 +160,14 @@ module Bulkrax
|
|
139
160
|
Hyrax::CurationConcern.actor
|
140
161
|
end
|
141
162
|
|
163
|
+
def create_work(attrs)
|
164
|
+
work_actor.create(environment(attrs))
|
165
|
+
end
|
166
|
+
|
167
|
+
def update_work(attrs)
|
168
|
+
work_actor.update(environment(attrs))
|
169
|
+
end
|
170
|
+
|
142
171
|
def create_collection(attrs)
|
143
172
|
attrs = clean_attrs(attrs)
|
144
173
|
attrs = collection_type(attrs)
|
@@ -159,28 +188,55 @@ module Bulkrax
|
|
159
188
|
file_set_attrs = attrs.slice(*object.attributes.keys)
|
160
189
|
object.assign_attributes(file_set_attrs)
|
161
190
|
|
162
|
-
attrs['uploaded_files']
|
191
|
+
attrs['uploaded_files']&.each do |uploaded_file_id|
|
163
192
|
uploaded_file = ::Hyrax::UploadedFile.find(uploaded_file_id)
|
164
193
|
next if uploaded_file.file_set_uri.present?
|
165
194
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
actor.create_content(uploaded_file)
|
171
|
-
actor.attach_to_work(work)
|
195
|
+
create_file_set_actor(attrs, work, work_permissions, uploaded_file)
|
196
|
+
end
|
197
|
+
attrs['remote_files']&.each do |remote_file|
|
198
|
+
create_file_set_actor(attrs, work, work_permissions, nil, remote_file)
|
172
199
|
end
|
173
200
|
|
174
201
|
object.save!
|
175
202
|
end
|
176
203
|
|
204
|
+
def create_file_set_actor(attrs, work, work_permissions, uploaded_file, remote_file = nil)
|
205
|
+
actor = ::Hyrax::Actors::FileSetActor.new(object, @user)
|
206
|
+
uploaded_file&.update(file_set_uri: actor.file_set.uri)
|
207
|
+
actor.file_set.permissions_attributes = work_permissions
|
208
|
+
actor.create_metadata(attrs)
|
209
|
+
actor.create_content(uploaded_file) if uploaded_file
|
210
|
+
actor.attach_to_work(work, attrs)
|
211
|
+
handle_remote_file(remote_file: remote_file, actor: actor, update: false) if remote_file
|
212
|
+
end
|
213
|
+
|
177
214
|
def update_file_set(attrs)
|
178
215
|
file_set_attrs = attrs.slice(*object.attributes.keys)
|
179
216
|
actor = ::Hyrax::Actors::FileSetActor.new(object, @user)
|
180
|
-
|
217
|
+
attrs['remote_files']&.each do |remote_file|
|
218
|
+
handle_remote_file(remote_file: remote_file, actor: actor, update: true)
|
219
|
+
end
|
181
220
|
actor.update_metadata(file_set_attrs)
|
182
221
|
end
|
183
222
|
|
223
|
+
def handle_remote_file(remote_file:, actor:, update: false)
|
224
|
+
actor.file_set.label = remote_file['file_name']
|
225
|
+
actor.file_set.import_url = remote_file['url']
|
226
|
+
|
227
|
+
url = remote_file['url']
|
228
|
+
tmp_file = Tempfile.new(remote_file['file_name'].split('.').first)
|
229
|
+
tmp_file.binmode
|
230
|
+
|
231
|
+
URI.open(url) do |url_file|
|
232
|
+
tmp_file.write(url_file.read)
|
233
|
+
end
|
234
|
+
|
235
|
+
tmp_file.rewind
|
236
|
+
update == true ? actor.update_content(tmp_file) : actor.create_content(tmp_file, from_url: true)
|
237
|
+
tmp_file.close
|
238
|
+
end
|
239
|
+
|
184
240
|
def clean_attrs(attrs)
|
185
241
|
# avoid the "ArgumentError: Identifier must be a string of size > 0 in order to be treeified" error
|
186
242
|
# when setting object.attributes
|
@@ -200,12 +256,32 @@ module Bulkrax
|
|
200
256
|
def transform_attributes(update: false)
|
201
257
|
@transform_attributes = attributes.slice(*permitted_attributes)
|
202
258
|
@transform_attributes.merge!(file_attributes(update_files)) if with_files
|
259
|
+
@transform_attributes = remove_blank_hash_values(@transform_attributes) if transformation_removes_blank_hash_values?
|
203
260
|
update ? @transform_attributes.except(:id) : @transform_attributes
|
204
261
|
end
|
205
262
|
|
206
263
|
# Regardless of what the Parser gives us, these are the properties we are prepared to accept.
|
207
264
|
def permitted_attributes
|
208
|
-
klass.properties.keys.map(&:to_sym) +
|
265
|
+
klass.properties.keys.map(&:to_sym) + base_permitted_attributes
|
266
|
+
end
|
267
|
+
|
268
|
+
# Return a copy of the given attributes, such that all values that are empty or an array of all
|
269
|
+
# empty values are fully emptied. (See implementation details)
|
270
|
+
#
|
271
|
+
# @param attributes [Hash]
|
272
|
+
# @return [Hash]
|
273
|
+
#
|
274
|
+
# @see https://github.com/emory-libraries/dlp-curate/issues/1973
|
275
|
+
def remove_blank_hash_values(attributes)
|
276
|
+
dupe = attributes.dup
|
277
|
+
dupe.each do |key, values|
|
278
|
+
if values.is_a?(Array) && values.all? { |value| value.is_a?(String) && value.empty? }
|
279
|
+
dupe[key] = []
|
280
|
+
elsif values.is_a?(String) && values.empty?
|
281
|
+
dupe[key] = nil
|
282
|
+
end
|
283
|
+
end
|
284
|
+
dupe
|
209
285
|
end
|
210
286
|
end
|
211
287
|
end
|
@@ -81,7 +81,7 @@ module Bulkrax
|
|
81
81
|
# This is adding the reverse relationship, from the child to the parent
|
82
82
|
def collection_parent_work_child
|
83
83
|
child_work_ids = child_records[:works].map(&:id)
|
84
|
-
parent_record.reindex_extent
|
84
|
+
parent_record.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
|
85
85
|
|
86
86
|
parent_record.add_member_objects(child_work_ids)
|
87
87
|
ImporterRun.find(importer_run_id).increment!(:processed_relationships, child_work_ids.count) # rubocop:disable Rails/SkipsModelValidations
|
@@ -5,30 +5,40 @@ module Bulkrax
|
|
5
5
|
queue_as :import
|
6
6
|
|
7
7
|
# rubocop:disable Rails/SkipsModelValidations
|
8
|
-
def perform(*
|
9
|
-
entry = Entry.find(
|
8
|
+
def perform(entry_id, run_id, time_to_live = 3, *)
|
9
|
+
entry = Entry.find(entry_id)
|
10
|
+
importer_run = ImporterRun.find(run_id)
|
10
11
|
entry.build
|
11
12
|
if entry.status == "Complete"
|
12
|
-
|
13
|
-
|
14
|
-
ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
|
13
|
+
importer_run.increment!(:processed_records)
|
14
|
+
importer_run.increment!(:processed_works)
|
15
15
|
else
|
16
16
|
# do not retry here because whatever parse error kept you from creating a work will likely
|
17
17
|
# keep preventing you from doing so.
|
18
|
-
|
19
|
-
|
20
|
-
ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
|
18
|
+
importer_run.increment!(:failed_records)
|
19
|
+
importer_run.increment!(:failed_works)
|
21
20
|
end
|
21
|
+
# Regardless of completion or not, we want to decrement the enqueued records.
|
22
|
+
importer_run.decrement!(:enqueued_records) unless importer_run.enqueued_records <= 0
|
23
|
+
|
22
24
|
entry.save!
|
23
|
-
entry.importer.current_run =
|
25
|
+
entry.importer.current_run = importer_run
|
24
26
|
entry.importer.record_status
|
25
|
-
rescue Bulkrax::CollectionsCreatedError
|
26
|
-
|
27
|
+
rescue Bulkrax::CollectionsCreatedError => e
|
28
|
+
Rails.logger.warn("#{self.class} entry_id: #{entry_id}, run_id: #{run_id} encountered #{e.class}: #{e.message}")
|
29
|
+
# You get 3 attempts at the above perform before we have the import exception cascade into
|
30
|
+
# the Sidekiq retry ecosystem.
|
31
|
+
# rubocop:disable Style/IfUnlessModifier
|
32
|
+
if time_to_live <= 1
|
33
|
+
raise "Exhauted reschedule limit for #{self.class} entry_id: #{entry_id}, run_id: #{run_id}. Attemping retries"
|
34
|
+
end
|
35
|
+
# rubocop:enable Style/IfUnlessModifier
|
36
|
+
reschedule(entry_id, run_id, time_to_live)
|
27
37
|
end
|
28
38
|
# rubocop:enable Rails/SkipsModelValidations
|
29
39
|
|
30
|
-
def reschedule(entry_id, run_id)
|
31
|
-
ImportWorkJob.set(wait: 1.minute).perform_later(entry_id, run_id)
|
40
|
+
def reschedule(entry_id, run_id, time_to_live)
|
41
|
+
ImportWorkJob.set(wait: 1.minute).perform_later(entry_id, run_id, time_to_live - 1)
|
32
42
|
end
|
33
43
|
end
|
34
44
|
end
|
@@ -6,6 +6,10 @@ module Bulkrax
|
|
6
6
|
class ApplicationMatcher
|
7
7
|
attr_accessor :to, :from, :parsed, :if, :split, :excluded, :nested_type
|
8
8
|
|
9
|
+
# New parse methods will need to be added here; you'll also want to define a corresponding
|
10
|
+
# "parse_#{field}" method.
|
11
|
+
class_attribute :parsed_fields, instance_writer: false, default: ['remote_files', 'language', 'subject', 'types', 'model', 'resource_type', 'format_original']
|
12
|
+
|
9
13
|
def initialize(args)
|
10
14
|
args.each do |k, v|
|
11
15
|
send("#{k}=", v)
|
@@ -30,7 +34,7 @@ module Bulkrax
|
|
30
34
|
|
31
35
|
def process_split
|
32
36
|
if self.split.is_a?(TrueClass)
|
33
|
-
@result = @result.split(
|
37
|
+
@result = @result.split(Bulkrax.multi_value_element_split_on)
|
34
38
|
elsif self.split
|
35
39
|
result = @result.split(Regexp.new(self.split))
|
36
40
|
@result = result.map(&:strip)
|
@@ -38,8 +42,6 @@ module Bulkrax
|
|
38
42
|
end
|
39
43
|
|
40
44
|
def process_parse
|
41
|
-
# New parse methods will need to be added here
|
42
|
-
parsed_fields = ['remote_files', 'language', 'subject', 'types', 'model', 'resource_type', 'format_original']
|
43
45
|
# This accounts for prefixed matchers
|
44
46
|
parser = parsed_fields.find { |field| to&.include? field }
|
45
47
|
|
@@ -7,7 +7,7 @@ module Bulkrax
|
|
7
7
|
# We do too much in these entry classes. We need to extract the common logic from the various
|
8
8
|
# entry models into a module that can be shared between them.
|
9
9
|
class CsvEntry < Entry # rubocop:disable Metrics/ClassLength
|
10
|
-
serialize :raw_metadata,
|
10
|
+
serialize :raw_metadata, Bulkrax::NormalizedJson
|
11
11
|
|
12
12
|
def self.fields_from_data(data)
|
13
13
|
data.headers.flatten.compact.uniq
|
@@ -18,7 +18,7 @@ module Bulkrax
|
|
18
18
|
raise StandardError, 'CSV path empty' if path.blank?
|
19
19
|
CSV.read(path,
|
20
20
|
headers: true,
|
21
|
-
header_converters:
|
21
|
+
header_converters: ->(h) { h.to_sym },
|
22
22
|
encoding: 'utf-8')
|
23
23
|
end
|
24
24
|
|
@@ -36,10 +36,14 @@ module Bulkrax
|
|
36
36
|
|
37
37
|
def build_metadata
|
38
38
|
raise StandardError, 'Record not found' if record.nil?
|
39
|
-
|
39
|
+
unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
|
40
|
+
raise StandardError,
|
41
|
+
"Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}"
|
42
|
+
end
|
40
43
|
|
41
44
|
self.parsed_metadata = {}
|
42
45
|
add_identifier
|
46
|
+
establish_factory_class
|
43
47
|
add_ingested_metadata
|
44
48
|
# TODO(alishaevn): remove the collections stuff entirely and only reference collections via the new parents code
|
45
49
|
add_collections
|
@@ -56,6 +60,12 @@ module Bulkrax
|
|
56
60
|
self.parsed_metadata[work_identifier] = [record[source_identifier]]
|
57
61
|
end
|
58
62
|
|
63
|
+
def establish_factory_class
|
64
|
+
parser.model_field_mappings.each do |key|
|
65
|
+
add_metadata('model', record[key]) if record.key?(key)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
59
69
|
def add_metadata_for_model
|
60
70
|
if factory_class == Collection
|
61
71
|
add_collection_type_gid
|
@@ -81,7 +91,7 @@ module Bulkrax
|
|
81
91
|
def add_file
|
82
92
|
self.parsed_metadata['file'] ||= []
|
83
93
|
if record['file']&.is_a?(String)
|
84
|
-
self.parsed_metadata['file'] = record['file'].split(
|
94
|
+
self.parsed_metadata['file'] = record['file'].split(Bulkrax.multi_value_element_split_on)
|
85
95
|
elsif record['file'].is_a?(Array)
|
86
96
|
self.parsed_metadata['file'] = record['file']
|
87
97
|
end
|
@@ -107,7 +117,9 @@ module Bulkrax
|
|
107
117
|
# Metadata required by Bulkrax for round-tripping
|
108
118
|
def build_system_metadata
|
109
119
|
self.parsed_metadata['id'] = hyrax_record.id
|
110
|
-
|
120
|
+
source_id = hyrax_record.send(work_identifier)
|
121
|
+
source_id = source_id.to_a.first if source_id.is_a?(ActiveTriples::Relation)
|
122
|
+
self.parsed_metadata[source_identifier] = source_id
|
111
123
|
self.parsed_metadata[key_for_export('model')] = hyrax_record.has_model.first
|
112
124
|
end
|
113
125
|
|
@@ -176,7 +188,7 @@ module Bulkrax
|
|
176
188
|
data = hyrax_record.send(key.to_s)
|
177
189
|
if data.is_a?(ActiveTriples::Relation)
|
178
190
|
if value['join']
|
179
|
-
self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join(
|
191
|
+
self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join(Bulkrax.multi_value_element_join_on).to_s
|
180
192
|
else
|
181
193
|
data.each_with_index do |d, i|
|
182
194
|
self.parsed_metadata["#{key_for_export(key)}_#{i + 1}"] = prepare_export_data(d)
|
@@ -236,7 +248,7 @@ module Bulkrax
|
|
236
248
|
|
237
249
|
def handle_join_on_export(key, values, join)
|
238
250
|
if join
|
239
|
-
parsed_metadata[key] = values.join(
|
251
|
+
parsed_metadata[key] = values.join(Bulkrax.multi_value_element_join_on)
|
240
252
|
else
|
241
253
|
values.each_with_index do |value, i|
|
242
254
|
parsed_metadata["#{key}_#{i + 1}"] = value
|
@@ -260,7 +272,7 @@ module Bulkrax
|
|
260
272
|
return [] unless parent_field_mapping.present? && record[parent_field_mapping].present?
|
261
273
|
|
262
274
|
identifiers = []
|
263
|
-
split_references = record[parent_field_mapping].split(
|
275
|
+
split_references = record[parent_field_mapping].split(Bulkrax.multi_value_element_split_on)
|
264
276
|
split_references.each do |c_reference|
|
265
277
|
matching_collection_entries = importerexporter.entries.select do |e|
|
266
278
|
(e.raw_metadata&.[](source_identifier) == c_reference) &&
|
data/app/models/bulkrax/entry.rb
CHANGED
@@ -4,6 +4,7 @@ module Bulkrax
|
|
4
4
|
# Custom error class for collections_created?
|
5
5
|
class CollectionsCreatedError < RuntimeError; end
|
6
6
|
class OAIError < RuntimeError; end
|
7
|
+
|
7
8
|
class Entry < ApplicationRecord
|
8
9
|
include Bulkrax::HasMatchers
|
9
10
|
include Bulkrax::ImportBehavior
|
@@ -15,7 +16,7 @@ module Bulkrax
|
|
15
16
|
alias importer importerexporter
|
16
17
|
alias exporter importerexporter
|
17
18
|
|
18
|
-
serialize :parsed_metadata,
|
19
|
+
serialize :parsed_metadata, Bulkrax::NormalizedJson
|
19
20
|
# Do not serialize raw_metadata as so we can support xml or other formats
|
20
21
|
serialize :collection_ids, Array
|
21
22
|
|
@@ -58,17 +58,26 @@ module Bulkrax
|
|
58
58
|
|
59
59
|
# If field_mapping is empty, setup a default based on the export_properties
|
60
60
|
def mapping
|
61
|
+
# rubocop:disable Style/IfUnlessModifier
|
61
62
|
@mapping ||= if self.field_mapping.blank? || self.field_mapping == [{}]
|
62
63
|
if parser.import_fields.present? || self.field_mapping == [{}]
|
63
|
-
|
64
|
-
parser.import_fields.reject(&:nil?).map do |m|
|
65
|
-
Bulkrax.default_field_mapping.call(m)
|
66
|
-
end.inject(:merge)
|
67
|
-
)
|
64
|
+
default_field_mapping
|
68
65
|
end
|
69
66
|
else
|
70
|
-
self.field_mapping
|
67
|
+
default_field_mapping.merge(self.field_mapping)
|
71
68
|
end
|
69
|
+
|
70
|
+
# rubocop:enable Style/IfUnlessModifier
|
71
|
+
end
|
72
|
+
|
73
|
+
def default_field_mapping
|
74
|
+
return self.field_mapping if parser.import_fields.nil?
|
75
|
+
|
76
|
+
ActiveSupport::HashWithIndifferentAccess.new(
|
77
|
+
parser.import_fields.reject(&:nil?).map do |m|
|
78
|
+
Bulkrax.default_field_mapping.call(m)
|
79
|
+
end.inject(:merge)
|
80
|
+
)
|
72
81
|
end
|
73
82
|
|
74
83
|
def parser_fields
|
@@ -143,17 +152,13 @@ module Bulkrax
|
|
143
152
|
import_objects(['relationship'])
|
144
153
|
end
|
145
154
|
|
155
|
+
DEFAULT_OBJECT_TYPES = %w[collection work file_set relationship].freeze
|
156
|
+
|
146
157
|
def import_objects(types_array = nil)
|
147
158
|
self.only_updates ||= false
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
else
|
152
|
-
types.each do |object_type|
|
153
|
-
self.save if self.new_record? # Object needs to be saved for statuses
|
154
|
-
parser.send("create_#{object_type.pluralize}")
|
155
|
-
end
|
156
|
-
end
|
159
|
+
self.save if self.new_record? # Object needs to be saved for statuses
|
160
|
+
types = types_array || DEFAULT_OBJECT_TYPES
|
161
|
+
parser.create_objects(types)
|
157
162
|
rescue StandardError => e
|
158
163
|
status_info(e)
|
159
164
|
end
|
@@ -5,7 +5,7 @@ require 'ostruct'
|
|
5
5
|
|
6
6
|
module Bulkrax
|
7
7
|
class OaiEntry < Entry
|
8
|
-
serialize :raw_metadata,
|
8
|
+
serialize :raw_metadata, Bulkrax::NormalizedJson
|
9
9
|
|
10
10
|
delegate :record, to: :raw_record
|
11
11
|
|
@@ -28,13 +28,16 @@ module Bulkrax
|
|
28
28
|
def build_metadata
|
29
29
|
self.parsed_metadata = {}
|
30
30
|
self.parsed_metadata[work_identifier] = [record.header.identifier]
|
31
|
+
self.raw_metadata = { xml: record.metadata.to_s }
|
31
32
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
33
|
+
# We need to establish the #factory_class before we proceed with the metadata. See
|
34
|
+
# https://github.com/samvera-labs/bulkrax/issues/702 for further details.
|
35
|
+
#
|
36
|
+
# tl;dr - if we don't have the right factory_class we might skip properties that are
|
37
|
+
# specifically assigned to the factory class
|
38
|
+
establish_factory_class
|
39
|
+
add_metadata_from_record
|
40
|
+
add_thumbnail_url
|
38
41
|
|
39
42
|
add_visibility
|
40
43
|
add_rights_statement
|
@@ -53,18 +56,48 @@ module Bulkrax
|
|
53
56
|
end
|
54
57
|
end
|
55
58
|
|
59
|
+
# To ensure we capture the correct parse data, we first need to establish the factory_class.
|
60
|
+
# @see https://github.com/samvera-labs/bulkrax/issues/702
|
61
|
+
def establish_factory_class
|
62
|
+
model_field_names = parser.model_field_mappings
|
63
|
+
|
64
|
+
each_candidate_metadata_node do |node|
|
65
|
+
next unless model_field_names.include?(node.name)
|
66
|
+
add_metadata(node.name, node.content)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def add_metadata_from_record
|
71
|
+
each_candidate_metadata_node do |node|
|
72
|
+
add_metadata(node.name, node.content)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# A method that you could override to better handle the shape of the record's metadata.
|
77
|
+
# @yieldparam node [Object<#name, #content>]
|
78
|
+
def each_candidate_metadata_node
|
79
|
+
record.metadata.children.each do |child|
|
80
|
+
child.children.each do |node|
|
81
|
+
yield(node)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def add_thumbnail_url
|
87
|
+
add_metadata('thumbnail_url', thumbnail_url)
|
88
|
+
end
|
89
|
+
|
56
90
|
# Retrieve list of collections for the entry; add to collection_ids
|
57
91
|
# If OAI-PMH doesn't return setSpec in the headers for GetRecord, use parser.collection_name
|
58
92
|
# in this case, if 'All' is selected, records will not be added to a collection.
|
59
93
|
def find_collection_ids
|
60
94
|
return self.collection_ids if collections_created?
|
61
95
|
if sets.blank? || parser.collection_name != 'all'
|
62
|
-
# c = Collection.where(Bulkrax.system_identifier_field => importerexporter.unique_collection_identifier(parser.collection_name)).first
|
63
96
|
collection = find_collection(importerexporter.unique_collection_identifier(parser.collection_name))
|
64
97
|
self.collection_ids << collection.id if collection.present? && !self.collection_ids.include?(collection.id)
|
65
98
|
else # All - collections should exist for all sets
|
66
99
|
sets.each do |set|
|
67
|
-
c =
|
100
|
+
c = find_collection(importerexporter.unique_collection_identifier(set.content))
|
68
101
|
self.collection_ids << c.id if c.present? && !self.collection_ids.include?(c.id)
|
69
102
|
end
|
70
103
|
end
|