bulkrax 4.3.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/assets/javascripts/bulkrax/exporters.js +12 -0
- data/app/controllers/bulkrax/entries_controller.rb +5 -0
- data/app/controllers/bulkrax/exporters_controller.rb +5 -0
- data/app/controllers/bulkrax/importers_controller.rb +9 -1
- data/app/factories/bulkrax/object_factory.rb +87 -11
- data/app/jobs/bulkrax/create_relationships_job.rb +1 -1
- data/app/jobs/bulkrax/import_file_set_job.rb +1 -0
- data/app/jobs/bulkrax/import_work_job.rb +23 -13
- data/app/matchers/bulkrax/application_matcher.rb +5 -3
- data/app/models/bulkrax/csv_entry.rb +20 -8
- data/app/models/bulkrax/entry.rb +2 -1
- data/app/models/bulkrax/importer.rb +20 -15
- data/app/models/bulkrax/oai_entry.rb +42 -9
- data/app/models/bulkrax/rdf_entry.rb +1 -1
- data/app/models/bulkrax/xml_entry.rb +54 -12
- data/app/models/concerns/bulkrax/file_factory.rb +9 -3
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +8 -1
- data/app/models/concerns/bulkrax/import_behavior.rb +27 -19
- data/app/parsers/bulkrax/application_parser.rb +90 -13
- data/app/parsers/bulkrax/csv_parser.rb +13 -6
- data/app/parsers/bulkrax/oai_dc_parser.rb +2 -2
- data/app/parsers/bulkrax/xml_parser.rb +6 -0
- data/app/services/bulkrax/remove_relationships_for_importer.rb +107 -0
- data/app/views/bulkrax/exporters/_form.html.erb +3 -3
- data/app/views/bulkrax/shared/_file_set_entries_tab.html.erb +3 -3
- data/app/views/hyrax/dashboard/sidebar/_bulkrax_sidebar_additions.html.erb +9 -5
- data/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb +18 -7
- data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +1 -1
- data/db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb +1 -1
- data/db/migrate/20220118001339_add_import_attempts_to_entries.rb +1 -1
- data/db/migrate/20220119213325_add_work_counters_to_importer_runs.rb +1 -1
- data/db/migrate/20220301001839_create_bulkrax_pending_relationships.rb +1 -1
- data/db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb +1 -1
- data/db/migrate/20220412233954_add_include_thumbnails_to_bulkrax_exporters.rb +1 -1
- data/db/migrate/20220413180915_add_generated_metadata_to_bulkrax_exporters.rb +1 -1
- data/db/migrate/20220609001128_rename_bulkrax_importer_run_to_importer_run.rb +1 -1
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +85 -11
- data/lib/generators/bulkrax/install_generator.rb +20 -0
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +13 -1
- data/lib/tasks/reset.rake +65 -0
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 49a1f0ccc806cf73a7872a634c8c2819dac62f98f0a9dc163e7ada8931d9b1fe
|
4
|
+
data.tar.gz: ecf5c8ad3e4864110665cfb5c6c5f6329e3b868414815bba163646db839400e7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: df394e4fbbc6ca0a71eb595c075c5181d24098b1ab889ff185da10991ec01c2eaf60239658564838b67df4c0a421306de923d2eab4bbeffafb31321acb9ba1ff
|
7
|
+
data.tar.gz: 35b691e96d0e59f83efadc35364283239f7f741ab4bf9d859756dacc8483e380090512cd10bf3b6ec4ce81936d00c88eb4d1ed96742c384e0f3082028ffdcf45
|
@@ -1,10 +1,12 @@
|
|
1
1
|
function hideUnhide(field) {
|
2
2
|
var allSources = $('body').find('.export-source-option')
|
3
|
+
removeRequired(allSources)
|
3
4
|
hide(allSources)
|
4
5
|
|
5
6
|
if (field.length > 0) {
|
6
7
|
var selectedSource = $('.' + field)
|
7
8
|
unhideSelected(selectedSource)
|
9
|
+
addRequired(selectedSource)
|
8
10
|
}
|
9
11
|
|
10
12
|
if (field === 'collection') {
|
@@ -12,6 +14,16 @@ function hideUnhide(field) {
|
|
12
14
|
}
|
13
15
|
};
|
14
16
|
|
17
|
+
function addRequired(selectedSource) {
|
18
|
+
selectedSource.addClass('required').attr('required', 'required');
|
19
|
+
selectedSource.parent().addClass('required');
|
20
|
+
}
|
21
|
+
|
22
|
+
function removeRequired(allSources) {
|
23
|
+
allSources.removeClass('required').removeAttr('required');
|
24
|
+
allSources.parent().removeClass('required').removeAttr('required')
|
25
|
+
};
|
26
|
+
|
15
27
|
// hide all export_source
|
16
28
|
function hide(allSources) {
|
17
29
|
allSources.addClass('hidden');
|
@@ -7,6 +7,7 @@ module Bulkrax
|
|
7
7
|
class EntriesController < ApplicationController
|
8
8
|
include Hyrax::ThemedLayoutController
|
9
9
|
before_action :authenticate_user!
|
10
|
+
before_action :check_permissions
|
10
11
|
with_themed_layout 'dashboard'
|
11
12
|
|
12
13
|
def show
|
@@ -40,5 +41,9 @@ module Bulkrax
|
|
40
41
|
add_breadcrumb @exporter.name, bulkrax.exporter_path(@exporter.id)
|
41
42
|
add_breadcrumb @entry.id
|
42
43
|
end
|
44
|
+
|
45
|
+
def check_permissions
|
46
|
+
raise CanCan::AccessDenied unless current_ability.can_import_works? || current_ability.can_export_works?
|
47
|
+
end
|
43
48
|
end
|
44
49
|
end
|
@@ -7,6 +7,7 @@ module Bulkrax
|
|
7
7
|
include Hyrax::ThemedLayoutController
|
8
8
|
include Bulkrax::DownloadBehavior
|
9
9
|
before_action :authenticate_user!
|
10
|
+
before_action :check_permissions
|
10
11
|
before_action :set_exporter, only: [:show, :edit, :update, :destroy]
|
11
12
|
with_themed_layout 'dashboard'
|
12
13
|
|
@@ -131,5 +132,9 @@ module Bulkrax
|
|
131
132
|
def file_path
|
132
133
|
"#{@exporter.exporter_export_zip_path}/#{params['exporter']['exporter_export_zip_files']}"
|
133
134
|
end
|
135
|
+
|
136
|
+
def check_permissions
|
137
|
+
raise CanCan::AccessDenied unless current_ability.can_export_works?
|
138
|
+
end
|
134
139
|
end
|
135
140
|
end
|
@@ -14,6 +14,7 @@ module Bulkrax
|
|
14
14
|
protect_from_forgery unless: -> { api_request? }
|
15
15
|
before_action :token_authenticate!, if: -> { api_request? }, only: [:create, :update, :delete]
|
16
16
|
before_action :authenticate_user!, unless: -> { api_request? }
|
17
|
+
before_action :check_permissions
|
17
18
|
before_action :set_importer, only: [:show, :edit, :update, :destroy]
|
18
19
|
with_themed_layout 'dashboard'
|
19
20
|
|
@@ -76,6 +77,9 @@ module Bulkrax
|
|
76
77
|
@importer = Importer.new(importer_params)
|
77
78
|
field_mapping_params
|
78
79
|
@importer.validate_only = true if params[:commit] == 'Create and Validate'
|
80
|
+
# the following line is needed to handle updating remote files of a FileSet
|
81
|
+
# on a new import otherwise it only gets updated during the update path
|
82
|
+
@importer.parser_fields['update_files'] = true if params[:commit] == 'Create and Import'
|
79
83
|
if @importer.save
|
80
84
|
files_for_import(file, cloud_files)
|
81
85
|
if params[:commit] == 'Create and Import'
|
@@ -276,7 +280,7 @@ module Bulkrax
|
|
276
280
|
def setup_client(url)
|
277
281
|
return false if url.nil?
|
278
282
|
headers = { from: Bulkrax.server_name }
|
279
|
-
@client ||= OAI::Client.new(url, headers: headers, parser: 'libxml'
|
283
|
+
@client ||= OAI::Client.new(url, headers: headers, parser: 'libxml')
|
280
284
|
end
|
281
285
|
|
282
286
|
# Download methods
|
@@ -316,6 +320,10 @@ module Bulkrax
|
|
316
320
|
end
|
317
321
|
@importer.save
|
318
322
|
end
|
323
|
+
|
324
|
+
def check_permissions
|
325
|
+
raise CanCan::AccessDenied unless current_ability.can_import_works?
|
326
|
+
end
|
319
327
|
end
|
320
328
|
# rubocop:enable Metrics/ClassLength
|
321
329
|
end
|
@@ -6,6 +6,27 @@ module Bulkrax
|
|
6
6
|
include Bulkrax::FileFactory
|
7
7
|
include DynamicRecordLookup
|
8
8
|
|
9
|
+
# @api private
|
10
|
+
#
|
11
|
+
# These are the attributes that we assume all "work type" classes (e.g. the given :klass) will
|
12
|
+
# have in addition to their specific attributes.
|
13
|
+
#
|
14
|
+
# @return [Array<Symbol>]
|
15
|
+
# @see #permitted_attributes
|
16
|
+
class_attribute :base_permitted_attributes,
|
17
|
+
default: %i[id edit_users edit_groups read_groups visibility work_members_attributes admin_set_id]
|
18
|
+
|
19
|
+
# @return [Boolean]
|
20
|
+
#
|
21
|
+
# @example
|
22
|
+
# Bulkrax::ObjectFactory.transformation_removes_blank_hash_values = true
|
23
|
+
#
|
24
|
+
# @see #transform_attributes
|
25
|
+
# @see https://github.com/samvera-labs/bulkrax/pull/708 For discussion concerning this feature
|
26
|
+
# @see https://github.com/samvera-labs/bulkrax/wiki/Interacting-with-Metadata For documentation
|
27
|
+
# concerning default behavior.
|
28
|
+
class_attribute :transformation_removes_blank_hash_values, default: false
|
29
|
+
|
9
30
|
define_model_callbacks :save, :create
|
10
31
|
attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :related_parents_parsed_mapping, :importer_run_id
|
11
32
|
|
@@ -58,7 +79,7 @@ module Bulkrax
|
|
58
79
|
elsif klass == FileSet
|
59
80
|
update_file_set(attrs)
|
60
81
|
else
|
61
|
-
|
82
|
+
update_work(attrs)
|
62
83
|
end
|
63
84
|
end
|
64
85
|
object.apply_depositor_metadata(@user) && object.save! if object.depositor.nil?
|
@@ -104,7 +125,7 @@ module Bulkrax
|
|
104
125
|
elsif klass == FileSet
|
105
126
|
create_file_set(attrs)
|
106
127
|
else
|
107
|
-
|
128
|
+
create_work(attrs)
|
108
129
|
end
|
109
130
|
end
|
110
131
|
end
|
@@ -139,6 +160,14 @@ module Bulkrax
|
|
139
160
|
Hyrax::CurationConcern.actor
|
140
161
|
end
|
141
162
|
|
163
|
+
def create_work(attrs)
|
164
|
+
work_actor.create(environment(attrs))
|
165
|
+
end
|
166
|
+
|
167
|
+
def update_work(attrs)
|
168
|
+
work_actor.update(environment(attrs))
|
169
|
+
end
|
170
|
+
|
142
171
|
def create_collection(attrs)
|
143
172
|
attrs = clean_attrs(attrs)
|
144
173
|
attrs = collection_type(attrs)
|
@@ -159,28 +188,55 @@ module Bulkrax
|
|
159
188
|
file_set_attrs = attrs.slice(*object.attributes.keys)
|
160
189
|
object.assign_attributes(file_set_attrs)
|
161
190
|
|
162
|
-
attrs['uploaded_files']
|
191
|
+
attrs['uploaded_files']&.each do |uploaded_file_id|
|
163
192
|
uploaded_file = ::Hyrax::UploadedFile.find(uploaded_file_id)
|
164
193
|
next if uploaded_file.file_set_uri.present?
|
165
194
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
actor.create_content(uploaded_file)
|
171
|
-
actor.attach_to_work(work)
|
195
|
+
create_file_set_actor(attrs, work, work_permissions, uploaded_file)
|
196
|
+
end
|
197
|
+
attrs['remote_files']&.each do |remote_file|
|
198
|
+
create_file_set_actor(attrs, work, work_permissions, nil, remote_file)
|
172
199
|
end
|
173
200
|
|
174
201
|
object.save!
|
175
202
|
end
|
176
203
|
|
204
|
+
def create_file_set_actor(attrs, work, work_permissions, uploaded_file, remote_file = nil)
|
205
|
+
actor = ::Hyrax::Actors::FileSetActor.new(object, @user)
|
206
|
+
uploaded_file&.update(file_set_uri: actor.file_set.uri)
|
207
|
+
actor.file_set.permissions_attributes = work_permissions
|
208
|
+
actor.create_metadata(attrs)
|
209
|
+
actor.create_content(uploaded_file) if uploaded_file
|
210
|
+
actor.attach_to_work(work, attrs)
|
211
|
+
handle_remote_file(remote_file: remote_file, actor: actor, update: false) if remote_file
|
212
|
+
end
|
213
|
+
|
177
214
|
def update_file_set(attrs)
|
178
215
|
file_set_attrs = attrs.slice(*object.attributes.keys)
|
179
216
|
actor = ::Hyrax::Actors::FileSetActor.new(object, @user)
|
180
|
-
|
217
|
+
attrs['remote_files']&.each do |remote_file|
|
218
|
+
handle_remote_file(remote_file: remote_file, actor: actor, update: true)
|
219
|
+
end
|
181
220
|
actor.update_metadata(file_set_attrs)
|
182
221
|
end
|
183
222
|
|
223
|
+
def handle_remote_file(remote_file:, actor:, update: false)
|
224
|
+
actor.file_set.label = remote_file['file_name']
|
225
|
+
actor.file_set.import_url = remote_file['url']
|
226
|
+
|
227
|
+
url = remote_file['url']
|
228
|
+
tmp_file = Tempfile.new(remote_file['file_name'].split('.').first)
|
229
|
+
tmp_file.binmode
|
230
|
+
|
231
|
+
URI.open(url) do |url_file|
|
232
|
+
tmp_file.write(url_file.read)
|
233
|
+
end
|
234
|
+
|
235
|
+
tmp_file.rewind
|
236
|
+
update == true ? actor.update_content(tmp_file) : actor.create_content(tmp_file, from_url: true)
|
237
|
+
tmp_file.close
|
238
|
+
end
|
239
|
+
|
184
240
|
def clean_attrs(attrs)
|
185
241
|
# avoid the "ArgumentError: Identifier must be a string of size > 0 in order to be treeified" error
|
186
242
|
# when setting object.attributes
|
@@ -200,12 +256,32 @@ module Bulkrax
|
|
200
256
|
def transform_attributes(update: false)
|
201
257
|
@transform_attributes = attributes.slice(*permitted_attributes)
|
202
258
|
@transform_attributes.merge!(file_attributes(update_files)) if with_files
|
259
|
+
@transform_attributes = remove_blank_hash_values(@transform_attributes) if transformation_removes_blank_hash_values?
|
203
260
|
update ? @transform_attributes.except(:id) : @transform_attributes
|
204
261
|
end
|
205
262
|
|
206
263
|
# Regardless of what the Parser gives us, these are the properties we are prepared to accept.
|
207
264
|
def permitted_attributes
|
208
|
-
klass.properties.keys.map(&:to_sym) +
|
265
|
+
klass.properties.keys.map(&:to_sym) + base_permitted_attributes
|
266
|
+
end
|
267
|
+
|
268
|
+
# Return a copy of the given attributes, such that all values that are empty or an array of all
|
269
|
+
# empty values are fully emptied. (See implementation details)
|
270
|
+
#
|
271
|
+
# @param attributes [Hash]
|
272
|
+
# @return [Hash]
|
273
|
+
#
|
274
|
+
# @see https://github.com/emory-libraries/dlp-curate/issues/1973
|
275
|
+
def remove_blank_hash_values(attributes)
|
276
|
+
dupe = attributes.dup
|
277
|
+
dupe.each do |key, values|
|
278
|
+
if values.is_a?(Array) && values.all? { |value| value.is_a?(String) && value.empty? }
|
279
|
+
dupe[key] = []
|
280
|
+
elsif values.is_a?(String) && values.empty?
|
281
|
+
dupe[key] = nil
|
282
|
+
end
|
283
|
+
end
|
284
|
+
dupe
|
209
285
|
end
|
210
286
|
end
|
211
287
|
end
|
@@ -81,7 +81,7 @@ module Bulkrax
|
|
81
81
|
# This is adding the reverse relationship, from the child to the parent
|
82
82
|
def collection_parent_work_child
|
83
83
|
child_work_ids = child_records[:works].map(&:id)
|
84
|
-
parent_record.reindex_extent
|
84
|
+
parent_record.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
|
85
85
|
|
86
86
|
parent_record.add_member_objects(child_work_ids)
|
87
87
|
ImporterRun.find(importer_run_id).increment!(:processed_relationships, child_work_ids.count) # rubocop:disable Rails/SkipsModelValidations
|
@@ -5,30 +5,40 @@ module Bulkrax
|
|
5
5
|
queue_as :import
|
6
6
|
|
7
7
|
# rubocop:disable Rails/SkipsModelValidations
|
8
|
-
def perform(*
|
9
|
-
entry = Entry.find(
|
8
|
+
def perform(entry_id, run_id, time_to_live = 3, *)
|
9
|
+
entry = Entry.find(entry_id)
|
10
|
+
importer_run = ImporterRun.find(run_id)
|
10
11
|
entry.build
|
11
12
|
if entry.status == "Complete"
|
12
|
-
|
13
|
-
|
14
|
-
ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
|
13
|
+
importer_run.increment!(:processed_records)
|
14
|
+
importer_run.increment!(:processed_works)
|
15
15
|
else
|
16
16
|
# do not retry here because whatever parse error kept you from creating a work will likely
|
17
17
|
# keep preventing you from doing so.
|
18
|
-
|
19
|
-
|
20
|
-
ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
|
18
|
+
importer_run.increment!(:failed_records)
|
19
|
+
importer_run.increment!(:failed_works)
|
21
20
|
end
|
21
|
+
# Regardless of completion or not, we want to decrement the enqueued records.
|
22
|
+
importer_run.decrement!(:enqueued_records) unless importer_run.enqueued_records <= 0
|
23
|
+
|
22
24
|
entry.save!
|
23
|
-
entry.importer.current_run =
|
25
|
+
entry.importer.current_run = importer_run
|
24
26
|
entry.importer.record_status
|
25
|
-
rescue Bulkrax::CollectionsCreatedError
|
26
|
-
|
27
|
+
rescue Bulkrax::CollectionsCreatedError => e
|
28
|
+
Rails.logger.warn("#{self.class} entry_id: #{entry_id}, run_id: #{run_id} encountered #{e.class}: #{e.message}")
|
29
|
+
# You get 3 attempts at the above perform before we have the import exception cascade into
|
30
|
+
# the Sidekiq retry ecosystem.
|
31
|
+
# rubocop:disable Style/IfUnlessModifier
|
32
|
+
if time_to_live <= 1
|
33
|
+
raise "Exhauted reschedule limit for #{self.class} entry_id: #{entry_id}, run_id: #{run_id}. Attemping retries"
|
34
|
+
end
|
35
|
+
# rubocop:enable Style/IfUnlessModifier
|
36
|
+
reschedule(entry_id, run_id, time_to_live)
|
27
37
|
end
|
28
38
|
# rubocop:enable Rails/SkipsModelValidations
|
29
39
|
|
30
|
-
def reschedule(entry_id, run_id)
|
31
|
-
ImportWorkJob.set(wait: 1.minute).perform_later(entry_id, run_id)
|
40
|
+
def reschedule(entry_id, run_id, time_to_live)
|
41
|
+
ImportWorkJob.set(wait: 1.minute).perform_later(entry_id, run_id, time_to_live - 1)
|
32
42
|
end
|
33
43
|
end
|
34
44
|
end
|
@@ -6,6 +6,10 @@ module Bulkrax
|
|
6
6
|
class ApplicationMatcher
|
7
7
|
attr_accessor :to, :from, :parsed, :if, :split, :excluded, :nested_type
|
8
8
|
|
9
|
+
# New parse methods will need to be added here; you'll also want to define a corresponding
|
10
|
+
# "parse_#{field}" method.
|
11
|
+
class_attribute :parsed_fields, instance_writer: false, default: ['remote_files', 'language', 'subject', 'types', 'model', 'resource_type', 'format_original']
|
12
|
+
|
9
13
|
def initialize(args)
|
10
14
|
args.each do |k, v|
|
11
15
|
send("#{k}=", v)
|
@@ -30,7 +34,7 @@ module Bulkrax
|
|
30
34
|
|
31
35
|
def process_split
|
32
36
|
if self.split.is_a?(TrueClass)
|
33
|
-
@result = @result.split(
|
37
|
+
@result = @result.split(Bulkrax.multi_value_element_split_on)
|
34
38
|
elsif self.split
|
35
39
|
result = @result.split(Regexp.new(self.split))
|
36
40
|
@result = result.map(&:strip)
|
@@ -38,8 +42,6 @@ module Bulkrax
|
|
38
42
|
end
|
39
43
|
|
40
44
|
def process_parse
|
41
|
-
# New parse methods will need to be added here
|
42
|
-
parsed_fields = ['remote_files', 'language', 'subject', 'types', 'model', 'resource_type', 'format_original']
|
43
45
|
# This accounts for prefixed matchers
|
44
46
|
parser = parsed_fields.find { |field| to&.include? field }
|
45
47
|
|
@@ -7,7 +7,7 @@ module Bulkrax
|
|
7
7
|
# We do too much in these entry classes. We need to extract the common logic from the various
|
8
8
|
# entry models into a module that can be shared between them.
|
9
9
|
class CsvEntry < Entry # rubocop:disable Metrics/ClassLength
|
10
|
-
serialize :raw_metadata,
|
10
|
+
serialize :raw_metadata, Bulkrax::NormalizedJson
|
11
11
|
|
12
12
|
def self.fields_from_data(data)
|
13
13
|
data.headers.flatten.compact.uniq
|
@@ -18,7 +18,7 @@ module Bulkrax
|
|
18
18
|
raise StandardError, 'CSV path empty' if path.blank?
|
19
19
|
CSV.read(path,
|
20
20
|
headers: true,
|
21
|
-
header_converters:
|
21
|
+
header_converters: ->(h) { h.to_sym },
|
22
22
|
encoding: 'utf-8')
|
23
23
|
end
|
24
24
|
|
@@ -36,10 +36,14 @@ module Bulkrax
|
|
36
36
|
|
37
37
|
def build_metadata
|
38
38
|
raise StandardError, 'Record not found' if record.nil?
|
39
|
-
|
39
|
+
unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
|
40
|
+
raise StandardError,
|
41
|
+
"Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}"
|
42
|
+
end
|
40
43
|
|
41
44
|
self.parsed_metadata = {}
|
42
45
|
add_identifier
|
46
|
+
establish_factory_class
|
43
47
|
add_ingested_metadata
|
44
48
|
# TODO(alishaevn): remove the collections stuff entirely and only reference collections via the new parents code
|
45
49
|
add_collections
|
@@ -56,6 +60,12 @@ module Bulkrax
|
|
56
60
|
self.parsed_metadata[work_identifier] = [record[source_identifier]]
|
57
61
|
end
|
58
62
|
|
63
|
+
def establish_factory_class
|
64
|
+
parser.model_field_mappings.each do |key|
|
65
|
+
add_metadata('model', record[key]) if record.key?(key)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
59
69
|
def add_metadata_for_model
|
60
70
|
if factory_class == Collection
|
61
71
|
add_collection_type_gid
|
@@ -81,7 +91,7 @@ module Bulkrax
|
|
81
91
|
def add_file
|
82
92
|
self.parsed_metadata['file'] ||= []
|
83
93
|
if record['file']&.is_a?(String)
|
84
|
-
self.parsed_metadata['file'] = record['file'].split(
|
94
|
+
self.parsed_metadata['file'] = record['file'].split(Bulkrax.multi_value_element_split_on)
|
85
95
|
elsif record['file'].is_a?(Array)
|
86
96
|
self.parsed_metadata['file'] = record['file']
|
87
97
|
end
|
@@ -107,7 +117,9 @@ module Bulkrax
|
|
107
117
|
# Metadata required by Bulkrax for round-tripping
|
108
118
|
def build_system_metadata
|
109
119
|
self.parsed_metadata['id'] = hyrax_record.id
|
110
|
-
|
120
|
+
source_id = hyrax_record.send(work_identifier)
|
121
|
+
source_id = source_id.to_a.first if source_id.is_a?(ActiveTriples::Relation)
|
122
|
+
self.parsed_metadata[source_identifier] = source_id
|
111
123
|
self.parsed_metadata[key_for_export('model')] = hyrax_record.has_model.first
|
112
124
|
end
|
113
125
|
|
@@ -176,7 +188,7 @@ module Bulkrax
|
|
176
188
|
data = hyrax_record.send(key.to_s)
|
177
189
|
if data.is_a?(ActiveTriples::Relation)
|
178
190
|
if value['join']
|
179
|
-
self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join(
|
191
|
+
self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join(Bulkrax.multi_value_element_join_on).to_s
|
180
192
|
else
|
181
193
|
data.each_with_index do |d, i|
|
182
194
|
self.parsed_metadata["#{key_for_export(key)}_#{i + 1}"] = prepare_export_data(d)
|
@@ -236,7 +248,7 @@ module Bulkrax
|
|
236
248
|
|
237
249
|
def handle_join_on_export(key, values, join)
|
238
250
|
if join
|
239
|
-
parsed_metadata[key] = values.join(
|
251
|
+
parsed_metadata[key] = values.join(Bulkrax.multi_value_element_join_on)
|
240
252
|
else
|
241
253
|
values.each_with_index do |value, i|
|
242
254
|
parsed_metadata["#{key}_#{i + 1}"] = value
|
@@ -260,7 +272,7 @@ module Bulkrax
|
|
260
272
|
return [] unless parent_field_mapping.present? && record[parent_field_mapping].present?
|
261
273
|
|
262
274
|
identifiers = []
|
263
|
-
split_references = record[parent_field_mapping].split(
|
275
|
+
split_references = record[parent_field_mapping].split(Bulkrax.multi_value_element_split_on)
|
264
276
|
split_references.each do |c_reference|
|
265
277
|
matching_collection_entries = importerexporter.entries.select do |e|
|
266
278
|
(e.raw_metadata&.[](source_identifier) == c_reference) &&
|
data/app/models/bulkrax/entry.rb
CHANGED
@@ -4,6 +4,7 @@ module Bulkrax
|
|
4
4
|
# Custom error class for collections_created?
|
5
5
|
class CollectionsCreatedError < RuntimeError; end
|
6
6
|
class OAIError < RuntimeError; end
|
7
|
+
|
7
8
|
class Entry < ApplicationRecord
|
8
9
|
include Bulkrax::HasMatchers
|
9
10
|
include Bulkrax::ImportBehavior
|
@@ -15,7 +16,7 @@ module Bulkrax
|
|
15
16
|
alias importer importerexporter
|
16
17
|
alias exporter importerexporter
|
17
18
|
|
18
|
-
serialize :parsed_metadata,
|
19
|
+
serialize :parsed_metadata, Bulkrax::NormalizedJson
|
19
20
|
# Do not serialize raw_metadata as so we can support xml or other formats
|
20
21
|
serialize :collection_ids, Array
|
21
22
|
|
@@ -58,17 +58,26 @@ module Bulkrax
|
|
58
58
|
|
59
59
|
# If field_mapping is empty, setup a default based on the export_properties
|
60
60
|
def mapping
|
61
|
+
# rubocop:disable Style/IfUnlessModifier
|
61
62
|
@mapping ||= if self.field_mapping.blank? || self.field_mapping == [{}]
|
62
63
|
if parser.import_fields.present? || self.field_mapping == [{}]
|
63
|
-
|
64
|
-
parser.import_fields.reject(&:nil?).map do |m|
|
65
|
-
Bulkrax.default_field_mapping.call(m)
|
66
|
-
end.inject(:merge)
|
67
|
-
)
|
64
|
+
default_field_mapping
|
68
65
|
end
|
69
66
|
else
|
70
|
-
self.field_mapping
|
67
|
+
default_field_mapping.merge(self.field_mapping)
|
71
68
|
end
|
69
|
+
|
70
|
+
# rubocop:enable Style/IfUnlessModifier
|
71
|
+
end
|
72
|
+
|
73
|
+
def default_field_mapping
|
74
|
+
return self.field_mapping if parser.import_fields.nil?
|
75
|
+
|
76
|
+
ActiveSupport::HashWithIndifferentAccess.new(
|
77
|
+
parser.import_fields.reject(&:nil?).map do |m|
|
78
|
+
Bulkrax.default_field_mapping.call(m)
|
79
|
+
end.inject(:merge)
|
80
|
+
)
|
72
81
|
end
|
73
82
|
|
74
83
|
def parser_fields
|
@@ -143,17 +152,13 @@ module Bulkrax
|
|
143
152
|
import_objects(['relationship'])
|
144
153
|
end
|
145
154
|
|
155
|
+
DEFAULT_OBJECT_TYPES = %w[collection work file_set relationship].freeze
|
156
|
+
|
146
157
|
def import_objects(types_array = nil)
|
147
158
|
self.only_updates ||= false
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
else
|
152
|
-
types.each do |object_type|
|
153
|
-
self.save if self.new_record? # Object needs to be saved for statuses
|
154
|
-
parser.send("create_#{object_type.pluralize}")
|
155
|
-
end
|
156
|
-
end
|
159
|
+
self.save if self.new_record? # Object needs to be saved for statuses
|
160
|
+
types = types_array || DEFAULT_OBJECT_TYPES
|
161
|
+
parser.create_objects(types)
|
157
162
|
rescue StandardError => e
|
158
163
|
status_info(e)
|
159
164
|
end
|
@@ -5,7 +5,7 @@ require 'ostruct'
|
|
5
5
|
|
6
6
|
module Bulkrax
|
7
7
|
class OaiEntry < Entry
|
8
|
-
serialize :raw_metadata,
|
8
|
+
serialize :raw_metadata, Bulkrax::NormalizedJson
|
9
9
|
|
10
10
|
delegate :record, to: :raw_record
|
11
11
|
|
@@ -28,13 +28,16 @@ module Bulkrax
|
|
28
28
|
def build_metadata
|
29
29
|
self.parsed_metadata = {}
|
30
30
|
self.parsed_metadata[work_identifier] = [record.header.identifier]
|
31
|
+
self.raw_metadata = { xml: record.metadata.to_s }
|
31
32
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
33
|
+
# We need to establish the #factory_class before we proceed with the metadata. See
|
34
|
+
# https://github.com/samvera-labs/bulkrax/issues/702 for further details.
|
35
|
+
#
|
36
|
+
# tl;dr - if we don't have the right factory_class we might skip properties that are
|
37
|
+
# specifically assigned to the factory class
|
38
|
+
establish_factory_class
|
39
|
+
add_metadata_from_record
|
40
|
+
add_thumbnail_url
|
38
41
|
|
39
42
|
add_visibility
|
40
43
|
add_rights_statement
|
@@ -53,18 +56,48 @@ module Bulkrax
|
|
53
56
|
end
|
54
57
|
end
|
55
58
|
|
59
|
+
# To ensure we capture the correct parse data, we first need to establish the factory_class.
|
60
|
+
# @see https://github.com/samvera-labs/bulkrax/issues/702
|
61
|
+
def establish_factory_class
|
62
|
+
model_field_names = parser.model_field_mappings
|
63
|
+
|
64
|
+
each_candidate_metadata_node do |node|
|
65
|
+
next unless model_field_names.include?(node.name)
|
66
|
+
add_metadata(node.name, node.content)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def add_metadata_from_record
|
71
|
+
each_candidate_metadata_node do |node|
|
72
|
+
add_metadata(node.name, node.content)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# A method that you could override to better handle the shape of the record's metadata.
|
77
|
+
# @yieldparam node [Object<#name, #content>]
|
78
|
+
def each_candidate_metadata_node
|
79
|
+
record.metadata.children.each do |child|
|
80
|
+
child.children.each do |node|
|
81
|
+
yield(node)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def add_thumbnail_url
|
87
|
+
add_metadata('thumbnail_url', thumbnail_url)
|
88
|
+
end
|
89
|
+
|
56
90
|
# Retrieve list of collections for the entry; add to collection_ids
|
57
91
|
# If OAI-PMH doesn't return setSpec in the headers for GetRecord, use parser.collection_name
|
58
92
|
# in this case, if 'All' is selected, records will not be added to a collection.
|
59
93
|
def find_collection_ids
|
60
94
|
return self.collection_ids if collections_created?
|
61
95
|
if sets.blank? || parser.collection_name != 'all'
|
62
|
-
# c = Collection.where(Bulkrax.system_identifier_field => importerexporter.unique_collection_identifier(parser.collection_name)).first
|
63
96
|
collection = find_collection(importerexporter.unique_collection_identifier(parser.collection_name))
|
64
97
|
self.collection_ids << collection.id if collection.present? && !self.collection_ids.include?(collection.id)
|
65
98
|
else # All - collections should exist for all sets
|
66
99
|
sets.each do |set|
|
67
|
-
c =
|
100
|
+
c = find_collection(importerexporter.unique_collection_identifier(set.content))
|
68
101
|
self.collection_ids << c.id if c.present? && !self.collection_ids.include?(c.id)
|
69
102
|
end
|
70
103
|
end
|