bulkrax 2.3.0 → 3.0.0.beta3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/factories/bulkrax/object_factory.rb +13 -46
- data/app/jobs/bulkrax/create_relationships_job.rb +58 -66
- data/app/jobs/bulkrax/importer_job.rb +1 -3
- data/app/jobs/bulkrax/schedule_relationships_job.rb +21 -0
- data/app/models/bulkrax/csv_entry.rb +42 -35
- data/app/models/bulkrax/entry.rb +3 -7
- data/app/models/bulkrax/importer.rb +20 -11
- data/app/models/bulkrax/importer_run.rb +2 -0
- data/app/models/bulkrax/oai_entry.rb +0 -4
- data/app/models/bulkrax/pending_relationship.rb +7 -0
- data/app/models/bulkrax/rdf_entry.rb +2 -10
- data/app/models/bulkrax/xml_entry.rb +1 -5
- data/app/models/concerns/bulkrax/export_behavior.rb +3 -1
- data/app/models/concerns/bulkrax/has_matchers.rb +0 -10
- data/app/models/concerns/bulkrax/import_behavior.rb +10 -12
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +2 -2
- data/app/parsers/bulkrax/application_parser.rb +8 -11
- data/app/parsers/bulkrax/bagit_parser.rb +3 -7
- data/app/parsers/bulkrax/csv_parser.rb +36 -77
- data/app/parsers/bulkrax/oai_dc_parser.rb +1 -1
- data/app/parsers/bulkrax/xml_parser.rb +3 -3
- data/db/migrate/20220301001839_create_bulkrax_pending_relationships.rb +11 -0
- data/db/migrate/20220301020307_add_parents_to_bulkrax_importer_runs.rb +5 -0
- data/db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb +5 -0
- data/db/migrate/20220330165510_remove_array_true_from_importer_run_parents_column.rb +5 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +0 -13
- metadata +10 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 144e5a05fdf0d22faa759b2f8529ec971af9001f10c8bbe74d705d954b85e048
|
4
|
+
data.tar.gz: 569cdb00a7f533e5b0e234079d5e2ae0996f7c5230675fec158ce62c06a82987
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 961ef2fcaccad86d47563de89538446a3757b54cd1abbaabe81e2479b902088ea62a06181fa22819e37df52cd6a32ce4a6c52892cd57ef89983b09a8bea403ef
|
7
|
+
data.tar.gz: 432704c4491eef71eb3a3bc4a294d13b9fa4e08b616c031a71510579919cdde96fa06d697f096016858725f6abf6d4a77414b2533f7aaeaa863e9031725f7ce5
|
@@ -7,20 +7,15 @@ module Bulkrax
|
|
7
7
|
include DynamicRecordLookup
|
8
8
|
|
9
9
|
define_model_callbacks :save, :create
|
10
|
-
attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :
|
10
|
+
attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :related_parents_parsed_mapping
|
11
11
|
|
12
12
|
# rubocop:disable Metrics/ParameterLists
|
13
|
-
def initialize(attributes:, source_identifier_value:, work_identifier:,
|
14
|
-
ActiveSupport::Deprecation.warn(
|
15
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
16
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
17
|
-
)
|
13
|
+
def initialize(attributes:, source_identifier_value:, work_identifier:, related_parents_parsed_mapping: nil, replace_files: false, user: nil, klass: nil, update_files: false)
|
18
14
|
@attributes = ActiveSupport::HashWithIndifferentAccess.new(attributes)
|
19
15
|
@replace_files = replace_files
|
20
16
|
@update_files = update_files
|
21
17
|
@user = user || User.batch_user
|
22
18
|
@work_identifier = work_identifier
|
23
|
-
@collection_field_mapping = collection_field_mapping
|
24
19
|
@related_parents_parsed_mapping = related_parents_parsed_mapping
|
25
20
|
@source_identifier_value = source_identifier_value
|
26
21
|
@klass = klass || Bulkrax.default_work_type.constantize
|
@@ -55,7 +50,7 @@ module Bulkrax
|
|
55
50
|
def update
|
56
51
|
raise "Object doesn't exist" unless object
|
57
52
|
destroy_existing_files if @replace_files && ![Collection, FileSet].include?(klass)
|
58
|
-
attrs =
|
53
|
+
attrs = transform_attributes(update: true)
|
59
54
|
run_callbacks :save do
|
60
55
|
if klass == Collection
|
61
56
|
update_collection(attrs)
|
@@ -97,7 +92,7 @@ module Bulkrax
|
|
97
92
|
# https://github.com/projecthydra/active_fedora/issues/874
|
98
93
|
# 2+ years later, still open!
|
99
94
|
def create
|
100
|
-
attrs =
|
95
|
+
attrs = transform_attributes
|
101
96
|
@object = klass.new
|
102
97
|
object.reindex_extent = Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX if object.respond_to?(:reindex_extent)
|
103
98
|
run_callbacks :save do
|
@@ -142,25 +137,15 @@ module Bulkrax
|
|
142
137
|
end
|
143
138
|
|
144
139
|
def create_collection(attrs)
|
145
|
-
ActiveSupport::Deprecation.warn(
|
146
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
147
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
148
|
-
)
|
149
140
|
attrs = collection_type(attrs)
|
150
|
-
persist_collection_memberships(parent:
|
151
|
-
persist_collection_memberships(parent: find_collection(attributes[collection_field_mapping]), child: object) if attributes[collection_field_mapping].present?
|
141
|
+
persist_collection_memberships(parent: find_collection(attributes[related_parents_parsed_mapping]), child: object) if attributes[related_parents_parsed_mapping].present?
|
152
142
|
object.attributes = attrs
|
153
143
|
object.apply_depositor_metadata(@user)
|
154
144
|
object.save!
|
155
145
|
end
|
156
146
|
|
157
147
|
def update_collection(attrs)
|
158
|
-
|
159
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
160
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
161
|
-
)
|
162
|
-
persist_collection_memberships(parent: object, child: find_collection(attributes[:child_collection_id])) if attributes[:child_collection_id].present?
|
163
|
-
persist_collection_memberships(parent: find_collection(attributes[collection_field_mapping]), child: object) if attributes[collection_field_mapping].present?
|
148
|
+
persist_collection_memberships(parent: find_collection(attributes[related_parents_parsed_mapping]), child: object) if attributes[related_parents_parsed_mapping].present?
|
164
149
|
object.attributes = attrs
|
165
150
|
object.save!
|
166
151
|
end
|
@@ -197,6 +182,10 @@ module Bulkrax
|
|
197
182
|
# Add child to parent's #member_collections
|
198
183
|
# Add parent to child's #member_of_collections
|
199
184
|
def persist_collection_memberships(parent:, child:)
|
185
|
+
parent.reject!(&:blank?) if parent.respond_to?(:reject!)
|
186
|
+
child.reject!(&:blank?) if child.respond_to?(:reject!)
|
187
|
+
return if parent.blank? || child.blank?
|
188
|
+
|
200
189
|
::Hyrax::Collections::NestedCollectionPersistenceService.persist_nested_collection_for(parent: parent, child: child)
|
201
190
|
end
|
202
191
|
|
@@ -205,7 +194,7 @@ module Bulkrax
|
|
205
194
|
when Hash
|
206
195
|
Collection.find(id[:id])
|
207
196
|
when String
|
208
|
-
Collection.find(id)
|
197
|
+
Collection.find(id) if id.present?
|
209
198
|
when Array
|
210
199
|
id.map { |i| find_collection(i) }
|
211
200
|
else
|
@@ -219,34 +208,12 @@ module Bulkrax
|
|
219
208
|
attrs
|
220
209
|
end
|
221
210
|
|
222
|
-
# Strip out the :collection key, and add the member_of_collection_ids,
|
223
|
-
# which is used by Hyrax::Actors::AddAsMemberOfCollectionsActor
|
224
|
-
def create_attributes
|
225
|
-
return transform_attributes if klass == Collection
|
226
|
-
ActiveSupport::Deprecation.warn(
|
227
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
228
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
229
|
-
)
|
230
|
-
transform_attributes.except(:collections, :collection, collection_field_mapping)
|
231
|
-
end
|
232
|
-
|
233
|
-
# Strip out the :collection key, and add the member_of_collection_ids,
|
234
|
-
# which is used by Hyrax::Actors::AddAsMemberOfCollectionsActor
|
235
|
-
def attribute_update
|
236
|
-
return transform_attributes.except(:id) if klass == Collection
|
237
|
-
ActiveSupport::Deprecation.warn(
|
238
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
239
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
240
|
-
)
|
241
|
-
transform_attributes.except(:id, :collections, :collection, collection_field_mapping)
|
242
|
-
end
|
243
|
-
|
244
211
|
# Override if we need to map the attributes from the parser in
|
245
212
|
# a way that is compatible with how the factory needs them.
|
246
|
-
def transform_attributes
|
213
|
+
def transform_attributes(update: false)
|
247
214
|
@transform_attributes = attributes.slice(*permitted_attributes)
|
248
215
|
@transform_attributes.merge!(file_attributes(update_files)) if with_files
|
249
|
-
@transform_attributes
|
216
|
+
update ? @transform_attributes.except(:id) : @transform_attributes
|
250
217
|
end
|
251
218
|
|
252
219
|
# Regardless of what the Parser gives us, these are the properties we are prepared to accept.
|
@@ -21,11 +21,9 @@ module Bulkrax
|
|
21
21
|
|
22
22
|
queue_as :import
|
23
23
|
|
24
|
-
attr_accessor :
|
24
|
+
attr_accessor :child_records, :parent_record, :parent_entry, :importer_run_id
|
25
25
|
|
26
|
-
# @param
|
27
|
-
# @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifier
|
28
|
-
# @param child_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifier
|
26
|
+
# @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifiers
|
29
27
|
# @param importer_run [Bulkrax::ImporterRun] current importer run (needed to properly update counters)
|
30
28
|
#
|
31
29
|
# The entry_identifier is used to lookup the @base_entry for the job (a.k.a. the entry the job was called from).
|
@@ -33,123 +31,117 @@ module Bulkrax
|
|
33
31
|
# Whether the @base_entry is the parent or the child in the relationship is determined by the presence of a
|
34
32
|
# parent_identifier or child_identifier param. For example, if a parent_identifier is passed, we know @base_entry
|
35
33
|
# is the child in the relationship, and vice versa if a child_identifier is passed.
|
36
|
-
def perform(
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
34
|
+
def perform(parent_identifier:, importer_run_id:)
|
35
|
+
pending_relationships = Bulkrax::PendingRelationship.find_each.select do |rel|
|
36
|
+
rel.bulkrax_importer_run_id == importer_run_id && rel.parent_id == parent_identifier
|
37
|
+
end.sort_by(&:order)
|
38
|
+
|
39
|
+
@importer_run_id = importer_run_id
|
40
|
+
@parent_record = find_record(parent_identifier)
|
41
|
+
@child_records = { works: [], collections: [] }
|
42
|
+
pending_relationships.each do |rel|
|
43
|
+
raise ::StandardError, %("#{rel}" needs either a child or a parent to create a relationship) if rel.child_id.nil? || rel.parent_id.nil?
|
44
|
+
child_record = find_record(rel.child_id)
|
45
|
+
child_record.is_a?(::Collection) ? @child_records[:collections] << child_record : @child_records[:works] << child_record
|
47
46
|
end
|
48
47
|
|
49
|
-
if
|
48
|
+
if (child_records[:collections].blank? && child_records[:works].blank?) || parent_record.blank?
|
50
49
|
reschedule(
|
51
|
-
entry_identifier: entry_identifier,
|
52
50
|
parent_identifier: parent_identifier,
|
53
|
-
|
54
|
-
importer_run: importer_run
|
51
|
+
importer_run_id: importer_run_id
|
55
52
|
)
|
56
53
|
return false # stop current job from continuing to run after rescheduling
|
57
54
|
end
|
58
55
|
|
59
|
-
|
56
|
+
@parent_entry = Bulkrax::Entry.where(identifier: parent_identifier,
|
57
|
+
importerexporter_id: ImporterRun.find(importer_run_id).importer_id,
|
58
|
+
importerexporter_type: "Bulkrax::Importer").first
|
59
|
+
create_relationships
|
60
|
+
pending_relationships.each(&:destroy)
|
60
61
|
rescue ::StandardError => e
|
61
|
-
|
62
|
-
|
62
|
+
parent_entry.status_info(e)
|
63
|
+
Bulkrax::ImporterRun.find(importer_run_id).increment!(:failed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
63
64
|
end
|
64
65
|
|
65
66
|
private
|
66
67
|
|
67
|
-
def
|
68
|
-
if parent_record.is_a?(::Collection)
|
69
|
-
|
70
|
-
|
71
|
-
collection_parent_work_child
|
72
|
-
elsif curation_concern?(parent_record) && child_record.is_a?(::Collection)
|
73
|
-
raise ::StandardError, 'a Collection may not be assigned as a child of a Work'
|
68
|
+
def create_relationships
|
69
|
+
if parent_record.is_a?(::Collection)
|
70
|
+
collection_parent_work_child unless child_records[:works].empty?
|
71
|
+
collection_parent_collection_child unless child_records[:collections].empty?
|
74
72
|
else
|
75
|
-
work_parent_work_child
|
73
|
+
work_parent_work_child unless child_records[:works].empty?
|
74
|
+
raise ::StandardError, 'a Collection may not be assigned as a child of a Work' if child_records[:collections].present?
|
76
75
|
end
|
77
76
|
end
|
78
77
|
|
79
78
|
def user
|
80
|
-
@user ||=
|
79
|
+
@user ||= Bulkrax::ImporterRun.find(importer_run_id).importer.user
|
81
80
|
end
|
82
81
|
|
83
82
|
# Work-Collection membership is added to the child as member_of_collection_ids
|
84
83
|
# This is adding the reverse relationship, from the child to the parent
|
85
84
|
def collection_parent_work_child
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
# TODO: add counters for :processed_parents and :failed_parents
|
101
|
-
importer_run.increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
85
|
+
child_records[:works].each do |child_record|
|
86
|
+
attrs = { id: child_record.id, member_of_collections_attributes: { 0 => { id: parent_record.id } } }
|
87
|
+
ObjectFactory.new(
|
88
|
+
attributes: attrs,
|
89
|
+
source_identifier_value: nil, # sending the :id in the attrs means the factory doesn't need a :source_identifier_value
|
90
|
+
work_identifier: parent_entry.parser.work_identifier,
|
91
|
+
related_parents_parsed_mapping: parent_entry.parser.related_parents_parsed_mapping,
|
92
|
+
replace_files: false,
|
93
|
+
user: user,
|
94
|
+
klass: child_record.class
|
95
|
+
).run
|
96
|
+
# TODO: add counters for :processed_parents and :failed_parents
|
97
|
+
Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
98
|
+
end
|
102
99
|
end
|
103
100
|
|
104
101
|
# Collection-Collection membership is added to the as member_ids
|
105
102
|
def collection_parent_collection_child
|
106
|
-
|
107
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
108
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
109
|
-
)
|
103
|
+
child_record = child_records[:collections].first
|
110
104
|
attrs = { id: parent_record.id, child_collection_id: child_record.id }
|
111
105
|
ObjectFactory.new(
|
112
106
|
attributes: attrs,
|
113
107
|
source_identifier_value: nil, # sending the :id in the attrs means the factory doesn't need a :source_identifier_value
|
114
|
-
work_identifier:
|
115
|
-
|
108
|
+
work_identifier: parent_entry.parser.work_identifier,
|
109
|
+
related_parents_parsed_mapping: parent_entry.parser.related_parents_parsed_mapping,
|
116
110
|
replace_files: false,
|
117
111
|
user: user,
|
118
112
|
klass: parent_record.class
|
119
113
|
).run
|
120
114
|
# TODO: add counters for :processed_parents and :failed_parents
|
121
|
-
|
115
|
+
Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
122
116
|
end
|
123
117
|
|
124
118
|
# Work-Work membership is added to the parent as member_ids
|
125
119
|
def work_parent_work_child
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
120
|
+
records_hash = {}
|
121
|
+
child_records[:works].each_with_index do |child_record, i|
|
122
|
+
records_hash[i] = { id: child_record.id }
|
123
|
+
end
|
130
124
|
attrs = {
|
131
125
|
id: parent_record.id,
|
132
|
-
work_members_attributes:
|
126
|
+
work_members_attributes: records_hash
|
133
127
|
}
|
134
128
|
ObjectFactory.new(
|
135
129
|
attributes: attrs,
|
136
130
|
source_identifier_value: nil, # sending the :id in the attrs means the factory doesn't need a :source_identifier_value
|
137
|
-
work_identifier:
|
138
|
-
|
131
|
+
work_identifier: parent_entry.parser.work_identifier,
|
132
|
+
related_parents_parsed_mapping: parent_entry.parser.related_parents_parsed_mapping,
|
139
133
|
replace_files: false,
|
140
134
|
user: user,
|
141
135
|
klass: parent_record.class
|
142
136
|
).run
|
143
137
|
# TODO: add counters for :processed_parents and :failed_parents
|
144
|
-
|
138
|
+
Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
145
139
|
end
|
146
140
|
|
147
|
-
def reschedule(
|
141
|
+
def reschedule(parent_identifier:, importer_run_id:)
|
148
142
|
CreateRelationshipsJob.set(wait: 10.minutes).perform_later(
|
149
|
-
entry_identifier: entry_identifier,
|
150
143
|
parent_identifier: parent_identifier,
|
151
|
-
|
152
|
-
importer_run: importer_run
|
144
|
+
importer_run_id: importer_run_id
|
153
145
|
)
|
154
146
|
end
|
155
147
|
end
|
@@ -18,9 +18,7 @@ module Bulkrax
|
|
18
18
|
importer.only_updates = only_updates_since_last_import || false
|
19
19
|
return unless importer.valid_import?
|
20
20
|
|
21
|
-
importer.
|
22
|
-
importer.import_works
|
23
|
-
importer.import_file_sets
|
21
|
+
importer.import_objects
|
24
22
|
end
|
25
23
|
|
26
24
|
def unzip_imported_file(parser)
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class ScheduleRelationshipsJob < ApplicationJob
|
5
|
+
def perform(importer_id:)
|
6
|
+
importer = Importer.find(importer_id)
|
7
|
+
pending_num = importer.entries.left_outer_joins(:latest_status)
|
8
|
+
.where('bulkrax_statuses.status_message IS NULL ').count
|
9
|
+
return reschedule(importer_id) unless pending_num.zero?
|
10
|
+
|
11
|
+
importer.last_run.parents.each do |parent_id|
|
12
|
+
CreateRelationshipsJob.perform_later(parent_identifier: parent_id, importer_run_id: importer.last_run.id)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def reschedule(importer_id)
|
17
|
+
ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importer_id)
|
18
|
+
false
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -22,18 +22,15 @@ module Bulkrax
|
|
22
22
|
encoding: 'utf-8')
|
23
23
|
end
|
24
24
|
|
25
|
-
def self.data_for_entry(data, _source_id)
|
26
|
-
ActiveSupport::Deprecation.warn(
|
27
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
28
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
29
|
-
)
|
25
|
+
def self.data_for_entry(data, _source_id, parser)
|
30
26
|
# If a multi-line CSV data is passed, grab the first row
|
31
27
|
data = data.first if data.is_a?(CSV::Table)
|
32
28
|
# model has to be separated so that it doesn't get mistranslated by to_h
|
33
29
|
raw_data = data.to_h
|
34
30
|
raw_data[:model] = data[:model] if data[:model].present?
|
35
31
|
# If the collection field mapping is not 'collection', add 'collection' - the parser needs it
|
36
|
-
|
32
|
+
# TODO: change to :parents
|
33
|
+
raw_data[:parents] = raw_data[parent_field(parser).to_sym] if raw_data.keys.include?(parent_field(parser).to_sym) && parent_field(parser) != 'parents'
|
37
34
|
return raw_data
|
38
35
|
end
|
39
36
|
|
@@ -47,7 +44,6 @@ module Bulkrax
|
|
47
44
|
add_visibility
|
48
45
|
add_metadata_for_model
|
49
46
|
add_rights_statement
|
50
|
-
add_collections
|
51
47
|
add_local
|
52
48
|
|
53
49
|
self.parsed_metadata
|
@@ -70,15 +66,9 @@ module Bulkrax
|
|
70
66
|
end
|
71
67
|
|
72
68
|
def add_ingested_metadata
|
73
|
-
ActiveSupport::Deprecation.warn(
|
74
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
75
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
76
|
-
)
|
77
69
|
# we do not want to sort the values in the record before adding the metadata.
|
78
70
|
# if we do, the factory_class will be set to the default_work_type for all values that come before "model" or "work type"
|
79
71
|
record.each do |key, value|
|
80
|
-
next if self.parser.collection_field_mapping.to_s == key_without_numbers(key)
|
81
|
-
|
82
72
|
index = key[/\d+/].to_i - 1 if key[/\d+/].to_i != 0
|
83
73
|
add_metadata(key_without_numbers(key), value, index)
|
84
74
|
end
|
@@ -104,28 +94,40 @@ module Bulkrax
|
|
104
94
|
self.parsed_metadata['id'] = hyrax_record.id
|
105
95
|
self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
|
106
96
|
self.parsed_metadata['model'] = hyrax_record.has_model.first
|
97
|
+
build_relationship_metadata
|
107
98
|
build_mapping_metadata
|
99
|
+
build_files unless hyrax_record.is_a?(Collection)
|
100
|
+
self.parsed_metadata
|
101
|
+
end
|
108
102
|
|
109
|
-
|
110
|
-
#
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
103
|
+
def build_relationship_metadata
|
104
|
+
# Includes all relationship methods for all exportable record types (works, Collections, FileSets)
|
105
|
+
relationship_methods = {
|
106
|
+
related_parents_parsed_mapping => %i[member_of_collection_ids member_of_work_ids in_work_ids],
|
107
|
+
related_children_parsed_mapping => %i[member_collection_ids member_work_ids file_set_ids]
|
108
|
+
}
|
109
|
+
|
110
|
+
relationship_methods.each do |relationship_key, methods|
|
111
|
+
next if relationship_key.blank?
|
112
|
+
|
113
|
+
values = []
|
114
|
+
methods.each do |m|
|
115
|
+
values << hyrax_record.public_send(m) if hyrax_record.respond_to?(m)
|
118
116
|
end
|
119
|
-
|
117
|
+
values = values.flatten.uniq
|
118
|
+
next if values.blank?
|
120
119
|
|
121
|
-
|
122
|
-
|
120
|
+
handle_join_on_export(relationship_key, values, mapping[related_parents_parsed_mapping]['join'].present?)
|
121
|
+
end
|
123
122
|
end
|
124
123
|
|
125
124
|
def build_mapping_metadata
|
126
125
|
mapping.each do |key, value|
|
127
126
|
next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
|
128
127
|
next if key == "model"
|
128
|
+
# relationships handled by #build_relationship_metadata
|
129
|
+
next if [related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
|
130
|
+
next if key == 'file' # handled by #build_files
|
129
131
|
next if value['excluded']
|
130
132
|
|
131
133
|
object_key = key if value.key?('object')
|
@@ -151,7 +153,7 @@ module Bulkrax
|
|
151
153
|
data = hyrax_record.send(key.to_s)
|
152
154
|
if data.is_a?(ActiveTriples::Relation)
|
153
155
|
if value['join']
|
154
|
-
self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join('
|
156
|
+
self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join(' | ').to_s # TODO: make split char dynamic
|
155
157
|
else
|
156
158
|
data.each_with_index do |d, i|
|
157
159
|
self.parsed_metadata["#{key_for_export(key)}_#{i + 1}"] = prepare_export_data(d)
|
@@ -200,12 +202,21 @@ module Bulkrax
|
|
200
202
|
end
|
201
203
|
|
202
204
|
def build_files
|
203
|
-
|
204
|
-
|
205
|
+
file_mapping = mapping['file']&.[]('from')&.first || 'file'
|
206
|
+
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
207
|
+
|
208
|
+
filenames = file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
|
209
|
+
handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
|
210
|
+
end
|
211
|
+
|
212
|
+
def handle_join_on_export(key, values, join)
|
213
|
+
if join
|
214
|
+
parsed_metadata[key] = values.join(' | ') # TODO: make split char dynamic
|
205
215
|
else
|
206
|
-
|
207
|
-
|
216
|
+
values.each_with_index do |value, i|
|
217
|
+
parsed_metadata["#{key}_#{i + 1}"] = value
|
208
218
|
end
|
219
|
+
parsed_metadata.delete(key)
|
209
220
|
end
|
210
221
|
end
|
211
222
|
|
@@ -228,13 +239,9 @@ module Bulkrax
|
|
228
239
|
end
|
229
240
|
|
230
241
|
def possible_collection_ids
|
231
|
-
ActiveSupport::Deprecation.warn(
|
232
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
233
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
234
|
-
)
|
235
242
|
return @possible_collection_ids if @possible_collection_ids.present?
|
236
243
|
|
237
|
-
collection_field_mapping = self.class.
|
244
|
+
collection_field_mapping = self.class.parent_field(parser)
|
238
245
|
return [] unless collection_field_mapping.present? && record[collection_field_mapping].present?
|
239
246
|
|
240
247
|
identifiers = []
|
data/app/models/bulkrax/entry.rb
CHANGED
@@ -58,7 +58,7 @@ module Bulkrax
|
|
58
58
|
# @param data - the data from the metadata file
|
59
59
|
# @param path - the path to the metadata file (used by some entries to get the file_paths for import)
|
60
60
|
# @return Hash containing the data (the entry build_metadata method will know what to expect in the hash)
|
61
|
-
def self.data_for_entry(_data, _source_id)
|
61
|
+
def self.data_for_entry(_data, _source_id, _parser)
|
62
62
|
raise StandardError, 'Not Implemented'
|
63
63
|
end
|
64
64
|
|
@@ -70,12 +70,8 @@ module Bulkrax
|
|
70
70
|
parser&.work_identifier&.to_s || 'source'
|
71
71
|
end
|
72
72
|
|
73
|
-
def self.
|
74
|
-
|
75
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
76
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
77
|
-
)
|
78
|
-
Bulkrax.collection_field_mapping[self.to_s]
|
73
|
+
def self.parent_field(parser)
|
74
|
+
parser.related_parents_parsed_mapping
|
79
75
|
end
|
80
76
|
|
81
77
|
def build
|
@@ -125,23 +125,32 @@ module Bulkrax
|
|
125
125
|
end
|
126
126
|
|
127
127
|
def import_works
|
128
|
-
|
129
|
-
self.only_updates ||= false
|
130
|
-
parser.create_works
|
131
|
-
rescue StandardError => e
|
132
|
-
status_info(e)
|
128
|
+
import_objects(['work'])
|
133
129
|
end
|
134
130
|
|
135
131
|
def import_collections
|
136
|
-
|
137
|
-
parser.create_collections
|
138
|
-
rescue StandardError => e
|
139
|
-
status_info(e)
|
132
|
+
import_objects(['collection'])
|
140
133
|
end
|
141
134
|
|
142
135
|
def import_file_sets
|
143
|
-
|
144
|
-
|
136
|
+
import_objects(['file_set'])
|
137
|
+
end
|
138
|
+
|
139
|
+
def import_relationships
|
140
|
+
import_objects(['relationship'])
|
141
|
+
end
|
142
|
+
|
143
|
+
def import_objects(types_array = nil)
|
144
|
+
self.only_updates ||= false
|
145
|
+
types = types_array || %w[work collection file_set relationship]
|
146
|
+
if parser.class == Bulkrax::CsvParser
|
147
|
+
parser.create_objects(types)
|
148
|
+
else
|
149
|
+
types.each do |object_type|
|
150
|
+
self.save if self.new_record? # Object needs to be saved for statuses
|
151
|
+
parser.send("create_#{object_type.pluralize}")
|
152
|
+
end
|
153
|
+
end
|
145
154
|
rescue StandardError => e
|
146
155
|
status_info(e)
|
147
156
|
end
|
@@ -26,10 +26,6 @@ module Bulkrax
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def build_metadata
|
29
|
-
ActiveSupport::Deprecation.warn(
|
30
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
31
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
32
|
-
)
|
33
29
|
self.parsed_metadata = {}
|
34
30
|
self.parsed_metadata[work_identifier] = [record.header.identifier]
|
35
31
|
|
@@ -13,11 +13,7 @@ module Bulkrax
|
|
13
13
|
data.predicates.map(&:to_s)
|
14
14
|
end
|
15
15
|
|
16
|
-
def self.data_for_entry(data, source_id)
|
17
|
-
ActiveSupport::Deprecation.warn(
|
18
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
19
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
20
|
-
)
|
16
|
+
def self.data_for_entry(data, source_id, parser)
|
21
17
|
reader = data
|
22
18
|
format = reader.class.format.to_sym
|
23
19
|
collections = []
|
@@ -25,7 +21,7 @@ module Bulkrax
|
|
25
21
|
delete = nil
|
26
22
|
data = RDF::Writer.for(format).buffer do |writer|
|
27
23
|
reader.each_statement do |statement|
|
28
|
-
collections << statement.object.to_s if
|
24
|
+
collections << statement.object.to_s if parent_field(parser).present? && parent_field(parser) == statement.predicate.to_s
|
29
25
|
children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
|
30
26
|
delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
|
31
27
|
writer << statement
|
@@ -55,10 +51,6 @@ module Bulkrax
|
|
55
51
|
end
|
56
52
|
|
57
53
|
def build_metadata
|
58
|
-
ActiveSupport::Deprecation.warn(
|
59
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
60
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
61
|
-
)
|
62
54
|
raise StandardError, 'Record not found' if record.nil?
|
63
55
|
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
64
56
|
|
@@ -14,7 +14,7 @@ module Bulkrax
|
|
14
14
|
Nokogiri::XML(open(path)).remove_namespaces!
|
15
15
|
end
|
16
16
|
|
17
|
-
def self.data_for_entry(data, source_id)
|
17
|
+
def self.data_for_entry(data, source_id, _parser)
|
18
18
|
collections = []
|
19
19
|
children = []
|
20
20
|
xpath_for_source_id = ".//*[name()='#{source_id}']"
|
@@ -39,10 +39,6 @@ module Bulkrax
|
|
39
39
|
end
|
40
40
|
|
41
41
|
def build_metadata
|
42
|
-
ActiveSupport::Deprecation.warn(
|
43
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
44
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
45
|
-
)
|
46
42
|
raise StandardError, 'Record not found' if record.nil?
|
47
43
|
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
48
44
|
self.parsed_metadata = {}
|
@@ -26,7 +26,9 @@ module Bulkrax
|
|
26
26
|
|
27
27
|
def write_files
|
28
28
|
return if hyrax_record.is_a?(Collection)
|
29
|
-
|
29
|
+
|
30
|
+
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
31
|
+
file_sets.each do |fs|
|
30
32
|
path = File.join(exporter_export_path, 'files')
|
31
33
|
FileUtils.mkdir_p(path)
|
32
34
|
file = filename(fs)
|
@@ -129,10 +129,6 @@ module Bulkrax
|
|
129
129
|
end
|
130
130
|
|
131
131
|
def supported_bulkrax_fields
|
132
|
-
ActiveSupport::Deprecation.warn(
|
133
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
134
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
135
|
-
)
|
136
132
|
@supported_bulkrax_fields ||=
|
137
133
|
%W[
|
138
134
|
id
|
@@ -141,22 +137,16 @@ module Bulkrax
|
|
141
137
|
model
|
142
138
|
visibility
|
143
139
|
delete
|
144
|
-
#{parser.collection_field_mapping}
|
145
140
|
#{related_parents_parsed_mapping}
|
146
141
|
#{related_children_parsed_mapping}
|
147
142
|
]
|
148
143
|
end
|
149
144
|
|
150
145
|
def multiple?(field)
|
151
|
-
ActiveSupport::Deprecation.warn(
|
152
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
153
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
154
|
-
)
|
155
146
|
@multiple_bulkrax_fields ||=
|
156
147
|
%W[
|
157
148
|
file
|
158
149
|
remote_files
|
159
|
-
#{parser.collection_field_mapping}
|
160
150
|
#{related_parents_parsed_mapping}
|
161
151
|
#{related_children_parsed_mapping}
|
162
152
|
]
|
@@ -28,7 +28,8 @@ module Bulkrax
|
|
28
28
|
self.parsed_metadata[related_parents_parsed_mapping].each do |parent_identifier|
|
29
29
|
next if parent_identifier.blank?
|
30
30
|
|
31
|
-
|
31
|
+
add_parent_to_import_run(parent_identifier, importerexporter.last_run)
|
32
|
+
PendingRelationship.create!(child_id: self.identifier, parent_id: parent_identifier, bulkrax_importer_run_id: importerexporter.last_run.id, order: self.id)
|
32
33
|
end
|
33
34
|
end
|
34
35
|
|
@@ -36,10 +37,16 @@ module Bulkrax
|
|
36
37
|
self.parsed_metadata[related_children_parsed_mapping].each do |child_identifier|
|
37
38
|
next if child_identifier.blank?
|
38
39
|
|
39
|
-
|
40
|
+
add_parent_to_import_run(self.identifier, importerexporter.last_run)
|
41
|
+
PendingRelationship.create!(parent_id: self.identifier, child_id: child_identifier, bulkrax_importer_run_id: importerexporter.last_run.id, order: self.id)
|
40
42
|
end
|
41
43
|
end
|
42
44
|
|
45
|
+
def add_parent_to_import_run(parent_id, run)
|
46
|
+
run.parents << parent_id
|
47
|
+
run.save
|
48
|
+
end
|
49
|
+
|
43
50
|
def find_collection_ids
|
44
51
|
self.collection_ids
|
45
52
|
end
|
@@ -77,10 +84,6 @@ module Bulkrax
|
|
77
84
|
def add_collections
|
78
85
|
return if find_collection_ids.blank?
|
79
86
|
|
80
|
-
ActiveSupport::Deprecation.warn(
|
81
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
82
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
83
|
-
)
|
84
87
|
self.parsed_metadata['member_of_collections_attributes'] = {}
|
85
88
|
find_collection_ids.each_with_index do |c, i|
|
86
89
|
self.parsed_metadata['member_of_collections_attributes'][i.to_s] = { id: c }
|
@@ -88,15 +91,10 @@ module Bulkrax
|
|
88
91
|
end
|
89
92
|
|
90
93
|
def factory
|
91
|
-
ActiveSupport::Deprecation.warn(
|
92
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
93
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
94
|
-
)
|
95
94
|
@factory ||= Bulkrax::ObjectFactory.new(attributes: self.parsed_metadata,
|
96
95
|
source_identifier_value: identifier,
|
97
96
|
work_identifier: parser.work_identifier,
|
98
|
-
|
99
|
-
related_parents_parsed_mapping: related_parents_parsed_mapping,
|
97
|
+
related_parents_parsed_mapping: parser.related_parents_parsed_mapping,
|
100
98
|
replace_files: replace_files,
|
101
99
|
user: user,
|
102
100
|
klass: factory_class,
|
@@ -20,14 +20,14 @@ module Bulkrax
|
|
20
20
|
(last_imported_at || Time.current) + frequency.to_seconds if schedulable? && last_imported_at.present?
|
21
21
|
end
|
22
22
|
|
23
|
-
def increment_counters(index, collection: false, file_set: false)
|
23
|
+
def increment_counters(index, collection: false, file_set: false, work: false)
|
24
24
|
# Only set the totals if they were not set on initialization
|
25
25
|
importer_run = ImporterRun.find(current_run.id) # make sure fresh
|
26
26
|
if collection
|
27
27
|
importer_run.total_collection_entries = index + 1 unless parser.collections_total.positive?
|
28
28
|
elsif file_set
|
29
29
|
importer_run.total_file_set_entries = index + 1 unless parser.file_sets_total.positive?
|
30
|
-
|
30
|
+
elsif work
|
31
31
|
# TODO: differentiate between work and collection counts for exporters
|
32
32
|
importer_run.total_work_entries = index + 1 unless limit.to_i.positive? || parser.total.positive?
|
33
33
|
end
|
@@ -56,7 +56,7 @@ module Bulkrax
|
|
56
56
|
end
|
57
57
|
|
58
58
|
def related_parents_parsed_mapping
|
59
|
-
@related_parents_parsed_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.keys&.first
|
59
|
+
@related_parents_parsed_mapping ||= (get_field_mapping_hash_for('related_parents_field_mapping')&.keys&.first || 'parents')
|
60
60
|
end
|
61
61
|
|
62
62
|
def related_children_raw_mapping
|
@@ -64,29 +64,22 @@ module Bulkrax
|
|
64
64
|
end
|
65
65
|
|
66
66
|
def related_children_parsed_mapping
|
67
|
-
@related_children_parsed_mapping ||= get_field_mapping_hash_for('related_children_field_mapping')&.keys&.first
|
67
|
+
@related_children_parsed_mapping ||= (get_field_mapping_hash_for('related_children_field_mapping')&.keys&.first || 'children')
|
68
68
|
end
|
69
69
|
|
70
70
|
def get_field_mapping_hash_for(key)
|
71
71
|
return instance_variable_get("@#{key}_hash") if instance_variable_get("@#{key}_hash").present?
|
72
72
|
|
73
|
+
mapping = importerexporter.field_mapping == [{}] ? {} : importerexporter.field_mapping
|
73
74
|
instance_variable_set(
|
74
75
|
"@#{key}_hash",
|
75
|
-
|
76
|
+
mapping&.with_indifferent_access&.select { |_, h| h.key?(key) }
|
76
77
|
)
|
77
78
|
raise StandardError, "more than one #{key} declared: #{instance_variable_get("@#{key}_hash").keys.join(', ')}" if instance_variable_get("@#{key}_hash").length > 1
|
78
79
|
|
79
80
|
instance_variable_get("@#{key}_hash")
|
80
81
|
end
|
81
82
|
|
82
|
-
def collection_field_mapping
|
83
|
-
ActiveSupport::Deprecation.warn(
|
84
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
85
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
86
|
-
)
|
87
|
-
Bulkrax.collection_field_mapping[self.entry_class.to_s]&.to_sym || :collection
|
88
|
-
end
|
89
|
-
|
90
83
|
def model_field_mappings
|
91
84
|
model_mappings = Bulkrax.field_mappings[self.class.to_s]&.dig('model', :from) || []
|
92
85
|
model_mappings |= ['model']
|
@@ -118,6 +111,10 @@ module Bulkrax
|
|
118
111
|
raise StandardError, 'must be defined' if importer?
|
119
112
|
end
|
120
113
|
|
114
|
+
def create_relationships
|
115
|
+
raise StandardError, 'must be defined' if importer?
|
116
|
+
end
|
117
|
+
|
121
118
|
# Optional, define if using browse everything for file upload
|
122
119
|
def retrieve_cloud_files(files); end
|
123
120
|
|
@@ -39,7 +39,7 @@ module Bulkrax
|
|
39
39
|
path = metadata_path(bag)
|
40
40
|
raise StandardError, 'No metadata files were found' if path.blank?
|
41
41
|
data = entry_class.read_data(path)
|
42
|
-
data = entry_class.data_for_entry(data, source_identifier)
|
42
|
+
data = entry_class.data_for_entry(data, source_identifier, self)
|
43
43
|
data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
|
44
44
|
data
|
45
45
|
end
|
@@ -75,7 +75,7 @@ module Bulkrax
|
|
75
75
|
else
|
76
76
|
ImportWorkJob.send(perform_method, new_entry.id, current_run.id)
|
77
77
|
end
|
78
|
-
increment_counters(index)
|
78
|
+
increment_counters(index, work: true)
|
79
79
|
end
|
80
80
|
importer.record_status
|
81
81
|
rescue StandardError => e
|
@@ -83,11 +83,7 @@ module Bulkrax
|
|
83
83
|
end
|
84
84
|
|
85
85
|
def collections
|
86
|
-
|
87
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
88
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
89
|
-
)
|
90
|
-
records.map { |r| r[collection_field_mapping].split(/\s*[;|]\s*/) if r[collection_field_mapping].present? }.flatten.compact.uniq
|
86
|
+
records.map { |r| r[related_parents_parsed_mapping].split(/\s*[;|]\s*/) if r[related_parents_parsed_mapping].present? }.flatten.compact.uniq
|
91
87
|
end
|
92
88
|
|
93
89
|
def collections_total
|
@@ -14,18 +14,13 @@ module Bulkrax
|
|
14
14
|
csv_data = entry_class.read_data(file_for_import)
|
15
15
|
importer.parser_fields['total'] = csv_data.count
|
16
16
|
importer.save
|
17
|
-
@records ||= csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil) }
|
17
|
+
@records ||= csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
|
18
18
|
end
|
19
19
|
|
20
20
|
def collections
|
21
|
-
ActiveSupport::Deprecation.warn(
|
22
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
23
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
24
|
-
)
|
25
21
|
# retrieve a list of unique collections
|
26
22
|
records.map do |r|
|
27
23
|
collections = []
|
28
|
-
r[collection_field_mapping].split(/\s*[;|]\s*/).each { |title| collections << { title: title, from_collection_field_mapping: true } } if r[collection_field_mapping].present?
|
29
24
|
model_field_mappings.each do |model_mapping|
|
30
25
|
collections << r if r[model_mapping.to_sym]&.downcase == 'collection'
|
31
26
|
end
|
@@ -85,89 +80,52 @@ module Bulkrax
|
|
85
80
|
end
|
86
81
|
|
87
82
|
def create_collections
|
88
|
-
|
89
|
-
next if collection.blank?
|
90
|
-
break if records.find_index(collection).present? && limit_reached?(limit, records.find_index(collection))
|
91
|
-
|
92
|
-
## BEGIN
|
93
|
-
# Add required metadata to collections being imported using the collection_field_mapping, which only have a :title
|
94
|
-
# TODO: Remove once collection_field_mapping is removed
|
95
|
-
metadata = add_required_collection_metadata(collection)
|
96
|
-
collection_hash = metadata.presence || collection
|
97
|
-
## END
|
98
|
-
|
99
|
-
new_entry = find_or_create_entry(collection_entry_class, collection_hash[source_identifier], 'Bulkrax::Importer', collection_hash)
|
100
|
-
increment_counters(index, collection: true)
|
101
|
-
# TODO: add support for :delete option
|
102
|
-
if collection.key?(:from_collection_field_mapping)
|
103
|
-
ActiveSupport::Deprecation.warn(
|
104
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
105
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
106
|
-
)
|
107
|
-
# When importing collections using the deprecated collection_field_mapping, the collection MUST be created
|
108
|
-
# before the work, so we use #perform_now to make sure that happens. The downside is, if a collection fails
|
109
|
-
# to import, it will stop the rest of the collections from importing successfully.
|
110
|
-
# TODO: Remove once collection_field_mapping is removed
|
111
|
-
ImportCollectionJob.perform_now(new_entry.id, current_run.id)
|
112
|
-
else
|
113
|
-
ImportCollectionJob.perform_later(new_entry.id, current_run.id)
|
114
|
-
end
|
115
|
-
end
|
116
|
-
importer.record_status
|
117
|
-
rescue StandardError => e
|
118
|
-
status_info(e)
|
83
|
+
create_objects(['collection'])
|
119
84
|
end
|
120
85
|
|
121
86
|
def create_works
|
122
|
-
|
123
|
-
next unless record_has_source_identifier(work, records.find_index(work))
|
124
|
-
break if limit_reached?(limit, records.find_index(work))
|
125
|
-
|
126
|
-
seen[work[source_identifier]] = true
|
127
|
-
new_entry = find_or_create_entry(entry_class, work[source_identifier], 'Bulkrax::Importer', work.to_h)
|
128
|
-
if work[:delete].present?
|
129
|
-
DeleteWorkJob.send(perform_method, new_entry, current_run)
|
130
|
-
else
|
131
|
-
ImportWorkJob.send(perform_method, new_entry.id, current_run.id)
|
132
|
-
end
|
133
|
-
increment_counters(index)
|
134
|
-
end
|
135
|
-
importer.record_status
|
136
|
-
rescue StandardError => e
|
137
|
-
status_info(e)
|
87
|
+
create_objects(['work'])
|
138
88
|
end
|
139
89
|
|
140
90
|
def create_file_sets
|
141
|
-
|
142
|
-
|
143
|
-
|
91
|
+
create_objects(['file_set'])
|
92
|
+
end
|
93
|
+
|
94
|
+
def create_relationships
|
95
|
+
create_objects(['relationship'])
|
96
|
+
end
|
97
|
+
|
98
|
+
def create_objects(types_array = nil)
|
99
|
+
(types_array || %w[work collection file_set relationship]).each do |type|
|
100
|
+
if type.eql?('relationship')
|
101
|
+
ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importerexporter.id)
|
102
|
+
next
|
103
|
+
end
|
104
|
+
send(type.pluralize).each_with_index do |current_record, index|
|
105
|
+
next unless record_has_source_identifier(current_record, records.find_index(current_record))
|
106
|
+
break if limit_reached?(limit, records.find_index(current_record))
|
144
107
|
|
145
|
-
|
146
|
-
|
147
|
-
|
108
|
+
seen[current_record[source_identifier]] = true
|
109
|
+
create_entry_and_job(current_record, type)
|
110
|
+
increment_counters(index, "#{type}": true)
|
111
|
+
end
|
112
|
+
importer.record_status
|
148
113
|
end
|
149
|
-
importer.record_status
|
150
114
|
rescue StandardError => e
|
151
115
|
status_info(e)
|
152
116
|
end
|
153
117
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
title: raw_collection_data[:title],
|
166
|
-
work_identifier => uci,
|
167
|
-
source_identifier => uci,
|
168
|
-
visibility: 'open',
|
169
|
-
collection_type_gid: ::Hyrax::CollectionType.find_or_create_default_collection_type.gid
|
170
|
-
}
|
118
|
+
def create_entry_and_job(current_record, type)
|
119
|
+
new_entry = find_or_create_entry(send("#{type}_entry_class"),
|
120
|
+
current_record[source_identifier],
|
121
|
+
'Bulkrax::Importer',
|
122
|
+
current_record.to_h)
|
123
|
+
if current_record[:delete].present?
|
124
|
+
# TODO: create a "Delete" job for file_sets and collections
|
125
|
+
"Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
|
126
|
+
else
|
127
|
+
"Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id)
|
128
|
+
end
|
171
129
|
end
|
172
130
|
|
173
131
|
def write_partial_import_file(file)
|
@@ -274,6 +232,7 @@ module Bulkrax
|
|
274
232
|
def entry_class
|
275
233
|
CsvEntry
|
276
234
|
end
|
235
|
+
alias work_entry_class entry_class
|
277
236
|
|
278
237
|
def collection_entry_class
|
279
238
|
CsvCollectionEntry
|
@@ -40,14 +40,14 @@ module Bulkrax
|
|
40
40
|
metadata_paths.map do |md|
|
41
41
|
# Retrieve all records
|
42
42
|
elements = entry_class.read_data(md).xpath("//#{record_element}")
|
43
|
-
r += elements.map { |el| entry_class.data_for_entry(el, source_identifier) }
|
43
|
+
r += elements.map { |el| entry_class.data_for_entry(el, source_identifier, self) }
|
44
44
|
end
|
45
45
|
# Flatten because we may have multiple records per array
|
46
46
|
r.compact.flatten
|
47
47
|
elsif parser_fields['import_type'] == 'single'
|
48
48
|
metadata_paths.map do |md|
|
49
49
|
data = entry_class.read_data(md).xpath("//#{record_element}").first # Take only the first record
|
50
|
-
entry_class.data_for_entry(data, source_identifier)
|
50
|
+
entry_class.data_for_entry(data, source_identifier, self)
|
51
51
|
end.compact # No need to flatten because we take only the first record
|
52
52
|
end
|
53
53
|
end
|
@@ -94,7 +94,7 @@ module Bulkrax
|
|
94
94
|
else
|
95
95
|
ImportWorkJob.send(perform_method, new_entry.id, current_run.id)
|
96
96
|
end
|
97
|
-
increment_counters(index)
|
97
|
+
increment_counters(index, work: true)
|
98
98
|
end
|
99
99
|
importer.record_status
|
100
100
|
rescue StandardError => e
|
@@ -0,0 +1,11 @@
|
|
1
|
+
class CreateBulkraxPendingRelationships < ActiveRecord::Migration[5.2]
|
2
|
+
def change
|
3
|
+
create_table :bulkrax_pending_relationships do |t|
|
4
|
+
t.belongs_to :bulkrax_importer_run, foreign_key: true, null: false
|
5
|
+
t.string :parent_id, null: false
|
6
|
+
t.string :child_id, null: false
|
7
|
+
|
8
|
+
t.timestamps
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
data/lib/bulkrax/version.rb
CHANGED
data/lib/bulkrax.rb
CHANGED
@@ -5,11 +5,9 @@ require 'active_support/all'
|
|
5
5
|
|
6
6
|
module Bulkrax
|
7
7
|
class << self
|
8
|
-
# TODO: remove collection_field_mapping when releasing v2
|
9
8
|
mattr_accessor :parsers,
|
10
9
|
:default_work_type,
|
11
10
|
:default_field_mapping,
|
12
|
-
:collection_field_mapping,
|
13
11
|
:fill_in_blank_source_identifiers,
|
14
12
|
:related_children_field_mapping,
|
15
13
|
:related_parents_field_mapping,
|
@@ -35,17 +33,6 @@ module Bulkrax
|
|
35
33
|
self.removed_image_path = Bulkrax::Engine.root.join('spec', 'fixtures', 'removed.png').to_s
|
36
34
|
self.server_name = 'bulkrax@example.com'
|
37
35
|
|
38
|
-
# NOTE: Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.
|
39
|
-
# Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.
|
40
|
-
# TODO: remove collection_field_mapping when releasing v2
|
41
|
-
# Field_mapping for establishing a collection relationship (FROM work TO collection)
|
42
|
-
# This value IS NOT used for OAI, so setting the OAI Entries here will have no effect
|
43
|
-
# The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry'
|
44
|
-
# The default value for CSV is collection
|
45
|
-
self.collection_field_mapping = {
|
46
|
-
'Bulkrax::CsvEntry' => 'collection'
|
47
|
-
}
|
48
|
-
|
49
36
|
# Hash of Generic field_mappings for use in the view
|
50
37
|
# There must be one field_mappings hash per view parial
|
51
38
|
# Based on Hyrax CoreMetadata && BasicMetadata
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0.beta3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-03-
|
11
|
+
date: 2022-03-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -268,6 +268,7 @@ files:
|
|
268
268
|
- app/jobs/bulkrax/import_file_set_job.rb
|
269
269
|
- app/jobs/bulkrax/import_work_job.rb
|
270
270
|
- app/jobs/bulkrax/importer_job.rb
|
271
|
+
- app/jobs/bulkrax/schedule_relationships_job.rb
|
271
272
|
- app/mailers/bulkrax/application_mailer.rb
|
272
273
|
- app/matchers/bulkrax/application_matcher.rb
|
273
274
|
- app/matchers/bulkrax/bagit_matcher.rb
|
@@ -287,6 +288,7 @@ files:
|
|
287
288
|
- app/models/bulkrax/oai_entry.rb
|
288
289
|
- app/models/bulkrax/oai_qualified_dc_entry.rb
|
289
290
|
- app/models/bulkrax/oai_set_entry.rb
|
291
|
+
- app/models/bulkrax/pending_relationship.rb
|
290
292
|
- app/models/bulkrax/rdf_collection_entry.rb
|
291
293
|
- app/models/bulkrax/rdf_entry.rb
|
292
294
|
- app/models/bulkrax/status.rb
|
@@ -363,6 +365,10 @@ files:
|
|
363
365
|
- db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb
|
364
366
|
- db/migrate/20220118001339_add_import_attempts_to_entries.rb
|
365
367
|
- db/migrate/20220119213325_add_work_counters_to_importer_runs.rb
|
368
|
+
- db/migrate/20220301001839_create_bulkrax_pending_relationships.rb
|
369
|
+
- db/migrate/20220301020307_add_parents_to_bulkrax_importer_runs.rb
|
370
|
+
- db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb
|
371
|
+
- db/migrate/20220330165510_remove_array_true_from_importer_run_parents_column.rb
|
366
372
|
- lib/bulkrax.rb
|
367
373
|
- lib/bulkrax/engine.rb
|
368
374
|
- lib/bulkrax/version.rb
|
@@ -389,9 +395,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
389
395
|
version: '0'
|
390
396
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
391
397
|
requirements:
|
392
|
-
- - "
|
398
|
+
- - ">"
|
393
399
|
- !ruby/object:Gem::Version
|
394
|
-
version:
|
400
|
+
version: 1.3.1
|
395
401
|
requirements: []
|
396
402
|
rubygems_version: 3.1.4
|
397
403
|
signing_key:
|