bulkrax 2.2.4 → 3.0.0.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/factories/bulkrax/object_factory.rb +13 -46
- data/app/jobs/bulkrax/create_relationships_job.rb +58 -66
- data/app/jobs/bulkrax/importer_job.rb +1 -3
- data/app/jobs/bulkrax/schedule_relationships_job.rb +21 -0
- data/app/models/bulkrax/csv_entry.rb +58 -39
- data/app/models/bulkrax/entry.rb +3 -7
- data/app/models/bulkrax/importer.rb +20 -11
- data/app/models/bulkrax/oai_entry.rb +0 -4
- data/app/models/bulkrax/pending_relationship.rb +7 -0
- data/app/models/bulkrax/rdf_entry.rb +2 -10
- data/app/models/bulkrax/xml_entry.rb +1 -5
- data/app/models/concerns/bulkrax/export_behavior.rb +3 -1
- data/app/models/concerns/bulkrax/has_matchers.rb +0 -10
- data/app/models/concerns/bulkrax/import_behavior.rb +11 -12
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +2 -2
- data/app/parsers/bulkrax/application_parser.rb +8 -11
- data/app/parsers/bulkrax/bagit_parser.rb +3 -7
- data/app/parsers/bulkrax/csv_parser.rb +44 -67
- data/app/parsers/bulkrax/oai_dc_parser.rb +1 -1
- data/app/parsers/bulkrax/xml_parser.rb +3 -3
- data/db/migrate/20220301001839_create_bulkrax_pending_relationships.rb +11 -0
- data/db/migrate/20220301020307_add_parents_to_bulkrax_importer_runs.rb +5 -0
- data/db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb +5 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +0 -13
- metadata +9 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0c05782edcfff4b5c460c3cc3bd8b3fbec355299a87c977b0400b2b555105de8
|
|
4
|
+
data.tar.gz: dd0316f5502afcea91ac0fa84f0ea6bcb1600aff9b867d5460e934aeafdaa9c2
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: fc72772040076f6e527adc335cb4dd6802c199f3b0e94ea81fab7d341d9354973fe76c08b9873e90d7244dd527617c60f0486e141736a589df2afd8e051bcd37
|
|
7
|
+
data.tar.gz: 3c39be004fd6196d8692643a567a73398a4cb4105a1174535f73cbf7b515b8ca0d6a2bbc8cdb05c012dca814f5bc5b57ab5bfc40a6191ec73fcd40c96eb3e630
|
|
@@ -7,20 +7,15 @@ module Bulkrax
|
|
|
7
7
|
include DynamicRecordLookup
|
|
8
8
|
|
|
9
9
|
define_model_callbacks :save, :create
|
|
10
|
-
attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :
|
|
10
|
+
attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :related_parents_parsed_mapping
|
|
11
11
|
|
|
12
12
|
# rubocop:disable Metrics/ParameterLists
|
|
13
|
-
def initialize(attributes:, source_identifier_value:, work_identifier:,
|
|
14
|
-
ActiveSupport::Deprecation.warn(
|
|
15
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
|
16
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
|
17
|
-
)
|
|
13
|
+
def initialize(attributes:, source_identifier_value:, work_identifier:, related_parents_parsed_mapping: nil, replace_files: false, user: nil, klass: nil, update_files: false)
|
|
18
14
|
@attributes = ActiveSupport::HashWithIndifferentAccess.new(attributes)
|
|
19
15
|
@replace_files = replace_files
|
|
20
16
|
@update_files = update_files
|
|
21
17
|
@user = user || User.batch_user
|
|
22
18
|
@work_identifier = work_identifier
|
|
23
|
-
@collection_field_mapping = collection_field_mapping
|
|
24
19
|
@related_parents_parsed_mapping = related_parents_parsed_mapping
|
|
25
20
|
@source_identifier_value = source_identifier_value
|
|
26
21
|
@klass = klass || Bulkrax.default_work_type.constantize
|
|
@@ -55,7 +50,7 @@ module Bulkrax
|
|
|
55
50
|
def update
|
|
56
51
|
raise "Object doesn't exist" unless object
|
|
57
52
|
destroy_existing_files if @replace_files && ![Collection, FileSet].include?(klass)
|
|
58
|
-
attrs =
|
|
53
|
+
attrs = transform_attributes(update: true)
|
|
59
54
|
run_callbacks :save do
|
|
60
55
|
if klass == Collection
|
|
61
56
|
update_collection(attrs)
|
|
@@ -97,7 +92,7 @@ module Bulkrax
|
|
|
97
92
|
# https://github.com/projecthydra/active_fedora/issues/874
|
|
98
93
|
# 2+ years later, still open!
|
|
99
94
|
def create
|
|
100
|
-
attrs =
|
|
95
|
+
attrs = transform_attributes
|
|
101
96
|
@object = klass.new
|
|
102
97
|
object.reindex_extent = Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX if object.respond_to?(:reindex_extent)
|
|
103
98
|
run_callbacks :save do
|
|
@@ -142,25 +137,15 @@ module Bulkrax
|
|
|
142
137
|
end
|
|
143
138
|
|
|
144
139
|
def create_collection(attrs)
|
|
145
|
-
ActiveSupport::Deprecation.warn(
|
|
146
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
|
147
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
|
148
|
-
)
|
|
149
140
|
attrs = collection_type(attrs)
|
|
150
|
-
persist_collection_memberships(parent:
|
|
151
|
-
persist_collection_memberships(parent: find_collection(attributes[collection_field_mapping]), child: object) if attributes[collection_field_mapping].present?
|
|
141
|
+
persist_collection_memberships(parent: find_collection(attributes[related_parents_parsed_mapping]), child: object) if attributes[related_parents_parsed_mapping].present?
|
|
152
142
|
object.attributes = attrs
|
|
153
143
|
object.apply_depositor_metadata(@user)
|
|
154
144
|
object.save!
|
|
155
145
|
end
|
|
156
146
|
|
|
157
147
|
def update_collection(attrs)
|
|
158
|
-
|
|
159
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
|
160
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
|
161
|
-
)
|
|
162
|
-
persist_collection_memberships(parent: object, child: find_collection(attributes[:child_collection_id])) if attributes[:child_collection_id].present?
|
|
163
|
-
persist_collection_memberships(parent: find_collection(attributes[collection_field_mapping]), child: object) if attributes[collection_field_mapping].present?
|
|
148
|
+
persist_collection_memberships(parent: find_collection(attributes[related_parents_parsed_mapping]), child: object) if attributes[related_parents_parsed_mapping].present?
|
|
164
149
|
object.attributes = attrs
|
|
165
150
|
object.save!
|
|
166
151
|
end
|
|
@@ -197,6 +182,10 @@ module Bulkrax
|
|
|
197
182
|
# Add child to parent's #member_collections
|
|
198
183
|
# Add parent to child's #member_of_collections
|
|
199
184
|
def persist_collection_memberships(parent:, child:)
|
|
185
|
+
parent.reject!(&:blank?) if parent.respond_to?(:reject!)
|
|
186
|
+
child.reject!(&:blank?) if child.respond_to?(:reject!)
|
|
187
|
+
return if parent.blank? || child.blank?
|
|
188
|
+
|
|
200
189
|
::Hyrax::Collections::NestedCollectionPersistenceService.persist_nested_collection_for(parent: parent, child: child)
|
|
201
190
|
end
|
|
202
191
|
|
|
@@ -205,7 +194,7 @@ module Bulkrax
|
|
|
205
194
|
when Hash
|
|
206
195
|
Collection.find(id[:id])
|
|
207
196
|
when String
|
|
208
|
-
Collection.find(id)
|
|
197
|
+
Collection.find(id) if id.present?
|
|
209
198
|
when Array
|
|
210
199
|
id.map { |i| find_collection(i) }
|
|
211
200
|
else
|
|
@@ -219,34 +208,12 @@ module Bulkrax
|
|
|
219
208
|
attrs
|
|
220
209
|
end
|
|
221
210
|
|
|
222
|
-
# Strip out the :collection key, and add the member_of_collection_ids,
|
|
223
|
-
# which is used by Hyrax::Actors::AddAsMemberOfCollectionsActor
|
|
224
|
-
def create_attributes
|
|
225
|
-
return transform_attributes if klass == Collection
|
|
226
|
-
ActiveSupport::Deprecation.warn(
|
|
227
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
|
228
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
|
229
|
-
)
|
|
230
|
-
transform_attributes.except(:collections, :collection, collection_field_mapping)
|
|
231
|
-
end
|
|
232
|
-
|
|
233
|
-
# Strip out the :collection key, and add the member_of_collection_ids,
|
|
234
|
-
# which is used by Hyrax::Actors::AddAsMemberOfCollectionsActor
|
|
235
|
-
def attribute_update
|
|
236
|
-
return transform_attributes.except(:id) if klass == Collection
|
|
237
|
-
ActiveSupport::Deprecation.warn(
|
|
238
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
|
239
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
|
240
|
-
)
|
|
241
|
-
transform_attributes.except(:id, :collections, :collection, collection_field_mapping)
|
|
242
|
-
end
|
|
243
|
-
|
|
244
211
|
# Override if we need to map the attributes from the parser in
|
|
245
212
|
# a way that is compatible with how the factory needs them.
|
|
246
|
-
def transform_attributes
|
|
213
|
+
def transform_attributes(update: false)
|
|
247
214
|
@transform_attributes = attributes.slice(*permitted_attributes)
|
|
248
215
|
@transform_attributes.merge!(file_attributes(update_files)) if with_files
|
|
249
|
-
@transform_attributes
|
|
216
|
+
update ? @transform_attributes.except(:id) : @transform_attributes
|
|
250
217
|
end
|
|
251
218
|
|
|
252
219
|
# Regardless of what the Parser gives us, these are the properties we are prepared to accept.
|
|
@@ -21,11 +21,9 @@ module Bulkrax
|
|
|
21
21
|
|
|
22
22
|
queue_as :import
|
|
23
23
|
|
|
24
|
-
attr_accessor :
|
|
24
|
+
attr_accessor :child_records, :parent_record, :parent_entry, :importer_run_id
|
|
25
25
|
|
|
26
|
-
# @param
|
|
27
|
-
# @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifier
|
|
28
|
-
# @param child_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifier
|
|
26
|
+
# @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifiers
|
|
29
27
|
# @param importer_run [Bulkrax::ImporterRun] current importer run (needed to properly update counters)
|
|
30
28
|
#
|
|
31
29
|
# The entry_identifier is used to lookup the @base_entry for the job (a.k.a. the entry the job was called from).
|
|
@@ -33,123 +31,117 @@ module Bulkrax
|
|
|
33
31
|
# Whether the @base_entry is the parent or the child in the relationship is determined by the presence of a
|
|
34
32
|
# parent_identifier or child_identifier param. For example, if a parent_identifier is passed, we know @base_entry
|
|
35
33
|
# is the child in the relationship, and vice versa if a child_identifier is passed.
|
|
36
|
-
def perform(
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
34
|
+
def perform(parent_identifier:, importer_run_id:)
|
|
35
|
+
pending_relationships = Bulkrax::PendingRelationship.find_each.select do |rel|
|
|
36
|
+
rel.bulkrax_importer_run_id == importer_run_id && rel.parent_id == parent_identifier
|
|
37
|
+
end.sort_by(&:order)
|
|
38
|
+
|
|
39
|
+
@importer_run_id = importer_run_id
|
|
40
|
+
@parent_record = find_record(parent_identifier)
|
|
41
|
+
@child_records = { works: [], collections: [] }
|
|
42
|
+
pending_relationships.each do |rel|
|
|
43
|
+
raise ::StandardError, %("#{rel}" needs either a child or a parent to create a relationship) if rel.child_id.nil? || rel.parent_id.nil?
|
|
44
|
+
child_record = find_record(rel.child_id)
|
|
45
|
+
child_record.is_a?(::Collection) ? @child_records[:collections] << child_record : @child_records[:works] << child_record
|
|
47
46
|
end
|
|
48
47
|
|
|
49
|
-
if
|
|
48
|
+
if (child_records[:collections].blank? && child_records[:works].blank?) || parent_record.blank?
|
|
50
49
|
reschedule(
|
|
51
|
-
entry_identifier: entry_identifier,
|
|
52
50
|
parent_identifier: parent_identifier,
|
|
53
|
-
|
|
54
|
-
importer_run: importer_run
|
|
51
|
+
importer_run_id: importer_run_id
|
|
55
52
|
)
|
|
56
53
|
return false # stop current job from continuing to run after rescheduling
|
|
57
54
|
end
|
|
58
55
|
|
|
59
|
-
|
|
56
|
+
@parent_entry = Bulkrax::Entry.where(identifier: parent_identifier,
|
|
57
|
+
importerexporter_id: ImporterRun.find(importer_run_id).importer_id,
|
|
58
|
+
importerexporter_type: "Bulkrax::Importer").first
|
|
59
|
+
create_relationships
|
|
60
|
+
pending_relationships.each(&:destroy)
|
|
60
61
|
rescue ::StandardError => e
|
|
61
|
-
|
|
62
|
-
|
|
62
|
+
parent_entry.status_info(e)
|
|
63
|
+
Bulkrax::ImporterRun.find(importer_run_id).increment!(:failed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
|
63
64
|
end
|
|
64
65
|
|
|
65
66
|
private
|
|
66
67
|
|
|
67
|
-
def
|
|
68
|
-
if parent_record.is_a?(::Collection)
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
collection_parent_work_child
|
|
72
|
-
elsif curation_concern?(parent_record) && child_record.is_a?(::Collection)
|
|
73
|
-
raise ::StandardError, 'a Collection may not be assigned as a child of a Work'
|
|
68
|
+
def create_relationships
|
|
69
|
+
if parent_record.is_a?(::Collection)
|
|
70
|
+
collection_parent_work_child unless child_records[:works].empty?
|
|
71
|
+
collection_parent_collection_child unless child_records[:collections].empty?
|
|
74
72
|
else
|
|
75
|
-
work_parent_work_child
|
|
73
|
+
work_parent_work_child unless child_records[:works].empty?
|
|
74
|
+
raise ::StandardError, 'a Collection may not be assigned as a child of a Work' if child_records[:collections].present?
|
|
76
75
|
end
|
|
77
76
|
end
|
|
78
77
|
|
|
79
78
|
def user
|
|
80
|
-
@user ||=
|
|
79
|
+
@user ||= Bulkrax::ImporterRun.find(importer_run_id).importer.user
|
|
81
80
|
end
|
|
82
81
|
|
|
83
82
|
# Work-Collection membership is added to the child as member_of_collection_ids
|
|
84
83
|
# This is adding the reverse relationship, from the child to the parent
|
|
85
84
|
def collection_parent_work_child
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
# TODO: add counters for :processed_parents and :failed_parents
|
|
101
|
-
importer_run.increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
|
85
|
+
child_records[:works].each do |child_record|
|
|
86
|
+
attrs = { id: child_record.id, member_of_collections_attributes: { 0 => { id: parent_record.id } } }
|
|
87
|
+
ObjectFactory.new(
|
|
88
|
+
attributes: attrs,
|
|
89
|
+
source_identifier_value: nil, # sending the :id in the attrs means the factory doesn't need a :source_identifier_value
|
|
90
|
+
work_identifier: parent_entry.parser.work_identifier,
|
|
91
|
+
related_parents_parsed_mapping: parent_entry.parser.related_parents_parsed_mapping,
|
|
92
|
+
replace_files: false,
|
|
93
|
+
user: user,
|
|
94
|
+
klass: child_record.class
|
|
95
|
+
).run
|
|
96
|
+
# TODO: add counters for :processed_parents and :failed_parents
|
|
97
|
+
Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
|
98
|
+
end
|
|
102
99
|
end
|
|
103
100
|
|
|
104
101
|
# Collection-Collection membership is added to the as member_ids
|
|
105
102
|
def collection_parent_collection_child
|
|
106
|
-
|
|
107
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
|
108
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
|
109
|
-
)
|
|
103
|
+
child_record = child_records[:collections].first
|
|
110
104
|
attrs = { id: parent_record.id, child_collection_id: child_record.id }
|
|
111
105
|
ObjectFactory.new(
|
|
112
106
|
attributes: attrs,
|
|
113
107
|
source_identifier_value: nil, # sending the :id in the attrs means the factory doesn't need a :source_identifier_value
|
|
114
|
-
work_identifier:
|
|
115
|
-
|
|
108
|
+
work_identifier: parent_entry.parser.work_identifier,
|
|
109
|
+
related_parents_parsed_mapping: parent_entry.parser.related_parents_parsed_mapping,
|
|
116
110
|
replace_files: false,
|
|
117
111
|
user: user,
|
|
118
112
|
klass: parent_record.class
|
|
119
113
|
).run
|
|
120
114
|
# TODO: add counters for :processed_parents and :failed_parents
|
|
121
|
-
|
|
115
|
+
Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
|
122
116
|
end
|
|
123
117
|
|
|
124
118
|
# Work-Work membership is added to the parent as member_ids
|
|
125
119
|
def work_parent_work_child
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
120
|
+
records_hash = {}
|
|
121
|
+
child_records[:works].each_with_index do |child_record, i|
|
|
122
|
+
records_hash[i] = { id: child_record.id }
|
|
123
|
+
end
|
|
130
124
|
attrs = {
|
|
131
125
|
id: parent_record.id,
|
|
132
|
-
work_members_attributes:
|
|
126
|
+
work_members_attributes: records_hash
|
|
133
127
|
}
|
|
134
128
|
ObjectFactory.new(
|
|
135
129
|
attributes: attrs,
|
|
136
130
|
source_identifier_value: nil, # sending the :id in the attrs means the factory doesn't need a :source_identifier_value
|
|
137
|
-
work_identifier:
|
|
138
|
-
|
|
131
|
+
work_identifier: parent_entry.parser.work_identifier,
|
|
132
|
+
related_parents_parsed_mapping: parent_entry.parser.related_parents_parsed_mapping,
|
|
139
133
|
replace_files: false,
|
|
140
134
|
user: user,
|
|
141
135
|
klass: parent_record.class
|
|
142
136
|
).run
|
|
143
137
|
# TODO: add counters for :processed_parents and :failed_parents
|
|
144
|
-
|
|
138
|
+
Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
|
145
139
|
end
|
|
146
140
|
|
|
147
|
-
def reschedule(
|
|
141
|
+
def reschedule(parent_identifier:, importer_run_id:)
|
|
148
142
|
CreateRelationshipsJob.set(wait: 10.minutes).perform_later(
|
|
149
|
-
entry_identifier: entry_identifier,
|
|
150
143
|
parent_identifier: parent_identifier,
|
|
151
|
-
|
|
152
|
-
importer_run: importer_run
|
|
144
|
+
importer_run_id: importer_run_id
|
|
153
145
|
)
|
|
154
146
|
end
|
|
155
147
|
end
|
|
@@ -18,9 +18,7 @@ module Bulkrax
|
|
|
18
18
|
importer.only_updates = only_updates_since_last_import || false
|
|
19
19
|
return unless importer.valid_import?
|
|
20
20
|
|
|
21
|
-
importer.
|
|
22
|
-
importer.import_works
|
|
23
|
-
importer.import_file_sets
|
|
21
|
+
importer.import_objects
|
|
24
22
|
end
|
|
25
23
|
|
|
26
24
|
def unzip_imported_file(parser)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bulkrax
|
|
4
|
+
class ScheduleRelationshipsJob < ApplicationJob
|
|
5
|
+
def perform(importer_id:)
|
|
6
|
+
importer = Importer.find(importer_id)
|
|
7
|
+
pending_num = importer.entries.left_outer_joins(:latest_status)
|
|
8
|
+
.where('bulkrax_statuses.status_message IS NULL ').count
|
|
9
|
+
return reschedule(importer_id) unless pending_num.zero?
|
|
10
|
+
|
|
11
|
+
importer.last_run.parents.each do |parent_id|
|
|
12
|
+
CreateRelationshipsJob.perform_later(parent_identifier: parent_id, importer_run_id: importer.last_run.id)
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def reschedule(importer_id)
|
|
17
|
+
ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importer_id)
|
|
18
|
+
false
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -3,7 +3,10 @@
|
|
|
3
3
|
require 'csv'
|
|
4
4
|
|
|
5
5
|
module Bulkrax
|
|
6
|
-
class
|
|
6
|
+
# TODO: We need to rework this class some to address the Metrics/ClassLength rubocop offense.
|
|
7
|
+
# We do too much in these entry classes. We need to extract the common logic from the various
|
|
8
|
+
# entry models into a module that can be shared between them.
|
|
9
|
+
class CsvEntry < Entry # rubocop:disable Metrics/ClassLength
|
|
7
10
|
serialize :raw_metadata, JSON
|
|
8
11
|
|
|
9
12
|
def self.fields_from_data(data)
|
|
@@ -19,18 +22,15 @@ module Bulkrax
|
|
|
19
22
|
encoding: 'utf-8')
|
|
20
23
|
end
|
|
21
24
|
|
|
22
|
-
def self.data_for_entry(data, _source_id)
|
|
23
|
-
ActiveSupport::Deprecation.warn(
|
|
24
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
|
25
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
|
26
|
-
)
|
|
25
|
+
def self.data_for_entry(data, _source_id, parser)
|
|
27
26
|
# If a multi-line CSV data is passed, grab the first row
|
|
28
27
|
data = data.first if data.is_a?(CSV::Table)
|
|
29
28
|
# model has to be separated so that it doesn't get mistranslated by to_h
|
|
30
29
|
raw_data = data.to_h
|
|
31
30
|
raw_data[:model] = data[:model] if data[:model].present?
|
|
32
31
|
# If the collection field mapping is not 'collection', add 'collection' - the parser needs it
|
|
33
|
-
|
|
32
|
+
# TODO: change to :parents
|
|
33
|
+
raw_data[:parents] = raw_data[parent_field(parser).to_sym] if raw_data.keys.include?(parent_field(parser).to_sym) && parent_field(parser) != 'parents'
|
|
34
34
|
return raw_data
|
|
35
35
|
end
|
|
36
36
|
|
|
@@ -44,7 +44,6 @@ module Bulkrax
|
|
|
44
44
|
add_visibility
|
|
45
45
|
add_metadata_for_model
|
|
46
46
|
add_rights_statement
|
|
47
|
-
add_collections
|
|
48
47
|
add_local
|
|
49
48
|
|
|
50
49
|
self.parsed_metadata
|
|
@@ -67,15 +66,9 @@ module Bulkrax
|
|
|
67
66
|
end
|
|
68
67
|
|
|
69
68
|
def add_ingested_metadata
|
|
70
|
-
ActiveSupport::Deprecation.warn(
|
|
71
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
|
72
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
|
73
|
-
)
|
|
74
69
|
# we do not want to sort the values in the record before adding the metadata.
|
|
75
70
|
# if we do, the factory_class will be set to the default_work_type for all values that come before "model" or "work type"
|
|
76
71
|
record.each do |key, value|
|
|
77
|
-
next if self.parser.collection_field_mapping.to_s == key_without_numbers(key)
|
|
78
|
-
|
|
79
72
|
index = key[/\d+/].to_i - 1 if key[/\d+/].to_i != 0
|
|
80
73
|
add_metadata(key_without_numbers(key), value, index)
|
|
81
74
|
end
|
|
@@ -101,28 +94,40 @@ module Bulkrax
|
|
|
101
94
|
self.parsed_metadata['id'] = hyrax_record.id
|
|
102
95
|
self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
|
|
103
96
|
self.parsed_metadata['model'] = hyrax_record.has_model.first
|
|
97
|
+
build_relationship_metadata
|
|
104
98
|
build_mapping_metadata
|
|
99
|
+
build_files unless hyrax_record.is_a?(Collection)
|
|
100
|
+
self.parsed_metadata
|
|
101
|
+
end
|
|
105
102
|
|
|
106
|
-
|
|
107
|
-
#
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
103
|
+
def build_relationship_metadata
|
|
104
|
+
# Includes all relationship methods for all exportable record types (works, Collections, FileSets)
|
|
105
|
+
relationship_methods = {
|
|
106
|
+
related_parents_parsed_mapping => %i[member_of_collection_ids member_of_work_ids in_work_ids],
|
|
107
|
+
related_children_parsed_mapping => %i[member_collection_ids member_work_ids file_set_ids]
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
relationship_methods.each do |relationship_key, methods|
|
|
111
|
+
next if relationship_key.blank?
|
|
112
|
+
|
|
113
|
+
values = []
|
|
114
|
+
methods.each do |m|
|
|
115
|
+
values << hyrax_record.public_send(m) if hyrax_record.respond_to?(m)
|
|
115
116
|
end
|
|
116
|
-
|
|
117
|
+
values = values.flatten.uniq
|
|
118
|
+
next if values.blank?
|
|
117
119
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
+
handle_join_on_export(relationship_key, values, mapping[related_parents_parsed_mapping]['join'].present?)
|
|
121
|
+
end
|
|
120
122
|
end
|
|
121
123
|
|
|
122
124
|
def build_mapping_metadata
|
|
123
125
|
mapping.each do |key, value|
|
|
124
126
|
next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
|
|
125
127
|
next if key == "model"
|
|
128
|
+
# relationships handled by #build_relationship_metadata
|
|
129
|
+
next if [related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
|
|
130
|
+
next if key == 'file' # handled by #build_files
|
|
126
131
|
next if value['excluded']
|
|
127
132
|
|
|
128
133
|
object_key = key if value.key?('object')
|
|
@@ -148,7 +153,7 @@ module Bulkrax
|
|
|
148
153
|
data = hyrax_record.send(key.to_s)
|
|
149
154
|
if data.is_a?(ActiveTriples::Relation)
|
|
150
155
|
if value['join']
|
|
151
|
-
self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join('
|
|
156
|
+
self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join(' | ').to_s # TODO: make split char dynamic
|
|
152
157
|
else
|
|
153
158
|
data.each_with_index do |d, i|
|
|
154
159
|
self.parsed_metadata["#{key_for_export(key)}_#{i + 1}"] = prepare_export_data(d)
|
|
@@ -197,12 +202,21 @@ module Bulkrax
|
|
|
197
202
|
end
|
|
198
203
|
|
|
199
204
|
def build_files
|
|
200
|
-
|
|
201
|
-
|
|
205
|
+
file_mapping = mapping['file']&.[]('from')&.first || 'file'
|
|
206
|
+
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
|
207
|
+
|
|
208
|
+
filenames = file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
|
|
209
|
+
handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def handle_join_on_export(key, values, join)
|
|
213
|
+
if join
|
|
214
|
+
parsed_metadata[key] = values.join(' | ') # TODO: make split char dynamic
|
|
202
215
|
else
|
|
203
|
-
|
|
204
|
-
|
|
216
|
+
values.each_with_index do |value, i|
|
|
217
|
+
parsed_metadata["#{key}_#{i + 1}"] = value
|
|
205
218
|
end
|
|
219
|
+
parsed_metadata.delete(key)
|
|
206
220
|
end
|
|
207
221
|
end
|
|
208
222
|
|
|
@@ -225,14 +239,19 @@ module Bulkrax
|
|
|
225
239
|
end
|
|
226
240
|
|
|
227
241
|
def possible_collection_ids
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
242
|
+
return @possible_collection_ids if @possible_collection_ids.present?
|
|
243
|
+
|
|
244
|
+
collection_field_mapping = self.class.parent_field(parser)
|
|
245
|
+
return [] unless collection_field_mapping.present? && record[collection_field_mapping].present?
|
|
246
|
+
|
|
247
|
+
identifiers = []
|
|
248
|
+
split_titles = record[collection_field_mapping].split(/\s*[;|]\s*/)
|
|
249
|
+
split_titles.each do |c_title|
|
|
250
|
+
matching_collection_entries = importerexporter.entries.select { |e| e.raw_metadata['title'] == c_title }
|
|
251
|
+
raise ::StandardError, 'Only expected to find one matching entry' if matching_collection_entries.count > 1
|
|
252
|
+
identifiers << matching_collection_entries.first&.identifier
|
|
253
|
+
end
|
|
254
|
+
@possible_collection_ids = identifiers.compact.presence || []
|
|
236
255
|
end
|
|
237
256
|
|
|
238
257
|
def collections_created?
|
data/app/models/bulkrax/entry.rb
CHANGED
|
@@ -58,7 +58,7 @@ module Bulkrax
|
|
|
58
58
|
# @param data - the data from the metadata file
|
|
59
59
|
# @param path - the path to the metadata file (used by some entries to get the file_paths for import)
|
|
60
60
|
# @return Hash containing the data (the entry build_metadata method will know what to expect in the hash)
|
|
61
|
-
def self.data_for_entry(_data, _source_id)
|
|
61
|
+
def self.data_for_entry(_data, _source_id, _parser)
|
|
62
62
|
raise StandardError, 'Not Implemented'
|
|
63
63
|
end
|
|
64
64
|
|
|
@@ -70,12 +70,8 @@ module Bulkrax
|
|
|
70
70
|
parser&.work_identifier&.to_s || 'source'
|
|
71
71
|
end
|
|
72
72
|
|
|
73
|
-
def self.
|
|
74
|
-
|
|
75
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
|
76
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
|
77
|
-
)
|
|
78
|
-
Bulkrax.collection_field_mapping[self.to_s]
|
|
73
|
+
def self.parent_field(parser)
|
|
74
|
+
parser.related_parents_parsed_mapping
|
|
79
75
|
end
|
|
80
76
|
|
|
81
77
|
def build
|
|
@@ -125,23 +125,32 @@ module Bulkrax
|
|
|
125
125
|
end
|
|
126
126
|
|
|
127
127
|
def import_works
|
|
128
|
-
|
|
129
|
-
self.only_updates ||= false
|
|
130
|
-
parser.create_works
|
|
131
|
-
rescue StandardError => e
|
|
132
|
-
status_info(e)
|
|
128
|
+
import_objects(['work'])
|
|
133
129
|
end
|
|
134
130
|
|
|
135
131
|
def import_collections
|
|
136
|
-
|
|
137
|
-
parser.create_collections
|
|
138
|
-
rescue StandardError => e
|
|
139
|
-
status_info(e)
|
|
132
|
+
import_objects(['collection'])
|
|
140
133
|
end
|
|
141
134
|
|
|
142
135
|
def import_file_sets
|
|
143
|
-
|
|
144
|
-
|
|
136
|
+
import_objects(['file_set'])
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def import_relationships
|
|
140
|
+
import_objects(['relationship'])
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def import_objects(types_array = nil)
|
|
144
|
+
self.only_updates ||= false
|
|
145
|
+
types = types_array || %w[work collection file_set relationship]
|
|
146
|
+
if parser.class == Bulkrax::CsvParser
|
|
147
|
+
parser.create_objects(types)
|
|
148
|
+
else
|
|
149
|
+
types.each do |object_type|
|
|
150
|
+
self.save if self.new_record? # Object needs to be saved for statuses
|
|
151
|
+
parser.send("create_#{object_type.pluralize}")
|
|
152
|
+
end
|
|
153
|
+
end
|
|
145
154
|
rescue StandardError => e
|
|
146
155
|
status_info(e)
|
|
147
156
|
end
|
|
@@ -26,10 +26,6 @@ module Bulkrax
|
|
|
26
26
|
end
|
|
27
27
|
|
|
28
28
|
def build_metadata
|
|
29
|
-
ActiveSupport::Deprecation.warn(
|
|
30
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
|
31
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
|
32
|
-
)
|
|
33
29
|
self.parsed_metadata = {}
|
|
34
30
|
self.parsed_metadata[work_identifier] = [record.header.identifier]
|
|
35
31
|
|
|
@@ -13,11 +13,7 @@ module Bulkrax
|
|
|
13
13
|
data.predicates.map(&:to_s)
|
|
14
14
|
end
|
|
15
15
|
|
|
16
|
-
def self.data_for_entry(data, source_id)
|
|
17
|
-
ActiveSupport::Deprecation.warn(
|
|
18
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
|
19
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
|
20
|
-
)
|
|
16
|
+
def self.data_for_entry(data, source_id, parser)
|
|
21
17
|
reader = data
|
|
22
18
|
format = reader.class.format.to_sym
|
|
23
19
|
collections = []
|
|
@@ -25,7 +21,7 @@ module Bulkrax
|
|
|
25
21
|
delete = nil
|
|
26
22
|
data = RDF::Writer.for(format).buffer do |writer|
|
|
27
23
|
reader.each_statement do |statement|
|
|
28
|
-
collections << statement.object.to_s if
|
|
24
|
+
collections << statement.object.to_s if parent_field(parser).present? && parent_field(parser) == statement.predicate.to_s
|
|
29
25
|
children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
|
|
30
26
|
delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
|
|
31
27
|
writer << statement
|
|
@@ -55,10 +51,6 @@ module Bulkrax
|
|
|
55
51
|
end
|
|
56
52
|
|
|
57
53
|
def build_metadata
|
|
58
|
-
ActiveSupport::Deprecation.warn(
|
|
59
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
|
60
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
|
61
|
-
)
|
|
62
54
|
raise StandardError, 'Record not found' if record.nil?
|
|
63
55
|
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
|
64
56
|
|
|
@@ -14,7 +14,7 @@ module Bulkrax
|
|
|
14
14
|
Nokogiri::XML(open(path)).remove_namespaces!
|
|
15
15
|
end
|
|
16
16
|
|
|
17
|
-
def self.data_for_entry(data, source_id)
|
|
17
|
+
def self.data_for_entry(data, source_id, _parser)
|
|
18
18
|
collections = []
|
|
19
19
|
children = []
|
|
20
20
|
xpath_for_source_id = ".//*[name()='#{source_id}']"
|
|
@@ -39,10 +39,6 @@ module Bulkrax
|
|
|
39
39
|
end
|
|
40
40
|
|
|
41
41
|
def build_metadata
|
|
42
|
-
ActiveSupport::Deprecation.warn(
|
|
43
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
|
44
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
|
45
|
-
)
|
|
46
42
|
raise StandardError, 'Record not found' if record.nil?
|
|
47
43
|
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
|
48
44
|
self.parsed_metadata = {}
|
|
@@ -26,7 +26,9 @@ module Bulkrax
|
|
|
26
26
|
|
|
27
27
|
def write_files
|
|
28
28
|
return if hyrax_record.is_a?(Collection)
|
|
29
|
-
|
|
29
|
+
|
|
30
|
+
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
|
31
|
+
file_sets.each do |fs|
|
|
30
32
|
path = File.join(exporter_export_path, 'files')
|
|
31
33
|
FileUtils.mkdir_p(path)
|
|
32
34
|
file = filename(fs)
|
|
@@ -129,10 +129,6 @@ module Bulkrax
|
|
|
129
129
|
end
|
|
130
130
|
|
|
131
131
|
def supported_bulkrax_fields
|
|
132
|
-
ActiveSupport::Deprecation.warn(
|
|
133
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
|
134
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
|
135
|
-
)
|
|
136
132
|
@supported_bulkrax_fields ||=
|
|
137
133
|
%W[
|
|
138
134
|
id
|
|
@@ -141,22 +137,16 @@ module Bulkrax
|
|
|
141
137
|
model
|
|
142
138
|
visibility
|
|
143
139
|
delete
|
|
144
|
-
#{parser.collection_field_mapping}
|
|
145
140
|
#{related_parents_parsed_mapping}
|
|
146
141
|
#{related_children_parsed_mapping}
|
|
147
142
|
]
|
|
148
143
|
end
|
|
149
144
|
|
|
150
145
|
def multiple?(field)
|
|
151
|
-
ActiveSupport::Deprecation.warn(
|
|
152
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
|
153
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
|
154
|
-
)
|
|
155
146
|
@multiple_bulkrax_fields ||=
|
|
156
147
|
%W[
|
|
157
148
|
file
|
|
158
149
|
remote_files
|
|
159
|
-
#{parser.collection_field_mapping}
|
|
160
150
|
#{related_parents_parsed_mapping}
|
|
161
151
|
#{related_children_parsed_mapping}
|
|
162
152
|
]
|
|
@@ -28,7 +28,8 @@ module Bulkrax
|
|
|
28
28
|
self.parsed_metadata[related_parents_parsed_mapping].each do |parent_identifier|
|
|
29
29
|
next if parent_identifier.blank?
|
|
30
30
|
|
|
31
|
-
|
|
31
|
+
add_parent_to_import_run(parent_identifier, importerexporter.last_run)
|
|
32
|
+
PendingRelationship.create!(child_id: self.identifier, parent_id: parent_identifier, bulkrax_importer_run_id: importerexporter.last_run.id, order: self.id)
|
|
32
33
|
end
|
|
33
34
|
end
|
|
34
35
|
|
|
@@ -36,10 +37,17 @@ module Bulkrax
|
|
|
36
37
|
self.parsed_metadata[related_children_parsed_mapping].each do |child_identifier|
|
|
37
38
|
next if child_identifier.blank?
|
|
38
39
|
|
|
39
|
-
|
|
40
|
+
add_parent_to_import_run(self.identifier, importerexporter.last_run)
|
|
41
|
+
PendingRelationship.create!(parent_id: self.identifier, child_id: child_identifier, bulkrax_importer_run_id: importerexporter.last_run.id, order: self.id)
|
|
40
42
|
end
|
|
41
43
|
end
|
|
42
44
|
|
|
45
|
+
def add_parent_to_import_run(parent_id, run)
|
|
46
|
+
run.parents = [] if run.parents.nil?
|
|
47
|
+
run.parents << parent_id
|
|
48
|
+
run.save
|
|
49
|
+
end
|
|
50
|
+
|
|
43
51
|
def find_collection_ids
|
|
44
52
|
self.collection_ids
|
|
45
53
|
end
|
|
@@ -77,10 +85,6 @@ module Bulkrax
|
|
|
77
85
|
def add_collections
|
|
78
86
|
return if find_collection_ids.blank?
|
|
79
87
|
|
|
80
|
-
ActiveSupport::Deprecation.warn(
|
|
81
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
|
82
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
|
83
|
-
)
|
|
84
88
|
self.parsed_metadata['member_of_collections_attributes'] = {}
|
|
85
89
|
find_collection_ids.each_with_index do |c, i|
|
|
86
90
|
self.parsed_metadata['member_of_collections_attributes'][i.to_s] = { id: c }
|
|
@@ -88,15 +92,10 @@ module Bulkrax
|
|
|
88
92
|
end
|
|
89
93
|
|
|
90
94
|
def factory
|
|
91
|
-
ActiveSupport::Deprecation.warn(
|
|
92
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
|
93
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
|
94
|
-
)
|
|
95
95
|
@factory ||= Bulkrax::ObjectFactory.new(attributes: self.parsed_metadata,
|
|
96
96
|
source_identifier_value: identifier,
|
|
97
97
|
work_identifier: parser.work_identifier,
|
|
98
|
-
|
|
99
|
-
related_parents_parsed_mapping: related_parents_parsed_mapping,
|
|
98
|
+
related_parents_parsed_mapping: parser.related_parents_parsed_mapping,
|
|
100
99
|
replace_files: replace_files,
|
|
101
100
|
user: user,
|
|
102
101
|
klass: factory_class,
|
|
@@ -20,14 +20,14 @@ module Bulkrax
|
|
|
20
20
|
(last_imported_at || Time.current) + frequency.to_seconds if schedulable? && last_imported_at.present?
|
|
21
21
|
end
|
|
22
22
|
|
|
23
|
-
def increment_counters(index, collection: false, file_set: false)
|
|
23
|
+
def increment_counters(index, collection: false, file_set: false, work: false)
|
|
24
24
|
# Only set the totals if they were not set on initialization
|
|
25
25
|
importer_run = ImporterRun.find(current_run.id) # make sure fresh
|
|
26
26
|
if collection
|
|
27
27
|
importer_run.total_collection_entries = index + 1 unless parser.collections_total.positive?
|
|
28
28
|
elsif file_set
|
|
29
29
|
importer_run.total_file_set_entries = index + 1 unless parser.file_sets_total.positive?
|
|
30
|
-
|
|
30
|
+
elsif work
|
|
31
31
|
# TODO: differentiate between work and collection counts for exporters
|
|
32
32
|
importer_run.total_work_entries = index + 1 unless limit.to_i.positive? || parser.total.positive?
|
|
33
33
|
end
|
|
@@ -56,7 +56,7 @@ module Bulkrax
|
|
|
56
56
|
end
|
|
57
57
|
|
|
58
58
|
def related_parents_parsed_mapping
|
|
59
|
-
@related_parents_parsed_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.keys&.first
|
|
59
|
+
@related_parents_parsed_mapping ||= (get_field_mapping_hash_for('related_parents_field_mapping')&.keys&.first || 'parents')
|
|
60
60
|
end
|
|
61
61
|
|
|
62
62
|
def related_children_raw_mapping
|
|
@@ -64,29 +64,22 @@ module Bulkrax
|
|
|
64
64
|
end
|
|
65
65
|
|
|
66
66
|
def related_children_parsed_mapping
|
|
67
|
-
@related_children_parsed_mapping ||= get_field_mapping_hash_for('related_children_field_mapping')&.keys&.first
|
|
67
|
+
@related_children_parsed_mapping ||= (get_field_mapping_hash_for('related_children_field_mapping')&.keys&.first || 'children')
|
|
68
68
|
end
|
|
69
69
|
|
|
70
70
|
def get_field_mapping_hash_for(key)
|
|
71
71
|
return instance_variable_get("@#{key}_hash") if instance_variable_get("@#{key}_hash").present?
|
|
72
72
|
|
|
73
|
+
mapping = importerexporter.field_mapping == [{}] ? {} : importerexporter.field_mapping
|
|
73
74
|
instance_variable_set(
|
|
74
75
|
"@#{key}_hash",
|
|
75
|
-
|
|
76
|
+
mapping&.with_indifferent_access&.select { |_, h| h.key?(key) }
|
|
76
77
|
)
|
|
77
78
|
raise StandardError, "more than one #{key} declared: #{instance_variable_get("@#{key}_hash").keys.join(', ')}" if instance_variable_get("@#{key}_hash").length > 1
|
|
78
79
|
|
|
79
80
|
instance_variable_get("@#{key}_hash")
|
|
80
81
|
end
|
|
81
82
|
|
|
82
|
-
def collection_field_mapping
|
|
83
|
-
ActiveSupport::Deprecation.warn(
|
|
84
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
|
85
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
|
86
|
-
)
|
|
87
|
-
Bulkrax.collection_field_mapping[self.entry_class.to_s]&.to_sym || :collection
|
|
88
|
-
end
|
|
89
|
-
|
|
90
83
|
def model_field_mappings
|
|
91
84
|
model_mappings = Bulkrax.field_mappings[self.class.to_s]&.dig('model', :from) || []
|
|
92
85
|
model_mappings |= ['model']
|
|
@@ -118,6 +111,10 @@ module Bulkrax
|
|
|
118
111
|
raise StandardError, 'must be defined' if importer?
|
|
119
112
|
end
|
|
120
113
|
|
|
114
|
+
def create_relationships
|
|
115
|
+
raise StandardError, 'must be defined' if importer?
|
|
116
|
+
end
|
|
117
|
+
|
|
121
118
|
# Optional, define if using browse everything for file upload
|
|
122
119
|
def retrieve_cloud_files(files); end
|
|
123
120
|
|
|
@@ -39,7 +39,7 @@ module Bulkrax
|
|
|
39
39
|
path = metadata_path(bag)
|
|
40
40
|
raise StandardError, 'No metadata files were found' if path.blank?
|
|
41
41
|
data = entry_class.read_data(path)
|
|
42
|
-
data = entry_class.data_for_entry(data, source_identifier)
|
|
42
|
+
data = entry_class.data_for_entry(data, source_identifier, self)
|
|
43
43
|
data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
|
|
44
44
|
data
|
|
45
45
|
end
|
|
@@ -75,7 +75,7 @@ module Bulkrax
|
|
|
75
75
|
else
|
|
76
76
|
ImportWorkJob.send(perform_method, new_entry.id, current_run.id)
|
|
77
77
|
end
|
|
78
|
-
increment_counters(index)
|
|
78
|
+
increment_counters(index, work: true)
|
|
79
79
|
end
|
|
80
80
|
importer.record_status
|
|
81
81
|
rescue StandardError => e
|
|
@@ -83,11 +83,7 @@ module Bulkrax
|
|
|
83
83
|
end
|
|
84
84
|
|
|
85
85
|
def collections
|
|
86
|
-
|
|
87
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
|
88
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
|
89
|
-
)
|
|
90
|
-
records.map { |r| r[collection_field_mapping].split(/\s*[;|]\s*/) if r[collection_field_mapping].present? }.flatten.compact.uniq
|
|
86
|
+
records.map { |r| r[related_parents_parsed_mapping].split(/\s*[;|]\s*/) if r[related_parents_parsed_mapping].present? }.flatten.compact.uniq
|
|
91
87
|
end
|
|
92
88
|
|
|
93
89
|
def collections_total
|
|
@@ -14,18 +14,13 @@ module Bulkrax
|
|
|
14
14
|
csv_data = entry_class.read_data(file_for_import)
|
|
15
15
|
importer.parser_fields['total'] = csv_data.count
|
|
16
16
|
importer.save
|
|
17
|
-
@records ||= csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil) }
|
|
17
|
+
@records ||= csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
def collections
|
|
21
|
-
ActiveSupport::Deprecation.warn(
|
|
22
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
|
23
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
|
24
|
-
)
|
|
25
21
|
# retrieve a list of unique collections
|
|
26
22
|
records.map do |r|
|
|
27
23
|
collections = []
|
|
28
|
-
r[collection_field_mapping].split(/\s*[;|]\s*/).each { |title| collections << { title: title, from_collection_field_mapping: true } } if r[collection_field_mapping].present?
|
|
29
24
|
model_field_mappings.each do |model_mapping|
|
|
30
25
|
collections << r if r[model_mapping.to_sym]&.downcase == 'collection'
|
|
31
26
|
end
|
|
@@ -85,73 +80,54 @@ module Bulkrax
|
|
|
85
80
|
end
|
|
86
81
|
|
|
87
82
|
def create_collections
|
|
88
|
-
|
|
89
|
-
next if collection.blank?
|
|
90
|
-
break if records.find_index(collection).present? && limit_reached?(limit, records.find_index(collection))
|
|
91
|
-
ActiveSupport::Deprecation.warn(
|
|
92
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
|
93
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
|
94
|
-
)
|
|
95
|
-
|
|
96
|
-
## BEGIN
|
|
97
|
-
# Add required metadata to collections being imported using the collection_field_mapping, which only have a :title
|
|
98
|
-
# TODO: Remove once collection_field_mapping is removed
|
|
99
|
-
metadata = if collection.delete(:from_collection_field_mapping)
|
|
100
|
-
uci = unique_collection_identifier(collection)
|
|
101
|
-
{
|
|
102
|
-
title: collection[:title],
|
|
103
|
-
work_identifier => uci,
|
|
104
|
-
source_identifier => uci,
|
|
105
|
-
visibility: 'open',
|
|
106
|
-
collection_type_gid: ::Hyrax::CollectionType.find_or_create_default_collection_type.gid
|
|
107
|
-
}
|
|
108
|
-
end
|
|
109
|
-
collection_hash = metadata.presence || collection
|
|
110
|
-
## END
|
|
111
|
-
|
|
112
|
-
new_entry = find_or_create_entry(collection_entry_class, collection_hash[source_identifier], 'Bulkrax::Importer', collection_hash)
|
|
113
|
-
increment_counters(index, collection: true)
|
|
114
|
-
# TODO: add support for :delete option
|
|
115
|
-
ImportCollectionJob.perform_now(new_entry.id, current_run.id)
|
|
116
|
-
end
|
|
117
|
-
importer.record_status
|
|
118
|
-
rescue StandardError => e
|
|
119
|
-
status_info(e)
|
|
83
|
+
create_objects(['collection'])
|
|
120
84
|
end
|
|
121
85
|
|
|
122
86
|
def create_works
|
|
123
|
-
|
|
124
|
-
next unless record_has_source_identifier(work, records.find_index(work))
|
|
125
|
-
break if limit_reached?(limit, records.find_index(work))
|
|
126
|
-
|
|
127
|
-
seen[work[source_identifier]] = true
|
|
128
|
-
new_entry = find_or_create_entry(entry_class, work[source_identifier], 'Bulkrax::Importer', work.to_h)
|
|
129
|
-
if work[:delete].present?
|
|
130
|
-
DeleteWorkJob.send(perform_method, new_entry, current_run)
|
|
131
|
-
else
|
|
132
|
-
ImportWorkJob.send(perform_method, new_entry.id, current_run.id)
|
|
133
|
-
end
|
|
134
|
-
increment_counters(index)
|
|
135
|
-
end
|
|
136
|
-
importer.record_status
|
|
137
|
-
rescue StandardError => e
|
|
138
|
-
status_info(e)
|
|
87
|
+
create_objects(['work'])
|
|
139
88
|
end
|
|
140
89
|
|
|
141
90
|
def create_file_sets
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
break if limit_reached?(limit, records.find_index(file_set))
|
|
91
|
+
create_objects(['file_set'])
|
|
92
|
+
end
|
|
145
93
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
94
|
+
def create_relationships
|
|
95
|
+
create_objects(['relationship'])
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def create_objects(types_array = nil)
|
|
99
|
+
(types_array || %w[work collection file_set relationship]).each do |type|
|
|
100
|
+
if type.eql?('relationship')
|
|
101
|
+
ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importerexporter.id)
|
|
102
|
+
next
|
|
103
|
+
end
|
|
104
|
+
send(type.pluralize).each_with_index do |current_record, index|
|
|
105
|
+
next unless record_has_source_identifier(current_record, records.find_index(current_record))
|
|
106
|
+
break if limit_reached?(limit, records.find_index(current_record))
|
|
107
|
+
|
|
108
|
+
seen[current_record[source_identifier]] = true
|
|
109
|
+
create_entry_and_job(current_record, type)
|
|
110
|
+
increment_counters(index, "#{type}": true)
|
|
111
|
+
end
|
|
112
|
+
importer.record_status
|
|
149
113
|
end
|
|
150
|
-
importer.record_status
|
|
151
114
|
rescue StandardError => e
|
|
152
115
|
status_info(e)
|
|
153
116
|
end
|
|
154
117
|
|
|
118
|
+
def create_entry_and_job(current_record, type)
|
|
119
|
+
new_entry = find_or_create_entry(send("#{type}_entry_class"),
|
|
120
|
+
current_record[source_identifier],
|
|
121
|
+
'Bulkrax::Importer',
|
|
122
|
+
current_record.to_h)
|
|
123
|
+
if current_record[:delete].present?
|
|
124
|
+
# TODO: create a "Delete" job for file_sets and collections
|
|
125
|
+
"Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
|
|
126
|
+
else
|
|
127
|
+
"Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id)
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
155
131
|
def write_partial_import_file(file)
|
|
156
132
|
import_filename = import_file_path.split('/').last
|
|
157
133
|
partial_import_filename = "#{File.basename(import_filename, '.csv')}_corrected_entries.csv"
|
|
@@ -188,13 +164,13 @@ module Bulkrax
|
|
|
188
164
|
|
|
189
165
|
case importerexporter.export_from
|
|
190
166
|
when 'all'
|
|
191
|
-
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", rows: 2_147_483_647).map(&:id)
|
|
192
|
-
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", rows: 2_147_483_647).map(&:id)
|
|
193
|
-
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", rows: 2_147_483_647).map(&:id)
|
|
167
|
+
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
|
168
|
+
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
|
169
|
+
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
|
194
170
|
when 'collection'
|
|
195
|
-
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", rows: 2_000_000_000).map(&:id)
|
|
171
|
+
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
|
196
172
|
when 'worktype'
|
|
197
|
-
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", rows: 2_000_000_000).map(&:id)
|
|
173
|
+
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
|
198
174
|
when 'importer'
|
|
199
175
|
set_ids_for_exporting_from_importer
|
|
200
176
|
end
|
|
@@ -214,7 +190,7 @@ module Bulkrax
|
|
|
214
190
|
extra_filters = extra_filters.presence || '*:*'
|
|
215
191
|
|
|
216
192
|
{ :@work_ids => ::Hyrax.config.curation_concerns, :@collection_ids => [::Collection], :@file_set_ids => [::FileSet] }.each do |instance_var, models_to_search|
|
|
217
|
-
instance_variable_set(instance_var, ActiveFedora::SolrService.
|
|
193
|
+
instance_variable_set(instance_var, ActiveFedora::SolrService.post(
|
|
218
194
|
extra_filters.to_s,
|
|
219
195
|
fq: [
|
|
220
196
|
"#{work_identifier}_sim:(#{complete_entry_identifiers.join(' OR ')})",
|
|
@@ -256,6 +232,7 @@ module Bulkrax
|
|
|
256
232
|
def entry_class
|
|
257
233
|
CsvEntry
|
|
258
234
|
end
|
|
235
|
+
alias work_entry_class entry_class
|
|
259
236
|
|
|
260
237
|
def collection_entry_class
|
|
261
238
|
CsvCollectionEntry
|
|
@@ -40,14 +40,14 @@ module Bulkrax
|
|
|
40
40
|
metadata_paths.map do |md|
|
|
41
41
|
# Retrieve all records
|
|
42
42
|
elements = entry_class.read_data(md).xpath("//#{record_element}")
|
|
43
|
-
r += elements.map { |el| entry_class.data_for_entry(el, source_identifier) }
|
|
43
|
+
r += elements.map { |el| entry_class.data_for_entry(el, source_identifier, self) }
|
|
44
44
|
end
|
|
45
45
|
# Flatten because we may have multiple records per array
|
|
46
46
|
r.compact.flatten
|
|
47
47
|
elsif parser_fields['import_type'] == 'single'
|
|
48
48
|
metadata_paths.map do |md|
|
|
49
49
|
data = entry_class.read_data(md).xpath("//#{record_element}").first # Take only the first record
|
|
50
|
-
entry_class.data_for_entry(data, source_identifier)
|
|
50
|
+
entry_class.data_for_entry(data, source_identifier, self)
|
|
51
51
|
end.compact # No need to flatten because we take only the first record
|
|
52
52
|
end
|
|
53
53
|
end
|
|
@@ -94,7 +94,7 @@ module Bulkrax
|
|
|
94
94
|
else
|
|
95
95
|
ImportWorkJob.send(perform_method, new_entry.id, current_run.id)
|
|
96
96
|
end
|
|
97
|
-
increment_counters(index)
|
|
97
|
+
increment_counters(index, work: true)
|
|
98
98
|
end
|
|
99
99
|
importer.record_status
|
|
100
100
|
rescue StandardError => e
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
class CreateBulkraxPendingRelationships < ActiveRecord::Migration[5.2]
|
|
2
|
+
def change
|
|
3
|
+
create_table :bulkrax_pending_relationships do |t|
|
|
4
|
+
t.belongs_to :bulkrax_importer_run, foreign_key: true, null: false
|
|
5
|
+
t.string :parent_id, null: false
|
|
6
|
+
t.string :child_id, null: false
|
|
7
|
+
|
|
8
|
+
t.timestamps
|
|
9
|
+
end
|
|
10
|
+
end
|
|
11
|
+
end
|
data/lib/bulkrax/version.rb
CHANGED
data/lib/bulkrax.rb
CHANGED
|
@@ -5,11 +5,9 @@ require 'active_support/all'
|
|
|
5
5
|
|
|
6
6
|
module Bulkrax
|
|
7
7
|
class << self
|
|
8
|
-
# TODO: remove collection_field_mapping when releasing v2
|
|
9
8
|
mattr_accessor :parsers,
|
|
10
9
|
:default_work_type,
|
|
11
10
|
:default_field_mapping,
|
|
12
|
-
:collection_field_mapping,
|
|
13
11
|
:fill_in_blank_source_identifiers,
|
|
14
12
|
:related_children_field_mapping,
|
|
15
13
|
:related_parents_field_mapping,
|
|
@@ -35,17 +33,6 @@ module Bulkrax
|
|
|
35
33
|
self.removed_image_path = Bulkrax::Engine.root.join('spec', 'fixtures', 'removed.png').to_s
|
|
36
34
|
self.server_name = 'bulkrax@example.com'
|
|
37
35
|
|
|
38
|
-
# NOTE: Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.
|
|
39
|
-
# Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.
|
|
40
|
-
# TODO: remove collection_field_mapping when releasing v2
|
|
41
|
-
# Field_mapping for establishing a collection relationship (FROM work TO collection)
|
|
42
|
-
# This value IS NOT used for OAI, so setting the OAI Entries here will have no effect
|
|
43
|
-
# The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry'
|
|
44
|
-
# The default value for CSV is collection
|
|
45
|
-
self.collection_field_mapping = {
|
|
46
|
-
'Bulkrax::CsvEntry' => 'collection'
|
|
47
|
-
}
|
|
48
|
-
|
|
49
36
|
# Hash of Generic field_mappings for use in the view
|
|
50
37
|
# There must be one field_mappings hash per view parial
|
|
51
38
|
# Based on Hyrax CoreMetadata && BasicMetadata
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: bulkrax
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 3.0.0.beta2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Rob Kaufman
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2022-
|
|
11
|
+
date: 2022-03-29 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rails
|
|
@@ -268,6 +268,7 @@ files:
|
|
|
268
268
|
- app/jobs/bulkrax/import_file_set_job.rb
|
|
269
269
|
- app/jobs/bulkrax/import_work_job.rb
|
|
270
270
|
- app/jobs/bulkrax/importer_job.rb
|
|
271
|
+
- app/jobs/bulkrax/schedule_relationships_job.rb
|
|
271
272
|
- app/mailers/bulkrax/application_mailer.rb
|
|
272
273
|
- app/matchers/bulkrax/application_matcher.rb
|
|
273
274
|
- app/matchers/bulkrax/bagit_matcher.rb
|
|
@@ -287,6 +288,7 @@ files:
|
|
|
287
288
|
- app/models/bulkrax/oai_entry.rb
|
|
288
289
|
- app/models/bulkrax/oai_qualified_dc_entry.rb
|
|
289
290
|
- app/models/bulkrax/oai_set_entry.rb
|
|
291
|
+
- app/models/bulkrax/pending_relationship.rb
|
|
290
292
|
- app/models/bulkrax/rdf_collection_entry.rb
|
|
291
293
|
- app/models/bulkrax/rdf_entry.rb
|
|
292
294
|
- app/models/bulkrax/status.rb
|
|
@@ -363,6 +365,9 @@ files:
|
|
|
363
365
|
- db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb
|
|
364
366
|
- db/migrate/20220118001339_add_import_attempts_to_entries.rb
|
|
365
367
|
- db/migrate/20220119213325_add_work_counters_to_importer_runs.rb
|
|
368
|
+
- db/migrate/20220301001839_create_bulkrax_pending_relationships.rb
|
|
369
|
+
- db/migrate/20220301020307_add_parents_to_bulkrax_importer_runs.rb
|
|
370
|
+
- db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb
|
|
366
371
|
- lib/bulkrax.rb
|
|
367
372
|
- lib/bulkrax/engine.rb
|
|
368
373
|
- lib/bulkrax/version.rb
|
|
@@ -389,9 +394,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
389
394
|
version: '0'
|
|
390
395
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
391
396
|
requirements:
|
|
392
|
-
- - "
|
|
397
|
+
- - ">"
|
|
393
398
|
- !ruby/object:Gem::Version
|
|
394
|
-
version:
|
|
399
|
+
version: 1.3.1
|
|
395
400
|
requirements: []
|
|
396
401
|
rubygems_version: 3.1.4
|
|
397
402
|
signing_key:
|