bulkrax 2.3.0 → 3.0.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/factories/bulkrax/object_factory.rb +8 -45
- data/app/jobs/bulkrax/create_relationships_job.rb +58 -66
- data/app/jobs/bulkrax/importer_job.rb +1 -3
- data/app/jobs/bulkrax/schedule_relationships_job.rb +21 -0
- data/app/models/bulkrax/csv_entry.rb +41 -34
- data/app/models/bulkrax/entry.rb +3 -7
- data/app/models/bulkrax/importer.rb +20 -11
- data/app/models/bulkrax/oai_entry.rb +0 -4
- data/app/models/bulkrax/pending_relationship.rb +7 -0
- data/app/models/bulkrax/rdf_entry.rb +2 -10
- data/app/models/bulkrax/xml_entry.rb +1 -5
- data/app/models/concerns/bulkrax/export_behavior.rb +3 -1
- data/app/models/concerns/bulkrax/has_matchers.rb +0 -10
- data/app/models/concerns/bulkrax/import_behavior.rb +11 -12
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +2 -2
- data/app/parsers/bulkrax/application_parser.rb +8 -11
- data/app/parsers/bulkrax/bagit_parser.rb +3 -7
- data/app/parsers/bulkrax/csv_parser.rb +36 -77
- data/app/parsers/bulkrax/oai_dc_parser.rb +1 -1
- data/app/parsers/bulkrax/xml_parser.rb +3 -3
- data/db/migrate/20220301001839_create_bulkrax_pending_relationships.rb +11 -0
- data/db/migrate/20220301020307_add_parents_to_bulkrax_importer_runs.rb +5 -0
- data/db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb +5 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +0 -13
- metadata +9 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a983b1932af9c9355daf4d0d5ff4eb628c109bad9f41103041548e97ec0d4abe
|
4
|
+
data.tar.gz: aae00491b670a9a329e93948818503eed2dd762ee768e9bed9b8d2ecf39dffc5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 124984e2a0e6b360aefb72fb3e9bbbb31af8ab268c7190d48c51c37487c89e75a010c4b7df3b9a17190852df12a478bd4d8663b3fe4127b90e681240f53821cc
|
7
|
+
data.tar.gz: ddb4d13c37e561f4bc8ebc88b0ef0c17f7aadd2e89627ec602a39e3aec0f892c79fb9ca0739ad28b73e045d30bf4f8143dd6522be9bba62fb1bb2871e00aa5ac
|
@@ -7,20 +7,15 @@ module Bulkrax
|
|
7
7
|
include DynamicRecordLookup
|
8
8
|
|
9
9
|
define_model_callbacks :save, :create
|
10
|
-
attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :
|
10
|
+
attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :related_parents_parsed_mapping
|
11
11
|
|
12
12
|
# rubocop:disable Metrics/ParameterLists
|
13
|
-
def initialize(attributes:, source_identifier_value:, work_identifier:,
|
14
|
-
ActiveSupport::Deprecation.warn(
|
15
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
16
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
17
|
-
)
|
13
|
+
def initialize(attributes:, source_identifier_value:, work_identifier:, related_parents_parsed_mapping: nil, replace_files: false, user: nil, klass: nil, update_files: false)
|
18
14
|
@attributes = ActiveSupport::HashWithIndifferentAccess.new(attributes)
|
19
15
|
@replace_files = replace_files
|
20
16
|
@update_files = update_files
|
21
17
|
@user = user || User.batch_user
|
22
18
|
@work_identifier = work_identifier
|
23
|
-
@collection_field_mapping = collection_field_mapping
|
24
19
|
@related_parents_parsed_mapping = related_parents_parsed_mapping
|
25
20
|
@source_identifier_value = source_identifier_value
|
26
21
|
@klass = klass || Bulkrax.default_work_type.constantize
|
@@ -55,7 +50,7 @@ module Bulkrax
|
|
55
50
|
def update
|
56
51
|
raise "Object doesn't exist" unless object
|
57
52
|
destroy_existing_files if @replace_files && ![Collection, FileSet].include?(klass)
|
58
|
-
attrs =
|
53
|
+
attrs = transform_attributes(update: true)
|
59
54
|
run_callbacks :save do
|
60
55
|
if klass == Collection
|
61
56
|
update_collection(attrs)
|
@@ -97,7 +92,7 @@ module Bulkrax
|
|
97
92
|
# https://github.com/projecthydra/active_fedora/issues/874
|
98
93
|
# 2+ years later, still open!
|
99
94
|
def create
|
100
|
-
attrs =
|
95
|
+
attrs = transform_attributes
|
101
96
|
@object = klass.new
|
102
97
|
object.reindex_extent = Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX if object.respond_to?(:reindex_extent)
|
103
98
|
run_callbacks :save do
|
@@ -142,25 +137,15 @@ module Bulkrax
|
|
142
137
|
end
|
143
138
|
|
144
139
|
def create_collection(attrs)
|
145
|
-
ActiveSupport::Deprecation.warn(
|
146
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
147
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
148
|
-
)
|
149
140
|
attrs = collection_type(attrs)
|
150
|
-
persist_collection_memberships(parent:
|
151
|
-
persist_collection_memberships(parent: find_collection(attributes[collection_field_mapping]), child: object) if attributes[collection_field_mapping].present?
|
141
|
+
persist_collection_memberships(parent: find_collection(attributes[related_parents_parsed_mapping]), child: object) if attributes[related_parents_parsed_mapping].present?
|
152
142
|
object.attributes = attrs
|
153
143
|
object.apply_depositor_metadata(@user)
|
154
144
|
object.save!
|
155
145
|
end
|
156
146
|
|
157
147
|
def update_collection(attrs)
|
158
|
-
|
159
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
160
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
161
|
-
)
|
162
|
-
persist_collection_memberships(parent: object, child: find_collection(attributes[:child_collection_id])) if attributes[:child_collection_id].present?
|
163
|
-
persist_collection_memberships(parent: find_collection(attributes[collection_field_mapping]), child: object) if attributes[collection_field_mapping].present?
|
148
|
+
persist_collection_memberships(parent: find_collection(attributes[related_parents_parsed_mapping]), child: object) if attributes[related_parents_parsed_mapping].present?
|
164
149
|
object.attributes = attrs
|
165
150
|
object.save!
|
166
151
|
end
|
@@ -219,34 +204,12 @@ module Bulkrax
|
|
219
204
|
attrs
|
220
205
|
end
|
221
206
|
|
222
|
-
# Strip out the :collection key, and add the member_of_collection_ids,
|
223
|
-
# which is used by Hyrax::Actors::AddAsMemberOfCollectionsActor
|
224
|
-
def create_attributes
|
225
|
-
return transform_attributes if klass == Collection
|
226
|
-
ActiveSupport::Deprecation.warn(
|
227
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
228
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
229
|
-
)
|
230
|
-
transform_attributes.except(:collections, :collection, collection_field_mapping)
|
231
|
-
end
|
232
|
-
|
233
|
-
# Strip out the :collection key, and add the member_of_collection_ids,
|
234
|
-
# which is used by Hyrax::Actors::AddAsMemberOfCollectionsActor
|
235
|
-
def attribute_update
|
236
|
-
return transform_attributes.except(:id) if klass == Collection
|
237
|
-
ActiveSupport::Deprecation.warn(
|
238
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
239
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
240
|
-
)
|
241
|
-
transform_attributes.except(:id, :collections, :collection, collection_field_mapping)
|
242
|
-
end
|
243
|
-
|
244
207
|
# Override if we need to map the attributes from the parser in
|
245
208
|
# a way that is compatible with how the factory needs them.
|
246
|
-
def transform_attributes
|
209
|
+
def transform_attributes(update: false)
|
247
210
|
@transform_attributes = attributes.slice(*permitted_attributes)
|
248
211
|
@transform_attributes.merge!(file_attributes(update_files)) if with_files
|
249
|
-
@transform_attributes
|
212
|
+
update ? @transform_attributes.except(:id) : @transform_attributes
|
250
213
|
end
|
251
214
|
|
252
215
|
# Regardless of what the Parser gives us, these are the properties we are prepared to accept.
|
@@ -21,11 +21,9 @@ module Bulkrax
|
|
21
21
|
|
22
22
|
queue_as :import
|
23
23
|
|
24
|
-
attr_accessor :
|
24
|
+
attr_accessor :child_records, :parent_record, :parent_entry, :importer_run_id
|
25
25
|
|
26
|
-
# @param
|
27
|
-
# @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifier
|
28
|
-
# @param child_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifier
|
26
|
+
# @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifiers
|
29
27
|
# @param importer_run [Bulkrax::ImporterRun] current importer run (needed to properly update counters)
|
30
28
|
#
|
31
29
|
# The entry_identifier is used to lookup the @base_entry for the job (a.k.a. the entry the job was called from).
|
@@ -33,123 +31,117 @@ module Bulkrax
|
|
33
31
|
# Whether the @base_entry is the parent or the child in the relationship is determined by the presence of a
|
34
32
|
# parent_identifier or child_identifier param. For example, if a parent_identifier is passed, we know @base_entry
|
35
33
|
# is the child in the relationship, and vice versa if a child_identifier is passed.
|
36
|
-
def perform(
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
34
|
+
def perform(parent_identifier:, importer_run_id:)
|
35
|
+
pending_relationships = Bulkrax::PendingRelationship.find_each.select do |rel|
|
36
|
+
rel.bulkrax_importer_run_id == importer_run_id && rel.parent_id == parent_identifier
|
37
|
+
end.sort_by(&:order)
|
38
|
+
|
39
|
+
@importer_run_id = importer_run_id
|
40
|
+
@parent_record = find_record(parent_identifier)
|
41
|
+
@child_records = { works: [], collections: [] }
|
42
|
+
pending_relationships.each do |rel|
|
43
|
+
raise ::StandardError, %("#{rel}" needs either a child or a parent to create a relationship) if rel.child_id.nil? || rel.parent_id.nil?
|
44
|
+
child_record = find_record(rel.child_id)
|
45
|
+
child_record.is_a?(::Collection) ? @child_records[:collections] << child_record : @child_records[:works] << child_record
|
47
46
|
end
|
48
47
|
|
49
|
-
if
|
48
|
+
if (child_records[:collections].blank? && child_records[:works].blank?) || parent_record.blank?
|
50
49
|
reschedule(
|
51
|
-
entry_identifier: entry_identifier,
|
52
50
|
parent_identifier: parent_identifier,
|
53
|
-
|
54
|
-
importer_run: importer_run
|
51
|
+
importer_run_id: importer_run_id
|
55
52
|
)
|
56
53
|
return false # stop current job from continuing to run after rescheduling
|
57
54
|
end
|
58
55
|
|
59
|
-
|
56
|
+
@parent_entry = Bulkrax::Entry.where(identifier: parent_identifier,
|
57
|
+
importerexporter_id: ImporterRun.find(importer_run_id).importer_id,
|
58
|
+
importerexporter_type: "Bulkrax::Importer").first
|
59
|
+
create_relationships
|
60
|
+
pending_relationships.each(&:destroy)
|
60
61
|
rescue ::StandardError => e
|
61
|
-
|
62
|
-
|
62
|
+
parent_entry.status_info(e)
|
63
|
+
Bulkrax::ImporterRun.find(importer_run_id).increment!(:failed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
63
64
|
end
|
64
65
|
|
65
66
|
private
|
66
67
|
|
67
|
-
def
|
68
|
-
if parent_record.is_a?(::Collection)
|
69
|
-
|
70
|
-
|
71
|
-
collection_parent_work_child
|
72
|
-
elsif curation_concern?(parent_record) && child_record.is_a?(::Collection)
|
73
|
-
raise ::StandardError, 'a Collection may not be assigned as a child of a Work'
|
68
|
+
def create_relationships
|
69
|
+
if parent_record.is_a?(::Collection)
|
70
|
+
collection_parent_work_child unless child_records[:works].empty?
|
71
|
+
collection_parent_collection_child unless child_records[:collections].empty?
|
74
72
|
else
|
75
|
-
work_parent_work_child
|
73
|
+
work_parent_work_child unless child_records[:works].empty?
|
74
|
+
raise ::StandardError, 'a Collection may not be assigned as a child of a Work' if child_records[:collections].present?
|
76
75
|
end
|
77
76
|
end
|
78
77
|
|
79
78
|
def user
|
80
|
-
@user ||=
|
79
|
+
@user ||= Bulkrax::ImporterRun.find(importer_run_id).importer.user
|
81
80
|
end
|
82
81
|
|
83
82
|
# Work-Collection membership is added to the child as member_of_collection_ids
|
84
83
|
# This is adding the reverse relationship, from the child to the parent
|
85
84
|
def collection_parent_work_child
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
# TODO: add counters for :processed_parents and :failed_parents
|
101
|
-
importer_run.increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
85
|
+
child_records[:works].each do |child_record|
|
86
|
+
attrs = { id: child_record.id, member_of_collections_attributes: { 0 => { id: parent_record.id } } }
|
87
|
+
ObjectFactory.new(
|
88
|
+
attributes: attrs,
|
89
|
+
source_identifier_value: nil, # sending the :id in the attrs means the factory doesn't need a :source_identifier_value
|
90
|
+
work_identifier: parent_entry.parser.work_identifier,
|
91
|
+
related_parents_parsed_mapping: parent_entry.parser.related_parents_parsed_mapping,
|
92
|
+
replace_files: false,
|
93
|
+
user: user,
|
94
|
+
klass: child_record.class
|
95
|
+
).run
|
96
|
+
# TODO: add counters for :processed_parents and :failed_parents
|
97
|
+
Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
98
|
+
end
|
102
99
|
end
|
103
100
|
|
104
101
|
# Collection-Collection membership is added to the as member_ids
|
105
102
|
def collection_parent_collection_child
|
106
|
-
|
107
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
108
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
109
|
-
)
|
103
|
+
child_record = child_records[:collections].first
|
110
104
|
attrs = { id: parent_record.id, child_collection_id: child_record.id }
|
111
105
|
ObjectFactory.new(
|
112
106
|
attributes: attrs,
|
113
107
|
source_identifier_value: nil, # sending the :id in the attrs means the factory doesn't need a :source_identifier_value
|
114
|
-
work_identifier:
|
115
|
-
|
108
|
+
work_identifier: parent_entry.parser.work_identifier,
|
109
|
+
related_parents_parsed_mapping: parent_entry.parser.related_parents_parsed_mapping,
|
116
110
|
replace_files: false,
|
117
111
|
user: user,
|
118
112
|
klass: parent_record.class
|
119
113
|
).run
|
120
114
|
# TODO: add counters for :processed_parents and :failed_parents
|
121
|
-
|
115
|
+
Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
122
116
|
end
|
123
117
|
|
124
118
|
# Work-Work membership is added to the parent as member_ids
|
125
119
|
def work_parent_work_child
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
120
|
+
records_hash = {}
|
121
|
+
child_records[:works].each_with_index do |child_record, i|
|
122
|
+
records_hash[i] = { id: child_record.id }
|
123
|
+
end
|
130
124
|
attrs = {
|
131
125
|
id: parent_record.id,
|
132
|
-
work_members_attributes:
|
126
|
+
work_members_attributes: records_hash
|
133
127
|
}
|
134
128
|
ObjectFactory.new(
|
135
129
|
attributes: attrs,
|
136
130
|
source_identifier_value: nil, # sending the :id in the attrs means the factory doesn't need a :source_identifier_value
|
137
|
-
work_identifier:
|
138
|
-
|
131
|
+
work_identifier: parent_entry.parser.work_identifier,
|
132
|
+
related_parents_parsed_mapping: parent_entry.parser.related_parents_parsed_mapping,
|
139
133
|
replace_files: false,
|
140
134
|
user: user,
|
141
135
|
klass: parent_record.class
|
142
136
|
).run
|
143
137
|
# TODO: add counters for :processed_parents and :failed_parents
|
144
|
-
|
138
|
+
Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
145
139
|
end
|
146
140
|
|
147
|
-
def reschedule(
|
141
|
+
def reschedule(parent_identifier:, importer_run_id:)
|
148
142
|
CreateRelationshipsJob.set(wait: 10.minutes).perform_later(
|
149
|
-
entry_identifier: entry_identifier,
|
150
143
|
parent_identifier: parent_identifier,
|
151
|
-
|
152
|
-
importer_run: importer_run
|
144
|
+
importer_run_id: importer_run_id
|
153
145
|
)
|
154
146
|
end
|
155
147
|
end
|
@@ -18,9 +18,7 @@ module Bulkrax
|
|
18
18
|
importer.only_updates = only_updates_since_last_import || false
|
19
19
|
return unless importer.valid_import?
|
20
20
|
|
21
|
-
importer.
|
22
|
-
importer.import_works
|
23
|
-
importer.import_file_sets
|
21
|
+
importer.import_objects
|
24
22
|
end
|
25
23
|
|
26
24
|
def unzip_imported_file(parser)
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class ScheduleRelationshipsJob < ApplicationJob
|
5
|
+
def perform(importer_id:)
|
6
|
+
importer = Importer.find(importer_id)
|
7
|
+
pending_num = importer.entries.left_outer_joins(:latest_status)
|
8
|
+
.where('bulkrax_statuses.status_message IS NULL ').count
|
9
|
+
return reschedule(importer_id) unless pending_num.zero?
|
10
|
+
|
11
|
+
importer.last_run.parents.each do |parent_id|
|
12
|
+
CreateRelationshipsJob.perform_later(parent_identifier: parent_id, importer_run_id: importer.last_run.id)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def reschedule(importer_id)
|
17
|
+
ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importer_id)
|
18
|
+
false
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -22,18 +22,15 @@ module Bulkrax
|
|
22
22
|
encoding: 'utf-8')
|
23
23
|
end
|
24
24
|
|
25
|
-
def self.data_for_entry(data, _source_id)
|
26
|
-
ActiveSupport::Deprecation.warn(
|
27
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
28
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
29
|
-
)
|
25
|
+
def self.data_for_entry(data, _source_id, parser)
|
30
26
|
# If a multi-line CSV data is passed, grab the first row
|
31
27
|
data = data.first if data.is_a?(CSV::Table)
|
32
28
|
# model has to be separated so that it doesn't get mistranslated by to_h
|
33
29
|
raw_data = data.to_h
|
34
30
|
raw_data[:model] = data[:model] if data[:model].present?
|
35
31
|
# If the collection field mapping is not 'collection', add 'collection' - the parser needs it
|
36
|
-
|
32
|
+
# TODO: change to :parents
|
33
|
+
raw_data[:parents] = raw_data[parent_field(parser).to_sym] if raw_data.keys.include?(parent_field(parser).to_sym) && parent_field(parser) != 'parents'
|
37
34
|
return raw_data
|
38
35
|
end
|
39
36
|
|
@@ -47,7 +44,6 @@ module Bulkrax
|
|
47
44
|
add_visibility
|
48
45
|
add_metadata_for_model
|
49
46
|
add_rights_statement
|
50
|
-
add_collections
|
51
47
|
add_local
|
52
48
|
|
53
49
|
self.parsed_metadata
|
@@ -70,15 +66,9 @@ module Bulkrax
|
|
70
66
|
end
|
71
67
|
|
72
68
|
def add_ingested_metadata
|
73
|
-
ActiveSupport::Deprecation.warn(
|
74
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
75
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
76
|
-
)
|
77
69
|
# we do not want to sort the values in the record before adding the metadata.
|
78
70
|
# if we do, the factory_class will be set to the default_work_type for all values that come before "model" or "work type"
|
79
71
|
record.each do |key, value|
|
80
|
-
next if self.parser.collection_field_mapping.to_s == key_without_numbers(key)
|
81
|
-
|
82
72
|
index = key[/\d+/].to_i - 1 if key[/\d+/].to_i != 0
|
83
73
|
add_metadata(key_without_numbers(key), value, index)
|
84
74
|
end
|
@@ -104,28 +94,40 @@ module Bulkrax
|
|
104
94
|
self.parsed_metadata['id'] = hyrax_record.id
|
105
95
|
self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
|
106
96
|
self.parsed_metadata['model'] = hyrax_record.has_model.first
|
97
|
+
build_relationship_metadata
|
107
98
|
build_mapping_metadata
|
99
|
+
build_files unless hyrax_record.is_a?(Collection)
|
100
|
+
self.parsed_metadata
|
101
|
+
end
|
108
102
|
|
109
|
-
|
110
|
-
#
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
103
|
+
def build_relationship_metadata
|
104
|
+
# Includes all relationship methods for all exportable record types (works, Collections, FileSets)
|
105
|
+
relationship_methods = {
|
106
|
+
related_parents_parsed_mapping => %i[member_of_collection_ids member_of_work_ids in_work_ids],
|
107
|
+
related_children_parsed_mapping => %i[member_collection_ids member_work_ids file_set_ids]
|
108
|
+
}
|
109
|
+
|
110
|
+
relationship_methods.each do |relationship_key, methods|
|
111
|
+
next if relationship_key.blank?
|
112
|
+
|
113
|
+
values = []
|
114
|
+
methods.each do |m|
|
115
|
+
values << hyrax_record.public_send(m) if hyrax_record.respond_to?(m)
|
118
116
|
end
|
119
|
-
|
117
|
+
values = values.flatten.uniq
|
118
|
+
next if values.blank?
|
120
119
|
|
121
|
-
|
122
|
-
|
120
|
+
handle_join_on_export(relationship_key, values, mapping[related_parents_parsed_mapping]['join'].present?)
|
121
|
+
end
|
123
122
|
end
|
124
123
|
|
125
124
|
def build_mapping_metadata
|
126
125
|
mapping.each do |key, value|
|
127
126
|
next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
|
128
127
|
next if key == "model"
|
128
|
+
# relationships handled by #build_relationship_metadata
|
129
|
+
next if [related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
|
130
|
+
next if key == 'file' # handled by #build_files
|
129
131
|
next if value['excluded']
|
130
132
|
|
131
133
|
object_key = key if value.key?('object')
|
@@ -200,12 +202,21 @@ module Bulkrax
|
|
200
202
|
end
|
201
203
|
|
202
204
|
def build_files
|
203
|
-
|
204
|
-
|
205
|
+
file_mapping = mapping['file']&.[]('from')&.first || 'file'
|
206
|
+
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
207
|
+
|
208
|
+
filenames = file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
|
209
|
+
handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
|
210
|
+
end
|
211
|
+
|
212
|
+
def handle_join_on_export(key, values, join)
|
213
|
+
if join
|
214
|
+
parsed_metadata[key] = values.join(' | ') # TODO: make split char dynamic
|
205
215
|
else
|
206
|
-
|
207
|
-
|
216
|
+
values.each_with_index do |value, i|
|
217
|
+
parsed_metadata["#{key}_#{i + 1}"] = value
|
208
218
|
end
|
219
|
+
parsed_metadata.delete(key)
|
209
220
|
end
|
210
221
|
end
|
211
222
|
|
@@ -228,13 +239,9 @@ module Bulkrax
|
|
228
239
|
end
|
229
240
|
|
230
241
|
def possible_collection_ids
|
231
|
-
ActiveSupport::Deprecation.warn(
|
232
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
233
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
234
|
-
)
|
235
242
|
return @possible_collection_ids if @possible_collection_ids.present?
|
236
243
|
|
237
|
-
collection_field_mapping = self.class.
|
244
|
+
collection_field_mapping = self.class.parent_field(parser)
|
238
245
|
return [] unless collection_field_mapping.present? && record[collection_field_mapping].present?
|
239
246
|
|
240
247
|
identifiers = []
|
data/app/models/bulkrax/entry.rb
CHANGED
@@ -58,7 +58,7 @@ module Bulkrax
|
|
58
58
|
# @param data - the data from the metadata file
|
59
59
|
# @param path - the path to the metadata file (used by some entries to get the file_paths for import)
|
60
60
|
# @return Hash containing the data (the entry build_metadata method will know what to expect in the hash)
|
61
|
-
def self.data_for_entry(_data, _source_id)
|
61
|
+
def self.data_for_entry(_data, _source_id, _parser)
|
62
62
|
raise StandardError, 'Not Implemented'
|
63
63
|
end
|
64
64
|
|
@@ -70,12 +70,8 @@ module Bulkrax
|
|
70
70
|
parser&.work_identifier&.to_s || 'source'
|
71
71
|
end
|
72
72
|
|
73
|
-
def self.
|
74
|
-
|
75
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
76
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
77
|
-
)
|
78
|
-
Bulkrax.collection_field_mapping[self.to_s]
|
73
|
+
def self.parent_field(parser)
|
74
|
+
parser.related_parents_parsed_mapping
|
79
75
|
end
|
80
76
|
|
81
77
|
def build
|
@@ -125,23 +125,32 @@ module Bulkrax
|
|
125
125
|
end
|
126
126
|
|
127
127
|
def import_works
|
128
|
-
|
129
|
-
self.only_updates ||= false
|
130
|
-
parser.create_works
|
131
|
-
rescue StandardError => e
|
132
|
-
status_info(e)
|
128
|
+
import_objects(['work'])
|
133
129
|
end
|
134
130
|
|
135
131
|
def import_collections
|
136
|
-
|
137
|
-
parser.create_collections
|
138
|
-
rescue StandardError => e
|
139
|
-
status_info(e)
|
132
|
+
import_objects(['collection'])
|
140
133
|
end
|
141
134
|
|
142
135
|
def import_file_sets
|
143
|
-
|
144
|
-
|
136
|
+
import_objects(['file_set'])
|
137
|
+
end
|
138
|
+
|
139
|
+
def import_relationships
|
140
|
+
import_objects(['relationship'])
|
141
|
+
end
|
142
|
+
|
143
|
+
def import_objects(types_array = nil)
|
144
|
+
self.only_updates ||= false
|
145
|
+
types = types_array || %w[work collection file_set relationship]
|
146
|
+
if parser.class == Bulkrax::CsvParser
|
147
|
+
parser.create_objects(types)
|
148
|
+
else
|
149
|
+
types.each do |object_type|
|
150
|
+
self.save if self.new_record? # Object needs to be saved for statuses
|
151
|
+
parser.send("create_#{object_type.pluralize}")
|
152
|
+
end
|
153
|
+
end
|
145
154
|
rescue StandardError => e
|
146
155
|
status_info(e)
|
147
156
|
end
|
@@ -26,10 +26,6 @@ module Bulkrax
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def build_metadata
|
29
|
-
ActiveSupport::Deprecation.warn(
|
30
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
31
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
32
|
-
)
|
33
29
|
self.parsed_metadata = {}
|
34
30
|
self.parsed_metadata[work_identifier] = [record.header.identifier]
|
35
31
|
|
@@ -13,11 +13,7 @@ module Bulkrax
|
|
13
13
|
data.predicates.map(&:to_s)
|
14
14
|
end
|
15
15
|
|
16
|
-
def self.data_for_entry(data, source_id)
|
17
|
-
ActiveSupport::Deprecation.warn(
|
18
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
19
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
20
|
-
)
|
16
|
+
def self.data_for_entry(data, source_id, parser)
|
21
17
|
reader = data
|
22
18
|
format = reader.class.format.to_sym
|
23
19
|
collections = []
|
@@ -25,7 +21,7 @@ module Bulkrax
|
|
25
21
|
delete = nil
|
26
22
|
data = RDF::Writer.for(format).buffer do |writer|
|
27
23
|
reader.each_statement do |statement|
|
28
|
-
collections << statement.object.to_s if
|
24
|
+
collections << statement.object.to_s if parent_field(parser).present? && parent_field(parser) == statement.predicate.to_s
|
29
25
|
children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
|
30
26
|
delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
|
31
27
|
writer << statement
|
@@ -55,10 +51,6 @@ module Bulkrax
|
|
55
51
|
end
|
56
52
|
|
57
53
|
def build_metadata
|
58
|
-
ActiveSupport::Deprecation.warn(
|
59
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
60
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
61
|
-
)
|
62
54
|
raise StandardError, 'Record not found' if record.nil?
|
63
55
|
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
64
56
|
|
@@ -14,7 +14,7 @@ module Bulkrax
|
|
14
14
|
Nokogiri::XML(open(path)).remove_namespaces!
|
15
15
|
end
|
16
16
|
|
17
|
-
def self.data_for_entry(data, source_id)
|
17
|
+
def self.data_for_entry(data, source_id, _parser)
|
18
18
|
collections = []
|
19
19
|
children = []
|
20
20
|
xpath_for_source_id = ".//*[name()='#{source_id}']"
|
@@ -39,10 +39,6 @@ module Bulkrax
|
|
39
39
|
end
|
40
40
|
|
41
41
|
def build_metadata
|
42
|
-
ActiveSupport::Deprecation.warn(
|
43
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
44
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
45
|
-
)
|
46
42
|
raise StandardError, 'Record not found' if record.nil?
|
47
43
|
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
48
44
|
self.parsed_metadata = {}
|
@@ -26,7 +26,9 @@ module Bulkrax
|
|
26
26
|
|
27
27
|
def write_files
|
28
28
|
return if hyrax_record.is_a?(Collection)
|
29
|
-
|
29
|
+
|
30
|
+
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
31
|
+
file_sets.each do |fs|
|
30
32
|
path = File.join(exporter_export_path, 'files')
|
31
33
|
FileUtils.mkdir_p(path)
|
32
34
|
file = filename(fs)
|
@@ -129,10 +129,6 @@ module Bulkrax
|
|
129
129
|
end
|
130
130
|
|
131
131
|
def supported_bulkrax_fields
|
132
|
-
ActiveSupport::Deprecation.warn(
|
133
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
134
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
135
|
-
)
|
136
132
|
@supported_bulkrax_fields ||=
|
137
133
|
%W[
|
138
134
|
id
|
@@ -141,22 +137,16 @@ module Bulkrax
|
|
141
137
|
model
|
142
138
|
visibility
|
143
139
|
delete
|
144
|
-
#{parser.collection_field_mapping}
|
145
140
|
#{related_parents_parsed_mapping}
|
146
141
|
#{related_children_parsed_mapping}
|
147
142
|
]
|
148
143
|
end
|
149
144
|
|
150
145
|
def multiple?(field)
|
151
|
-
ActiveSupport::Deprecation.warn(
|
152
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
153
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
154
|
-
)
|
155
146
|
@multiple_bulkrax_fields ||=
|
156
147
|
%W[
|
157
148
|
file
|
158
149
|
remote_files
|
159
|
-
#{parser.collection_field_mapping}
|
160
150
|
#{related_parents_parsed_mapping}
|
161
151
|
#{related_children_parsed_mapping}
|
162
152
|
]
|
@@ -28,7 +28,8 @@ module Bulkrax
|
|
28
28
|
self.parsed_metadata[related_parents_parsed_mapping].each do |parent_identifier|
|
29
29
|
next if parent_identifier.blank?
|
30
30
|
|
31
|
-
|
31
|
+
add_parent_to_import_run(parent_identifier, importerexporter.last_run)
|
32
|
+
PendingRelationship.create!(child_id: self.identifier, parent_id: parent_identifier, bulkrax_importer_run_id: importerexporter.last_run.id, order: self.id)
|
32
33
|
end
|
33
34
|
end
|
34
35
|
|
@@ -36,10 +37,17 @@ module Bulkrax
|
|
36
37
|
self.parsed_metadata[related_children_parsed_mapping].each do |child_identifier|
|
37
38
|
next if child_identifier.blank?
|
38
39
|
|
39
|
-
|
40
|
+
add_parent_to_import_run(self.identifier, importerexporter.last_run)
|
41
|
+
PendingRelationship.create!(parent_id: self.identifier, child_id: child_identifier, bulkrax_importer_run_id: importerexporter.last_run.id, order: self.id)
|
40
42
|
end
|
41
43
|
end
|
42
44
|
|
45
|
+
def add_parent_to_import_run(parent_id, run)
|
46
|
+
run.parents = [] if run.parents.nil?
|
47
|
+
run.parents << parent_id
|
48
|
+
run.save
|
49
|
+
end
|
50
|
+
|
43
51
|
def find_collection_ids
|
44
52
|
self.collection_ids
|
45
53
|
end
|
@@ -77,10 +85,6 @@ module Bulkrax
|
|
77
85
|
def add_collections
|
78
86
|
return if find_collection_ids.blank?
|
79
87
|
|
80
|
-
ActiveSupport::Deprecation.warn(
|
81
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
82
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
83
|
-
)
|
84
88
|
self.parsed_metadata['member_of_collections_attributes'] = {}
|
85
89
|
find_collection_ids.each_with_index do |c, i|
|
86
90
|
self.parsed_metadata['member_of_collections_attributes'][i.to_s] = { id: c }
|
@@ -88,15 +92,10 @@ module Bulkrax
|
|
88
92
|
end
|
89
93
|
|
90
94
|
def factory
|
91
|
-
ActiveSupport::Deprecation.warn(
|
92
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
93
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
94
|
-
)
|
95
95
|
@factory ||= Bulkrax::ObjectFactory.new(attributes: self.parsed_metadata,
|
96
96
|
source_identifier_value: identifier,
|
97
97
|
work_identifier: parser.work_identifier,
|
98
|
-
|
99
|
-
related_parents_parsed_mapping: related_parents_parsed_mapping,
|
98
|
+
related_parents_parsed_mapping: parser.related_parents_parsed_mapping,
|
100
99
|
replace_files: replace_files,
|
101
100
|
user: user,
|
102
101
|
klass: factory_class,
|
@@ -20,14 +20,14 @@ module Bulkrax
|
|
20
20
|
(last_imported_at || Time.current) + frequency.to_seconds if schedulable? && last_imported_at.present?
|
21
21
|
end
|
22
22
|
|
23
|
-
def increment_counters(index, collection: false, file_set: false)
|
23
|
+
def increment_counters(index, collection: false, file_set: false, work: false)
|
24
24
|
# Only set the totals if they were not set on initialization
|
25
25
|
importer_run = ImporterRun.find(current_run.id) # make sure fresh
|
26
26
|
if collection
|
27
27
|
importer_run.total_collection_entries = index + 1 unless parser.collections_total.positive?
|
28
28
|
elsif file_set
|
29
29
|
importer_run.total_file_set_entries = index + 1 unless parser.file_sets_total.positive?
|
30
|
-
|
30
|
+
elsif work
|
31
31
|
# TODO: differentiate between work and collection counts for exporters
|
32
32
|
importer_run.total_work_entries = index + 1 unless limit.to_i.positive? || parser.total.positive?
|
33
33
|
end
|
@@ -56,7 +56,7 @@ module Bulkrax
|
|
56
56
|
end
|
57
57
|
|
58
58
|
def related_parents_parsed_mapping
|
59
|
-
@related_parents_parsed_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.keys&.first
|
59
|
+
@related_parents_parsed_mapping ||= (get_field_mapping_hash_for('related_parents_field_mapping')&.keys&.first || 'parents')
|
60
60
|
end
|
61
61
|
|
62
62
|
def related_children_raw_mapping
|
@@ -64,29 +64,22 @@ module Bulkrax
|
|
64
64
|
end
|
65
65
|
|
66
66
|
def related_children_parsed_mapping
|
67
|
-
@related_children_parsed_mapping ||= get_field_mapping_hash_for('related_children_field_mapping')&.keys&.first
|
67
|
+
@related_children_parsed_mapping ||= (get_field_mapping_hash_for('related_children_field_mapping')&.keys&.first || 'children')
|
68
68
|
end
|
69
69
|
|
70
70
|
def get_field_mapping_hash_for(key)
|
71
71
|
return instance_variable_get("@#{key}_hash") if instance_variable_get("@#{key}_hash").present?
|
72
72
|
|
73
|
+
mapping = importerexporter.field_mapping == [{}] ? {} : importerexporter.field_mapping
|
73
74
|
instance_variable_set(
|
74
75
|
"@#{key}_hash",
|
75
|
-
|
76
|
+
mapping&.with_indifferent_access&.select { |_, h| h.key?(key) }
|
76
77
|
)
|
77
78
|
raise StandardError, "more than one #{key} declared: #{instance_variable_get("@#{key}_hash").keys.join(', ')}" if instance_variable_get("@#{key}_hash").length > 1
|
78
79
|
|
79
80
|
instance_variable_get("@#{key}_hash")
|
80
81
|
end
|
81
82
|
|
82
|
-
def collection_field_mapping
|
83
|
-
ActiveSupport::Deprecation.warn(
|
84
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
85
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
86
|
-
)
|
87
|
-
Bulkrax.collection_field_mapping[self.entry_class.to_s]&.to_sym || :collection
|
88
|
-
end
|
89
|
-
|
90
83
|
def model_field_mappings
|
91
84
|
model_mappings = Bulkrax.field_mappings[self.class.to_s]&.dig('model', :from) || []
|
92
85
|
model_mappings |= ['model']
|
@@ -118,6 +111,10 @@ module Bulkrax
|
|
118
111
|
raise StandardError, 'must be defined' if importer?
|
119
112
|
end
|
120
113
|
|
114
|
+
def create_relationships
|
115
|
+
raise StandardError, 'must be defined' if importer?
|
116
|
+
end
|
117
|
+
|
121
118
|
# Optional, define if using browse everything for file upload
|
122
119
|
def retrieve_cloud_files(files); end
|
123
120
|
|
@@ -39,7 +39,7 @@ module Bulkrax
|
|
39
39
|
path = metadata_path(bag)
|
40
40
|
raise StandardError, 'No metadata files were found' if path.blank?
|
41
41
|
data = entry_class.read_data(path)
|
42
|
-
data = entry_class.data_for_entry(data, source_identifier)
|
42
|
+
data = entry_class.data_for_entry(data, source_identifier, self)
|
43
43
|
data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
|
44
44
|
data
|
45
45
|
end
|
@@ -75,7 +75,7 @@ module Bulkrax
|
|
75
75
|
else
|
76
76
|
ImportWorkJob.send(perform_method, new_entry.id, current_run.id)
|
77
77
|
end
|
78
|
-
increment_counters(index)
|
78
|
+
increment_counters(index, work: true)
|
79
79
|
end
|
80
80
|
importer.record_status
|
81
81
|
rescue StandardError => e
|
@@ -83,11 +83,7 @@ module Bulkrax
|
|
83
83
|
end
|
84
84
|
|
85
85
|
def collections
|
86
|
-
|
87
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
88
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
89
|
-
)
|
90
|
-
records.map { |r| r[collection_field_mapping].split(/\s*[;|]\s*/) if r[collection_field_mapping].present? }.flatten.compact.uniq
|
86
|
+
records.map { |r| r[related_parents_parsed_mapping].split(/\s*[;|]\s*/) if r[related_parents_parsed_mapping].present? }.flatten.compact.uniq
|
91
87
|
end
|
92
88
|
|
93
89
|
def collections_total
|
@@ -14,18 +14,13 @@ module Bulkrax
|
|
14
14
|
csv_data = entry_class.read_data(file_for_import)
|
15
15
|
importer.parser_fields['total'] = csv_data.count
|
16
16
|
importer.save
|
17
|
-
@records ||= csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil) }
|
17
|
+
@records ||= csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
|
18
18
|
end
|
19
19
|
|
20
20
|
def collections
|
21
|
-
ActiveSupport::Deprecation.warn(
|
22
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
23
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
24
|
-
)
|
25
21
|
# retrieve a list of unique collections
|
26
22
|
records.map do |r|
|
27
23
|
collections = []
|
28
|
-
r[collection_field_mapping].split(/\s*[;|]\s*/).each { |title| collections << { title: title, from_collection_field_mapping: true } } if r[collection_field_mapping].present?
|
29
24
|
model_field_mappings.each do |model_mapping|
|
30
25
|
collections << r if r[model_mapping.to_sym]&.downcase == 'collection'
|
31
26
|
end
|
@@ -85,89 +80,52 @@ module Bulkrax
|
|
85
80
|
end
|
86
81
|
|
87
82
|
def create_collections
|
88
|
-
|
89
|
-
next if collection.blank?
|
90
|
-
break if records.find_index(collection).present? && limit_reached?(limit, records.find_index(collection))
|
91
|
-
|
92
|
-
## BEGIN
|
93
|
-
# Add required metadata to collections being imported using the collection_field_mapping, which only have a :title
|
94
|
-
# TODO: Remove once collection_field_mapping is removed
|
95
|
-
metadata = add_required_collection_metadata(collection)
|
96
|
-
collection_hash = metadata.presence || collection
|
97
|
-
## END
|
98
|
-
|
99
|
-
new_entry = find_or_create_entry(collection_entry_class, collection_hash[source_identifier], 'Bulkrax::Importer', collection_hash)
|
100
|
-
increment_counters(index, collection: true)
|
101
|
-
# TODO: add support for :delete option
|
102
|
-
if collection.key?(:from_collection_field_mapping)
|
103
|
-
ActiveSupport::Deprecation.warn(
|
104
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
105
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
106
|
-
)
|
107
|
-
# When importing collections using the deprecated collection_field_mapping, the collection MUST be created
|
108
|
-
# before the work, so we use #perform_now to make sure that happens. The downside is, if a collection fails
|
109
|
-
# to import, it will stop the rest of the collections from importing successfully.
|
110
|
-
# TODO: Remove once collection_field_mapping is removed
|
111
|
-
ImportCollectionJob.perform_now(new_entry.id, current_run.id)
|
112
|
-
else
|
113
|
-
ImportCollectionJob.perform_later(new_entry.id, current_run.id)
|
114
|
-
end
|
115
|
-
end
|
116
|
-
importer.record_status
|
117
|
-
rescue StandardError => e
|
118
|
-
status_info(e)
|
83
|
+
create_objects(['collection'])
|
119
84
|
end
|
120
85
|
|
121
86
|
def create_works
|
122
|
-
|
123
|
-
next unless record_has_source_identifier(work, records.find_index(work))
|
124
|
-
break if limit_reached?(limit, records.find_index(work))
|
125
|
-
|
126
|
-
seen[work[source_identifier]] = true
|
127
|
-
new_entry = find_or_create_entry(entry_class, work[source_identifier], 'Bulkrax::Importer', work.to_h)
|
128
|
-
if work[:delete].present?
|
129
|
-
DeleteWorkJob.send(perform_method, new_entry, current_run)
|
130
|
-
else
|
131
|
-
ImportWorkJob.send(perform_method, new_entry.id, current_run.id)
|
132
|
-
end
|
133
|
-
increment_counters(index)
|
134
|
-
end
|
135
|
-
importer.record_status
|
136
|
-
rescue StandardError => e
|
137
|
-
status_info(e)
|
87
|
+
create_objects(['work'])
|
138
88
|
end
|
139
89
|
|
140
90
|
def create_file_sets
|
141
|
-
|
142
|
-
|
143
|
-
|
91
|
+
create_objects(['file_set'])
|
92
|
+
end
|
93
|
+
|
94
|
+
def create_relationships
|
95
|
+
create_objects(['relationship'])
|
96
|
+
end
|
97
|
+
|
98
|
+
def create_objects(types_array = nil)
|
99
|
+
(types_array || %w[work collection file_set relationship]).each do |type|
|
100
|
+
if type.eql?('relationship')
|
101
|
+
ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importerexporter.id)
|
102
|
+
next
|
103
|
+
end
|
104
|
+
send(type.pluralize).each_with_index do |current_record, index|
|
105
|
+
next unless record_has_source_identifier(current_record, records.find_index(current_record))
|
106
|
+
break if limit_reached?(limit, records.find_index(current_record))
|
144
107
|
|
145
|
-
|
146
|
-
|
147
|
-
|
108
|
+
seen[current_record[source_identifier]] = true
|
109
|
+
create_entry_and_job(current_record, type)
|
110
|
+
increment_counters(index, "#{type}": true)
|
111
|
+
end
|
112
|
+
importer.record_status
|
148
113
|
end
|
149
|
-
importer.record_status
|
150
114
|
rescue StandardError => e
|
151
115
|
status_info(e)
|
152
116
|
end
|
153
117
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
title: raw_collection_data[:title],
|
166
|
-
work_identifier => uci,
|
167
|
-
source_identifier => uci,
|
168
|
-
visibility: 'open',
|
169
|
-
collection_type_gid: ::Hyrax::CollectionType.find_or_create_default_collection_type.gid
|
170
|
-
}
|
118
|
+
def create_entry_and_job(current_record, type)
|
119
|
+
new_entry = find_or_create_entry(send("#{type}_entry_class"),
|
120
|
+
current_record[source_identifier],
|
121
|
+
'Bulkrax::Importer',
|
122
|
+
current_record.to_h)
|
123
|
+
if current_record[:delete].present?
|
124
|
+
# TODO: create a "Delete" job for file_sets and collections
|
125
|
+
"Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
|
126
|
+
else
|
127
|
+
"Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id)
|
128
|
+
end
|
171
129
|
end
|
172
130
|
|
173
131
|
def write_partial_import_file(file)
|
@@ -274,6 +232,7 @@ module Bulkrax
|
|
274
232
|
def entry_class
|
275
233
|
CsvEntry
|
276
234
|
end
|
235
|
+
alias work_entry_class entry_class
|
277
236
|
|
278
237
|
def collection_entry_class
|
279
238
|
CsvCollectionEntry
|
@@ -40,14 +40,14 @@ module Bulkrax
|
|
40
40
|
metadata_paths.map do |md|
|
41
41
|
# Retrieve all records
|
42
42
|
elements = entry_class.read_data(md).xpath("//#{record_element}")
|
43
|
-
r += elements.map { |el| entry_class.data_for_entry(el, source_identifier) }
|
43
|
+
r += elements.map { |el| entry_class.data_for_entry(el, source_identifier, self) }
|
44
44
|
end
|
45
45
|
# Flatten because we may have multiple records per array
|
46
46
|
r.compact.flatten
|
47
47
|
elsif parser_fields['import_type'] == 'single'
|
48
48
|
metadata_paths.map do |md|
|
49
49
|
data = entry_class.read_data(md).xpath("//#{record_element}").first # Take only the first record
|
50
|
-
entry_class.data_for_entry(data, source_identifier)
|
50
|
+
entry_class.data_for_entry(data, source_identifier, self)
|
51
51
|
end.compact # No need to flatten because we take only the first record
|
52
52
|
end
|
53
53
|
end
|
@@ -94,7 +94,7 @@ module Bulkrax
|
|
94
94
|
else
|
95
95
|
ImportWorkJob.send(perform_method, new_entry.id, current_run.id)
|
96
96
|
end
|
97
|
-
increment_counters(index)
|
97
|
+
increment_counters(index, work: true)
|
98
98
|
end
|
99
99
|
importer.record_status
|
100
100
|
rescue StandardError => e
|
@@ -0,0 +1,11 @@
|
|
1
|
+
class CreateBulkraxPendingRelationships < ActiveRecord::Migration[5.2]
|
2
|
+
def change
|
3
|
+
create_table :bulkrax_pending_relationships do |t|
|
4
|
+
t.belongs_to :bulkrax_importer_run, foreign_key: true, null: false
|
5
|
+
t.string :parent_id, null: false
|
6
|
+
t.string :child_id, null: false
|
7
|
+
|
8
|
+
t.timestamps
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
data/lib/bulkrax/version.rb
CHANGED
data/lib/bulkrax.rb
CHANGED
@@ -5,11 +5,9 @@ require 'active_support/all'
|
|
5
5
|
|
6
6
|
module Bulkrax
|
7
7
|
class << self
|
8
|
-
# TODO: remove collection_field_mapping when releasing v2
|
9
8
|
mattr_accessor :parsers,
|
10
9
|
:default_work_type,
|
11
10
|
:default_field_mapping,
|
12
|
-
:collection_field_mapping,
|
13
11
|
:fill_in_blank_source_identifiers,
|
14
12
|
:related_children_field_mapping,
|
15
13
|
:related_parents_field_mapping,
|
@@ -35,17 +33,6 @@ module Bulkrax
|
|
35
33
|
self.removed_image_path = Bulkrax::Engine.root.join('spec', 'fixtures', 'removed.png').to_s
|
36
34
|
self.server_name = 'bulkrax@example.com'
|
37
35
|
|
38
|
-
# NOTE: Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.
|
39
|
-
# Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.
|
40
|
-
# TODO: remove collection_field_mapping when releasing v2
|
41
|
-
# Field_mapping for establishing a collection relationship (FROM work TO collection)
|
42
|
-
# This value IS NOT used for OAI, so setting the OAI Entries here will have no effect
|
43
|
-
# The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry'
|
44
|
-
# The default value for CSV is collection
|
45
|
-
self.collection_field_mapping = {
|
46
|
-
'Bulkrax::CsvEntry' => 'collection'
|
47
|
-
}
|
48
|
-
|
49
36
|
# Hash of Generic field_mappings for use in the view
|
50
37
|
# There must be one field_mappings hash per view parial
|
51
38
|
# Based on Hyrax CoreMetadata && BasicMetadata
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0.beta1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-03-
|
11
|
+
date: 2022-03-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -268,6 +268,7 @@ files:
|
|
268
268
|
- app/jobs/bulkrax/import_file_set_job.rb
|
269
269
|
- app/jobs/bulkrax/import_work_job.rb
|
270
270
|
- app/jobs/bulkrax/importer_job.rb
|
271
|
+
- app/jobs/bulkrax/schedule_relationships_job.rb
|
271
272
|
- app/mailers/bulkrax/application_mailer.rb
|
272
273
|
- app/matchers/bulkrax/application_matcher.rb
|
273
274
|
- app/matchers/bulkrax/bagit_matcher.rb
|
@@ -287,6 +288,7 @@ files:
|
|
287
288
|
- app/models/bulkrax/oai_entry.rb
|
288
289
|
- app/models/bulkrax/oai_qualified_dc_entry.rb
|
289
290
|
- app/models/bulkrax/oai_set_entry.rb
|
291
|
+
- app/models/bulkrax/pending_relationship.rb
|
290
292
|
- app/models/bulkrax/rdf_collection_entry.rb
|
291
293
|
- app/models/bulkrax/rdf_entry.rb
|
292
294
|
- app/models/bulkrax/status.rb
|
@@ -363,6 +365,9 @@ files:
|
|
363
365
|
- db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb
|
364
366
|
- db/migrate/20220118001339_add_import_attempts_to_entries.rb
|
365
367
|
- db/migrate/20220119213325_add_work_counters_to_importer_runs.rb
|
368
|
+
- db/migrate/20220301001839_create_bulkrax_pending_relationships.rb
|
369
|
+
- db/migrate/20220301020307_add_parents_to_bulkrax_importer_runs.rb
|
370
|
+
- db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb
|
366
371
|
- lib/bulkrax.rb
|
367
372
|
- lib/bulkrax/engine.rb
|
368
373
|
- lib/bulkrax/version.rb
|
@@ -389,9 +394,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
389
394
|
version: '0'
|
390
395
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
391
396
|
requirements:
|
392
|
-
- - "
|
397
|
+
- - ">"
|
393
398
|
- !ruby/object:Gem::Version
|
394
|
-
version:
|
399
|
+
version: 1.3.1
|
395
400
|
requirements: []
|
396
401
|
rubygems_version: 3.1.4
|
397
402
|
signing_key:
|