bulkrax 2.2.3 → 3.0.0.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/factories/bulkrax/object_factory.rb +8 -45
- data/app/jobs/bulkrax/create_relationships_job.rb +58 -66
- data/app/jobs/bulkrax/importer_job.rb +1 -3
- data/app/jobs/bulkrax/schedule_relationships_job.rb +21 -0
- data/app/models/bulkrax/csv_entry.rb +58 -39
- data/app/models/bulkrax/entry.rb +3 -7
- data/app/models/bulkrax/importer.rb +20 -11
- data/app/models/bulkrax/oai_entry.rb +0 -4
- data/app/models/bulkrax/pending_relationship.rb +7 -0
- data/app/models/bulkrax/rdf_entry.rb +2 -10
- data/app/models/bulkrax/xml_entry.rb +1 -5
- data/app/models/concerns/bulkrax/export_behavior.rb +3 -1
- data/app/models/concerns/bulkrax/has_matchers.rb +1 -10
- data/app/models/concerns/bulkrax/import_behavior.rb +11 -12
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +2 -2
- data/app/parsers/bulkrax/application_parser.rb +8 -11
- data/app/parsers/bulkrax/bagit_parser.rb +3 -7
- data/app/parsers/bulkrax/csv_parser.rb +44 -67
- data/app/parsers/bulkrax/oai_dc_parser.rb +1 -1
- data/app/parsers/bulkrax/xml_parser.rb +3 -3
- data/db/migrate/20220301001839_create_bulkrax_pending_relationships.rb +11 -0
- data/db/migrate/20220301020307_add_parents_to_bulkrax_importer_runs.rb +5 -0
- data/db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb +5 -0
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +0 -13
- metadata +13 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a983b1932af9c9355daf4d0d5ff4eb628c109bad9f41103041548e97ec0d4abe
|
4
|
+
data.tar.gz: aae00491b670a9a329e93948818503eed2dd762ee768e9bed9b8d2ecf39dffc5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 124984e2a0e6b360aefb72fb3e9bbbb31af8ab268c7190d48c51c37487c89e75a010c4b7df3b9a17190852df12a478bd4d8663b3fe4127b90e681240f53821cc
|
7
|
+
data.tar.gz: ddb4d13c37e561f4bc8ebc88b0ef0c17f7aadd2e89627ec602a39e3aec0f892c79fb9ca0739ad28b73e045d30bf4f8143dd6522be9bba62fb1bb2871e00aa5ac
|
@@ -7,20 +7,15 @@ module Bulkrax
|
|
7
7
|
include DynamicRecordLookup
|
8
8
|
|
9
9
|
define_model_callbacks :save, :create
|
10
|
-
attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :
|
10
|
+
attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :related_parents_parsed_mapping
|
11
11
|
|
12
12
|
# rubocop:disable Metrics/ParameterLists
|
13
|
-
def initialize(attributes:, source_identifier_value:, work_identifier:,
|
14
|
-
ActiveSupport::Deprecation.warn(
|
15
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
16
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
17
|
-
)
|
13
|
+
def initialize(attributes:, source_identifier_value:, work_identifier:, related_parents_parsed_mapping: nil, replace_files: false, user: nil, klass: nil, update_files: false)
|
18
14
|
@attributes = ActiveSupport::HashWithIndifferentAccess.new(attributes)
|
19
15
|
@replace_files = replace_files
|
20
16
|
@update_files = update_files
|
21
17
|
@user = user || User.batch_user
|
22
18
|
@work_identifier = work_identifier
|
23
|
-
@collection_field_mapping = collection_field_mapping
|
24
19
|
@related_parents_parsed_mapping = related_parents_parsed_mapping
|
25
20
|
@source_identifier_value = source_identifier_value
|
26
21
|
@klass = klass || Bulkrax.default_work_type.constantize
|
@@ -55,7 +50,7 @@ module Bulkrax
|
|
55
50
|
def update
|
56
51
|
raise "Object doesn't exist" unless object
|
57
52
|
destroy_existing_files if @replace_files && ![Collection, FileSet].include?(klass)
|
58
|
-
attrs =
|
53
|
+
attrs = transform_attributes(update: true)
|
59
54
|
run_callbacks :save do
|
60
55
|
if klass == Collection
|
61
56
|
update_collection(attrs)
|
@@ -97,7 +92,7 @@ module Bulkrax
|
|
97
92
|
# https://github.com/projecthydra/active_fedora/issues/874
|
98
93
|
# 2+ years later, still open!
|
99
94
|
def create
|
100
|
-
attrs =
|
95
|
+
attrs = transform_attributes
|
101
96
|
@object = klass.new
|
102
97
|
object.reindex_extent = Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX if object.respond_to?(:reindex_extent)
|
103
98
|
run_callbacks :save do
|
@@ -142,25 +137,15 @@ module Bulkrax
|
|
142
137
|
end
|
143
138
|
|
144
139
|
def create_collection(attrs)
|
145
|
-
ActiveSupport::Deprecation.warn(
|
146
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
147
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
148
|
-
)
|
149
140
|
attrs = collection_type(attrs)
|
150
|
-
persist_collection_memberships(parent:
|
151
|
-
persist_collection_memberships(parent: find_collection(attributes[collection_field_mapping]), child: object) if attributes[collection_field_mapping].present?
|
141
|
+
persist_collection_memberships(parent: find_collection(attributes[related_parents_parsed_mapping]), child: object) if attributes[related_parents_parsed_mapping].present?
|
152
142
|
object.attributes = attrs
|
153
143
|
object.apply_depositor_metadata(@user)
|
154
144
|
object.save!
|
155
145
|
end
|
156
146
|
|
157
147
|
def update_collection(attrs)
|
158
|
-
|
159
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
160
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
161
|
-
)
|
162
|
-
persist_collection_memberships(parent: object, child: find_collection(attributes[:child_collection_id])) if attributes[:child_collection_id].present?
|
163
|
-
persist_collection_memberships(parent: find_collection(attributes[collection_field_mapping]), child: object) if attributes[collection_field_mapping].present?
|
148
|
+
persist_collection_memberships(parent: find_collection(attributes[related_parents_parsed_mapping]), child: object) if attributes[related_parents_parsed_mapping].present?
|
164
149
|
object.attributes = attrs
|
165
150
|
object.save!
|
166
151
|
end
|
@@ -219,34 +204,12 @@ module Bulkrax
|
|
219
204
|
attrs
|
220
205
|
end
|
221
206
|
|
222
|
-
# Strip out the :collection key, and add the member_of_collection_ids,
|
223
|
-
# which is used by Hyrax::Actors::AddAsMemberOfCollectionsActor
|
224
|
-
def create_attributes
|
225
|
-
return transform_attributes if klass == Collection
|
226
|
-
ActiveSupport::Deprecation.warn(
|
227
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
228
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
229
|
-
)
|
230
|
-
transform_attributes.except(:collections, :collection, collection_field_mapping)
|
231
|
-
end
|
232
|
-
|
233
|
-
# Strip out the :collection key, and add the member_of_collection_ids,
|
234
|
-
# which is used by Hyrax::Actors::AddAsMemberOfCollectionsActor
|
235
|
-
def attribute_update
|
236
|
-
return transform_attributes.except(:id) if klass == Collection
|
237
|
-
ActiveSupport::Deprecation.warn(
|
238
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
239
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
240
|
-
)
|
241
|
-
transform_attributes.except(:id, :collections, :collection, collection_field_mapping)
|
242
|
-
end
|
243
|
-
|
244
207
|
# Override if we need to map the attributes from the parser in
|
245
208
|
# a way that is compatible with how the factory needs them.
|
246
|
-
def transform_attributes
|
209
|
+
def transform_attributes(update: false)
|
247
210
|
@transform_attributes = attributes.slice(*permitted_attributes)
|
248
211
|
@transform_attributes.merge!(file_attributes(update_files)) if with_files
|
249
|
-
@transform_attributes
|
212
|
+
update ? @transform_attributes.except(:id) : @transform_attributes
|
250
213
|
end
|
251
214
|
|
252
215
|
# Regardless of what the Parser gives us, these are the properties we are prepared to accept.
|
@@ -21,11 +21,9 @@ module Bulkrax
|
|
21
21
|
|
22
22
|
queue_as :import
|
23
23
|
|
24
|
-
attr_accessor :
|
24
|
+
attr_accessor :child_records, :parent_record, :parent_entry, :importer_run_id
|
25
25
|
|
26
|
-
# @param
|
27
|
-
# @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifier
|
28
|
-
# @param child_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifier
|
26
|
+
# @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifiers
|
29
27
|
# @param importer_run [Bulkrax::ImporterRun] current importer run (needed to properly update counters)
|
30
28
|
#
|
31
29
|
# The entry_identifier is used to lookup the @base_entry for the job (a.k.a. the entry the job was called from).
|
@@ -33,123 +31,117 @@ module Bulkrax
|
|
33
31
|
# Whether the @base_entry is the parent or the child in the relationship is determined by the presence of a
|
34
32
|
# parent_identifier or child_identifier param. For example, if a parent_identifier is passed, we know @base_entry
|
35
33
|
# is the child in the relationship, and vice versa if a child_identifier is passed.
|
36
|
-
def perform(
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
34
|
+
def perform(parent_identifier:, importer_run_id:)
|
35
|
+
pending_relationships = Bulkrax::PendingRelationship.find_each.select do |rel|
|
36
|
+
rel.bulkrax_importer_run_id == importer_run_id && rel.parent_id == parent_identifier
|
37
|
+
end.sort_by(&:order)
|
38
|
+
|
39
|
+
@importer_run_id = importer_run_id
|
40
|
+
@parent_record = find_record(parent_identifier)
|
41
|
+
@child_records = { works: [], collections: [] }
|
42
|
+
pending_relationships.each do |rel|
|
43
|
+
raise ::StandardError, %("#{rel}" needs either a child or a parent to create a relationship) if rel.child_id.nil? || rel.parent_id.nil?
|
44
|
+
child_record = find_record(rel.child_id)
|
45
|
+
child_record.is_a?(::Collection) ? @child_records[:collections] << child_record : @child_records[:works] << child_record
|
47
46
|
end
|
48
47
|
|
49
|
-
if
|
48
|
+
if (child_records[:collections].blank? && child_records[:works].blank?) || parent_record.blank?
|
50
49
|
reschedule(
|
51
|
-
entry_identifier: entry_identifier,
|
52
50
|
parent_identifier: parent_identifier,
|
53
|
-
|
54
|
-
importer_run: importer_run
|
51
|
+
importer_run_id: importer_run_id
|
55
52
|
)
|
56
53
|
return false # stop current job from continuing to run after rescheduling
|
57
54
|
end
|
58
55
|
|
59
|
-
|
56
|
+
@parent_entry = Bulkrax::Entry.where(identifier: parent_identifier,
|
57
|
+
importerexporter_id: ImporterRun.find(importer_run_id).importer_id,
|
58
|
+
importerexporter_type: "Bulkrax::Importer").first
|
59
|
+
create_relationships
|
60
|
+
pending_relationships.each(&:destroy)
|
60
61
|
rescue ::StandardError => e
|
61
|
-
|
62
|
-
|
62
|
+
parent_entry.status_info(e)
|
63
|
+
Bulkrax::ImporterRun.find(importer_run_id).increment!(:failed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
63
64
|
end
|
64
65
|
|
65
66
|
private
|
66
67
|
|
67
|
-
def
|
68
|
-
if parent_record.is_a?(::Collection)
|
69
|
-
|
70
|
-
|
71
|
-
collection_parent_work_child
|
72
|
-
elsif curation_concern?(parent_record) && child_record.is_a?(::Collection)
|
73
|
-
raise ::StandardError, 'a Collection may not be assigned as a child of a Work'
|
68
|
+
def create_relationships
|
69
|
+
if parent_record.is_a?(::Collection)
|
70
|
+
collection_parent_work_child unless child_records[:works].empty?
|
71
|
+
collection_parent_collection_child unless child_records[:collections].empty?
|
74
72
|
else
|
75
|
-
work_parent_work_child
|
73
|
+
work_parent_work_child unless child_records[:works].empty?
|
74
|
+
raise ::StandardError, 'a Collection may not be assigned as a child of a Work' if child_records[:collections].present?
|
76
75
|
end
|
77
76
|
end
|
78
77
|
|
79
78
|
def user
|
80
|
-
@user ||=
|
79
|
+
@user ||= Bulkrax::ImporterRun.find(importer_run_id).importer.user
|
81
80
|
end
|
82
81
|
|
83
82
|
# Work-Collection membership is added to the child as member_of_collection_ids
|
84
83
|
# This is adding the reverse relationship, from the child to the parent
|
85
84
|
def collection_parent_work_child
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
# TODO: add counters for :processed_parents and :failed_parents
|
101
|
-
importer_run.increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
85
|
+
child_records[:works].each do |child_record|
|
86
|
+
attrs = { id: child_record.id, member_of_collections_attributes: { 0 => { id: parent_record.id } } }
|
87
|
+
ObjectFactory.new(
|
88
|
+
attributes: attrs,
|
89
|
+
source_identifier_value: nil, # sending the :id in the attrs means the factory doesn't need a :source_identifier_value
|
90
|
+
work_identifier: parent_entry.parser.work_identifier,
|
91
|
+
related_parents_parsed_mapping: parent_entry.parser.related_parents_parsed_mapping,
|
92
|
+
replace_files: false,
|
93
|
+
user: user,
|
94
|
+
klass: child_record.class
|
95
|
+
).run
|
96
|
+
# TODO: add counters for :processed_parents and :failed_parents
|
97
|
+
Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
98
|
+
end
|
102
99
|
end
|
103
100
|
|
104
101
|
# Collection-Collection membership is added to the as member_ids
|
105
102
|
def collection_parent_collection_child
|
106
|
-
|
107
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
108
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
109
|
-
)
|
103
|
+
child_record = child_records[:collections].first
|
110
104
|
attrs = { id: parent_record.id, child_collection_id: child_record.id }
|
111
105
|
ObjectFactory.new(
|
112
106
|
attributes: attrs,
|
113
107
|
source_identifier_value: nil, # sending the :id in the attrs means the factory doesn't need a :source_identifier_value
|
114
|
-
work_identifier:
|
115
|
-
|
108
|
+
work_identifier: parent_entry.parser.work_identifier,
|
109
|
+
related_parents_parsed_mapping: parent_entry.parser.related_parents_parsed_mapping,
|
116
110
|
replace_files: false,
|
117
111
|
user: user,
|
118
112
|
klass: parent_record.class
|
119
113
|
).run
|
120
114
|
# TODO: add counters for :processed_parents and :failed_parents
|
121
|
-
|
115
|
+
Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
122
116
|
end
|
123
117
|
|
124
118
|
# Work-Work membership is added to the parent as member_ids
|
125
119
|
def work_parent_work_child
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
120
|
+
records_hash = {}
|
121
|
+
child_records[:works].each_with_index do |child_record, i|
|
122
|
+
records_hash[i] = { id: child_record.id }
|
123
|
+
end
|
130
124
|
attrs = {
|
131
125
|
id: parent_record.id,
|
132
|
-
work_members_attributes:
|
126
|
+
work_members_attributes: records_hash
|
133
127
|
}
|
134
128
|
ObjectFactory.new(
|
135
129
|
attributes: attrs,
|
136
130
|
source_identifier_value: nil, # sending the :id in the attrs means the factory doesn't need a :source_identifier_value
|
137
|
-
work_identifier:
|
138
|
-
|
131
|
+
work_identifier: parent_entry.parser.work_identifier,
|
132
|
+
related_parents_parsed_mapping: parent_entry.parser.related_parents_parsed_mapping,
|
139
133
|
replace_files: false,
|
140
134
|
user: user,
|
141
135
|
klass: parent_record.class
|
142
136
|
).run
|
143
137
|
# TODO: add counters for :processed_parents and :failed_parents
|
144
|
-
|
138
|
+
Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
|
145
139
|
end
|
146
140
|
|
147
|
-
def reschedule(
|
141
|
+
def reschedule(parent_identifier:, importer_run_id:)
|
148
142
|
CreateRelationshipsJob.set(wait: 10.minutes).perform_later(
|
149
|
-
entry_identifier: entry_identifier,
|
150
143
|
parent_identifier: parent_identifier,
|
151
|
-
|
152
|
-
importer_run: importer_run
|
144
|
+
importer_run_id: importer_run_id
|
153
145
|
)
|
154
146
|
end
|
155
147
|
end
|
@@ -18,9 +18,7 @@ module Bulkrax
|
|
18
18
|
importer.only_updates = only_updates_since_last_import || false
|
19
19
|
return unless importer.valid_import?
|
20
20
|
|
21
|
-
importer.
|
22
|
-
importer.import_works
|
23
|
-
importer.import_file_sets
|
21
|
+
importer.import_objects
|
24
22
|
end
|
25
23
|
|
26
24
|
def unzip_imported_file(parser)
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class ScheduleRelationshipsJob < ApplicationJob
|
5
|
+
def perform(importer_id:)
|
6
|
+
importer = Importer.find(importer_id)
|
7
|
+
pending_num = importer.entries.left_outer_joins(:latest_status)
|
8
|
+
.where('bulkrax_statuses.status_message IS NULL ').count
|
9
|
+
return reschedule(importer_id) unless pending_num.zero?
|
10
|
+
|
11
|
+
importer.last_run.parents.each do |parent_id|
|
12
|
+
CreateRelationshipsJob.perform_later(parent_identifier: parent_id, importer_run_id: importer.last_run.id)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def reschedule(importer_id)
|
17
|
+
ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importer_id)
|
18
|
+
false
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -3,7 +3,10 @@
|
|
3
3
|
require 'csv'
|
4
4
|
|
5
5
|
module Bulkrax
|
6
|
-
class
|
6
|
+
# TODO: We need to rework this class some to address the Metrics/ClassLength rubocop offense.
|
7
|
+
# We do too much in these entry classes. We need to extract the common logic from the various
|
8
|
+
# entry models into a module that can be shared between them.
|
9
|
+
class CsvEntry < Entry # rubocop:disable Metrics/ClassLength
|
7
10
|
serialize :raw_metadata, JSON
|
8
11
|
|
9
12
|
def self.fields_from_data(data)
|
@@ -19,18 +22,15 @@ module Bulkrax
|
|
19
22
|
encoding: 'utf-8')
|
20
23
|
end
|
21
24
|
|
22
|
-
def self.data_for_entry(data, _source_id)
|
23
|
-
ActiveSupport::Deprecation.warn(
|
24
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
25
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
26
|
-
)
|
25
|
+
def self.data_for_entry(data, _source_id, parser)
|
27
26
|
# If a multi-line CSV data is passed, grab the first row
|
28
27
|
data = data.first if data.is_a?(CSV::Table)
|
29
28
|
# model has to be separated so that it doesn't get mistranslated by to_h
|
30
29
|
raw_data = data.to_h
|
31
30
|
raw_data[:model] = data[:model] if data[:model].present?
|
32
31
|
# If the collection field mapping is not 'collection', add 'collection' - the parser needs it
|
33
|
-
|
32
|
+
# TODO: change to :parents
|
33
|
+
raw_data[:parents] = raw_data[parent_field(parser).to_sym] if raw_data.keys.include?(parent_field(parser).to_sym) && parent_field(parser) != 'parents'
|
34
34
|
return raw_data
|
35
35
|
end
|
36
36
|
|
@@ -40,11 +40,10 @@ module Bulkrax
|
|
40
40
|
|
41
41
|
self.parsed_metadata = {}
|
42
42
|
add_identifier
|
43
|
-
add_visibility
|
44
43
|
add_ingested_metadata
|
44
|
+
add_visibility
|
45
45
|
add_metadata_for_model
|
46
46
|
add_rights_statement
|
47
|
-
add_collections
|
48
47
|
add_local
|
49
48
|
|
50
49
|
self.parsed_metadata
|
@@ -67,15 +66,9 @@ module Bulkrax
|
|
67
66
|
end
|
68
67
|
|
69
68
|
def add_ingested_metadata
|
70
|
-
ActiveSupport::Deprecation.warn(
|
71
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
72
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
73
|
-
)
|
74
69
|
# we do not want to sort the values in the record before adding the metadata.
|
75
70
|
# if we do, the factory_class will be set to the default_work_type for all values that come before "model" or "work type"
|
76
71
|
record.each do |key, value|
|
77
|
-
next if self.parser.collection_field_mapping.to_s == key_without_numbers(key)
|
78
|
-
|
79
72
|
index = key[/\d+/].to_i - 1 if key[/\d+/].to_i != 0
|
80
73
|
add_metadata(key_without_numbers(key), value, index)
|
81
74
|
end
|
@@ -101,28 +94,40 @@ module Bulkrax
|
|
101
94
|
self.parsed_metadata['id'] = hyrax_record.id
|
102
95
|
self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
|
103
96
|
self.parsed_metadata['model'] = hyrax_record.has_model.first
|
97
|
+
build_relationship_metadata
|
104
98
|
build_mapping_metadata
|
99
|
+
build_files unless hyrax_record.is_a?(Collection)
|
100
|
+
self.parsed_metadata
|
101
|
+
end
|
105
102
|
|
106
|
-
|
107
|
-
#
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
103
|
+
def build_relationship_metadata
|
104
|
+
# Includes all relationship methods for all exportable record types (works, Collections, FileSets)
|
105
|
+
relationship_methods = {
|
106
|
+
related_parents_parsed_mapping => %i[member_of_collection_ids member_of_work_ids in_work_ids],
|
107
|
+
related_children_parsed_mapping => %i[member_collection_ids member_work_ids file_set_ids]
|
108
|
+
}
|
109
|
+
|
110
|
+
relationship_methods.each do |relationship_key, methods|
|
111
|
+
next if relationship_key.blank?
|
112
|
+
|
113
|
+
values = []
|
114
|
+
methods.each do |m|
|
115
|
+
values << hyrax_record.public_send(m) if hyrax_record.respond_to?(m)
|
115
116
|
end
|
116
|
-
|
117
|
+
values = values.flatten.uniq
|
118
|
+
next if values.blank?
|
117
119
|
|
118
|
-
|
119
|
-
|
120
|
+
handle_join_on_export(relationship_key, values, mapping[related_parents_parsed_mapping]['join'].present?)
|
121
|
+
end
|
120
122
|
end
|
121
123
|
|
122
124
|
def build_mapping_metadata
|
123
125
|
mapping.each do |key, value|
|
124
126
|
next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
|
125
127
|
next if key == "model"
|
128
|
+
# relationships handled by #build_relationship_metadata
|
129
|
+
next if [related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
|
130
|
+
next if key == 'file' # handled by #build_files
|
126
131
|
next if value['excluded']
|
127
132
|
|
128
133
|
object_key = key if value.key?('object')
|
@@ -197,12 +202,21 @@ module Bulkrax
|
|
197
202
|
end
|
198
203
|
|
199
204
|
def build_files
|
200
|
-
|
201
|
-
|
205
|
+
file_mapping = mapping['file']&.[]('from')&.first || 'file'
|
206
|
+
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
207
|
+
|
208
|
+
filenames = file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
|
209
|
+
handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
|
210
|
+
end
|
211
|
+
|
212
|
+
def handle_join_on_export(key, values, join)
|
213
|
+
if join
|
214
|
+
parsed_metadata[key] = values.join(' | ') # TODO: make split char dynamic
|
202
215
|
else
|
203
|
-
|
204
|
-
|
216
|
+
values.each_with_index do |value, i|
|
217
|
+
parsed_metadata["#{key}_#{i + 1}"] = value
|
205
218
|
end
|
219
|
+
parsed_metadata.delete(key)
|
206
220
|
end
|
207
221
|
end
|
208
222
|
|
@@ -225,14 +239,19 @@ module Bulkrax
|
|
225
239
|
end
|
226
240
|
|
227
241
|
def possible_collection_ids
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
242
|
+
return @possible_collection_ids if @possible_collection_ids.present?
|
243
|
+
|
244
|
+
collection_field_mapping = self.class.parent_field(parser)
|
245
|
+
return [] unless collection_field_mapping.present? && record[collection_field_mapping].present?
|
246
|
+
|
247
|
+
identifiers = []
|
248
|
+
split_titles = record[collection_field_mapping].split(/\s*[;|]\s*/)
|
249
|
+
split_titles.each do |c_title|
|
250
|
+
matching_collection_entries = importerexporter.entries.select { |e| e.raw_metadata['title'] == c_title }
|
251
|
+
raise ::StandardError, 'Only expected to find one matching entry' if matching_collection_entries.count > 1
|
252
|
+
identifiers << matching_collection_entries.first&.identifier
|
253
|
+
end
|
254
|
+
@possible_collection_ids = identifiers.compact.presence || []
|
236
255
|
end
|
237
256
|
|
238
257
|
def collections_created?
|
data/app/models/bulkrax/entry.rb
CHANGED
@@ -58,7 +58,7 @@ module Bulkrax
|
|
58
58
|
# @param data - the data from the metadata file
|
59
59
|
# @param path - the path to the metadata file (used by some entries to get the file_paths for import)
|
60
60
|
# @return Hash containing the data (the entry build_metadata method will know what to expect in the hash)
|
61
|
-
def self.data_for_entry(_data, _source_id)
|
61
|
+
def self.data_for_entry(_data, _source_id, _parser)
|
62
62
|
raise StandardError, 'Not Implemented'
|
63
63
|
end
|
64
64
|
|
@@ -70,12 +70,8 @@ module Bulkrax
|
|
70
70
|
parser&.work_identifier&.to_s || 'source'
|
71
71
|
end
|
72
72
|
|
73
|
-
def self.
|
74
|
-
|
75
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
76
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
77
|
-
)
|
78
|
-
Bulkrax.collection_field_mapping[self.to_s]
|
73
|
+
def self.parent_field(parser)
|
74
|
+
parser.related_parents_parsed_mapping
|
79
75
|
end
|
80
76
|
|
81
77
|
def build
|
@@ -125,23 +125,32 @@ module Bulkrax
|
|
125
125
|
end
|
126
126
|
|
127
127
|
def import_works
|
128
|
-
|
129
|
-
self.only_updates ||= false
|
130
|
-
parser.create_works
|
131
|
-
rescue StandardError => e
|
132
|
-
status_info(e)
|
128
|
+
import_objects(['work'])
|
133
129
|
end
|
134
130
|
|
135
131
|
def import_collections
|
136
|
-
|
137
|
-
parser.create_collections
|
138
|
-
rescue StandardError => e
|
139
|
-
status_info(e)
|
132
|
+
import_objects(['collection'])
|
140
133
|
end
|
141
134
|
|
142
135
|
def import_file_sets
|
143
|
-
|
144
|
-
|
136
|
+
import_objects(['file_set'])
|
137
|
+
end
|
138
|
+
|
139
|
+
def import_relationships
|
140
|
+
import_objects(['relationship'])
|
141
|
+
end
|
142
|
+
|
143
|
+
def import_objects(types_array = nil)
|
144
|
+
self.only_updates ||= false
|
145
|
+
types = types_array || %w[work collection file_set relationship]
|
146
|
+
if parser.class == Bulkrax::CsvParser
|
147
|
+
parser.create_objects(types)
|
148
|
+
else
|
149
|
+
types.each do |object_type|
|
150
|
+
self.save if self.new_record? # Object needs to be saved for statuses
|
151
|
+
parser.send("create_#{object_type.pluralize}")
|
152
|
+
end
|
153
|
+
end
|
145
154
|
rescue StandardError => e
|
146
155
|
status_info(e)
|
147
156
|
end
|
@@ -26,10 +26,6 @@ module Bulkrax
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def build_metadata
|
29
|
-
ActiveSupport::Deprecation.warn(
|
30
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
31
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
32
|
-
)
|
33
29
|
self.parsed_metadata = {}
|
34
30
|
self.parsed_metadata[work_identifier] = [record.header.identifier]
|
35
31
|
|
@@ -13,11 +13,7 @@ module Bulkrax
|
|
13
13
|
data.predicates.map(&:to_s)
|
14
14
|
end
|
15
15
|
|
16
|
-
def self.data_for_entry(data, source_id)
|
17
|
-
ActiveSupport::Deprecation.warn(
|
18
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
19
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
20
|
-
)
|
16
|
+
def self.data_for_entry(data, source_id, parser)
|
21
17
|
reader = data
|
22
18
|
format = reader.class.format.to_sym
|
23
19
|
collections = []
|
@@ -25,7 +21,7 @@ module Bulkrax
|
|
25
21
|
delete = nil
|
26
22
|
data = RDF::Writer.for(format).buffer do |writer|
|
27
23
|
reader.each_statement do |statement|
|
28
|
-
collections << statement.object.to_s if
|
24
|
+
collections << statement.object.to_s if parent_field(parser).present? && parent_field(parser) == statement.predicate.to_s
|
29
25
|
children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
|
30
26
|
delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
|
31
27
|
writer << statement
|
@@ -55,10 +51,6 @@ module Bulkrax
|
|
55
51
|
end
|
56
52
|
|
57
53
|
def build_metadata
|
58
|
-
ActiveSupport::Deprecation.warn(
|
59
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
60
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
61
|
-
)
|
62
54
|
raise StandardError, 'Record not found' if record.nil?
|
63
55
|
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
64
56
|
|
@@ -14,7 +14,7 @@ module Bulkrax
|
|
14
14
|
Nokogiri::XML(open(path)).remove_namespaces!
|
15
15
|
end
|
16
16
|
|
17
|
-
def self.data_for_entry(data, source_id)
|
17
|
+
def self.data_for_entry(data, source_id, _parser)
|
18
18
|
collections = []
|
19
19
|
children = []
|
20
20
|
xpath_for_source_id = ".//*[name()='#{source_id}']"
|
@@ -39,10 +39,6 @@ module Bulkrax
|
|
39
39
|
end
|
40
40
|
|
41
41
|
def build_metadata
|
42
|
-
ActiveSupport::Deprecation.warn(
|
43
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
44
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
45
|
-
)
|
46
42
|
raise StandardError, 'Record not found' if record.nil?
|
47
43
|
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
48
44
|
self.parsed_metadata = {}
|
@@ -26,7 +26,9 @@ module Bulkrax
|
|
26
26
|
|
27
27
|
def write_files
|
28
28
|
return if hyrax_record.is_a?(Collection)
|
29
|
-
|
29
|
+
|
30
|
+
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
|
31
|
+
file_sets.each do |fs|
|
30
32
|
path = File.join(exporter_export_path, 'files')
|
31
33
|
FileUtils.mkdir_p(path)
|
32
34
|
file = filename(fs)
|
@@ -129,33 +129,24 @@ module Bulkrax
|
|
129
129
|
end
|
130
130
|
|
131
131
|
def supported_bulkrax_fields
|
132
|
-
ActiveSupport::Deprecation.warn(
|
133
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
134
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
135
|
-
)
|
136
132
|
@supported_bulkrax_fields ||=
|
137
133
|
%W[
|
138
134
|
id
|
139
135
|
file
|
140
136
|
remote_files
|
141
137
|
model
|
138
|
+
visibility
|
142
139
|
delete
|
143
|
-
#{parser.collection_field_mapping}
|
144
140
|
#{related_parents_parsed_mapping}
|
145
141
|
#{related_children_parsed_mapping}
|
146
142
|
]
|
147
143
|
end
|
148
144
|
|
149
145
|
def multiple?(field)
|
150
|
-
ActiveSupport::Deprecation.warn(
|
151
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
152
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
153
|
-
)
|
154
146
|
@multiple_bulkrax_fields ||=
|
155
147
|
%W[
|
156
148
|
file
|
157
149
|
remote_files
|
158
|
-
#{parser.collection_field_mapping}
|
159
150
|
#{related_parents_parsed_mapping}
|
160
151
|
#{related_children_parsed_mapping}
|
161
152
|
]
|
@@ -28,7 +28,8 @@ module Bulkrax
|
|
28
28
|
self.parsed_metadata[related_parents_parsed_mapping].each do |parent_identifier|
|
29
29
|
next if parent_identifier.blank?
|
30
30
|
|
31
|
-
|
31
|
+
add_parent_to_import_run(parent_identifier, importerexporter.last_run)
|
32
|
+
PendingRelationship.create!(child_id: self.identifier, parent_id: parent_identifier, bulkrax_importer_run_id: importerexporter.last_run.id, order: self.id)
|
32
33
|
end
|
33
34
|
end
|
34
35
|
|
@@ -36,10 +37,17 @@ module Bulkrax
|
|
36
37
|
self.parsed_metadata[related_children_parsed_mapping].each do |child_identifier|
|
37
38
|
next if child_identifier.blank?
|
38
39
|
|
39
|
-
|
40
|
+
add_parent_to_import_run(self.identifier, importerexporter.last_run)
|
41
|
+
PendingRelationship.create!(parent_id: self.identifier, child_id: child_identifier, bulkrax_importer_run_id: importerexporter.last_run.id, order: self.id)
|
40
42
|
end
|
41
43
|
end
|
42
44
|
|
45
|
+
def add_parent_to_import_run(parent_id, run)
|
46
|
+
run.parents = [] if run.parents.nil?
|
47
|
+
run.parents << parent_id
|
48
|
+
run.save
|
49
|
+
end
|
50
|
+
|
43
51
|
def find_collection_ids
|
44
52
|
self.collection_ids
|
45
53
|
end
|
@@ -77,10 +85,6 @@ module Bulkrax
|
|
77
85
|
def add_collections
|
78
86
|
return if find_collection_ids.blank?
|
79
87
|
|
80
|
-
ActiveSupport::Deprecation.warn(
|
81
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
82
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
83
|
-
)
|
84
88
|
self.parsed_metadata['member_of_collections_attributes'] = {}
|
85
89
|
find_collection_ids.each_with_index do |c, i|
|
86
90
|
self.parsed_metadata['member_of_collections_attributes'][i.to_s] = { id: c }
|
@@ -88,15 +92,10 @@ module Bulkrax
|
|
88
92
|
end
|
89
93
|
|
90
94
|
def factory
|
91
|
-
ActiveSupport::Deprecation.warn(
|
92
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
93
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
94
|
-
)
|
95
95
|
@factory ||= Bulkrax::ObjectFactory.new(attributes: self.parsed_metadata,
|
96
96
|
source_identifier_value: identifier,
|
97
97
|
work_identifier: parser.work_identifier,
|
98
|
-
|
99
|
-
related_parents_parsed_mapping: related_parents_parsed_mapping,
|
98
|
+
related_parents_parsed_mapping: parser.related_parents_parsed_mapping,
|
100
99
|
replace_files: replace_files,
|
101
100
|
user: user,
|
102
101
|
klass: factory_class,
|
@@ -20,14 +20,14 @@ module Bulkrax
|
|
20
20
|
(last_imported_at || Time.current) + frequency.to_seconds if schedulable? && last_imported_at.present?
|
21
21
|
end
|
22
22
|
|
23
|
-
def increment_counters(index, collection: false, file_set: false)
|
23
|
+
def increment_counters(index, collection: false, file_set: false, work: false)
|
24
24
|
# Only set the totals if they were not set on initialization
|
25
25
|
importer_run = ImporterRun.find(current_run.id) # make sure fresh
|
26
26
|
if collection
|
27
27
|
importer_run.total_collection_entries = index + 1 unless parser.collections_total.positive?
|
28
28
|
elsif file_set
|
29
29
|
importer_run.total_file_set_entries = index + 1 unless parser.file_sets_total.positive?
|
30
|
-
|
30
|
+
elsif work
|
31
31
|
# TODO: differentiate between work and collection counts for exporters
|
32
32
|
importer_run.total_work_entries = index + 1 unless limit.to_i.positive? || parser.total.positive?
|
33
33
|
end
|
@@ -56,7 +56,7 @@ module Bulkrax
|
|
56
56
|
end
|
57
57
|
|
58
58
|
def related_parents_parsed_mapping
|
59
|
-
@related_parents_parsed_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.keys&.first
|
59
|
+
@related_parents_parsed_mapping ||= (get_field_mapping_hash_for('related_parents_field_mapping')&.keys&.first || 'parents')
|
60
60
|
end
|
61
61
|
|
62
62
|
def related_children_raw_mapping
|
@@ -64,29 +64,22 @@ module Bulkrax
|
|
64
64
|
end
|
65
65
|
|
66
66
|
def related_children_parsed_mapping
|
67
|
-
@related_children_parsed_mapping ||= get_field_mapping_hash_for('related_children_field_mapping')&.keys&.first
|
67
|
+
@related_children_parsed_mapping ||= (get_field_mapping_hash_for('related_children_field_mapping')&.keys&.first || 'children')
|
68
68
|
end
|
69
69
|
|
70
70
|
def get_field_mapping_hash_for(key)
|
71
71
|
return instance_variable_get("@#{key}_hash") if instance_variable_get("@#{key}_hash").present?
|
72
72
|
|
73
|
+
mapping = importerexporter.field_mapping == [{}] ? {} : importerexporter.field_mapping
|
73
74
|
instance_variable_set(
|
74
75
|
"@#{key}_hash",
|
75
|
-
|
76
|
+
mapping&.with_indifferent_access&.select { |_, h| h.key?(key) }
|
76
77
|
)
|
77
78
|
raise StandardError, "more than one #{key} declared: #{instance_variable_get("@#{key}_hash").keys.join(', ')}" if instance_variable_get("@#{key}_hash").length > 1
|
78
79
|
|
79
80
|
instance_variable_get("@#{key}_hash")
|
80
81
|
end
|
81
82
|
|
82
|
-
def collection_field_mapping
|
83
|
-
ActiveSupport::Deprecation.warn(
|
84
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
85
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
86
|
-
)
|
87
|
-
Bulkrax.collection_field_mapping[self.entry_class.to_s]&.to_sym || :collection
|
88
|
-
end
|
89
|
-
|
90
83
|
def model_field_mappings
|
91
84
|
model_mappings = Bulkrax.field_mappings[self.class.to_s]&.dig('model', :from) || []
|
92
85
|
model_mappings |= ['model']
|
@@ -118,6 +111,10 @@ module Bulkrax
|
|
118
111
|
raise StandardError, 'must be defined' if importer?
|
119
112
|
end
|
120
113
|
|
114
|
+
def create_relationships
|
115
|
+
raise StandardError, 'must be defined' if importer?
|
116
|
+
end
|
117
|
+
|
121
118
|
# Optional, define if using browse everything for file upload
|
122
119
|
def retrieve_cloud_files(files); end
|
123
120
|
|
@@ -39,7 +39,7 @@ module Bulkrax
|
|
39
39
|
path = metadata_path(bag)
|
40
40
|
raise StandardError, 'No metadata files were found' if path.blank?
|
41
41
|
data = entry_class.read_data(path)
|
42
|
-
data = entry_class.data_for_entry(data, source_identifier)
|
42
|
+
data = entry_class.data_for_entry(data, source_identifier, self)
|
43
43
|
data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
|
44
44
|
data
|
45
45
|
end
|
@@ -75,7 +75,7 @@ module Bulkrax
|
|
75
75
|
else
|
76
76
|
ImportWorkJob.send(perform_method, new_entry.id, current_run.id)
|
77
77
|
end
|
78
|
-
increment_counters(index)
|
78
|
+
increment_counters(index, work: true)
|
79
79
|
end
|
80
80
|
importer.record_status
|
81
81
|
rescue StandardError => e
|
@@ -83,11 +83,7 @@ module Bulkrax
|
|
83
83
|
end
|
84
84
|
|
85
85
|
def collections
|
86
|
-
|
87
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
88
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
89
|
-
)
|
90
|
-
records.map { |r| r[collection_field_mapping].split(/\s*[;|]\s*/) if r[collection_field_mapping].present? }.flatten.compact.uniq
|
86
|
+
records.map { |r| r[related_parents_parsed_mapping].split(/\s*[;|]\s*/) if r[related_parents_parsed_mapping].present? }.flatten.compact.uniq
|
91
87
|
end
|
92
88
|
|
93
89
|
def collections_total
|
@@ -14,18 +14,13 @@ module Bulkrax
|
|
14
14
|
csv_data = entry_class.read_data(file_for_import)
|
15
15
|
importer.parser_fields['total'] = csv_data.count
|
16
16
|
importer.save
|
17
|
-
@records ||= csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil) }
|
17
|
+
@records ||= csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
|
18
18
|
end
|
19
19
|
|
20
20
|
def collections
|
21
|
-
ActiveSupport::Deprecation.warn(
|
22
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
23
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
24
|
-
)
|
25
21
|
# retrieve a list of unique collections
|
26
22
|
records.map do |r|
|
27
23
|
collections = []
|
28
|
-
r[collection_field_mapping].split(/\s*[;|]\s*/).each { |title| collections << { title: title, from_collection_field_mapping: true } } if r[collection_field_mapping].present?
|
29
24
|
model_field_mappings.each do |model_mapping|
|
30
25
|
collections << r if r[model_mapping.to_sym]&.downcase == 'collection'
|
31
26
|
end
|
@@ -85,73 +80,54 @@ module Bulkrax
|
|
85
80
|
end
|
86
81
|
|
87
82
|
def create_collections
|
88
|
-
|
89
|
-
next if collection.blank?
|
90
|
-
break if records.find_index(collection).present? && limit_reached?(limit, records.find_index(collection))
|
91
|
-
ActiveSupport::Deprecation.warn(
|
92
|
-
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
|
93
|
-
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
|
94
|
-
)
|
95
|
-
|
96
|
-
## BEGIN
|
97
|
-
# Add required metadata to collections being imported using the collection_field_mapping, which only have a :title
|
98
|
-
# TODO: Remove once collection_field_mapping is removed
|
99
|
-
metadata = if collection.delete(:from_collection_field_mapping)
|
100
|
-
uci = unique_collection_identifier(collection)
|
101
|
-
{
|
102
|
-
title: collection[:title],
|
103
|
-
work_identifier => uci,
|
104
|
-
source_identifier => uci,
|
105
|
-
visibility: 'open',
|
106
|
-
collection_type_gid: ::Hyrax::CollectionType.find_or_create_default_collection_type.gid
|
107
|
-
}
|
108
|
-
end
|
109
|
-
collection_hash = metadata.presence || collection
|
110
|
-
## END
|
111
|
-
|
112
|
-
new_entry = find_or_create_entry(collection_entry_class, collection_hash[source_identifier], 'Bulkrax::Importer', collection_hash)
|
113
|
-
increment_counters(index, collection: true)
|
114
|
-
# TODO: add support for :delete option
|
115
|
-
ImportCollectionJob.perform_now(new_entry.id, current_run.id)
|
116
|
-
end
|
117
|
-
importer.record_status
|
118
|
-
rescue StandardError => e
|
119
|
-
status_info(e)
|
83
|
+
create_objects(['collection'])
|
120
84
|
end
|
121
85
|
|
122
86
|
def create_works
|
123
|
-
|
124
|
-
next unless record_has_source_identifier(work, records.find_index(work))
|
125
|
-
break if limit_reached?(limit, records.find_index(work))
|
126
|
-
|
127
|
-
seen[work[source_identifier]] = true
|
128
|
-
new_entry = find_or_create_entry(entry_class, work[source_identifier], 'Bulkrax::Importer', work.to_h)
|
129
|
-
if work[:delete].present?
|
130
|
-
DeleteWorkJob.send(perform_method, new_entry, current_run)
|
131
|
-
else
|
132
|
-
ImportWorkJob.send(perform_method, new_entry.id, current_run.id)
|
133
|
-
end
|
134
|
-
increment_counters(index)
|
135
|
-
end
|
136
|
-
importer.record_status
|
137
|
-
rescue StandardError => e
|
138
|
-
status_info(e)
|
87
|
+
create_objects(['work'])
|
139
88
|
end
|
140
89
|
|
141
90
|
def create_file_sets
|
142
|
-
|
143
|
-
|
144
|
-
break if limit_reached?(limit, records.find_index(file_set))
|
91
|
+
create_objects(['file_set'])
|
92
|
+
end
|
145
93
|
|
146
|
-
|
147
|
-
|
148
|
-
|
94
|
+
def create_relationships
|
95
|
+
create_objects(['relationship'])
|
96
|
+
end
|
97
|
+
|
98
|
+
def create_objects(types_array = nil)
|
99
|
+
(types_array || %w[work collection file_set relationship]).each do |type|
|
100
|
+
if type.eql?('relationship')
|
101
|
+
ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importerexporter.id)
|
102
|
+
next
|
103
|
+
end
|
104
|
+
send(type.pluralize).each_with_index do |current_record, index|
|
105
|
+
next unless record_has_source_identifier(current_record, records.find_index(current_record))
|
106
|
+
break if limit_reached?(limit, records.find_index(current_record))
|
107
|
+
|
108
|
+
seen[current_record[source_identifier]] = true
|
109
|
+
create_entry_and_job(current_record, type)
|
110
|
+
increment_counters(index, "#{type}": true)
|
111
|
+
end
|
112
|
+
importer.record_status
|
149
113
|
end
|
150
|
-
importer.record_status
|
151
114
|
rescue StandardError => e
|
152
115
|
status_info(e)
|
153
116
|
end
|
154
117
|
|
118
|
+
def create_entry_and_job(current_record, type)
|
119
|
+
new_entry = find_or_create_entry(send("#{type}_entry_class"),
|
120
|
+
current_record[source_identifier],
|
121
|
+
'Bulkrax::Importer',
|
122
|
+
current_record.to_h)
|
123
|
+
if current_record[:delete].present?
|
124
|
+
# TODO: create a "Delete" job for file_sets and collections
|
125
|
+
"Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
|
126
|
+
else
|
127
|
+
"Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
155
131
|
def write_partial_import_file(file)
|
156
132
|
import_filename = import_file_path.split('/').last
|
157
133
|
partial_import_filename = "#{File.basename(import_filename, '.csv')}_corrected_entries.csv"
|
@@ -188,13 +164,13 @@ module Bulkrax
|
|
188
164
|
|
189
165
|
case importerexporter.export_from
|
190
166
|
when 'all'
|
191
|
-
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", rows: 2_147_483_647).map(&:id)
|
192
|
-
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", rows: 2_147_483_647).map(&:id)
|
193
|
-
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", rows: 2_147_483_647).map(&:id)
|
167
|
+
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
168
|
+
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
169
|
+
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
194
170
|
when 'collection'
|
195
|
-
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", rows: 2_000_000_000).map(&:id)
|
171
|
+
@work_ids = ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
196
172
|
when 'worktype'
|
197
|
-
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", rows: 2_000_000_000).map(&:id)
|
173
|
+
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
198
174
|
when 'importer'
|
199
175
|
set_ids_for_exporting_from_importer
|
200
176
|
end
|
@@ -214,7 +190,7 @@ module Bulkrax
|
|
214
190
|
extra_filters = extra_filters.presence || '*:*'
|
215
191
|
|
216
192
|
{ :@work_ids => ::Hyrax.config.curation_concerns, :@collection_ids => [::Collection], :@file_set_ids => [::FileSet] }.each do |instance_var, models_to_search|
|
217
|
-
instance_variable_set(instance_var, ActiveFedora::SolrService.
|
193
|
+
instance_variable_set(instance_var, ActiveFedora::SolrService.post(
|
218
194
|
extra_filters.to_s,
|
219
195
|
fq: [
|
220
196
|
"#{work_identifier}_sim:(#{complete_entry_identifiers.join(' OR ')})",
|
@@ -256,6 +232,7 @@ module Bulkrax
|
|
256
232
|
def entry_class
|
257
233
|
CsvEntry
|
258
234
|
end
|
235
|
+
alias work_entry_class entry_class
|
259
236
|
|
260
237
|
def collection_entry_class
|
261
238
|
CsvCollectionEntry
|
@@ -40,14 +40,14 @@ module Bulkrax
|
|
40
40
|
metadata_paths.map do |md|
|
41
41
|
# Retrieve all records
|
42
42
|
elements = entry_class.read_data(md).xpath("//#{record_element}")
|
43
|
-
r += elements.map { |el| entry_class.data_for_entry(el, source_identifier) }
|
43
|
+
r += elements.map { |el| entry_class.data_for_entry(el, source_identifier, self) }
|
44
44
|
end
|
45
45
|
# Flatten because we may have multiple records per array
|
46
46
|
r.compact.flatten
|
47
47
|
elsif parser_fields['import_type'] == 'single'
|
48
48
|
metadata_paths.map do |md|
|
49
49
|
data = entry_class.read_data(md).xpath("//#{record_element}").first # Take only the first record
|
50
|
-
entry_class.data_for_entry(data, source_identifier)
|
50
|
+
entry_class.data_for_entry(data, source_identifier, self)
|
51
51
|
end.compact # No need to flatten because we take only the first record
|
52
52
|
end
|
53
53
|
end
|
@@ -94,7 +94,7 @@ module Bulkrax
|
|
94
94
|
else
|
95
95
|
ImportWorkJob.send(perform_method, new_entry.id, current_run.id)
|
96
96
|
end
|
97
|
-
increment_counters(index)
|
97
|
+
increment_counters(index, work: true)
|
98
98
|
end
|
99
99
|
importer.record_status
|
100
100
|
rescue StandardError => e
|
@@ -0,0 +1,11 @@
|
|
1
|
+
class CreateBulkraxPendingRelationships < ActiveRecord::Migration[5.2]
|
2
|
+
def change
|
3
|
+
create_table :bulkrax_pending_relationships do |t|
|
4
|
+
t.belongs_to :bulkrax_importer_run, foreign_key: true, null: false
|
5
|
+
t.string :parent_id, null: false
|
6
|
+
t.string :child_id, null: false
|
7
|
+
|
8
|
+
t.timestamps
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
data/lib/bulkrax/version.rb
CHANGED
data/lib/bulkrax.rb
CHANGED
@@ -5,11 +5,9 @@ require 'active_support/all'
|
|
5
5
|
|
6
6
|
module Bulkrax
|
7
7
|
class << self
|
8
|
-
# TODO: remove collection_field_mapping when releasing v2
|
9
8
|
mattr_accessor :parsers,
|
10
9
|
:default_work_type,
|
11
10
|
:default_field_mapping,
|
12
|
-
:collection_field_mapping,
|
13
11
|
:fill_in_blank_source_identifiers,
|
14
12
|
:related_children_field_mapping,
|
15
13
|
:related_parents_field_mapping,
|
@@ -35,17 +33,6 @@ module Bulkrax
|
|
35
33
|
self.removed_image_path = Bulkrax::Engine.root.join('spec', 'fixtures', 'removed.png').to_s
|
36
34
|
self.server_name = 'bulkrax@example.com'
|
37
35
|
|
38
|
-
# NOTE: Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.
|
39
|
-
# Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.
|
40
|
-
# TODO: remove collection_field_mapping when releasing v2
|
41
|
-
# Field_mapping for establishing a collection relationship (FROM work TO collection)
|
42
|
-
# This value IS NOT used for OAI, so setting the OAI Entries here will have no effect
|
43
|
-
# The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry'
|
44
|
-
# The default value for CSV is collection
|
45
|
-
self.collection_field_mapping = {
|
46
|
-
'Bulkrax::CsvEntry' => 'collection'
|
47
|
-
}
|
48
|
-
|
49
36
|
# Hash of Generic field_mappings for use in the view
|
50
37
|
# There must be one field_mappings hash per view parial
|
51
38
|
# Based on Hyrax CoreMetadata && BasicMetadata
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0.beta1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-03-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -268,6 +268,7 @@ files:
|
|
268
268
|
- app/jobs/bulkrax/import_file_set_job.rb
|
269
269
|
- app/jobs/bulkrax/import_work_job.rb
|
270
270
|
- app/jobs/bulkrax/importer_job.rb
|
271
|
+
- app/jobs/bulkrax/schedule_relationships_job.rb
|
271
272
|
- app/mailers/bulkrax/application_mailer.rb
|
272
273
|
- app/matchers/bulkrax/application_matcher.rb
|
273
274
|
- app/matchers/bulkrax/bagit_matcher.rb
|
@@ -287,6 +288,7 @@ files:
|
|
287
288
|
- app/models/bulkrax/oai_entry.rb
|
288
289
|
- app/models/bulkrax/oai_qualified_dc_entry.rb
|
289
290
|
- app/models/bulkrax/oai_set_entry.rb
|
291
|
+
- app/models/bulkrax/pending_relationship.rb
|
290
292
|
- app/models/bulkrax/rdf_collection_entry.rb
|
291
293
|
- app/models/bulkrax/rdf_entry.rb
|
292
294
|
- app/models/bulkrax/status.rb
|
@@ -363,6 +365,9 @@ files:
|
|
363
365
|
- db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb
|
364
366
|
- db/migrate/20220118001339_add_import_attempts_to_entries.rb
|
365
367
|
- db/migrate/20220119213325_add_work_counters_to_importer_runs.rb
|
368
|
+
- db/migrate/20220301001839_create_bulkrax_pending_relationships.rb
|
369
|
+
- db/migrate/20220301020307_add_parents_to_bulkrax_importer_runs.rb
|
370
|
+
- db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb
|
366
371
|
- lib/bulkrax.rb
|
367
372
|
- lib/bulkrax/engine.rb
|
368
373
|
- lib/bulkrax/version.rb
|
@@ -378,7 +383,7 @@ homepage: https://github.com/samvera-labs/bulkrax
|
|
378
383
|
licenses:
|
379
384
|
- Apache-2.0
|
380
385
|
metadata: {}
|
381
|
-
post_install_message:
|
386
|
+
post_install_message:
|
382
387
|
rdoc_options: []
|
383
388
|
require_paths:
|
384
389
|
- lib
|
@@ -389,12 +394,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
389
394
|
version: '0'
|
390
395
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
391
396
|
requirements:
|
392
|
-
- - "
|
397
|
+
- - ">"
|
393
398
|
- !ruby/object:Gem::Version
|
394
|
-
version:
|
399
|
+
version: 1.3.1
|
395
400
|
requirements: []
|
396
|
-
rubygems_version: 3.1.
|
397
|
-
signing_key:
|
401
|
+
rubygems_version: 3.1.4
|
402
|
+
signing_key:
|
398
403
|
specification_version: 4
|
399
404
|
summary: Import and export tool for Hyrax and Hyku
|
400
405
|
test_files: []
|