bulkrax 5.4.1 → 5.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/jobs/bulkrax/create_relationships_job.rb +1 -1
- data/app/jobs/bulkrax/delete_job.rb +1 -1
- data/app/jobs/bulkrax/download_cloud_file_job.rb +1 -1
- data/app/jobs/bulkrax/import_collection_job.rb +1 -1
- data/app/jobs/bulkrax/import_file_set_job.rb +1 -1
- data/app/jobs/bulkrax/import_work_job.rb +1 -1
- data/app/jobs/bulkrax/importer_job.rb +1 -1
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +207 -201
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 32206384d2a056f0a43d9ddc6612b62871da36a1354d8a0488b0e14d307ab3e8
|
4
|
+
data.tar.gz: fc6355c17ea6e9c3a8657643516565f36c2f658c0ce3d3c0c1d59a9291a29499
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1b23cf6f323860a0d2fb0691198616496ecce513325cd40e10392c36037ee85661d6b5819a3e8bd594cd9d33afe4745037490fd8f3d9f9bdee98ad1e7296efa6
|
7
|
+
data.tar.gz: 4830b8cd860f22201a5d1abd52b8eca8521d997fbfbe687394ad41fa046c9e36686e5ebc44703e8dce83c18a52e744dc7b962b5ab9ee6400f36b3cb528b04a37
|
@@ -40,7 +40,7 @@ module Bulkrax
|
|
40
40
|
|
41
41
|
include DynamicRecordLookup
|
42
42
|
|
43
|
-
queue_as
|
43
|
+
queue_as Bulkrax.config.ingest_queue_name
|
44
44
|
|
45
45
|
# @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifiers
|
46
46
|
# @param importer_run [Bulkrax::ImporterRun] current importer run (needed to properly update counters)
|
data/lib/bulkrax/version.rb
CHANGED
data/lib/bulkrax.rb
CHANGED
@@ -1,16 +1,16 @@
|
|
1
|
-
# frozen_string_literal: true
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "bulkrax/version"
|
4
|
-
require "bulkrax/engine"
|
5
|
-
require 'active_support/all'
|
3
|
+
require "bulkrax/version"
|
4
|
+
require "bulkrax/engine"
|
5
|
+
require 'active_support/all'
|
6
6
|
|
7
|
-
# rubocop:disable Metrics/ModuleLength
|
7
|
+
# rubocop:disable Metrics/ModuleLength
|
8
8
|
module Bulkrax
|
9
|
-
|
10
|
-
|
9
|
+
extend self # rubocop:disable Style/ModuleFunction
|
10
|
+
extend Forwardable
|
11
11
|
|
12
|
-
|
13
|
-
|
12
|
+
##
|
13
|
+
# @api public
|
14
14
|
class Configuration
|
15
15
|
attr_accessor :api_definition,
|
16
16
|
:curation_concerns,
|
@@ -35,6 +35,15 @@ module Bulkrax
|
|
35
35
|
:reserved_properties,
|
36
36
|
:server_name
|
37
37
|
|
38
|
+
attr_writer :ingest_queue_name
|
39
|
+
##
|
40
|
+
# @return [String, Proc]
|
41
|
+
def ingest_queue_name
|
42
|
+
return @ingest_queue_name if @ingest_queue_name.present?
|
43
|
+
return Hyrax.config.ingest_queue_name if defined?(Hyrax)
|
44
|
+
:import
|
45
|
+
end
|
46
|
+
|
38
47
|
attr_writer :use_locking
|
39
48
|
|
40
49
|
def use_locking
|
@@ -45,61 +54,61 @@ module Bulkrax
|
|
45
54
|
alias use_locking? use_locking
|
46
55
|
end
|
47
56
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
57
|
+
def config
|
58
|
+
@config ||= Configuration.new
|
59
|
+
yield @config if block_given?
|
60
|
+
@config
|
61
|
+
end
|
62
|
+
alias setup config
|
54
63
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
64
|
+
def_delegators :@config,
|
65
|
+
:api_definition,
|
66
|
+
:api_definition=,
|
67
|
+
:curation_concerns,
|
68
|
+
:curation_concerns=,
|
69
|
+
:default_field_mapping,
|
70
|
+
:default_field_mapping=,
|
71
|
+
:default_work_type,
|
72
|
+
:default_work_type=,
|
73
|
+
:export_path,
|
74
|
+
:export_path=,
|
75
|
+
:field_mappings,
|
76
|
+
:field_mappings=,
|
77
|
+
:file_model_class,
|
78
|
+
:file_model_class=,
|
79
|
+
:fill_in_blank_source_identifiers,
|
80
|
+
:fill_in_blank_source_identifiers=,
|
81
|
+
:generated_metadata_mapping,
|
82
|
+
:generated_metadata_mapping=,
|
83
|
+
:import_path,
|
84
|
+
:import_path=,
|
85
|
+
:multi_value_element_join_on,
|
86
|
+
:multi_value_element_join_on=,
|
87
|
+
:multi_value_element_split_on,
|
88
|
+
:multi_value_element_split_on=,
|
89
|
+
:object_factory,
|
90
|
+
:object_factory=,
|
91
|
+
:parsers,
|
92
|
+
:parsers=,
|
93
|
+
:qa_controlled_properties,
|
94
|
+
:qa_controlled_properties=,
|
95
|
+
:related_children_field_mapping,
|
96
|
+
:related_children_field_mapping=,
|
97
|
+
:related_parents_field_mapping,
|
98
|
+
:related_parents_field_mapping=,
|
99
|
+
:relationship_job_class,
|
100
|
+
:relationship_job_class=,
|
101
|
+
:removed_image_path,
|
102
|
+
:removed_image_path=,
|
103
|
+
:required_elements,
|
104
|
+
:required_elements=,
|
105
|
+
:reserved_properties,
|
106
|
+
:reserved_properties=,
|
107
|
+
:server_name,
|
108
|
+
:server_name=,
|
109
|
+
:use_locking,
|
110
|
+
:use_locking=,
|
111
|
+
:use_locking?
|
103
112
|
|
104
113
|
config do |conf|
|
105
114
|
conf.parsers = [
|
@@ -138,152 +147,149 @@ module Bulkrax
|
|
138
147
|
# Based on Hyrax CoreMetadata && BasicMetadata
|
139
148
|
# Override at application level to change
|
140
149
|
conf.field_mappings = {
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
150
|
+
"Bulkrax::OaiDcParser" => {
|
151
|
+
"contributor" => { from: ["contributor"] },
|
152
|
+
# no appropriate mapping for coverage (based_near needs id)
|
153
|
+
# ""=>{:from=>["coverage"]},
|
154
|
+
"creator" => { from: ["creator"] },
|
155
|
+
"date_created" => { from: ["date"] },
|
156
|
+
"description" => { from: ["description"] },
|
157
|
+
# no appropriate mapping for format
|
158
|
+
# ""=>{:from=>["format"]},
|
159
|
+
"identifier" => { from: ["identifier"] },
|
160
|
+
"language" => { from: ["language"], parsed: true },
|
161
|
+
"publisher" => { from: ["publisher"] },
|
162
|
+
"related_url" => { from: ["relation"] },
|
163
|
+
"rights_statement" => { from: ["rights"] },
|
164
|
+
"source" => { from: ["source"] },
|
165
|
+
"subject" => { from: ["subject"], parsed: true },
|
166
|
+
"title" => { from: ["title"] },
|
167
|
+
"resource_type" => { from: ["type"], parsed: true },
|
168
|
+
"remote_files" => { from: ["thumbnail_url"], parsed: true }
|
160
169
|
},
|
161
|
-
|
162
|
-
|
163
|
-
|
170
|
+
"Bulkrax::OaiQualifiedDcParser" => {
|
171
|
+
"abstract" => { from: ["abstract"] },
|
172
|
+
"alternative_title" => { from: ["alternative"] },
|
164
173
|
"bibliographic_citation" => { from: ["bibliographicCitation"] },
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
# 'Bulkrax::BagitParser' => { "source" => { from: ["source"], source_identifier: true } },
|
187
|
-
# 'Bulkrax::XmlParser' => { "source" => { from: ["source"], source_identifier: true } }
|
188
|
-
}
|
174
|
+
"contributor" => { from: ["contributor"] },
|
175
|
+
"creator" => { from: ["creator"] },
|
176
|
+
"date_created" => { from: ["created"] },
|
177
|
+
"description" => { from: ["description"] },
|
178
|
+
"language" => { from: ["language"] },
|
179
|
+
"license" => { from: ["license"] },
|
180
|
+
"publisher" => { from: ["publisher"] },
|
181
|
+
"related_url" => { from: ["relation"] },
|
182
|
+
"rights_holder" => { from: ["rightsHolder"] },
|
183
|
+
"rights_statement" => { from: ["rights"] },
|
184
|
+
"source" => { from: ["source"] },
|
185
|
+
"subject" => { from: ["subject"], parsed: true },
|
186
|
+
"title" => { from: ["title"] },
|
187
|
+
"resource_type" => { from: ["type"], parsed: true },
|
188
|
+
"remote_files" => { from: ["thumbnail_url"], parsed: true }
|
189
|
+
},
|
190
|
+
# When empty, a default_field_mapping will be generated
|
191
|
+
"Bulkrax::CsvParser" => {},
|
192
|
+
'Bulkrax::BagitParser' => {},
|
193
|
+
'Bulkrax::XmlParser' => {}
|
194
|
+
}
|
189
195
|
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
196
|
+
# Lambda to set the default field mapping
|
197
|
+
conf.default_field_mapping = lambda do |field|
|
198
|
+
return if field.blank?
|
199
|
+
{
|
200
|
+
field.to_s =>
|
201
|
+
{
|
202
|
+
from: [field.to_s],
|
203
|
+
split: false,
|
204
|
+
parsed: Bulkrax::ApplicationMatcher.method_defined?("parse_#{field}"),
|
205
|
+
if: nil,
|
206
|
+
excluded: false
|
207
|
+
}
|
208
|
+
}
|
209
|
+
end
|
204
210
|
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
211
|
+
# Properties that should not be used in imports. They are reserved for use by Hyrax.
|
212
|
+
conf.reserved_properties = %w[
|
213
|
+
create_date
|
214
|
+
modified_date
|
215
|
+
date_modified
|
216
|
+
date_uploaded
|
217
|
+
depositor
|
218
|
+
arkivo_checksum
|
219
|
+
has_model
|
220
|
+
head
|
221
|
+
label
|
222
|
+
import_url
|
223
|
+
on_behalf_of
|
224
|
+
proxy_depositor
|
225
|
+
owner
|
226
|
+
state
|
227
|
+
tail
|
228
|
+
original_url
|
229
|
+
relative_path
|
230
|
+
]
|
225
231
|
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
232
|
+
# List of Questioning Authority properties that are controlled via YAML files in
|
233
|
+
# the config/authorities/ directory. For example, the :rights_statement property
|
234
|
+
# is controlled by the active terms in config/authorities/rights_statements.yml
|
235
|
+
conf.qa_controlled_properties = %w[rights_statement license]
|
236
|
+
end
|
231
237
|
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
238
|
+
def api_definition
|
239
|
+
@api_definition ||= ActiveSupport::HashWithIndifferentAccess.new(
|
240
|
+
YAML.safe_load(
|
241
|
+
ERB.new(
|
242
|
+
File.read(Rails.root.join('config', 'bulkrax_api.yml'))
|
243
|
+
).result
|
244
|
+
)
|
245
|
+
)
|
246
|
+
end
|
241
247
|
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
248
|
+
DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON = ' | '
|
249
|
+
# Specify the delimiter for joining an attribute's multi-value array into a string.
|
250
|
+
#
|
251
|
+
# @note the specific delimiter should likely be present in the multi_value_element_split_on
|
252
|
+
# expression.
|
253
|
+
def multi_value_element_join_on
|
254
|
+
@multi_value_element_join_on ||= DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON
|
255
|
+
end
|
250
256
|
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
+
DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON = /\s*[:;|]\s*/.freeze
|
258
|
+
# @return [RegexClass] the regular express to use to "split" an attribute's values. If set to
|
259
|
+
# `true` use the DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON.
|
260
|
+
#
|
261
|
+
# @note The "true" value is to preserve backwards compatibility.
|
262
|
+
# @see DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON
|
257
263
|
def multi_value_element_split_on
|
258
|
-
|
259
|
-
|
264
|
+
if @multi_value_element_join_on.is_a?(TrueClass)
|
265
|
+
DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON
|
260
266
|
else
|
261
|
-
|
267
|
+
@multi_value_element_split_on ||= DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON
|
262
268
|
end
|
263
269
|
end
|
264
270
|
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
271
|
+
# Responsible for stripping hidden characters from the given string.
|
272
|
+
#
|
273
|
+
# @param value [#to_s]
|
274
|
+
# @return [String] with hidden characters removed
|
275
|
+
#
|
276
|
+
# @see https://github.com/samvera-labs/bulkrax/issues/688
|
277
|
+
def normalize_string(value)
|
278
|
+
# Removing [Byte Order Mark (BOM)](https://en.wikipedia.org/wiki/Byte_order_mark)
|
279
|
+
value.to_s.delete("\xEF\xBB\xBF")
|
280
|
+
end
|
275
281
|
|
276
|
-
|
277
|
-
|
282
|
+
def fallback_user_for_importer_exporter_processing
|
283
|
+
return User.batch_user if defined?(Hyrax) && User.respond_to?(:batch_user)
|
278
284
|
|
279
|
-
|
280
|
-
|
285
|
+
raise "We have no fallback user available for Bulkrax.fallback_user_for_importer_exporter_processing"
|
286
|
+
end
|
281
287
|
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
288
|
+
# This class confirms to the Active::Support.serialize interface. It's job is to ensure that we
|
289
|
+
# don't have keys with the tricksy Byte Order Mark character.
|
290
|
+
#
|
291
|
+
# @see https://api.rubyonrails.org/classes/ActiveRecord/AttributeMethods/Serialization/ClassMethods.html#method-i-serialize
|
292
|
+
class NormalizedJson
|
287
293
|
def self.normalize_keys(hash)
|
288
294
|
return hash unless hash.respond_to?(:each_pair)
|
289
295
|
returning_value = {}
|
@@ -295,18 +301,18 @@ module Bulkrax
|
|
295
301
|
|
296
302
|
# When we write the serialized data to the database, we "dump" the value into that database
|
297
303
|
# column.
|
298
|
-
|
299
|
-
|
300
|
-
|
304
|
+
def self.dump(value)
|
305
|
+
JSON.dump(normalize_keys(value))
|
306
|
+
end
|
301
307
|
|
302
308
|
# When we load the serialized data from the database, we pass the database's value into "load"
|
303
309
|
# function.
|
304
310
|
#
|
305
311
|
# rubocop:disable Security/JSONLoad
|
306
|
-
|
307
|
-
|
308
|
-
|
312
|
+
def self.load(string)
|
313
|
+
normalize_keys(JSON.load(string))
|
314
|
+
end
|
309
315
|
# rubocop:enable Security/JSONLoad
|
310
|
-
|
316
|
+
end
|
311
317
|
end
|
312
|
-
# rubocop:disable Metrics/ModuleLength
|
318
|
+
# rubocop:disable Metrics/ModuleLength
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.4.
|
4
|
+
version: 5.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-02-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|