bulkrax 5.5.0 → 5.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/jobs/bulkrax/create_relationships_job.rb +1 -1
- data/app/jobs/bulkrax/delete_job.rb +1 -1
- data/app/jobs/bulkrax/download_cloud_file_job.rb +1 -1
- data/app/jobs/bulkrax/import_collection_job.rb +1 -1
- data/app/jobs/bulkrax/import_file_set_job.rb +1 -1
- data/app/jobs/bulkrax/import_work_job.rb +1 -1
- data/app/jobs/bulkrax/importer_job.rb +1 -1
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +207 -198
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d047869e323f1c300329fa46b3cb0bcc4e0f3975729d44c66554d79f00f41657
|
4
|
+
data.tar.gz: 0d80b53d7b490bfe39bf8148f926cd2aa9e73f22f1e5a5726f225c11f72b6856
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0e03981be6998e66998c33484afb06aead0e18984bc7a75320aef9078cfec8ce419203b3d90b93cad3193bb7f9187ad3a8906291690d791dac66f07a42144ee2
|
7
|
+
data.tar.gz: 4a66439b1b28aa601ef36425b440a29b8796589d4fe2c74f759e066768564cc3656cb1de290e1df89fb45d28ec6926e0d3a67936c990619057028f7701822177
|
@@ -40,7 +40,7 @@ module Bulkrax
|
|
40
40
|
|
41
41
|
include DynamicRecordLookup
|
42
42
|
|
43
|
-
queue_as
|
43
|
+
queue_as Bulkrax.config.ingest_queue_name
|
44
44
|
|
45
45
|
# @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifiers
|
46
46
|
# @param importer_run [Bulkrax::ImporterRun] current importer run (needed to properly update counters)
|
data/lib/bulkrax/version.rb
CHANGED
data/lib/bulkrax.rb
CHANGED
@@ -1,16 +1,16 @@
|
|
1
|
-
# frozen_string_literal: true
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "bulkrax/version"
|
4
|
-
require "bulkrax/engine"
|
5
|
-
require 'active_support/all'
|
3
|
+
require "bulkrax/version"
|
4
|
+
require "bulkrax/engine"
|
5
|
+
require 'active_support/all'
|
6
6
|
|
7
|
-
# rubocop:disable Metrics/ModuleLength
|
7
|
+
# rubocop:disable Metrics/ModuleLength
|
8
8
|
module Bulkrax
|
9
|
-
|
10
|
-
|
9
|
+
extend self # rubocop:disable Style/ModuleFunction
|
10
|
+
extend Forwardable
|
11
11
|
|
12
|
-
|
13
|
-
|
12
|
+
##
|
13
|
+
# @api public
|
14
14
|
class Configuration
|
15
15
|
attr_accessor :api_definition,
|
16
16
|
:curation_concerns,
|
@@ -35,6 +35,15 @@ module Bulkrax
|
|
35
35
|
:reserved_properties,
|
36
36
|
:server_name
|
37
37
|
|
38
|
+
attr_writer :ingest_queue_name
|
39
|
+
##
|
40
|
+
# @return [String, Proc]
|
41
|
+
def ingest_queue_name
|
42
|
+
return @ingest_queue_name if @ingest_queue_name.present?
|
43
|
+
return Hyrax.config.ingest_queue_name if defined?(Hyrax)
|
44
|
+
:import
|
45
|
+
end
|
46
|
+
|
38
47
|
attr_writer :use_locking
|
39
48
|
|
40
49
|
def use_locking
|
@@ -45,61 +54,61 @@ module Bulkrax
|
|
45
54
|
alias use_locking? use_locking
|
46
55
|
end
|
47
56
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
57
|
+
def config
|
58
|
+
@config ||= Configuration.new
|
59
|
+
yield @config if block_given?
|
60
|
+
@config
|
61
|
+
end
|
62
|
+
alias setup config
|
54
63
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
64
|
+
def_delegators :@config,
|
65
|
+
:api_definition,
|
66
|
+
:api_definition=,
|
67
|
+
:curation_concerns,
|
68
|
+
:curation_concerns=,
|
69
|
+
:default_field_mapping,
|
70
|
+
:default_field_mapping=,
|
71
|
+
:default_work_type,
|
72
|
+
:default_work_type=,
|
73
|
+
:export_path,
|
74
|
+
:export_path=,
|
75
|
+
:field_mappings,
|
76
|
+
:field_mappings=,
|
77
|
+
:file_model_class,
|
78
|
+
:file_model_class=,
|
79
|
+
:fill_in_blank_source_identifiers,
|
80
|
+
:fill_in_blank_source_identifiers=,
|
81
|
+
:generated_metadata_mapping,
|
82
|
+
:generated_metadata_mapping=,
|
83
|
+
:import_path,
|
84
|
+
:import_path=,
|
85
|
+
:multi_value_element_join_on,
|
86
|
+
:multi_value_element_join_on=,
|
87
|
+
:multi_value_element_split_on,
|
88
|
+
:multi_value_element_split_on=,
|
89
|
+
:object_factory,
|
90
|
+
:object_factory=,
|
91
|
+
:parsers,
|
92
|
+
:parsers=,
|
93
|
+
:qa_controlled_properties,
|
94
|
+
:qa_controlled_properties=,
|
95
|
+
:related_children_field_mapping,
|
96
|
+
:related_children_field_mapping=,
|
97
|
+
:related_parents_field_mapping,
|
98
|
+
:related_parents_field_mapping=,
|
99
|
+
:relationship_job_class,
|
100
|
+
:relationship_job_class=,
|
101
|
+
:removed_image_path,
|
102
|
+
:removed_image_path=,
|
103
|
+
:required_elements,
|
104
|
+
:required_elements=,
|
105
|
+
:reserved_properties,
|
106
|
+
:reserved_properties=,
|
107
|
+
:server_name,
|
108
|
+
:server_name=,
|
109
|
+
:use_locking,
|
110
|
+
:use_locking=,
|
111
|
+
:use_locking?
|
103
112
|
|
104
113
|
config do |conf|
|
105
114
|
conf.parsers = [
|
@@ -138,149 +147,149 @@ module Bulkrax
|
|
138
147
|
# Based on Hyrax CoreMetadata && BasicMetadata
|
139
148
|
# Override at application level to change
|
140
149
|
conf.field_mappings = {
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
150
|
+
"Bulkrax::OaiDcParser" => {
|
151
|
+
"contributor" => { from: ["contributor"] },
|
152
|
+
# no appropriate mapping for coverage (based_near needs id)
|
153
|
+
# ""=>{:from=>["coverage"]},
|
154
|
+
"creator" => { from: ["creator"] },
|
155
|
+
"date_created" => { from: ["date"] },
|
156
|
+
"description" => { from: ["description"] },
|
157
|
+
# no appropriate mapping for format
|
158
|
+
# ""=>{:from=>["format"]},
|
159
|
+
"identifier" => { from: ["identifier"] },
|
160
|
+
"language" => { from: ["language"], parsed: true },
|
161
|
+
"publisher" => { from: ["publisher"] },
|
162
|
+
"related_url" => { from: ["relation"] },
|
163
|
+
"rights_statement" => { from: ["rights"] },
|
164
|
+
"source" => { from: ["source"] },
|
165
|
+
"subject" => { from: ["subject"], parsed: true },
|
166
|
+
"title" => { from: ["title"] },
|
167
|
+
"resource_type" => { from: ["type"], parsed: true },
|
168
|
+
"remote_files" => { from: ["thumbnail_url"], parsed: true }
|
160
169
|
},
|
161
|
-
|
162
|
-
|
163
|
-
|
170
|
+
"Bulkrax::OaiQualifiedDcParser" => {
|
171
|
+
"abstract" => { from: ["abstract"] },
|
172
|
+
"alternative_title" => { from: ["alternative"] },
|
164
173
|
"bibliographic_citation" => { from: ["bibliographicCitation"] },
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
174
|
+
"contributor" => { from: ["contributor"] },
|
175
|
+
"creator" => { from: ["creator"] },
|
176
|
+
"date_created" => { from: ["created"] },
|
177
|
+
"description" => { from: ["description"] },
|
178
|
+
"language" => { from: ["language"] },
|
179
|
+
"license" => { from: ["license"] },
|
180
|
+
"publisher" => { from: ["publisher"] },
|
181
|
+
"related_url" => { from: ["relation"] },
|
182
|
+
"rights_holder" => { from: ["rightsHolder"] },
|
183
|
+
"rights_statement" => { from: ["rights"] },
|
184
|
+
"source" => { from: ["source"] },
|
185
|
+
"subject" => { from: ["subject"], parsed: true },
|
186
|
+
"title" => { from: ["title"] },
|
187
|
+
"resource_type" => { from: ["type"], parsed: true },
|
188
|
+
"remote_files" => { from: ["thumbnail_url"], parsed: true }
|
189
|
+
},
|
190
|
+
# When empty, a default_field_mapping will be generated
|
191
|
+
"Bulkrax::CsvParser" => {},
|
192
|
+
'Bulkrax::BagitParser' => {},
|
193
|
+
'Bulkrax::XmlParser' => {}
|
194
|
+
}
|
186
195
|
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
196
|
+
# Lambda to set the default field mapping
|
197
|
+
conf.default_field_mapping = lambda do |field|
|
198
|
+
return if field.blank?
|
199
|
+
{
|
200
|
+
field.to_s =>
|
201
|
+
{
|
202
|
+
from: [field.to_s],
|
203
|
+
split: false,
|
204
|
+
parsed: Bulkrax::ApplicationMatcher.method_defined?("parse_#{field}"),
|
205
|
+
if: nil,
|
206
|
+
excluded: false
|
207
|
+
}
|
208
|
+
}
|
209
|
+
end
|
201
210
|
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
211
|
+
# Properties that should not be used in imports. They are reserved for use by Hyrax.
|
212
|
+
conf.reserved_properties = %w[
|
213
|
+
create_date
|
214
|
+
modified_date
|
215
|
+
date_modified
|
216
|
+
date_uploaded
|
217
|
+
depositor
|
218
|
+
arkivo_checksum
|
219
|
+
has_model
|
220
|
+
head
|
221
|
+
label
|
222
|
+
import_url
|
223
|
+
on_behalf_of
|
224
|
+
proxy_depositor
|
225
|
+
owner
|
226
|
+
state
|
227
|
+
tail
|
228
|
+
original_url
|
229
|
+
relative_path
|
230
|
+
]
|
222
231
|
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
232
|
+
# List of Questioning Authority properties that are controlled via YAML files in
|
233
|
+
# the config/authorities/ directory. For example, the :rights_statement property
|
234
|
+
# is controlled by the active terms in config/authorities/rights_statements.yml
|
235
|
+
conf.qa_controlled_properties = %w[rights_statement license]
|
236
|
+
end
|
228
237
|
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
+
def api_definition
|
239
|
+
@api_definition ||= ActiveSupport::HashWithIndifferentAccess.new(
|
240
|
+
YAML.safe_load(
|
241
|
+
ERB.new(
|
242
|
+
File.read(Rails.root.join('config', 'bulkrax_api.yml'))
|
243
|
+
).result
|
244
|
+
)
|
245
|
+
)
|
246
|
+
end
|
238
247
|
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
248
|
+
DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON = ' | '
|
249
|
+
# Specify the delimiter for joining an attribute's multi-value array into a string.
|
250
|
+
#
|
251
|
+
# @note the specific delimiter should likely be present in the multi_value_element_split_on
|
252
|
+
# expression.
|
253
|
+
def multi_value_element_join_on
|
254
|
+
@multi_value_element_join_on ||= DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON
|
255
|
+
end
|
247
256
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
257
|
+
DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON = /\s*[:;|]\s*/.freeze
|
258
|
+
# @return [RegexClass] the regular express to use to "split" an attribute's values. If set to
|
259
|
+
# `true` use the DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON.
|
260
|
+
#
|
261
|
+
# @note The "true" value is to preserve backwards compatibility.
|
262
|
+
# @see DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON
|
254
263
|
def multi_value_element_split_on
|
255
|
-
|
256
|
-
|
264
|
+
if @multi_value_element_join_on.is_a?(TrueClass)
|
265
|
+
DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON
|
257
266
|
else
|
258
|
-
|
267
|
+
@multi_value_element_split_on ||= DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON
|
259
268
|
end
|
260
269
|
end
|
261
270
|
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
271
|
+
# Responsible for stripping hidden characters from the given string.
|
272
|
+
#
|
273
|
+
# @param value [#to_s]
|
274
|
+
# @return [String] with hidden characters removed
|
275
|
+
#
|
276
|
+
# @see https://github.com/samvera-labs/bulkrax/issues/688
|
277
|
+
def normalize_string(value)
|
278
|
+
# Removing [Byte Order Mark (BOM)](https://en.wikipedia.org/wiki/Byte_order_mark)
|
279
|
+
value.to_s.delete("\xEF\xBB\xBF")
|
280
|
+
end
|
272
281
|
|
273
|
-
|
274
|
-
|
282
|
+
def fallback_user_for_importer_exporter_processing
|
283
|
+
return User.batch_user if defined?(Hyrax) && User.respond_to?(:batch_user)
|
275
284
|
|
276
|
-
|
277
|
-
|
285
|
+
raise "We have no fallback user available for Bulkrax.fallback_user_for_importer_exporter_processing"
|
286
|
+
end
|
278
287
|
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
288
|
+
# This class confirms to the Active::Support.serialize interface. It's job is to ensure that we
|
289
|
+
# don't have keys with the tricksy Byte Order Mark character.
|
290
|
+
#
|
291
|
+
# @see https://api.rubyonrails.org/classes/ActiveRecord/AttributeMethods/Serialization/ClassMethods.html#method-i-serialize
|
292
|
+
class NormalizedJson
|
284
293
|
def self.normalize_keys(hash)
|
285
294
|
return hash unless hash.respond_to?(:each_pair)
|
286
295
|
returning_value = {}
|
@@ -292,18 +301,18 @@ module Bulkrax
|
|
292
301
|
|
293
302
|
# When we write the serialized data to the database, we "dump" the value into that database
|
294
303
|
# column.
|
295
|
-
|
296
|
-
|
297
|
-
|
304
|
+
def self.dump(value)
|
305
|
+
JSON.dump(normalize_keys(value))
|
306
|
+
end
|
298
307
|
|
299
308
|
# When we load the serialized data from the database, we pass the database's value into "load"
|
300
309
|
# function.
|
301
310
|
#
|
302
311
|
# rubocop:disable Security/JSONLoad
|
303
|
-
|
304
|
-
|
305
|
-
|
312
|
+
def self.load(string)
|
313
|
+
normalize_keys(JSON.load(string))
|
314
|
+
end
|
306
315
|
# rubocop:enable Security/JSONLoad
|
307
|
-
|
316
|
+
end
|
308
317
|
end
|
309
|
-
# rubocop:disable Metrics/ModuleLength
|
318
|
+
# rubocop:disable Metrics/ModuleLength
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.5.
|
4
|
+
version: 5.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-02-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|