bulkrax 5.5.0 → 5.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/jobs/bulkrax/create_relationships_job.rb +1 -1
- data/app/jobs/bulkrax/delete_job.rb +1 -1
- data/app/jobs/bulkrax/download_cloud_file_job.rb +1 -1
- data/app/jobs/bulkrax/import_collection_job.rb +1 -1
- data/app/jobs/bulkrax/import_file_set_job.rb +1 -1
- data/app/jobs/bulkrax/import_work_job.rb +1 -1
- data/app/jobs/bulkrax/importer_job.rb +1 -1
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +207 -198
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d047869e323f1c300329fa46b3cb0bcc4e0f3975729d44c66554d79f00f41657
|
4
|
+
data.tar.gz: 0d80b53d7b490bfe39bf8148f926cd2aa9e73f22f1e5a5726f225c11f72b6856
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0e03981be6998e66998c33484afb06aead0e18984bc7a75320aef9078cfec8ce419203b3d90b93cad3193bb7f9187ad3a8906291690d791dac66f07a42144ee2
|
7
|
+
data.tar.gz: 4a66439b1b28aa601ef36425b440a29b8796589d4fe2c74f759e066768564cc3656cb1de290e1df89fb45d28ec6926e0d3a67936c990619057028f7701822177
|
@@ -40,7 +40,7 @@ module Bulkrax
|
|
40
40
|
|
41
41
|
include DynamicRecordLookup
|
42
42
|
|
43
|
-
queue_as
|
43
|
+
queue_as Bulkrax.config.ingest_queue_name
|
44
44
|
|
45
45
|
# @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifiers
|
46
46
|
# @param importer_run [Bulkrax::ImporterRun] current importer run (needed to properly update counters)
|
data/lib/bulkrax/version.rb
CHANGED
data/lib/bulkrax.rb
CHANGED
@@ -1,16 +1,16 @@
|
|
1
|
-
# frozen_string_literal: true
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "bulkrax/version"
|
4
|
-
require "bulkrax/engine"
|
5
|
-
require 'active_support/all'
|
3
|
+
require "bulkrax/version"
|
4
|
+
require "bulkrax/engine"
|
5
|
+
require 'active_support/all'
|
6
6
|
|
7
|
-
# rubocop:disable Metrics/ModuleLength
|
7
|
+
# rubocop:disable Metrics/ModuleLength
|
8
8
|
module Bulkrax
|
9
|
-
|
10
|
-
|
9
|
+
extend self # rubocop:disable Style/ModuleFunction
|
10
|
+
extend Forwardable
|
11
11
|
|
12
|
-
|
13
|
-
|
12
|
+
##
|
13
|
+
# @api public
|
14
14
|
class Configuration
|
15
15
|
attr_accessor :api_definition,
|
16
16
|
:curation_concerns,
|
@@ -35,6 +35,15 @@ module Bulkrax
|
|
35
35
|
:reserved_properties,
|
36
36
|
:server_name
|
37
37
|
|
38
|
+
attr_writer :ingest_queue_name
|
39
|
+
##
|
40
|
+
# @return [String, Proc]
|
41
|
+
def ingest_queue_name
|
42
|
+
return @ingest_queue_name if @ingest_queue_name.present?
|
43
|
+
return Hyrax.config.ingest_queue_name if defined?(Hyrax)
|
44
|
+
:import
|
45
|
+
end
|
46
|
+
|
38
47
|
attr_writer :use_locking
|
39
48
|
|
40
49
|
def use_locking
|
@@ -45,61 +54,61 @@ module Bulkrax
|
|
45
54
|
alias use_locking? use_locking
|
46
55
|
end
|
47
56
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
57
|
+
def config
|
58
|
+
@config ||= Configuration.new
|
59
|
+
yield @config if block_given?
|
60
|
+
@config
|
61
|
+
end
|
62
|
+
alias setup config
|
54
63
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
64
|
+
def_delegators :@config,
|
65
|
+
:api_definition,
|
66
|
+
:api_definition=,
|
67
|
+
:curation_concerns,
|
68
|
+
:curation_concerns=,
|
69
|
+
:default_field_mapping,
|
70
|
+
:default_field_mapping=,
|
71
|
+
:default_work_type,
|
72
|
+
:default_work_type=,
|
73
|
+
:export_path,
|
74
|
+
:export_path=,
|
75
|
+
:field_mappings,
|
76
|
+
:field_mappings=,
|
77
|
+
:file_model_class,
|
78
|
+
:file_model_class=,
|
79
|
+
:fill_in_blank_source_identifiers,
|
80
|
+
:fill_in_blank_source_identifiers=,
|
81
|
+
:generated_metadata_mapping,
|
82
|
+
:generated_metadata_mapping=,
|
83
|
+
:import_path,
|
84
|
+
:import_path=,
|
85
|
+
:multi_value_element_join_on,
|
86
|
+
:multi_value_element_join_on=,
|
87
|
+
:multi_value_element_split_on,
|
88
|
+
:multi_value_element_split_on=,
|
89
|
+
:object_factory,
|
90
|
+
:object_factory=,
|
91
|
+
:parsers,
|
92
|
+
:parsers=,
|
93
|
+
:qa_controlled_properties,
|
94
|
+
:qa_controlled_properties=,
|
95
|
+
:related_children_field_mapping,
|
96
|
+
:related_children_field_mapping=,
|
97
|
+
:related_parents_field_mapping,
|
98
|
+
:related_parents_field_mapping=,
|
99
|
+
:relationship_job_class,
|
100
|
+
:relationship_job_class=,
|
101
|
+
:removed_image_path,
|
102
|
+
:removed_image_path=,
|
103
|
+
:required_elements,
|
104
|
+
:required_elements=,
|
105
|
+
:reserved_properties,
|
106
|
+
:reserved_properties=,
|
107
|
+
:server_name,
|
108
|
+
:server_name=,
|
109
|
+
:use_locking,
|
110
|
+
:use_locking=,
|
111
|
+
:use_locking?
|
103
112
|
|
104
113
|
config do |conf|
|
105
114
|
conf.parsers = [
|
@@ -138,149 +147,149 @@ module Bulkrax
|
|
138
147
|
# Based on Hyrax CoreMetadata && BasicMetadata
|
139
148
|
# Override at application level to change
|
140
149
|
conf.field_mappings = {
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
150
|
+
"Bulkrax::OaiDcParser" => {
|
151
|
+
"contributor" => { from: ["contributor"] },
|
152
|
+
# no appropriate mapping for coverage (based_near needs id)
|
153
|
+
# ""=>{:from=>["coverage"]},
|
154
|
+
"creator" => { from: ["creator"] },
|
155
|
+
"date_created" => { from: ["date"] },
|
156
|
+
"description" => { from: ["description"] },
|
157
|
+
# no appropriate mapping for format
|
158
|
+
# ""=>{:from=>["format"]},
|
159
|
+
"identifier" => { from: ["identifier"] },
|
160
|
+
"language" => { from: ["language"], parsed: true },
|
161
|
+
"publisher" => { from: ["publisher"] },
|
162
|
+
"related_url" => { from: ["relation"] },
|
163
|
+
"rights_statement" => { from: ["rights"] },
|
164
|
+
"source" => { from: ["source"] },
|
165
|
+
"subject" => { from: ["subject"], parsed: true },
|
166
|
+
"title" => { from: ["title"] },
|
167
|
+
"resource_type" => { from: ["type"], parsed: true },
|
168
|
+
"remote_files" => { from: ["thumbnail_url"], parsed: true }
|
160
169
|
},
|
161
|
-
|
162
|
-
|
163
|
-
|
170
|
+
"Bulkrax::OaiQualifiedDcParser" => {
|
171
|
+
"abstract" => { from: ["abstract"] },
|
172
|
+
"alternative_title" => { from: ["alternative"] },
|
164
173
|
"bibliographic_citation" => { from: ["bibliographicCitation"] },
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
174
|
+
"contributor" => { from: ["contributor"] },
|
175
|
+
"creator" => { from: ["creator"] },
|
176
|
+
"date_created" => { from: ["created"] },
|
177
|
+
"description" => { from: ["description"] },
|
178
|
+
"language" => { from: ["language"] },
|
179
|
+
"license" => { from: ["license"] },
|
180
|
+
"publisher" => { from: ["publisher"] },
|
181
|
+
"related_url" => { from: ["relation"] },
|
182
|
+
"rights_holder" => { from: ["rightsHolder"] },
|
183
|
+
"rights_statement" => { from: ["rights"] },
|
184
|
+
"source" => { from: ["source"] },
|
185
|
+
"subject" => { from: ["subject"], parsed: true },
|
186
|
+
"title" => { from: ["title"] },
|
187
|
+
"resource_type" => { from: ["type"], parsed: true },
|
188
|
+
"remote_files" => { from: ["thumbnail_url"], parsed: true }
|
189
|
+
},
|
190
|
+
# When empty, a default_field_mapping will be generated
|
191
|
+
"Bulkrax::CsvParser" => {},
|
192
|
+
'Bulkrax::BagitParser' => {},
|
193
|
+
'Bulkrax::XmlParser' => {}
|
194
|
+
}
|
186
195
|
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
196
|
+
# Lambda to set the default field mapping
|
197
|
+
conf.default_field_mapping = lambda do |field|
|
198
|
+
return if field.blank?
|
199
|
+
{
|
200
|
+
field.to_s =>
|
201
|
+
{
|
202
|
+
from: [field.to_s],
|
203
|
+
split: false,
|
204
|
+
parsed: Bulkrax::ApplicationMatcher.method_defined?("parse_#{field}"),
|
205
|
+
if: nil,
|
206
|
+
excluded: false
|
207
|
+
}
|
208
|
+
}
|
209
|
+
end
|
201
210
|
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
211
|
+
# Properties that should not be used in imports. They are reserved for use by Hyrax.
|
212
|
+
conf.reserved_properties = %w[
|
213
|
+
create_date
|
214
|
+
modified_date
|
215
|
+
date_modified
|
216
|
+
date_uploaded
|
217
|
+
depositor
|
218
|
+
arkivo_checksum
|
219
|
+
has_model
|
220
|
+
head
|
221
|
+
label
|
222
|
+
import_url
|
223
|
+
on_behalf_of
|
224
|
+
proxy_depositor
|
225
|
+
owner
|
226
|
+
state
|
227
|
+
tail
|
228
|
+
original_url
|
229
|
+
relative_path
|
230
|
+
]
|
222
231
|
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
232
|
+
# List of Questioning Authority properties that are controlled via YAML files in
|
233
|
+
# the config/authorities/ directory. For example, the :rights_statement property
|
234
|
+
# is controlled by the active terms in config/authorities/rights_statements.yml
|
235
|
+
conf.qa_controlled_properties = %w[rights_statement license]
|
236
|
+
end
|
228
237
|
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
+
def api_definition
|
239
|
+
@api_definition ||= ActiveSupport::HashWithIndifferentAccess.new(
|
240
|
+
YAML.safe_load(
|
241
|
+
ERB.new(
|
242
|
+
File.read(Rails.root.join('config', 'bulkrax_api.yml'))
|
243
|
+
).result
|
244
|
+
)
|
245
|
+
)
|
246
|
+
end
|
238
247
|
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
248
|
+
DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON = ' | '
|
249
|
+
# Specify the delimiter for joining an attribute's multi-value array into a string.
|
250
|
+
#
|
251
|
+
# @note the specific delimiter should likely be present in the multi_value_element_split_on
|
252
|
+
# expression.
|
253
|
+
def multi_value_element_join_on
|
254
|
+
@multi_value_element_join_on ||= DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON
|
255
|
+
end
|
247
256
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
257
|
+
DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON = /\s*[:;|]\s*/.freeze
|
258
|
+
# @return [RegexClass] the regular express to use to "split" an attribute's values. If set to
|
259
|
+
# `true` use the DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON.
|
260
|
+
#
|
261
|
+
# @note The "true" value is to preserve backwards compatibility.
|
262
|
+
# @see DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON
|
254
263
|
def multi_value_element_split_on
|
255
|
-
|
256
|
-
|
264
|
+
if @multi_value_element_join_on.is_a?(TrueClass)
|
265
|
+
DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON
|
257
266
|
else
|
258
|
-
|
267
|
+
@multi_value_element_split_on ||= DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON
|
259
268
|
end
|
260
269
|
end
|
261
270
|
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
271
|
+
# Responsible for stripping hidden characters from the given string.
|
272
|
+
#
|
273
|
+
# @param value [#to_s]
|
274
|
+
# @return [String] with hidden characters removed
|
275
|
+
#
|
276
|
+
# @see https://github.com/samvera-labs/bulkrax/issues/688
|
277
|
+
def normalize_string(value)
|
278
|
+
# Removing [Byte Order Mark (BOM)](https://en.wikipedia.org/wiki/Byte_order_mark)
|
279
|
+
value.to_s.delete("\xEF\xBB\xBF")
|
280
|
+
end
|
272
281
|
|
273
|
-
|
274
|
-
|
282
|
+
def fallback_user_for_importer_exporter_processing
|
283
|
+
return User.batch_user if defined?(Hyrax) && User.respond_to?(:batch_user)
|
275
284
|
|
276
|
-
|
277
|
-
|
285
|
+
raise "We have no fallback user available for Bulkrax.fallback_user_for_importer_exporter_processing"
|
286
|
+
end
|
278
287
|
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
288
|
+
# This class confirms to the Active::Support.serialize interface. It's job is to ensure that we
|
289
|
+
# don't have keys with the tricksy Byte Order Mark character.
|
290
|
+
#
|
291
|
+
# @see https://api.rubyonrails.org/classes/ActiveRecord/AttributeMethods/Serialization/ClassMethods.html#method-i-serialize
|
292
|
+
class NormalizedJson
|
284
293
|
def self.normalize_keys(hash)
|
285
294
|
return hash unless hash.respond_to?(:each_pair)
|
286
295
|
returning_value = {}
|
@@ -292,18 +301,18 @@ module Bulkrax
|
|
292
301
|
|
293
302
|
# When we write the serialized data to the database, we "dump" the value into that database
|
294
303
|
# column.
|
295
|
-
|
296
|
-
|
297
|
-
|
304
|
+
def self.dump(value)
|
305
|
+
JSON.dump(normalize_keys(value))
|
306
|
+
end
|
298
307
|
|
299
308
|
# When we load the serialized data from the database, we pass the database's value into "load"
|
300
309
|
# function.
|
301
310
|
#
|
302
311
|
# rubocop:disable Security/JSONLoad
|
303
|
-
|
304
|
-
|
305
|
-
|
312
|
+
def self.load(string)
|
313
|
+
normalize_keys(JSON.load(string))
|
314
|
+
end
|
306
315
|
# rubocop:enable Security/JSONLoad
|
307
|
-
|
316
|
+
end
|
308
317
|
end
|
309
|
-
# rubocop:disable Metrics/ModuleLength
|
318
|
+
# rubocop:disable Metrics/ModuleLength
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bulkrax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.5.
|
4
|
+
version: 5.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-02-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|