darlingtonia 3.0.5 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +9 -2
- data/CHANGELOG.md +8 -1
- data/README.md +9 -1
- data/lib/darlingtonia/hyrax_record_importer.rb +56 -2
- data/lib/darlingtonia/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ee7ce1d03349bd1c624e7ad3bd3c12e92db474f77559444f15a36a34f444a8aa
|
4
|
+
data.tar.gz: bbfcdc4b7b0cd0ac1b9cfc83e30cb03d19d626e8664a05a7be762d7ea3e721ec
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bc8ca923806a61c2b99c2c8bd9d97942464a539afb637c3f4206ce02faf397d9bd1e893f65c89153816eeee7bb5c55a0e9a25f59820462bbf6665606504cb3c0
|
7
|
+
data.tar.gz: 067ffa15cfccfb7f045e33f015f1e7dd3f07526214153e953aab1427723f9436a03e12ab1e927306c05004178f9e62260990d3ba639fd95ca761209a5e918163
|
data/.rubocop.yml
CHANGED
@@ -21,13 +21,16 @@ Metrics/BlockLength:
|
|
21
21
|
- 'spec/**/*'
|
22
22
|
- 'lib/darlingtonia/spec/**/*'
|
23
23
|
|
24
|
+
Metrics/ClassLength:
|
25
|
+
Enabled: false
|
26
|
+
|
24
27
|
Metrics/CyclomaticComplexity:
|
25
28
|
Exclude:
|
26
29
|
- lib/darlingtonia/hyrax_basic_metadata_mapper.rb
|
30
|
+
- lib/darlingtonia/hyrax_record_importer.rb
|
27
31
|
|
28
32
|
Metrics/LineLength:
|
29
|
-
|
30
|
-
- 'lib/darlingtonia/hyrax_basic_metadata_mapper.rb'
|
33
|
+
Enabled: false
|
31
34
|
|
32
35
|
Metrics/MethodLength:
|
33
36
|
Exclude:
|
@@ -35,6 +38,10 @@ Metrics/MethodLength:
|
|
35
38
|
- 'lib/darlingtonia/hyrax_basic_metadata_mapper.rb'
|
36
39
|
- lib/darlingtonia/hyrax_record_importer.rb
|
37
40
|
|
41
|
+
Metrics/PerceivedComplexity:
|
42
|
+
Exclude:
|
43
|
+
- lib/darlingtonia/hyrax_record_importer.rb
|
44
|
+
|
38
45
|
Naming/AccessorMethodName:
|
39
46
|
Exclude:
|
40
47
|
- lib/darlingtonia/hyrax_record_importer.rb
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,13 @@
|
|
1
|
+
3.1.0 - Tue Feb 26, 2019
|
2
|
+
|
3
|
+
New Feature: `HyraxRecordImporter` now accepts a `deduplication_field` in the
|
4
|
+
attributes hash it receives when it is created. If a `deduplication_field`
|
5
|
+
is provided, the system will look for existing works with that field and matching
|
6
|
+
value and will update the record instead of creating a new record.
|
7
|
+
|
1
8
|
3.0.5 - Tue Feb 26, 2019
|
2
9
|
|
3
|
-
When setting the depositor, query for user with `find_by_user_key`, which is the Hyrax convention.
|
10
|
+
When setting the depositor, query for user with `find_by_user_key`, which is the Hyrax convention.
|
4
11
|
If user_key isn't found, fall back to querying by User.id for backward compatibility.
|
5
12
|
|
6
13
|
3.0.4 - Fri Feb 22, 2019
|
data/README.md
CHANGED
@@ -35,8 +35,16 @@ class MyImporter
|
|
35
35
|
end
|
36
36
|
|
37
37
|
def import
|
38
|
+
attrs = {
|
39
|
+
collection_id: collection_id, # pass a collection id to the record importer and all records will be added to that collection
|
40
|
+
depositor_id: depositor_id, # pass a Hyrax user_key here and that Hyrax user will own all objects created during this import
|
41
|
+
deduplication_field: 'identifier' # pass a field with a persistent identifier (e.g., ARK) and it will check to see if a record with that identifier already
|
42
|
+
} # exists, update its metadata if so, and only if it doesn't find a record with that identifier will it make a new object.
|
43
|
+
|
38
44
|
file = File.open(@csv_file)
|
39
|
-
|
45
|
+
parser = Darlingtonia::CsvParser.new(file: file)
|
46
|
+
record_importer = Darlingtonia::HyraxRecordImporter.new(attributes: attrs)
|
47
|
+
Darlingtonia::Importer.new(parser: parser, record_importer: record_importer).import
|
40
48
|
file.close # unless a block is passed to File.open, the file must be explicitly closed
|
41
49
|
end
|
42
50
|
end
|
@@ -17,6 +17,11 @@ module Darlingtonia
|
|
17
17
|
# @return [String] an id number associated with the process that kicked off this import run
|
18
18
|
attr_accessor :batch_id
|
19
19
|
|
20
|
+
# @!attribute [rw] deduplication_field
|
21
|
+
# @return [String] if this is set, look for records with a match in this field
|
22
|
+
# and update the metadata instead of creating a new record. This will NOT re-import file attachments.
|
23
|
+
attr_accessor :deduplication_field
|
24
|
+
|
20
25
|
# @!attribute [rw] success_count
|
21
26
|
# @return [String] the number of records this importer has successfully created
|
22
27
|
attr_accessor :success_count
|
@@ -30,15 +35,21 @@ module Darlingtonia
|
|
30
35
|
# the CSV/mapper. These are useful for logging
|
31
36
|
# and tracking the output of an import job for
|
32
37
|
# a given collection, user, or batch.
|
38
|
+
# If a deduplication_field is provided, the system will
|
39
|
+
# look for existing works with that field and matching
|
40
|
+
# value and will update the record instead of creating a new record.
|
33
41
|
# @example
|
34
42
|
# attributes: { collection_id: '123',
|
35
43
|
# depositor_id: '456',
|
36
|
-
# batch_id: '789'
|
44
|
+
# batch_id: '789',
|
45
|
+
# deduplication_field: 'legacy_id'
|
46
|
+
# }
|
37
47
|
def initialize(error_stream: Darlingtonia.config.default_error_stream,
|
38
48
|
info_stream: Darlingtonia.config.default_info_stream,
|
39
49
|
attributes: {})
|
40
50
|
self.collection_id = attributes[:collection_id]
|
41
51
|
self.batch_id = attributes[:batch_id]
|
52
|
+
self.deduplication_field = attributes[:deduplication_field]
|
42
53
|
set_depositor(attributes[:depositor_id])
|
43
54
|
@success_count = 0
|
44
55
|
@failure_count = 0
|
@@ -55,12 +66,27 @@ module Darlingtonia
|
|
55
66
|
self.depositor = user
|
56
67
|
end
|
57
68
|
|
69
|
+
##
|
70
|
+
# @param record [ImportRecord]
|
71
|
+
# @return [ActiveFedora::Base]
|
72
|
+
# Search for any existing records that match on the deduplication_field
|
73
|
+
def find_existing_record(record)
|
74
|
+
return unless deduplication_field
|
75
|
+
return unless record.respond_to?(deduplication_field)
|
76
|
+
return if record.mapper.send(deduplication_field).empty?
|
77
|
+
existing_records = import_type.where("#{deduplication_field}": record.mapper.send(deduplication_field).to_s)
|
78
|
+
raise "More than one record matches deduplication_field #{deduplication_field} with value #{record.mapper.send(deduplication_field)}" if existing_records.count > 1
|
79
|
+
existing_records&.first
|
80
|
+
end
|
81
|
+
|
58
82
|
##
|
59
83
|
# @param record [ImportRecord]
|
60
84
|
#
|
61
85
|
# @return [void]
|
62
86
|
def import(record:)
|
63
|
-
|
87
|
+
existing_record = find_existing_record(record)
|
88
|
+
create_for(record: record) unless existing_record
|
89
|
+
update_for(existing_record: existing_record, update_record: record) if existing_record
|
64
90
|
rescue Faraday::ConnectionFailed, Ldp::HttpError => e
|
65
91
|
error_stream << e
|
66
92
|
rescue RuntimeError => e
|
@@ -153,6 +179,34 @@ module Darlingtonia
|
|
153
179
|
|
154
180
|
private
|
155
181
|
|
182
|
+
# Update an existing object using the Hyrax actor stack
|
183
|
+
# We assume the object was created as expected if the actor stack returns true.
|
184
|
+
def update_for(existing_record:, update_record:)
|
185
|
+
info_stream << "event: record_update_started, batch_id: #{batch_id}, collection_id: #{collection_id}, #{deduplication_field}: #{update_record.respond_to?(deduplication_field) ? update_record.send(deduplication_field) : update_record}"
|
186
|
+
additional_attrs = {
|
187
|
+
depositor: @depositor.user_key
|
188
|
+
}
|
189
|
+
attrs = update_record.attributes.merge(additional_attrs)
|
190
|
+
attrs = attrs.merge(member_of_collections_attributes: { '0' => { id: collection_id } }) if collection_id
|
191
|
+
# Ensure nothing is passed in the files field,
|
192
|
+
# since this is reserved for Hyrax and is where uploaded_files will be attached
|
193
|
+
attrs.delete(:files)
|
194
|
+
based_near = attrs.delete(:based_near)
|
195
|
+
attrs = attrs.merge(based_near_attributes: based_near_attributes(based_near)) unless based_near.nil? || based_near.empty?
|
196
|
+
actor_env = Hyrax::Actors::Environment.new(existing_record,
|
197
|
+
::Ability.new(@depositor),
|
198
|
+
attrs)
|
199
|
+
if Hyrax::CurationConcern.actor.update(actor_env)
|
200
|
+
info_stream << "event: record_updated, batch_id: #{batch_id}, record_id: #{existing_record.id}, collection_id: #{collection_id}, #{deduplication_field}: #{existing_record.respond_to?(deduplication_field) ? existing_record.send(deduplication_field) : existing_record}"
|
201
|
+
@success_count += 1
|
202
|
+
else
|
203
|
+
existing_record.errors.each do |attr, msg|
|
204
|
+
error_stream << "event: validation_failed, batch_id: #{batch_id}, collection_id: #{collection_id}, attribute: #{attr.capitalize}, message: #{msg}, record_title: record_title: #{attrs[:title] ? attrs[:title] : attrs}"
|
205
|
+
end
|
206
|
+
@failure_count += 1
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
156
210
|
# Create an object using the Hyrax actor stack
|
157
211
|
# We assume the object was created as expected if the actor stack returns true.
|
158
212
|
def create_for(record:)
|
data/lib/darlingtonia/version.rb
CHANGED