darlingtonia 3.0.5 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +9 -2
- data/CHANGELOG.md +8 -1
- data/README.md +9 -1
- data/lib/darlingtonia/hyrax_record_importer.rb +56 -2
- data/lib/darlingtonia/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ee7ce1d03349bd1c624e7ad3bd3c12e92db474f77559444f15a36a34f444a8aa
|
4
|
+
data.tar.gz: bbfcdc4b7b0cd0ac1b9cfc83e30cb03d19d626e8664a05a7be762d7ea3e721ec
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bc8ca923806a61c2b99c2c8bd9d97942464a539afb637c3f4206ce02faf397d9bd1e893f65c89153816eeee7bb5c55a0e9a25f59820462bbf6665606504cb3c0
|
7
|
+
data.tar.gz: 067ffa15cfccfb7f045e33f015f1e7dd3f07526214153e953aab1427723f9436a03e12ab1e927306c05004178f9e62260990d3ba639fd95ca761209a5e918163
|
data/.rubocop.yml
CHANGED
@@ -21,13 +21,16 @@ Metrics/BlockLength:
|
|
21
21
|
- 'spec/**/*'
|
22
22
|
- 'lib/darlingtonia/spec/**/*'
|
23
23
|
|
24
|
+
Metrics/ClassLength:
|
25
|
+
Enabled: false
|
26
|
+
|
24
27
|
Metrics/CyclomaticComplexity:
|
25
28
|
Exclude:
|
26
29
|
- lib/darlingtonia/hyrax_basic_metadata_mapper.rb
|
30
|
+
- lib/darlingtonia/hyrax_record_importer.rb
|
27
31
|
|
28
32
|
Metrics/LineLength:
|
29
|
-
|
30
|
-
- 'lib/darlingtonia/hyrax_basic_metadata_mapper.rb'
|
33
|
+
Enabled: false
|
31
34
|
|
32
35
|
Metrics/MethodLength:
|
33
36
|
Exclude:
|
@@ -35,6 +38,10 @@ Metrics/MethodLength:
|
|
35
38
|
- 'lib/darlingtonia/hyrax_basic_metadata_mapper.rb'
|
36
39
|
- lib/darlingtonia/hyrax_record_importer.rb
|
37
40
|
|
41
|
+
Metrics/PerceivedComplexity:
|
42
|
+
Exclude:
|
43
|
+
- lib/darlingtonia/hyrax_record_importer.rb
|
44
|
+
|
38
45
|
Naming/AccessorMethodName:
|
39
46
|
Exclude:
|
40
47
|
- lib/darlingtonia/hyrax_record_importer.rb
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,13 @@
|
|
1
|
+
3.1.0 - Tue Feb 26, 2019
|
2
|
+
|
3
|
+
New Feature: `HyraxRecordImporter` now accepts a `deduplication_field` in the
|
4
|
+
attributes hash it receives when it is created. If a `deduplication_field`
|
5
|
+
is provided, the system will look for existing works with that field and matching
|
6
|
+
value and will update the record instead of creating a new record.
|
7
|
+
|
1
8
|
3.0.5 - Tue Feb 26, 2019
|
2
9
|
|
3
|
-
When setting the depositor, query for user with `find_by_user_key`, which is the Hyrax convention.
|
10
|
+
When setting the depositor, query for user with `find_by_user_key`, which is the Hyrax convention.
|
4
11
|
If user_key isn't found, fall back to querying by User.id for backward compatibility.
|
5
12
|
|
6
13
|
3.0.4 - Fri Feb 22, 2019
|
data/README.md
CHANGED
@@ -35,8 +35,16 @@ class MyImporter
|
|
35
35
|
end
|
36
36
|
|
37
37
|
def import
|
38
|
+
attrs = {
|
39
|
+
collection_id: collection_id, # pass a collection id to the record importer and all records will be added to that collection
|
40
|
+
depositor_id: depositor_id, # pass a Hyrax user_key here and that Hyrax user will own all objects created during this import
|
41
|
+
deduplication_field: 'identifier' # pass a field with a persistent identifier (e.g., ARK) and it will check to see if a record with that identifier already
|
42
|
+
} # exists, update its metadata if so, and only if it doesn't find a record with that identifier will it make a new object.
|
43
|
+
|
38
44
|
file = File.open(@csv_file)
|
39
|
-
|
45
|
+
parser = Darlingtonia::CsvParser.new(file: file)
|
46
|
+
record_importer = Darlingtonia::HyraxRecordImporter.new(attributes: attrs)
|
47
|
+
Darlingtonia::Importer.new(parser: parser, record_importer: record_importer).import
|
40
48
|
file.close # unless a block is passed to File.open, the file must be explicitly closed
|
41
49
|
end
|
42
50
|
end
|
@@ -17,6 +17,11 @@ module Darlingtonia
|
|
17
17
|
# @return [String] an id number associated with the process that kicked off this import run
|
18
18
|
attr_accessor :batch_id
|
19
19
|
|
20
|
+
# @!attribute [rw] deduplication_field
|
21
|
+
# @return [String] if this is set, look for records with a match in this field
|
22
|
+
# and update the metadata instead of creating a new record. This will NOT re-import file attachments.
|
23
|
+
attr_accessor :deduplication_field
|
24
|
+
|
20
25
|
# @!attribute [rw] success_count
|
21
26
|
# @return [String] the number of records this importer has successfully created
|
22
27
|
attr_accessor :success_count
|
@@ -30,15 +35,21 @@ module Darlingtonia
|
|
30
35
|
# the CSV/mapper. These are useful for logging
|
31
36
|
# and tracking the output of an import job for
|
32
37
|
# a given collection, user, or batch.
|
38
|
+
# If a deduplication_field is provided, the system will
|
39
|
+
# look for existing works with that field and matching
|
40
|
+
# value and will update the record instead of creating a new record.
|
33
41
|
# @example
|
34
42
|
# attributes: { collection_id: '123',
|
35
43
|
# depositor_id: '456',
|
36
|
-
# batch_id: '789'
|
44
|
+
# batch_id: '789',
|
45
|
+
# deduplication_field: 'legacy_id'
|
46
|
+
# }
|
37
47
|
def initialize(error_stream: Darlingtonia.config.default_error_stream,
|
38
48
|
info_stream: Darlingtonia.config.default_info_stream,
|
39
49
|
attributes: {})
|
40
50
|
self.collection_id = attributes[:collection_id]
|
41
51
|
self.batch_id = attributes[:batch_id]
|
52
|
+
self.deduplication_field = attributes[:deduplication_field]
|
42
53
|
set_depositor(attributes[:depositor_id])
|
43
54
|
@success_count = 0
|
44
55
|
@failure_count = 0
|
@@ -55,12 +66,27 @@ module Darlingtonia
|
|
55
66
|
self.depositor = user
|
56
67
|
end
|
57
68
|
|
69
|
+
##
|
70
|
+
# @param record [ImportRecord]
|
71
|
+
# @return [ActiveFedora::Base]
|
72
|
+
# Search for any existing records that match on the deduplication_field
|
73
|
+
def find_existing_record(record)
|
74
|
+
return unless deduplication_field
|
75
|
+
return unless record.respond_to?(deduplication_field)
|
76
|
+
return if record.mapper.send(deduplication_field).empty?
|
77
|
+
existing_records = import_type.where("#{deduplication_field}": record.mapper.send(deduplication_field).to_s)
|
78
|
+
raise "More than one record matches deduplication_field #{deduplication_field} with value #{record.mapper.send(deduplication_field)}" if existing_records.count > 1
|
79
|
+
existing_records&.first
|
80
|
+
end
|
81
|
+
|
58
82
|
##
|
59
83
|
# @param record [ImportRecord]
|
60
84
|
#
|
61
85
|
# @return [void]
|
62
86
|
def import(record:)
|
63
|
-
|
87
|
+
existing_record = find_existing_record(record)
|
88
|
+
create_for(record: record) unless existing_record
|
89
|
+
update_for(existing_record: existing_record, update_record: record) if existing_record
|
64
90
|
rescue Faraday::ConnectionFailed, Ldp::HttpError => e
|
65
91
|
error_stream << e
|
66
92
|
rescue RuntimeError => e
|
@@ -153,6 +179,34 @@ module Darlingtonia
|
|
153
179
|
|
154
180
|
private
|
155
181
|
|
182
|
+
# Update an existing object using the Hyrax actor stack
|
183
|
+
# We assume the object was created as expected if the actor stack returns true.
|
184
|
+
def update_for(existing_record:, update_record:)
|
185
|
+
info_stream << "event: record_update_started, batch_id: #{batch_id}, collection_id: #{collection_id}, #{deduplication_field}: #{update_record.respond_to?(deduplication_field) ? update_record.send(deduplication_field) : update_record}"
|
186
|
+
additional_attrs = {
|
187
|
+
depositor: @depositor.user_key
|
188
|
+
}
|
189
|
+
attrs = update_record.attributes.merge(additional_attrs)
|
190
|
+
attrs = attrs.merge(member_of_collections_attributes: { '0' => { id: collection_id } }) if collection_id
|
191
|
+
# Ensure nothing is passed in the files field,
|
192
|
+
# since this is reserved for Hyrax and is where uploaded_files will be attached
|
193
|
+
attrs.delete(:files)
|
194
|
+
based_near = attrs.delete(:based_near)
|
195
|
+
attrs = attrs.merge(based_near_attributes: based_near_attributes(based_near)) unless based_near.nil? || based_near.empty?
|
196
|
+
actor_env = Hyrax::Actors::Environment.new(existing_record,
|
197
|
+
::Ability.new(@depositor),
|
198
|
+
attrs)
|
199
|
+
if Hyrax::CurationConcern.actor.update(actor_env)
|
200
|
+
info_stream << "event: record_updated, batch_id: #{batch_id}, record_id: #{existing_record.id}, collection_id: #{collection_id}, #{deduplication_field}: #{existing_record.respond_to?(deduplication_field) ? existing_record.send(deduplication_field) : existing_record}"
|
201
|
+
@success_count += 1
|
202
|
+
else
|
203
|
+
existing_record.errors.each do |attr, msg|
|
204
|
+
error_stream << "event: validation_failed, batch_id: #{batch_id}, collection_id: #{collection_id}, attribute: #{attr.capitalize}, message: #{msg}, record_title: record_title: #{attrs[:title] ? attrs[:title] : attrs}"
|
205
|
+
end
|
206
|
+
@failure_count += 1
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
156
210
|
# Create an object using the Hyrax actor stack
|
157
211
|
# We assume the object was created as expected if the actor stack returns true.
|
158
212
|
def create_for(record:)
|
data/lib/darlingtonia/version.rb
CHANGED