darlingtonia 3.0.5 → 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6be5042f9ac88ade67585052eae9cc2dfa30855be0356bbee2f2895150c5d9a5
4
- data.tar.gz: 0466ed8a2f5fd289c591ca230f987d294b3710cac3becc4a0a3194c6c73ea31f
3
+ metadata.gz: ee7ce1d03349bd1c624e7ad3bd3c12e92db474f77559444f15a36a34f444a8aa
4
+ data.tar.gz: bbfcdc4b7b0cd0ac1b9cfc83e30cb03d19d626e8664a05a7be762d7ea3e721ec
5
5
  SHA512:
6
- metadata.gz: 18b96a5efae6c3f30c5965c88cd35e974db44e1abd155ab01fb19cdd1b53719d24c6ed4e0e29b45c4744f7534bd18fe18a7c674d4e9d1bc2cb92f07e7a200233
7
- data.tar.gz: 7a3a556e44d1ca0552b84f3db46abe19ec35260c2c6ddfbd0fe370341748e927e1345162bac1e5aba939a9b78343d8a4b1b85d0b2929991729d36ba07e0d6553
6
+ metadata.gz: bc8ca923806a61c2b99c2c8bd9d97942464a539afb637c3f4206ce02faf397d9bd1e893f65c89153816eeee7bb5c55a0e9a25f59820462bbf6665606504cb3c0
7
+ data.tar.gz: 067ffa15cfccfb7f045e33f015f1e7dd3f07526214153e953aab1427723f9436a03e12ab1e927306c05004178f9e62260990d3ba639fd95ca761209a5e918163
@@ -21,13 +21,16 @@ Metrics/BlockLength:
21
21
  - 'spec/**/*'
22
22
  - 'lib/darlingtonia/spec/**/*'
23
23
 
24
+ Metrics/ClassLength:
25
+ Enabled: false
26
+
24
27
  Metrics/CyclomaticComplexity:
25
28
  Exclude:
26
29
  - lib/darlingtonia/hyrax_basic_metadata_mapper.rb
30
+ - lib/darlingtonia/hyrax_record_importer.rb
27
31
 
28
32
  Metrics/LineLength:
29
- Exclude:
30
- - 'lib/darlingtonia/hyrax_basic_metadata_mapper.rb'
33
+ Enabled: false
31
34
 
32
35
  Metrics/MethodLength:
33
36
  Exclude:
@@ -35,6 +38,10 @@ Metrics/MethodLength:
35
38
  - 'lib/darlingtonia/hyrax_basic_metadata_mapper.rb'
36
39
  - lib/darlingtonia/hyrax_record_importer.rb
37
40
 
41
+ Metrics/PerceivedComplexity:
42
+ Exclude:
43
+ - lib/darlingtonia/hyrax_record_importer.rb
44
+
38
45
  Naming/AccessorMethodName:
39
46
  Exclude:
40
47
  - lib/darlingtonia/hyrax_record_importer.rb
@@ -1,6 +1,13 @@
1
+ 3.1.0 - Tue Feb 26, 2019
2
+
3
+ New Feature: `HyraxRecordImporter` now accepts a `deduplication_field` in the
4
+ attributes hash it receives when it is created. If a `deduplication_field`
5
+ is provided, the system will look for existing works with that field and matching
6
+ value and will update the record instead of creating a new record.
7
+
1
8
  3.0.5 - Tue Feb 26, 2019
2
9
 
3
- When setting the depositor, query for user with `find_by_user_key`, which is the Hyrax convention.
10
+ When setting the depositor, query for user with `find_by_user_key`, which is the Hyrax convention.
4
11
  If user_key isn't found, fall back to querying by User.id for backward compatibility.
5
12
 
6
13
  3.0.4 - Fri Feb 22, 2019
data/README.md CHANGED
@@ -35,8 +35,16 @@ class MyImporter
35
35
  end
36
36
 
37
37
  def import
38
+ attrs = {
39
+ collection_id: collection_id, # pass a collection id to the record importer and all records will be added to that collection
40
+ depositor_id: depositor_id, # pass a Hyrax user_key here and that Hyrax user will own all objects created during this import
41
+ deduplication_field: 'identifier' # pass a field with a persistent identifier (e.g., ARK) and it will check to see if a record with that identifier already
42
+ } # exists, update its metadata if so, and only if it doesn't find a record with that identifier will it make a new object.
43
+
38
44
  file = File.open(@csv_file)
39
- Darlingtonia::Importer.new(parser: Darlingtonia::CsvParser.new(file: file), record_importer: Darlingtonia::HyraxRecordImporter.new).import
45
+ parser = Darlingtonia::CsvParser.new(file: file)
46
+ record_importer = Darlingtonia::HyraxRecordImporter.new(attributes: attrs)
47
+ Darlingtonia::Importer.new(parser: parser, record_importer: record_importer).import
40
48
  file.close # unless a block is passed to File.open, the file must be explicitly closed
41
49
  end
42
50
  end
@@ -17,6 +17,11 @@ module Darlingtonia
17
17
  # @return [String] an id number associated with the process that kicked off this import run
18
18
  attr_accessor :batch_id
19
19
 
20
+ # @!attribute [rw] deduplication_field
21
+ # @return [String] if this is set, look for records with a match in this field
22
+ # and update the metadata instead of creating a new record. This will NOT re-import file attachments.
23
+ attr_accessor :deduplication_field
24
+
20
25
  # @!attribute [rw] success_count
21
26
  # @return [String] the number of records this importer has successfully created
22
27
  attr_accessor :success_count
@@ -30,15 +35,21 @@ module Darlingtonia
30
35
  # the CSV/mapper. These are useful for logging
31
36
  # and tracking the output of an import job for
32
37
  # a given collection, user, or batch.
38
+ # If a deduplication_field is provided, the system will
39
+ # look for existing works with that field and matching
40
+ # value and will update the record instead of creating a new record.
33
41
  # @example
34
42
  # attributes: { collection_id: '123',
35
43
  # depositor_id: '456',
36
- # batch_id: '789' }
44
+ # batch_id: '789',
45
+ # deduplication_field: 'legacy_id'
46
+ # }
37
47
  def initialize(error_stream: Darlingtonia.config.default_error_stream,
38
48
  info_stream: Darlingtonia.config.default_info_stream,
39
49
  attributes: {})
40
50
  self.collection_id = attributes[:collection_id]
41
51
  self.batch_id = attributes[:batch_id]
52
+ self.deduplication_field = attributes[:deduplication_field]
42
53
  set_depositor(attributes[:depositor_id])
43
54
  @success_count = 0
44
55
  @failure_count = 0
@@ -55,12 +66,27 @@ module Darlingtonia
55
66
  self.depositor = user
56
67
  end
57
68
 
69
+ ##
70
+ # @param record [ImportRecord]
71
+ # @return [ActiveFedora::Base]
72
+ # Search for any existing records that match on the deduplication_field
73
+ def find_existing_record(record)
74
+ return unless deduplication_field
75
+ return unless record.respond_to?(deduplication_field)
76
+ return if record.mapper.send(deduplication_field).empty?
77
+ existing_records = import_type.where("#{deduplication_field}": record.mapper.send(deduplication_field).to_s)
78
+ raise "More than one record matches deduplication_field #{deduplication_field} with value #{record.mapper.send(deduplication_field)}" if existing_records.count > 1
79
+ existing_records&.first
80
+ end
81
+
58
82
  ##
59
83
  # @param record [ImportRecord]
60
84
  #
61
85
  # @return [void]
62
86
  def import(record:)
63
- create_for(record: record)
87
+ existing_record = find_existing_record(record)
88
+ create_for(record: record) unless existing_record
89
+ update_for(existing_record: existing_record, update_record: record) if existing_record
64
90
  rescue Faraday::ConnectionFailed, Ldp::HttpError => e
65
91
  error_stream << e
66
92
  rescue RuntimeError => e
@@ -153,6 +179,34 @@ module Darlingtonia
153
179
 
154
180
  private
155
181
 
182
+ # Update an existing object using the Hyrax actor stack
183
+ # We assume the object was created as expected if the actor stack returns true.
184
+ def update_for(existing_record:, update_record:)
185
+ info_stream << "event: record_update_started, batch_id: #{batch_id}, collection_id: #{collection_id}, #{deduplication_field}: #{update_record.respond_to?(deduplication_field) ? update_record.send(deduplication_field) : update_record}"
186
+ additional_attrs = {
187
+ depositor: @depositor.user_key
188
+ }
189
+ attrs = update_record.attributes.merge(additional_attrs)
190
+ attrs = attrs.merge(member_of_collections_attributes: { '0' => { id: collection_id } }) if collection_id
191
+ # Ensure nothing is passed in the files field,
192
+ # since this is reserved for Hyrax and is where uploaded_files will be attached
193
+ attrs.delete(:files)
194
+ based_near = attrs.delete(:based_near)
195
+ attrs = attrs.merge(based_near_attributes: based_near_attributes(based_near)) unless based_near.nil? || based_near.empty?
196
+ actor_env = Hyrax::Actors::Environment.new(existing_record,
197
+ ::Ability.new(@depositor),
198
+ attrs)
199
+ if Hyrax::CurationConcern.actor.update(actor_env)
200
+ info_stream << "event: record_updated, batch_id: #{batch_id}, record_id: #{existing_record.id}, collection_id: #{collection_id}, #{deduplication_field}: #{existing_record.respond_to?(deduplication_field) ? existing_record.send(deduplication_field) : existing_record}"
201
+ @success_count += 1
202
+ else
203
+ existing_record.errors.each do |attr, msg|
204
+ error_stream << "event: validation_failed, batch_id: #{batch_id}, collection_id: #{collection_id}, attribute: #{attr.capitalize}, message: #{msg}, record_title: record_title: #{attrs[:title] ? attrs[:title] : attrs}"
205
+ end
206
+ @failure_count += 1
207
+ end
208
+ end
209
+
156
210
  # Create an object using the Hyrax actor stack
157
211
  # We assume the object was created as expected if the actor stack returns true.
158
212
  def create_for(record:)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Darlingtonia
4
- VERSION = '3.0.5'
4
+ VERSION = '3.1.0'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: darlingtonia
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.5
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Data Curation Experts