darlingtonia 3.0.5 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6be5042f9ac88ade67585052eae9cc2dfa30855be0356bbee2f2895150c5d9a5
4
- data.tar.gz: 0466ed8a2f5fd289c591ca230f987d294b3710cac3becc4a0a3194c6c73ea31f
3
+ metadata.gz: ee7ce1d03349bd1c624e7ad3bd3c12e92db474f77559444f15a36a34f444a8aa
4
+ data.tar.gz: bbfcdc4b7b0cd0ac1b9cfc83e30cb03d19d626e8664a05a7be762d7ea3e721ec
5
5
  SHA512:
6
- metadata.gz: 18b96a5efae6c3f30c5965c88cd35e974db44e1abd155ab01fb19cdd1b53719d24c6ed4e0e29b45c4744f7534bd18fe18a7c674d4e9d1bc2cb92f07e7a200233
7
- data.tar.gz: 7a3a556e44d1ca0552b84f3db46abe19ec35260c2c6ddfbd0fe370341748e927e1345162bac1e5aba939a9b78343d8a4b1b85d0b2929991729d36ba07e0d6553
6
+ metadata.gz: bc8ca923806a61c2b99c2c8bd9d97942464a539afb637c3f4206ce02faf397d9bd1e893f65c89153816eeee7bb5c55a0e9a25f59820462bbf6665606504cb3c0
7
+ data.tar.gz: 067ffa15cfccfb7f045e33f015f1e7dd3f07526214153e953aab1427723f9436a03e12ab1e927306c05004178f9e62260990d3ba639fd95ca761209a5e918163
@@ -21,13 +21,16 @@ Metrics/BlockLength:
21
21
  - 'spec/**/*'
22
22
  - 'lib/darlingtonia/spec/**/*'
23
23
 
24
+ Metrics/ClassLength:
25
+ Enabled: false
26
+
24
27
  Metrics/CyclomaticComplexity:
25
28
  Exclude:
26
29
  - lib/darlingtonia/hyrax_basic_metadata_mapper.rb
30
+ - lib/darlingtonia/hyrax_record_importer.rb
27
31
 
28
32
  Metrics/LineLength:
29
- Exclude:
30
- - 'lib/darlingtonia/hyrax_basic_metadata_mapper.rb'
33
+ Enabled: false
31
34
 
32
35
  Metrics/MethodLength:
33
36
  Exclude:
@@ -35,6 +38,10 @@ Metrics/MethodLength:
35
38
  - 'lib/darlingtonia/hyrax_basic_metadata_mapper.rb'
36
39
  - lib/darlingtonia/hyrax_record_importer.rb
37
40
 
41
+ Metrics/PerceivedComplexity:
42
+ Exclude:
43
+ - lib/darlingtonia/hyrax_record_importer.rb
44
+
38
45
  Naming/AccessorMethodName:
39
46
  Exclude:
40
47
  - lib/darlingtonia/hyrax_record_importer.rb
@@ -1,6 +1,13 @@
1
+ 3.1.0 - Tue Feb 26, 2019
2
+
3
+ New Feature: `HyraxRecordImporter` now accepts a `deduplication_field` in the
4
+ attributes hash it receives when it is created. If a `deduplication_field`
5
+ is provided, the system will look for existing works with that field and matching
6
+ value and will update the record instead of creating a new record.
7
+
1
8
  3.0.5 - Tue Feb 26, 2019
2
9
 
3
- When setting the depositor, query for user with `find_by_user_key`, which is the Hyrax convention.
10
+ When setting the depositor, query for user with `find_by_user_key`, which is the Hyrax convention.
4
11
  If user_key isn't found, fall back to querying by User.id for backward compatibility.
5
12
 
6
13
  3.0.4 - Fri Feb 22, 2019
data/README.md CHANGED
@@ -35,8 +35,16 @@ class MyImporter
35
35
  end
36
36
 
37
37
  def import
38
+ attrs = {
39
+ collection_id: collection_id, # pass a collection id to the record importer and all records will be added to that collection
40
+ depositor_id: depositor_id, # pass a Hyrax user_key here and that Hyrax user will own all objects created during this import
41
+ deduplication_field: 'identifier' # pass a field with a persistent identifier (e.g., ARK) and it will check to see if a record with that identifier already
42
+ } # exists, update its metadata if so, and only if it doesn't find a record with that identifier will it make a new object.
43
+
38
44
  file = File.open(@csv_file)
39
- Darlingtonia::Importer.new(parser: Darlingtonia::CsvParser.new(file: file), record_importer: Darlingtonia::HyraxRecordImporter.new).import
45
+ parser = Darlingtonia::CsvParser.new(file: file)
46
+ record_importer = Darlingtonia::HyraxRecordImporter.new(attributes: attrs)
47
+ Darlingtonia::Importer.new(parser: parser, record_importer: record_importer).import
40
48
  file.close # unless a block is passed to File.open, the file must be explicitly closed
41
49
  end
42
50
  end
@@ -17,6 +17,11 @@ module Darlingtonia
17
17
  # @return [String] an id number associated with the process that kicked off this import run
18
18
  attr_accessor :batch_id
19
19
 
20
+ # @!attribute [rw] deduplication_field
21
+ # @return [String] if this is set, look for records with a match in this field
22
+ # and update the metadata instead of creating a new record. This will NOT re-import file attachments.
23
+ attr_accessor :deduplication_field
24
+
20
25
  # @!attribute [rw] success_count
21
26
  # @return [String] the number of records this importer has successfully created
22
27
  attr_accessor :success_count
@@ -30,15 +35,21 @@ module Darlingtonia
30
35
  # the CSV/mapper. These are useful for logging
31
36
  # and tracking the output of an import job for
32
37
  # a given collection, user, or batch.
38
+ # If a deduplication_field is provided, the system will
39
+ # look for existing works with that field and matching
40
+ # value and will update the record instead of creating a new record.
33
41
  # @example
34
42
  # attributes: { collection_id: '123',
35
43
  # depositor_id: '456',
36
- # batch_id: '789' }
44
+ # batch_id: '789',
45
+ # deduplication_field: 'legacy_id'
46
+ # }
37
47
  def initialize(error_stream: Darlingtonia.config.default_error_stream,
38
48
  info_stream: Darlingtonia.config.default_info_stream,
39
49
  attributes: {})
40
50
  self.collection_id = attributes[:collection_id]
41
51
  self.batch_id = attributes[:batch_id]
52
+ self.deduplication_field = attributes[:deduplication_field]
42
53
  set_depositor(attributes[:depositor_id])
43
54
  @success_count = 0
44
55
  @failure_count = 0
@@ -55,12 +66,27 @@ module Darlingtonia
55
66
  self.depositor = user
56
67
  end
57
68
 
69
+ ##
70
+ # @param record [ImportRecord]
71
+ # @return [ActiveFedora::Base]
72
+ # Search for any existing records that match on the deduplication_field
73
+ def find_existing_record(record)
74
+ return unless deduplication_field
75
+ return unless record.respond_to?(deduplication_field)
76
+ return if record.mapper.send(deduplication_field).empty?
77
+ existing_records = import_type.where("#{deduplication_field}": record.mapper.send(deduplication_field).to_s)
78
+ raise "More than one record matches deduplication_field #{deduplication_field} with value #{record.mapper.send(deduplication_field)}" if existing_records.count > 1
79
+ existing_records&.first
80
+ end
81
+
58
82
  ##
59
83
  # @param record [ImportRecord]
60
84
  #
61
85
  # @return [void]
62
86
  def import(record:)
63
- create_for(record: record)
87
+ existing_record = find_existing_record(record)
88
+ create_for(record: record) unless existing_record
89
+ update_for(existing_record: existing_record, update_record: record) if existing_record
64
90
  rescue Faraday::ConnectionFailed, Ldp::HttpError => e
65
91
  error_stream << e
66
92
  rescue RuntimeError => e
@@ -153,6 +179,34 @@ module Darlingtonia
153
179
 
154
180
  private
155
181
 
182
+ # Update an existing object using the Hyrax actor stack
183
+ # We assume the object was created as expected if the actor stack returns true.
184
+ def update_for(existing_record:, update_record:)
185
+ info_stream << "event: record_update_started, batch_id: #{batch_id}, collection_id: #{collection_id}, #{deduplication_field}: #{update_record.respond_to?(deduplication_field) ? update_record.send(deduplication_field) : update_record}"
186
+ additional_attrs = {
187
+ depositor: @depositor.user_key
188
+ }
189
+ attrs = update_record.attributes.merge(additional_attrs)
190
+ attrs = attrs.merge(member_of_collections_attributes: { '0' => { id: collection_id } }) if collection_id
191
+ # Ensure nothing is passed in the files field,
192
+ # since this is reserved for Hyrax and is where uploaded_files will be attached
193
+ attrs.delete(:files)
194
+ based_near = attrs.delete(:based_near)
195
+ attrs = attrs.merge(based_near_attributes: based_near_attributes(based_near)) unless based_near.nil? || based_near.empty?
196
+ actor_env = Hyrax::Actors::Environment.new(existing_record,
197
+ ::Ability.new(@depositor),
198
+ attrs)
199
+ if Hyrax::CurationConcern.actor.update(actor_env)
200
+ info_stream << "event: record_updated, batch_id: #{batch_id}, record_id: #{existing_record.id}, collection_id: #{collection_id}, #{deduplication_field}: #{existing_record.respond_to?(deduplication_field) ? existing_record.send(deduplication_field) : existing_record}"
201
+ @success_count += 1
202
+ else
203
+ existing_record.errors.each do |attr, msg|
204
+ error_stream << "event: validation_failed, batch_id: #{batch_id}, collection_id: #{collection_id}, attribute: #{attr.capitalize}, message: #{msg}, record_title: record_title: #{attrs[:title] ? attrs[:title] : attrs}"
205
+ end
206
+ @failure_count += 1
207
+ end
208
+ end
209
+
156
210
  # Create an object using the Hyrax actor stack
157
211
  # We assume the object was created as expected if the actor stack returns true.
158
212
  def create_for(record:)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Darlingtonia
4
- VERSION = '3.0.5'
4
+ VERSION = '3.1.0'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: darlingtonia
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.5
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Data Curation Experts