dmp-dynamo_adapter 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a8393e469f04b0d19fc991c950216019c12fc01dca4d7a654ebea2322d422358
4
- data.tar.gz: b4bccc6f70c5673c4e0477321545641bbf192448f5daff315a86dfae24851e42
3
+ metadata.gz: 73072fe825acd0df7976f0b387ca8a441beb225ff387cc8390d8e3c289a028b5
4
+ data.tar.gz: f6e31294429df6ca86914a4f7ac8157fee8684ed03fac7fec521858384d649de
5
5
  SHA512:
6
- metadata.gz: 2b65cdc9b0c236695ee3c8d91784e4cd3a285def899e57089db5bc3cef3cb6407b68f4909d0ffd733499361d5bc70872e305c4174643b7b45a096f4285d7a05e
7
- data.tar.gz: 4fcdc51c40c03eea486d488a66904712223690526156eee1719ea4fe80a0ced5cd8f415e6fb09c9e855d292234ea8ade54cc253440de79f55af3dba29691d195
6
+ metadata.gz: 703bb8718f53211b4ee6f4d31fd34ada4a955ee316d86b7a78049ad760039c75e351d5a808ce41bb4d2983ee0c21b9372af3089cd322247398cbda75f3516491
7
+ data.tar.gz: c67e27dc354b2206a86f334e4c9a181208587b4be885e71f3644543a3f5219e3afe97a080f2386a9dec3dd633d4dcd7c1ec12c53324f470ffc39b6584700b8e1
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'securerandom'
4
+ reequire 'dmp/metadata_handler'
5
+
6
+ module Dmp
7
+ # Methods that handle PK generation
8
+ class DmpIdHandler
9
+ DOI_REGEX = %r{[0-9]{2}\.[0-9]{5}/[a-zA-Z0-9/_.]+}.freeze
10
+
11
+ class << self
12
+ def dmp_id_base_url
13
+ ENV['DMP_ID_BASE_URL'].end_with?('/') ? ENV['DMP_ID_BASE_URL'] : "#{ENV['DMP_ID_BASE_URL']}/"
14
+ end
15
+
16
+ # Preassign a DMP ID that will leater be sent to the DOI minting authority (EZID)
17
+ def preregister_dmp_id
18
+ dmp_id = ''
19
+
20
+ counter = 0
21
+ while dmp_id == '' && counter <= 10
22
+ prefix = "#{ENV['DMP_ID_SHOULDER']}.#{SecureRandom.hex(4).upcase}"
23
+ dmp_id = prefix if find_by_pk(p_key: Dmp::MetadataHandler.append_pk_prefix(dmp: dmp_id)).empty?
24
+ counter += 1
25
+ end
26
+ # Something went wrong and it was unable to identify a unique id
27
+ return nil if counter >= 10
28
+
29
+ "#{Dmp::MetadataHandler::PK_DMP_PREFIX}#{dmp_id_base_url}#{dmp_id}"
30
+ end
31
+
32
+ # Format the DMP ID in the way we want it
33
+ def format_dmp_id(value:)
34
+ dmp_id = value.match(DOI_REGEX).to_s
35
+ return nil if dmp_id.nil? || dmp_id == ''
36
+ # If it's already a URL, return it as is
37
+ return value if value.start_with?('http')
38
+
39
+ dmp_id = dmp_id.gsub('doi:', '')
40
+ dmp_id = dmp_id.start_with?('/') ? dmp_id[1..dmp_id.length] : dmp_id
41
+ "#{dmp_id_base_url}#{dmp_id}"
42
+ end
43
+
44
+ # Append the :PK prefix to the :dmp_id
45
+ def dmp_id_to_pk(json:)
46
+ return nil if json.nil? || json['identifier'].nil?
47
+
48
+ # If it's a DOI format it correctly
49
+ dmp_id = format_dmp_id(value: json['identifier'].to_s)
50
+ return nil if dmp_id.nil? || dmp_id == ''
51
+
52
+ Dmp::MetadataHandler.append_pk_prefix(dmp: dmp_id)
53
+ end
54
+
55
+ # Derive the DMP ID by removing the :PK prefix
56
+ def pk_to_dmp_id(p_key:)
57
+ return nil if p_key.nil?
58
+
59
+ { type: 'doi', identifier: Dmp::MetadataHandler.remove_pk_prefix(dmp: p_key) }
60
+ end
61
+ end
62
+ end
63
+ end
@@ -3,8 +3,8 @@
3
3
  require 'json'
4
4
  require 'aws-sdk-dynamodb'
5
5
 
6
- require_relative './dmp_id_handler'
7
- require_relative './metadata_handler'
6
+ require 'dmp/dmp_id_handler'
7
+ require 'dmp/metadata_handler'
8
8
 
9
9
  module Dmp
10
10
  # DMP adapter for an AWS DynamoDB Table
@@ -0,0 +1,251 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dmp/dmp_id_handler'
4
+
5
+ module Dmp
6
+ # Handles alterations to DMP metadata elements
7
+ class MetadataHandler
8
+ PK_DMP_PREFIX = 'DMP#'.freeze
9
+ PK_PROVENANCE_PREFIX = 'PROVENANCE#'.freeze
10
+
11
+ SK_PREFIX = 'VERSION#'.freeze
12
+
13
+ LATEST_VERSION = "#{SK_PREFIX}latest".freeze
14
+ TOMBSTONE_VERSION = "#{SK_PREFIX}tombstone".freeze
15
+
16
+ class << self
17
+ # determine if the objects are equal. This ignores :SK, :dmphub_modification_day
18
+ # and :dmphub_updated_at attributes
19
+ def eql(dmp_a:, dmp_b:)
20
+ dmp_a = {} if dmp_a.nil?
21
+ dmp_b = {} if dmp_b.nil?
22
+ # They are not equal if the :PK do not match (and aren't blank)
23
+ return false if !dmp_a['PK'].nil? && !dmp_b['PK'].nil? && dmp_a['PK'] != dmp_b['PK']
24
+
25
+ a = deep_copy(obj: dmp_a)
26
+ b = deep_copy(obj: dmp_b)
27
+
28
+ # ignore some of the attributes before comparing
29
+ %w[SK dmphub_modification_day dmphub_updated_at dmphub_created_at].each do |key|
30
+ a.delete(key) unless a[key].nil?
31
+ b.delete(key) unless b[key].nil?
32
+ end
33
+ a == b
34
+ end
35
+
36
+ # Append the PK prefix for the object
37
+ def append_pk_prefix(dmp: nil, provenance: nil)
38
+ # If all the :PK types were passed return nil because we only want one
39
+ return nil if !dmp.nil? && !provenance.nil?
40
+
41
+ return "#{PK_DMP_PREFIX}#{remove_pk_prefix(dmp: dmp)}" unless dmp.nil?
42
+ return "#{PK_PROVENANCE_PREFIX}#{remove_pk_prefix(provenance: provenance)}" unless provenance.nil?
43
+
44
+ nil
45
+ end
46
+
47
+ # Strip off the PK prefix
48
+ def remove_pk_prefix(dmp: nil, provenance: nil)
49
+ # If all the :PK types were passed return nil because we only want one
50
+ return nil if !dmp.nil? && !provenance.nil?
51
+
52
+ return dmp.gsub(PK_DMP_PREFIX, '') unless dmp.nil?
53
+ return provenance.gsub(PK_PROVENANCE_PREFIX, '') unless provenance.nil?
54
+
55
+ nil
56
+ end
57
+
58
+ # Add all attributes necessary for the DMPHub
59
+ # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
60
+ def annotate_json(provenance:, p_key:, json:)
61
+ return nil if provenance.nil? || p_key.nil? || json.nil?
62
+
63
+ # Fail if the :PK does not match the :dmp_id if the json has a :PK
64
+ id = Dmp::DmpIdHandler.dmp_id_to_pk(json: json.fetch('dmp_id', {}))
65
+ id = nil if id != p_key && !json['PK'].nil?
66
+
67
+ annotated = deep_copy(obj: json)
68
+ annotated['PK'] = json['PK'] || p_key
69
+ annotated['SK'] = LATEST_VERSION
70
+
71
+ # Ensure that the :dmp_id matches the :PK
72
+ annotated['dmp_id'] = Dmp::DmpIdHandler.pk_to_dmp_id(p_key: annotated['PK'])
73
+
74
+ # Update the modification timestamps
75
+ annotated['dmphub_modification_day'] = Time.now.strftime('%Y-%M-%d')
76
+ annotated['dmphub_updated_at'] = Time.now.iso8601
77
+ # Only add the Creation date if it is blank
78
+ annotated['dmphub_created_at'] = Time.now.iso8601 if json['dmphub_created_at'].nil?
79
+ return annotated unless json['dmphub_provenance_id'].nil?
80
+
81
+ annotated['dmphub_provenance_id'] = provenance
82
+ return annotated if !annotated['dmphub_provenance_identifier'].nil? ||
83
+ json.fetch('dmp_id', {})['identifier'].nil?
84
+
85
+ # Record the original Provenance system's identifier
86
+ annotated['dmphub_provenance_identifier'] = json['dmp_id']
87
+ annotated
88
+ end
89
+ # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
90
+
91
+ # Process an update on the DMP metadata
92
+ def process_update(updater:, original_version:, new_version:)
93
+ return nil if updater.nil? || new_version.nil?
94
+ # If there is no :original_version then assume it's a new DMP
95
+ return new_version if original_version.nil?
96
+ # does not allow tombstoned DMPs to be updated
97
+ return original_version if original_version['SK'] == TOMBSTONE_VERSION
98
+ return original_version if eql(dmp_a: original_version, dmp_b: new_version)
99
+
100
+ owner = original_version['dmphub_provenance_id']
101
+ args = { owner: owner, updater: updater }
102
+
103
+ # If the system of provenance is making the change then just use the
104
+ # new version as the base and then splice in any mods made by others
105
+ args = args.merge({ base: new_version, mods: original_version})
106
+ return splice_for_owner(args) if owner == updater
107
+
108
+ # Otherwise use the original version as the base and then update the
109
+ # metadata owned by the updater system
110
+ args = args.merge({ base: original_version, mods: new_version})
111
+ splice_for_others(args)
112
+ end
113
+
114
+ private
115
+
116
+ # Splice changes from other systems back onto the system of provenance's changes
117
+ def splice_for_owner(owner:, updater:, base:, mods:)
118
+ return base if owner.nil? || updater.nil? || mods.nil?
119
+ return mods if base.nil?
120
+
121
+ provenance_regex = %r{"dmphub_provenance_id":"#{PK_PROVENANCE_PREFIX}[a-zA-Z\-_]+"}
122
+ others = base.to_json.match(provenance_regex)
123
+ # Just return it as is if there are no mods by other systems
124
+ return mods if others.nil?
125
+
126
+ spliced = deep_copy(obj: base)
127
+ cloned_mods = deep_copy(obj: mods)
128
+
129
+ # ensure that the :project and :funding are defined
130
+ spliced['project'] = [{}] if spliced['project'].nil? || spliced['project'].empty?
131
+ spliced['project'].first['funding'] = [] if spliced['project'].first['funding'].nil?
132
+ # get all the new funding and retain other system's funding metadata
133
+ mod_fundings = cloned_mods.fetch('project', [{}]).first.fetch('funding', [])
134
+ other_fundings = spliced['project'].first['funding'].reject { |fund| fund['dmphub_provenance_id'].nil? }
135
+ # process funding (just attach all funding not owned by the system of provenance)
136
+ spliced['project'].first['funding'] = mod_fundings
137
+ spliced['project'].first['funding'] << other_fundings if other_fundings.any?
138
+ return spliced if cloned_mods['dmproadmap_related_identifiers'].nil?
139
+
140
+ # process related_identifiers (just attach all related identifiers not owned by the system of provenance)
141
+ spliced['dmproadmap_related_identifiers'] = [] if spliced['dmproadmap_related_identifiers'].nil?
142
+ mod_relateds = cloned_mods.fetch('dmproadmap_related_identifiers', [])
143
+ other_relateds = spliced['dmproadmap_related_identifiers'].reject { |id| id['dmphub_provenance_id'].nil? }
144
+ spliced['dmproadmap_related_identifiers'] = mod_relateds
145
+ spliced['dmproadmap_related_identifiers'] << other_relateds if other_relateds.any?
146
+ spliced
147
+ end
148
+
149
+ # Splice changes from other systems back onto the system of provenance's changes
150
+ def splice_for_others(owner:, updater:, base:, mods:)
151
+ return base if owner.nil? || updater.nil? || base.nil? || mods.nil?
152
+
153
+ spliced = deep_copy(obj: base)
154
+ base_funds = spliced.fetch('project', [{}]).first.fetch('funding', [])
155
+ base_relateds = spliced.fetch('dmproadmap_related_identifiers', [])
156
+
157
+ mod_funds = mods.fetch('project', [{}]).first.fetch('funding', [])
158
+ mod_relateds = mods.fetch('dmproadmap_related_identifiers', [])
159
+
160
+ # process funding
161
+ spliced['project'].first['funding'] = update_funding(
162
+ updater: updater, base: base_funds, mods: mod_funds
163
+ )
164
+ return spliced if mod_relateds.empty?
165
+
166
+ # process related_identifiers
167
+ spliced['dmproadmap_related_identifiers'] = update_related_identifiers(
168
+ updater: updater, base: base_relateds, mods: mod_relateds
169
+ )
170
+ spliced
171
+ end
172
+
173
+ # Splice funding changes
174
+ def update_funding(updater:, base:, mods:)
175
+ return base if updater.nil? || mods.nil? || mods.empty?
176
+
177
+ spliced = deep_copy(obj: base)
178
+ mods.each do |funding|
179
+ # Ignore it if it has no status or grant id
180
+ next if funding['funding_status'].nil? && funding['grant_id'].nil?
181
+
182
+ # See if there is an existing funding record for the funder that's waiting on an update
183
+ spliced = [] if spliced.nil?
184
+ items = spliced.select do |orig|
185
+ !orig['funder_id'].nil? &&
186
+ orig['funder_id'] == funding['funder_id'] &&
187
+ %w[applied planned].include?(orig['funding_status'])
188
+ end
189
+ # Always grab the most current
190
+ item = items.sort { |a, b| b.fetch('dmphub_created_at', '') <=> a.fetch('dmphub_created_at', '') }.first
191
+
192
+ # Out with the old and in with the new
193
+ spliced.delete(item) unless item.nil?
194
+ # retain the original name
195
+ funding['name'] = item['name'] unless item.nil?
196
+ item = deep_copy(obj: funding)
197
+
198
+ item['funding_status'] == funding['funding_status'] unless funding['funding_status'].nil?
199
+ spliced << item if funding['grant_id'].nil?
200
+ next if funding['grant_id'].nil?
201
+
202
+ item['grant_id'] = funding['grant_id']
203
+ item['funding_status'] = funding['grant_id'].nil? ? 'rejected' : 'granted'
204
+
205
+ # Add the provenance to the entry
206
+ item['grant_id']['dmphub_provenance_id'] = updater
207
+ item['grant_id']['dmphub_created_at'] = Time.now.iso8601
208
+ spliced << item
209
+ end
210
+ spliced
211
+ end
212
+
213
+ # Splice related identifier changes
214
+ def update_related_identifiers(updater:, base:, mods:)
215
+ return base if updater.nil? || mods.nil? || mods.empty?
216
+
217
+ # Remove the updater's existing related identifiers and replace with the new set
218
+ spliced = base.nil? ? [] : deep_copy(obj: base)
219
+ spliced = spliced.reject { |related| related['dmphub_provenance_id'] == updater }
220
+ # Add the provenance to the entry
221
+ updates = mods.nil? ? [] : deep_copy(obj: mods)
222
+ updates = updates.map do |related|
223
+ related['dmphub_provenance_id'] = updater
224
+ related
225
+ end
226
+ spliced + updates
227
+ end
228
+
229
+ # Ruby's clone/dup methods do not clone/dup the children, so we need to do it here
230
+ def deep_copy(obj:)
231
+ case obj.class.name
232
+ when 'Array'
233
+ obj.map { |item| deep_copy(obj: item) }
234
+ when 'Hash'
235
+ hash = obj.dup
236
+ hash.each_pair do |key, value|
237
+ if ::String === key || ::Symbol === key
238
+ hash[key] = deep_copy(obj: value)
239
+ else
240
+ hash.delete(key)
241
+ hash[deep_copy(obj: key)] = deep_copy(obj: value)
242
+ end
243
+ end
244
+ hash
245
+ else
246
+ obj.dup
247
+ end
248
+ end
249
+ end
250
+ end
251
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dmp-dynamo_adapter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - briri
@@ -58,7 +58,9 @@ executables: []
58
58
  extensions: []
59
59
  extra_rdoc_files: []
60
60
  files:
61
+ - lib/dmp/dmp_id_handler.rb
61
62
  - lib/dmp/dynamo_adapter.rb
63
+ - lib/dmp/metadata_handler.rb
62
64
  homepage: https://github.com/CDLUC3/dmphub-v2/tree/main/gems/dmp-dynamo_adapter
63
65
  licenses:
64
66
  - MIT