dmp-dynamo_adapter 0.1.3 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c4c2a510b026b803c920fd6554a4196ca88ba8eaac1664adbe84205aac6f043a
4
- data.tar.gz: c2ac05ece5c801f7119e2544b88c860666cd1fd7010debe24191490f22c2cb5a
3
+ metadata.gz: 8fe879d1aefc8db1c3b1a8160171c2f3cd8f6ee1986d494abf69083af19a6133
4
+ data.tar.gz: 58fd4512a6c72bfaee714cfb19be7c9dd3f9a81b2df1742f5a7a35ff6b5fa045
5
5
  SHA512:
6
- metadata.gz: 2a5417148acb494de40019d4b323c5aae7f13c4c65699abdea86b47c6f98768a70f4cca8ac58bca6012ec92c23e6f4ef3c61c77f2c821bb0b6fcd64db4001282
7
- data.tar.gz: dba9ee2fc94ef98acebce7dc4f1954c0936afe7fb07bc5365b27b173da1b29a63e7c94ee06982e60a18542adc436c7a9a510cec76ea6c924f339bd94f879f12c
6
+ metadata.gz: 52c93f6323e77810246cceeba743a71866872931716f82b1dfe482ff613d1361ef596deed64ff8f22eec32ea38c7679d0c64916c7e1941d076eed1357537ac37
7
+ data.tar.gz: 01f5d12e5227007fc434520e135b82edbbb4601a517c6a9898cf510fe2dcca9163900145fb50b19d69a40cdc8e3bf81fd345c0e0361d24df2a650ab15a9bf2cb
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'securerandom'
4
+ require 'dmp/metadata_handler'
5
+
6
+ module Dmp
7
+ # Methods that handle PK generation
8
+ class DmpIdHandler
9
+ DOI_REGEX = %r{[0-9]{2}\.[0-9]{5}/[a-zA-Z0-9/_.]+}.freeze
10
+
11
+ class << self
12
+ def dmp_id_base_url
13
+ ENV['DMP_ID_BASE_URL'].end_with?('/') ? ENV['DMP_ID_BASE_URL'] : "#{ENV['DMP_ID_BASE_URL']}/"
14
+ end
15
+
16
+ # Preassign a DMP ID that will leater be sent to the DOI minting authority (EZID)
17
+ def preregister_dmp_id
18
+ dmp_id = ''
19
+
20
+ counter = 0
21
+ while dmp_id == '' && counter <= 10
22
+ prefix = "#{ENV['DMP_ID_SHOULDER']}.#{SecureRandom.hex(4).upcase}"
23
+ dmp_id = prefix if find_by_pk(p_key: Dmp::MetadataHandler.append_pk_prefix(dmp: dmp_id)).empty?
24
+ counter += 1
25
+ end
26
+ # Something went wrong and it was unable to identify a unique id
27
+ return nil if counter >= 10
28
+
29
+ "#{Dmp::MetadataHandler::PK_DMP_PREFIX}#{dmp_id_base_url}#{dmp_id}"
30
+ end
31
+
32
+ # Format the DMP ID in the way we want it
33
+ def format_dmp_id(value:)
34
+ dmp_id = value.match(DOI_REGEX).to_s
35
+ return nil if dmp_id.nil? || dmp_id == ''
36
+ # If it's already a URL, return it as is
37
+ return value if value.start_with?('http')
38
+
39
+ dmp_id = dmp_id.gsub('doi:', '')
40
+ dmp_id = dmp_id.start_with?('/') ? dmp_id[1..dmp_id.length] : dmp_id
41
+ "#{dmp_id_base_url}#{dmp_id}"
42
+ end
43
+
44
+ # Append the :PK prefix to the :dmp_id
45
+ def dmp_id_to_pk(json:)
46
+ return nil if json.nil? || json['identifier'].nil?
47
+
48
+ # If it's a DOI format it correctly
49
+ dmp_id = format_dmp_id(value: json['identifier'].to_s)
50
+ return nil if dmp_id.nil? || dmp_id == ''
51
+
52
+ Dmp::MetadataHandler.append_pk_prefix(dmp: dmp_id)
53
+ end
54
+
55
+ # Derive the DMP ID by removing the :PK prefix
56
+ def pk_to_dmp_id(p_key:)
57
+ return nil if p_key.nil?
58
+
59
+ { type: 'doi', identifier: Dmp::MetadataHandler.remove_pk_prefix(dmp: p_key) }
60
+ end
61
+ end
62
+ end
63
+ end
@@ -3,8 +3,8 @@
3
3
  require 'json'
4
4
  require 'aws-sdk-dynamodb'
5
5
 
6
- require 'dmp_id_handler'
7
- require 'metadata_handler'
6
+ require 'dmp/dmp_id_handler'
7
+ require 'dmp/metadata_handler'
8
8
 
9
9
  module Dmp
10
10
  # DMP adapter for an AWS DynamoDB Table
@@ -0,0 +1,251 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dmp/dmp_id_handler'
4
+
5
+ module Dmp
6
+ # Handles alterations to DMP metadata elements
7
+ class MetadataHandler
8
+ PK_DMP_PREFIX = 'DMP#'.freeze
9
+ PK_PROVENANCE_PREFIX = 'PROVENANCE#'.freeze
10
+
11
+ SK_PREFIX = 'VERSION#'.freeze
12
+
13
+ LATEST_VERSION = "#{SK_PREFIX}latest".freeze
14
+ TOMBSTONE_VERSION = "#{SK_PREFIX}tombstone".freeze
15
+
16
+ class << self
17
+ # determine if the objects are equal. This ignores :SK, :dmphub_modification_day
18
+ # and :dmphub_updated_at attributes
19
+ def eql(dmp_a:, dmp_b:)
20
+ dmp_a = {} if dmp_a.nil?
21
+ dmp_b = {} if dmp_b.nil?
22
+ # They are not equal if the :PK do not match (and aren't blank)
23
+ return false if !dmp_a['PK'].nil? && !dmp_b['PK'].nil? && dmp_a['PK'] != dmp_b['PK']
24
+
25
+ a = deep_copy(obj: dmp_a)
26
+ b = deep_copy(obj: dmp_b)
27
+
28
+ # ignore some of the attributes before comparing
29
+ %w[SK dmphub_modification_day dmphub_updated_at dmphub_created_at].each do |key|
30
+ a.delete(key) unless a[key].nil?
31
+ b.delete(key) unless b[key].nil?
32
+ end
33
+ a == b
34
+ end
35
+
36
+ # Append the PK prefix for the object
37
+ def append_pk_prefix(dmp: nil, provenance: nil)
38
+ # If all the :PK types were passed return nil because we only want one
39
+ return nil if !dmp.nil? && !provenance.nil?
40
+
41
+ return "#{PK_DMP_PREFIX}#{remove_pk_prefix(dmp: dmp)}" unless dmp.nil?
42
+ return "#{PK_PROVENANCE_PREFIX}#{remove_pk_prefix(provenance: provenance)}" unless provenance.nil?
43
+
44
+ nil
45
+ end
46
+
47
+ # Strip off the PK prefix
48
+ def remove_pk_prefix(dmp: nil, provenance: nil)
49
+ # If all the :PK types were passed return nil because we only want one
50
+ return nil if !dmp.nil? && !provenance.nil?
51
+
52
+ return dmp.gsub(PK_DMP_PREFIX, '') unless dmp.nil?
53
+ return provenance.gsub(PK_PROVENANCE_PREFIX, '') unless provenance.nil?
54
+
55
+ nil
56
+ end
57
+
58
+ # Add all attributes necessary for the DMPHub
59
+ # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
60
+ def annotate_json(provenance:, p_key:, json:)
61
+ return nil if provenance.nil? || p_key.nil? || json.nil?
62
+
63
+ # Fail if the :PK does not match the :dmp_id if the json has a :PK
64
+ id = Dmp::DmpIdHandler.dmp_id_to_pk(json: json.fetch('dmp_id', {}))
65
+ id = nil if id != p_key && !json['PK'].nil?
66
+
67
+ annotated = deep_copy(obj: json)
68
+ annotated['PK'] = json['PK'] || p_key
69
+ annotated['SK'] = LATEST_VERSION
70
+
71
+ # Ensure that the :dmp_id matches the :PK
72
+ annotated['dmp_id'] = Dmp::DmpIdHandler.pk_to_dmp_id(p_key: annotated['PK'])
73
+
74
+ # Update the modification timestamps
75
+ annotated['dmphub_modification_day'] = Time.now.strftime('%Y-%M-%d')
76
+ annotated['dmphub_updated_at'] = Time.now.iso8601
77
+ # Only add the Creation date if it is blank
78
+ annotated['dmphub_created_at'] = Time.now.iso8601 if json['dmphub_created_at'].nil?
79
+ return annotated unless json['dmphub_provenance_id'].nil?
80
+
81
+ annotated['dmphub_provenance_id'] = provenance
82
+ return annotated if !annotated['dmphub_provenance_identifier'].nil? ||
83
+ json.fetch('dmp_id', {})['identifier'].nil?
84
+
85
+ # Record the original Provenance system's identifier
86
+ annotated['dmphub_provenance_identifier'] = json['dmp_id']
87
+ annotated
88
+ end
89
+ # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
90
+
91
+ # Process an update on the DMP metadata
92
+ def process_update(updater:, original_version:, new_version:)
93
+ return nil if updater.nil? || new_version.nil?
94
+ # If there is no :original_version then assume it's a new DMP
95
+ return new_version if original_version.nil?
96
+ # does not allow tombstoned DMPs to be updated
97
+ return original_version if original_version['SK'] == TOMBSTONE_VERSION
98
+ return original_version if eql(dmp_a: original_version, dmp_b: new_version)
99
+
100
+ owner = original_version['dmphub_provenance_id']
101
+ args = { owner: owner, updater: updater }
102
+
103
+ # If the system of provenance is making the change then just use the
104
+ # new version as the base and then splice in any mods made by others
105
+ args = args.merge({ base: new_version, mods: original_version})
106
+ return splice_for_owner(args) if owner == updater
107
+
108
+ # Otherwise use the original version as the base and then update the
109
+ # metadata owned by the updater system
110
+ args = args.merge({ base: original_version, mods: new_version})
111
+ splice_for_others(args)
112
+ end
113
+
114
+ private
115
+
116
+ # Splice changes from other systems back onto the system of provenance's changes
117
+ def splice_for_owner(owner:, updater:, base:, mods:)
118
+ return base if owner.nil? || updater.nil? || mods.nil?
119
+ return mods if base.nil?
120
+
121
+ provenance_regex = %r{"dmphub_provenance_id":"#{PK_PROVENANCE_PREFIX}[a-zA-Z\-_]+"}
122
+ others = base.to_json.match(provenance_regex)
123
+ # Just return it as is if there are no mods by other systems
124
+ return mods if others.nil?
125
+
126
+ spliced = deep_copy(obj: base)
127
+ cloned_mods = deep_copy(obj: mods)
128
+
129
+ # ensure that the :project and :funding are defined
130
+ spliced['project'] = [{}] if spliced['project'].nil? || spliced['project'].empty?
131
+ spliced['project'].first['funding'] = [] if spliced['project'].first['funding'].nil?
132
+ # get all the new funding and retain other system's funding metadata
133
+ mod_fundings = cloned_mods.fetch('project', [{}]).first.fetch('funding', [])
134
+ other_fundings = spliced['project'].first['funding'].reject { |fund| fund['dmphub_provenance_id'].nil? }
135
+ # process funding (just attach all funding not owned by the system of provenance)
136
+ spliced['project'].first['funding'] = mod_fundings
137
+ spliced['project'].first['funding'] << other_fundings if other_fundings.any?
138
+ return spliced if cloned_mods['dmproadmap_related_identifiers'].nil?
139
+
140
+ # process related_identifiers (just attach all related identifiers not owned by the system of provenance)
141
+ spliced['dmproadmap_related_identifiers'] = [] if spliced['dmproadmap_related_identifiers'].nil?
142
+ mod_relateds = cloned_mods.fetch('dmproadmap_related_identifiers', [])
143
+ other_relateds = spliced['dmproadmap_related_identifiers'].reject { |id| id['dmphub_provenance_id'].nil? }
144
+ spliced['dmproadmap_related_identifiers'] = mod_relateds
145
+ spliced['dmproadmap_related_identifiers'] << other_relateds if other_relateds.any?
146
+ spliced
147
+ end
148
+
149
+ # Splice changes from other systems back onto the system of provenance's changes
150
+ def splice_for_others(owner:, updater:, base:, mods:)
151
+ return base if owner.nil? || updater.nil? || base.nil? || mods.nil?
152
+
153
+ spliced = deep_copy(obj: base)
154
+ base_funds = spliced.fetch('project', [{}]).first.fetch('funding', [])
155
+ base_relateds = spliced.fetch('dmproadmap_related_identifiers', [])
156
+
157
+ mod_funds = mods.fetch('project', [{}]).first.fetch('funding', [])
158
+ mod_relateds = mods.fetch('dmproadmap_related_identifiers', [])
159
+
160
+ # process funding
161
+ spliced['project'].first['funding'] = update_funding(
162
+ updater: updater, base: base_funds, mods: mod_funds
163
+ )
164
+ return spliced if mod_relateds.empty?
165
+
166
+ # process related_identifiers
167
+ spliced['dmproadmap_related_identifiers'] = update_related_identifiers(
168
+ updater: updater, base: base_relateds, mods: mod_relateds
169
+ )
170
+ spliced
171
+ end
172
+
173
+ # Splice funding changes
174
+ def update_funding(updater:, base:, mods:)
175
+ return base if updater.nil? || mods.nil? || mods.empty?
176
+
177
+ spliced = deep_copy(obj: base)
178
+ mods.each do |funding|
179
+ # Ignore it if it has no status or grant id
180
+ next if funding['funding_status'].nil? && funding['grant_id'].nil?
181
+
182
+ # See if there is an existing funding record for the funder that's waiting on an update
183
+ spliced = [] if spliced.nil?
184
+ items = spliced.select do |orig|
185
+ !orig['funder_id'].nil? &&
186
+ orig['funder_id'] == funding['funder_id'] &&
187
+ %w[applied planned].include?(orig['funding_status'])
188
+ end
189
+ # Always grab the most current
190
+ item = items.sort { |a, b| b.fetch('dmphub_created_at', '') <=> a.fetch('dmphub_created_at', '') }.first
191
+
192
+ # Out with the old and in with the new
193
+ spliced.delete(item) unless item.nil?
194
+ # retain the original name
195
+ funding['name'] = item['name'] unless item.nil?
196
+ item = deep_copy(obj: funding)
197
+
198
+ item['funding_status'] == funding['funding_status'] unless funding['funding_status'].nil?
199
+ spliced << item if funding['grant_id'].nil?
200
+ next if funding['grant_id'].nil?
201
+
202
+ item['grant_id'] = funding['grant_id']
203
+ item['funding_status'] = funding['grant_id'].nil? ? 'rejected' : 'granted'
204
+
205
+ # Add the provenance to the entry
206
+ item['grant_id']['dmphub_provenance_id'] = updater
207
+ item['grant_id']['dmphub_created_at'] = Time.now.iso8601
208
+ spliced << item
209
+ end
210
+ spliced
211
+ end
212
+
213
+ # Splice related identifier changes
214
+ def update_related_identifiers(updater:, base:, mods:)
215
+ return base if updater.nil? || mods.nil? || mods.empty?
216
+
217
+ # Remove the updater's existing related identifiers and replace with the new set
218
+ spliced = base.nil? ? [] : deep_copy(obj: base)
219
+ spliced = spliced.reject { |related| related['dmphub_provenance_id'] == updater }
220
+ # Add the provenance to the entry
221
+ updates = mods.nil? ? [] : deep_copy(obj: mods)
222
+ updates = updates.map do |related|
223
+ related['dmphub_provenance_id'] = updater
224
+ related
225
+ end
226
+ spliced + updates
227
+ end
228
+
229
+ # Ruby's clone/dup methods do not clone/dup the children, so we need to do it here
230
+ def deep_copy(obj:)
231
+ case obj.class.name
232
+ when 'Array'
233
+ obj.map { |item| deep_copy(obj: item) }
234
+ when 'Hash'
235
+ hash = obj.dup
236
+ hash.each_pair do |key, value|
237
+ if ::String === key || ::Symbol === key
238
+ hash[key] = deep_copy(obj: value)
239
+ else
240
+ hash.delete(key)
241
+ hash[deep_copy(obj: key)] = deep_copy(obj: value)
242
+ end
243
+ end
244
+ hash
245
+ else
246
+ obj.dup
247
+ end
248
+ end
249
+ end
250
+ end
251
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dmp-dynamo_adapter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - briri
@@ -58,7 +58,9 @@ executables: []
58
58
  extensions: []
59
59
  extra_rdoc_files: []
60
60
  files:
61
+ - lib/dmp/dmp_id_handler.rb
61
62
  - lib/dmp/dynamo_adapter.rb
63
+ - lib/dmp/metadata_handler.rb
62
64
  homepage: https://github.com/CDLUC3/dmphub-v2/tree/main/gems/dmp-dynamo_adapter
63
65
  licenses:
64
66
  - MIT