dmp-dynamo_adapter 0.1.3 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/dmp/dmp_id_handler.rb +63 -0
- data/lib/dmp/dynamo_adapter.rb +2 -2
- data/lib/dmp/metadata_handler.rb +251 -0
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8fe879d1aefc8db1c3b1a8160171c2f3cd8f6ee1986d494abf69083af19a6133
|
4
|
+
data.tar.gz: 58fd4512a6c72bfaee714cfb19be7c9dd3f9a81b2df1742f5a7a35ff6b5fa045
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 52c93f6323e77810246cceeba743a71866872931716f82b1dfe482ff613d1361ef596deed64ff8f22eec32ea38c7679d0c64916c7e1941d076eed1357537ac37
|
7
|
+
data.tar.gz: 01f5d12e5227007fc434520e135b82edbbb4601a517c6a9898cf510fe2dcca9163900145fb50b19d69a40cdc8e3bf81fd345c0e0361d24df2a650ab15a9bf2cb
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'securerandom'
|
4
|
+
require 'dmp/metadata_handler'
|
5
|
+
|
6
|
+
module Dmp
|
7
|
+
# Methods that handle PK generation
|
8
|
+
class DmpIdHandler
|
9
|
+
DOI_REGEX = %r{[0-9]{2}\.[0-9]{5}/[a-zA-Z0-9/_.]+}.freeze
|
10
|
+
|
11
|
+
class << self
|
12
|
+
def dmp_id_base_url
|
13
|
+
ENV['DMP_ID_BASE_URL'].end_with?('/') ? ENV['DMP_ID_BASE_URL'] : "#{ENV['DMP_ID_BASE_URL']}/"
|
14
|
+
end
|
15
|
+
|
16
|
+
# Preassign a DMP ID that will leater be sent to the DOI minting authority (EZID)
|
17
|
+
def preregister_dmp_id
|
18
|
+
dmp_id = ''
|
19
|
+
|
20
|
+
counter = 0
|
21
|
+
while dmp_id == '' && counter <= 10
|
22
|
+
prefix = "#{ENV['DMP_ID_SHOULDER']}.#{SecureRandom.hex(4).upcase}"
|
23
|
+
dmp_id = prefix if find_by_pk(p_key: Dmp::MetadataHandler.append_pk_prefix(dmp: dmp_id)).empty?
|
24
|
+
counter += 1
|
25
|
+
end
|
26
|
+
# Something went wrong and it was unable to identify a unique id
|
27
|
+
return nil if counter >= 10
|
28
|
+
|
29
|
+
"#{Dmp::MetadataHandler::PK_DMP_PREFIX}#{dmp_id_base_url}#{dmp_id}"
|
30
|
+
end
|
31
|
+
|
32
|
+
# Format the DMP ID in the way we want it
|
33
|
+
def format_dmp_id(value:)
|
34
|
+
dmp_id = value.match(DOI_REGEX).to_s
|
35
|
+
return nil if dmp_id.nil? || dmp_id == ''
|
36
|
+
# If it's already a URL, return it as is
|
37
|
+
return value if value.start_with?('http')
|
38
|
+
|
39
|
+
dmp_id = dmp_id.gsub('doi:', '')
|
40
|
+
dmp_id = dmp_id.start_with?('/') ? dmp_id[1..dmp_id.length] : dmp_id
|
41
|
+
"#{dmp_id_base_url}#{dmp_id}"
|
42
|
+
end
|
43
|
+
|
44
|
+
# Append the :PK prefix to the :dmp_id
|
45
|
+
def dmp_id_to_pk(json:)
|
46
|
+
return nil if json.nil? || json['identifier'].nil?
|
47
|
+
|
48
|
+
# If it's a DOI format it correctly
|
49
|
+
dmp_id = format_dmp_id(value: json['identifier'].to_s)
|
50
|
+
return nil if dmp_id.nil? || dmp_id == ''
|
51
|
+
|
52
|
+
Dmp::MetadataHandler.append_pk_prefix(dmp: dmp_id)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Derive the DMP ID by removing the :PK prefix
|
56
|
+
def pk_to_dmp_id(p_key:)
|
57
|
+
return nil if p_key.nil?
|
58
|
+
|
59
|
+
{ type: 'doi', identifier: Dmp::MetadataHandler.remove_pk_prefix(dmp: p_key) }
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
data/lib/dmp/dynamo_adapter.rb
CHANGED
@@ -0,0 +1,251 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'dmp/dmp_id_handler'
|
4
|
+
|
5
|
+
module Dmp
|
6
|
+
# Handles alterations to DMP metadata elements
|
7
|
+
class MetadataHandler
|
8
|
+
PK_DMP_PREFIX = 'DMP#'.freeze
|
9
|
+
PK_PROVENANCE_PREFIX = 'PROVENANCE#'.freeze
|
10
|
+
|
11
|
+
SK_PREFIX = 'VERSION#'.freeze
|
12
|
+
|
13
|
+
LATEST_VERSION = "#{SK_PREFIX}latest".freeze
|
14
|
+
TOMBSTONE_VERSION = "#{SK_PREFIX}tombstone".freeze
|
15
|
+
|
16
|
+
class << self
|
17
|
+
# determine if the objects are equal. This ignores :SK, :dmphub_modification_day
|
18
|
+
# and :dmphub_updated_at attributes
|
19
|
+
def eql(dmp_a:, dmp_b:)
|
20
|
+
dmp_a = {} if dmp_a.nil?
|
21
|
+
dmp_b = {} if dmp_b.nil?
|
22
|
+
# They are not equal if the :PK do not match (and aren't blank)
|
23
|
+
return false if !dmp_a['PK'].nil? && !dmp_b['PK'].nil? && dmp_a['PK'] != dmp_b['PK']
|
24
|
+
|
25
|
+
a = deep_copy(obj: dmp_a)
|
26
|
+
b = deep_copy(obj: dmp_b)
|
27
|
+
|
28
|
+
# ignore some of the attributes before comparing
|
29
|
+
%w[SK dmphub_modification_day dmphub_updated_at dmphub_created_at].each do |key|
|
30
|
+
a.delete(key) unless a[key].nil?
|
31
|
+
b.delete(key) unless b[key].nil?
|
32
|
+
end
|
33
|
+
a == b
|
34
|
+
end
|
35
|
+
|
36
|
+
# Append the PK prefix for the object
|
37
|
+
def append_pk_prefix(dmp: nil, provenance: nil)
|
38
|
+
# If all the :PK types were passed return nil because we only want one
|
39
|
+
return nil if !dmp.nil? && !provenance.nil?
|
40
|
+
|
41
|
+
return "#{PK_DMP_PREFIX}#{remove_pk_prefix(dmp: dmp)}" unless dmp.nil?
|
42
|
+
return "#{PK_PROVENANCE_PREFIX}#{remove_pk_prefix(provenance: provenance)}" unless provenance.nil?
|
43
|
+
|
44
|
+
nil
|
45
|
+
end
|
46
|
+
|
47
|
+
# Strip off the PK prefix
|
48
|
+
def remove_pk_prefix(dmp: nil, provenance: nil)
|
49
|
+
# If all the :PK types were passed return nil because we only want one
|
50
|
+
return nil if !dmp.nil? && !provenance.nil?
|
51
|
+
|
52
|
+
return dmp.gsub(PK_DMP_PREFIX, '') unless dmp.nil?
|
53
|
+
return provenance.gsub(PK_PROVENANCE_PREFIX, '') unless provenance.nil?
|
54
|
+
|
55
|
+
nil
|
56
|
+
end
|
57
|
+
|
58
|
+
# Add all attributes necessary for the DMPHub
|
59
|
+
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
60
|
+
def annotate_json(provenance:, p_key:, json:)
|
61
|
+
return nil if provenance.nil? || p_key.nil? || json.nil?
|
62
|
+
|
63
|
+
# Fail if the :PK does not match the :dmp_id if the json has a :PK
|
64
|
+
id = Dmp::DmpIdHandler.dmp_id_to_pk(json: json.fetch('dmp_id', {}))
|
65
|
+
id = nil if id != p_key && !json['PK'].nil?
|
66
|
+
|
67
|
+
annotated = deep_copy(obj: json)
|
68
|
+
annotated['PK'] = json['PK'] || p_key
|
69
|
+
annotated['SK'] = LATEST_VERSION
|
70
|
+
|
71
|
+
# Ensure that the :dmp_id matches the :PK
|
72
|
+
annotated['dmp_id'] = Dmp::DmpIdHandler.pk_to_dmp_id(p_key: annotated['PK'])
|
73
|
+
|
74
|
+
# Update the modification timestamps
|
75
|
+
annotated['dmphub_modification_day'] = Time.now.strftime('%Y-%M-%d')
|
76
|
+
annotated['dmphub_updated_at'] = Time.now.iso8601
|
77
|
+
# Only add the Creation date if it is blank
|
78
|
+
annotated['dmphub_created_at'] = Time.now.iso8601 if json['dmphub_created_at'].nil?
|
79
|
+
return annotated unless json['dmphub_provenance_id'].nil?
|
80
|
+
|
81
|
+
annotated['dmphub_provenance_id'] = provenance
|
82
|
+
return annotated if !annotated['dmphub_provenance_identifier'].nil? ||
|
83
|
+
json.fetch('dmp_id', {})['identifier'].nil?
|
84
|
+
|
85
|
+
# Record the original Provenance system's identifier
|
86
|
+
annotated['dmphub_provenance_identifier'] = json['dmp_id']
|
87
|
+
annotated
|
88
|
+
end
|
89
|
+
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
90
|
+
|
91
|
+
# Process an update on the DMP metadata
|
92
|
+
def process_update(updater:, original_version:, new_version:)
|
93
|
+
return nil if updater.nil? || new_version.nil?
|
94
|
+
# If there is no :original_version then assume it's a new DMP
|
95
|
+
return new_version if original_version.nil?
|
96
|
+
# does not allow tombstoned DMPs to be updated
|
97
|
+
return original_version if original_version['SK'] == TOMBSTONE_VERSION
|
98
|
+
return original_version if eql(dmp_a: original_version, dmp_b: new_version)
|
99
|
+
|
100
|
+
owner = original_version['dmphub_provenance_id']
|
101
|
+
args = { owner: owner, updater: updater }
|
102
|
+
|
103
|
+
# If the system of provenance is making the change then just use the
|
104
|
+
# new version as the base and then splice in any mods made by others
|
105
|
+
args = args.merge({ base: new_version, mods: original_version})
|
106
|
+
return splice_for_owner(args) if owner == updater
|
107
|
+
|
108
|
+
# Otherwise use the original version as the base and then update the
|
109
|
+
# metadata owned by the updater system
|
110
|
+
args = args.merge({ base: original_version, mods: new_version})
|
111
|
+
splice_for_others(args)
|
112
|
+
end
|
113
|
+
|
114
|
+
private
|
115
|
+
|
116
|
+
# Splice changes from other systems back onto the system of provenance's changes
|
117
|
+
def splice_for_owner(owner:, updater:, base:, mods:)
|
118
|
+
return base if owner.nil? || updater.nil? || mods.nil?
|
119
|
+
return mods if base.nil?
|
120
|
+
|
121
|
+
provenance_regex = %r{"dmphub_provenance_id":"#{PK_PROVENANCE_PREFIX}[a-zA-Z\-_]+"}
|
122
|
+
others = base.to_json.match(provenance_regex)
|
123
|
+
# Just return it as is if there are no mods by other systems
|
124
|
+
return mods if others.nil?
|
125
|
+
|
126
|
+
spliced = deep_copy(obj: base)
|
127
|
+
cloned_mods = deep_copy(obj: mods)
|
128
|
+
|
129
|
+
# ensure that the :project and :funding are defined
|
130
|
+
spliced['project'] = [{}] if spliced['project'].nil? || spliced['project'].empty?
|
131
|
+
spliced['project'].first['funding'] = [] if spliced['project'].first['funding'].nil?
|
132
|
+
# get all the new funding and retain other system's funding metadata
|
133
|
+
mod_fundings = cloned_mods.fetch('project', [{}]).first.fetch('funding', [])
|
134
|
+
other_fundings = spliced['project'].first['funding'].reject { |fund| fund['dmphub_provenance_id'].nil? }
|
135
|
+
# process funding (just attach all funding not owned by the system of provenance)
|
136
|
+
spliced['project'].first['funding'] = mod_fundings
|
137
|
+
spliced['project'].first['funding'] << other_fundings if other_fundings.any?
|
138
|
+
return spliced if cloned_mods['dmproadmap_related_identifiers'].nil?
|
139
|
+
|
140
|
+
# process related_identifiers (just attach all related identifiers not owned by the system of provenance)
|
141
|
+
spliced['dmproadmap_related_identifiers'] = [] if spliced['dmproadmap_related_identifiers'].nil?
|
142
|
+
mod_relateds = cloned_mods.fetch('dmproadmap_related_identifiers', [])
|
143
|
+
other_relateds = spliced['dmproadmap_related_identifiers'].reject { |id| id['dmphub_provenance_id'].nil? }
|
144
|
+
spliced['dmproadmap_related_identifiers'] = mod_relateds
|
145
|
+
spliced['dmproadmap_related_identifiers'] << other_relateds if other_relateds.any?
|
146
|
+
spliced
|
147
|
+
end
|
148
|
+
|
149
|
+
# Splice changes from other systems back onto the system of provenance's changes
|
150
|
+
def splice_for_others(owner:, updater:, base:, mods:)
|
151
|
+
return base if owner.nil? || updater.nil? || base.nil? || mods.nil?
|
152
|
+
|
153
|
+
spliced = deep_copy(obj: base)
|
154
|
+
base_funds = spliced.fetch('project', [{}]).first.fetch('funding', [])
|
155
|
+
base_relateds = spliced.fetch('dmproadmap_related_identifiers', [])
|
156
|
+
|
157
|
+
mod_funds = mods.fetch('project', [{}]).first.fetch('funding', [])
|
158
|
+
mod_relateds = mods.fetch('dmproadmap_related_identifiers', [])
|
159
|
+
|
160
|
+
# process funding
|
161
|
+
spliced['project'].first['funding'] = update_funding(
|
162
|
+
updater: updater, base: base_funds, mods: mod_funds
|
163
|
+
)
|
164
|
+
return spliced if mod_relateds.empty?
|
165
|
+
|
166
|
+
# process related_identifiers
|
167
|
+
spliced['dmproadmap_related_identifiers'] = update_related_identifiers(
|
168
|
+
updater: updater, base: base_relateds, mods: mod_relateds
|
169
|
+
)
|
170
|
+
spliced
|
171
|
+
end
|
172
|
+
|
173
|
+
# Splice funding changes
|
174
|
+
def update_funding(updater:, base:, mods:)
|
175
|
+
return base if updater.nil? || mods.nil? || mods.empty?
|
176
|
+
|
177
|
+
spliced = deep_copy(obj: base)
|
178
|
+
mods.each do |funding|
|
179
|
+
# Ignore it if it has no status or grant id
|
180
|
+
next if funding['funding_status'].nil? && funding['grant_id'].nil?
|
181
|
+
|
182
|
+
# See if there is an existing funding record for the funder that's waiting on an update
|
183
|
+
spliced = [] if spliced.nil?
|
184
|
+
items = spliced.select do |orig|
|
185
|
+
!orig['funder_id'].nil? &&
|
186
|
+
orig['funder_id'] == funding['funder_id'] &&
|
187
|
+
%w[applied planned].include?(orig['funding_status'])
|
188
|
+
end
|
189
|
+
# Always grab the most current
|
190
|
+
item = items.sort { |a, b| b.fetch('dmphub_created_at', '') <=> a.fetch('dmphub_created_at', '') }.first
|
191
|
+
|
192
|
+
# Out with the old and in with the new
|
193
|
+
spliced.delete(item) unless item.nil?
|
194
|
+
# retain the original name
|
195
|
+
funding['name'] = item['name'] unless item.nil?
|
196
|
+
item = deep_copy(obj: funding)
|
197
|
+
|
198
|
+
item['funding_status'] == funding['funding_status'] unless funding['funding_status'].nil?
|
199
|
+
spliced << item if funding['grant_id'].nil?
|
200
|
+
next if funding['grant_id'].nil?
|
201
|
+
|
202
|
+
item['grant_id'] = funding['grant_id']
|
203
|
+
item['funding_status'] = funding['grant_id'].nil? ? 'rejected' : 'granted'
|
204
|
+
|
205
|
+
# Add the provenance to the entry
|
206
|
+
item['grant_id']['dmphub_provenance_id'] = updater
|
207
|
+
item['grant_id']['dmphub_created_at'] = Time.now.iso8601
|
208
|
+
spliced << item
|
209
|
+
end
|
210
|
+
spliced
|
211
|
+
end
|
212
|
+
|
213
|
+
# Splice related identifier changes
|
214
|
+
def update_related_identifiers(updater:, base:, mods:)
|
215
|
+
return base if updater.nil? || mods.nil? || mods.empty?
|
216
|
+
|
217
|
+
# Remove the updater's existing related identifiers and replace with the new set
|
218
|
+
spliced = base.nil? ? [] : deep_copy(obj: base)
|
219
|
+
spliced = spliced.reject { |related| related['dmphub_provenance_id'] == updater }
|
220
|
+
# Add the provenance to the entry
|
221
|
+
updates = mods.nil? ? [] : deep_copy(obj: mods)
|
222
|
+
updates = updates.map do |related|
|
223
|
+
related['dmphub_provenance_id'] = updater
|
224
|
+
related
|
225
|
+
end
|
226
|
+
spliced + updates
|
227
|
+
end
|
228
|
+
|
229
|
+
# Ruby's clone/dup methods do not clone/dup the children, so we need to do it here
|
230
|
+
def deep_copy(obj:)
|
231
|
+
case obj.class.name
|
232
|
+
when 'Array'
|
233
|
+
obj.map { |item| deep_copy(obj: item) }
|
234
|
+
when 'Hash'
|
235
|
+
hash = obj.dup
|
236
|
+
hash.each_pair do |key, value|
|
237
|
+
if ::String === key || ::Symbol === key
|
238
|
+
hash[key] = deep_copy(obj: value)
|
239
|
+
else
|
240
|
+
hash.delete(key)
|
241
|
+
hash[deep_copy(obj: key)] = deep_copy(obj: value)
|
242
|
+
end
|
243
|
+
end
|
244
|
+
hash
|
245
|
+
else
|
246
|
+
obj.dup
|
247
|
+
end
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dmp-dynamo_adapter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- briri
|
@@ -58,7 +58,9 @@ executables: []
|
|
58
58
|
extensions: []
|
59
59
|
extra_rdoc_files: []
|
60
60
|
files:
|
61
|
+
- lib/dmp/dmp_id_handler.rb
|
61
62
|
- lib/dmp/dynamo_adapter.rb
|
63
|
+
- lib/dmp/metadata_handler.rb
|
62
64
|
homepage: https://github.com/CDLUC3/dmphub-v2/tree/main/gems/dmp-dynamo_adapter
|
63
65
|
licenses:
|
64
66
|
- MIT
|