dmp-dynamo_adapter 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/dmp/dmp_id_handler.rb +63 -0
- data/lib/dmp/dynamo_adapter.rb +2 -2
- data/lib/dmp/metadata_handler.rb +251 -0
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 73072fe825acd0df7976f0b387ca8a441beb225ff387cc8390d8e3c289a028b5
|
4
|
+
data.tar.gz: f6e31294429df6ca86914a4f7ac8157fee8684ed03fac7fec521858384d649de
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 703bb8718f53211b4ee6f4d31fd34ada4a955ee316d86b7a78049ad760039c75e351d5a808ce41bb4d2983ee0c21b9372af3089cd322247398cbda75f3516491
|
7
|
+
data.tar.gz: c67e27dc354b2206a86f334e4c9a181208587b4be885e71f3644543a3f5219e3afe97a080f2386a9dec3dd633d4dcd7c1ec12c53324f470ffc39b6584700b8e1
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'securerandom'
|
4
|
+
reequire 'dmp/metadata_handler'
|
5
|
+
|
6
|
+
module Dmp
|
7
|
+
# Methods that handle PK generation
|
8
|
+
class DmpIdHandler
|
9
|
+
DOI_REGEX = %r{[0-9]{2}\.[0-9]{5}/[a-zA-Z0-9/_.]+}.freeze
|
10
|
+
|
11
|
+
class << self
|
12
|
+
def dmp_id_base_url
|
13
|
+
ENV['DMP_ID_BASE_URL'].end_with?('/') ? ENV['DMP_ID_BASE_URL'] : "#{ENV['DMP_ID_BASE_URL']}/"
|
14
|
+
end
|
15
|
+
|
16
|
+
# Preassign a DMP ID that will leater be sent to the DOI minting authority (EZID)
|
17
|
+
def preregister_dmp_id
|
18
|
+
dmp_id = ''
|
19
|
+
|
20
|
+
counter = 0
|
21
|
+
while dmp_id == '' && counter <= 10
|
22
|
+
prefix = "#{ENV['DMP_ID_SHOULDER']}.#{SecureRandom.hex(4).upcase}"
|
23
|
+
dmp_id = prefix if find_by_pk(p_key: Dmp::MetadataHandler.append_pk_prefix(dmp: dmp_id)).empty?
|
24
|
+
counter += 1
|
25
|
+
end
|
26
|
+
# Something went wrong and it was unable to identify a unique id
|
27
|
+
return nil if counter >= 10
|
28
|
+
|
29
|
+
"#{Dmp::MetadataHandler::PK_DMP_PREFIX}#{dmp_id_base_url}#{dmp_id}"
|
30
|
+
end
|
31
|
+
|
32
|
+
# Format the DMP ID in the way we want it
|
33
|
+
def format_dmp_id(value:)
|
34
|
+
dmp_id = value.match(DOI_REGEX).to_s
|
35
|
+
return nil if dmp_id.nil? || dmp_id == ''
|
36
|
+
# If it's already a URL, return it as is
|
37
|
+
return value if value.start_with?('http')
|
38
|
+
|
39
|
+
dmp_id = dmp_id.gsub('doi:', '')
|
40
|
+
dmp_id = dmp_id.start_with?('/') ? dmp_id[1..dmp_id.length] : dmp_id
|
41
|
+
"#{dmp_id_base_url}#{dmp_id}"
|
42
|
+
end
|
43
|
+
|
44
|
+
# Append the :PK prefix to the :dmp_id
|
45
|
+
def dmp_id_to_pk(json:)
|
46
|
+
return nil if json.nil? || json['identifier'].nil?
|
47
|
+
|
48
|
+
# If it's a DOI format it correctly
|
49
|
+
dmp_id = format_dmp_id(value: json['identifier'].to_s)
|
50
|
+
return nil if dmp_id.nil? || dmp_id == ''
|
51
|
+
|
52
|
+
Dmp::MetadataHandler.append_pk_prefix(dmp: dmp_id)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Derive the DMP ID by removing the :PK prefix
|
56
|
+
def pk_to_dmp_id(p_key:)
|
57
|
+
return nil if p_key.nil?
|
58
|
+
|
59
|
+
{ type: 'doi', identifier: Dmp::MetadataHandler.remove_pk_prefix(dmp: p_key) }
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
data/lib/dmp/dynamo_adapter.rb
CHANGED
@@ -0,0 +1,251 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'dmp/dmp_id_handler'
|
4
|
+
|
5
|
+
module Dmp
|
6
|
+
# Handles alterations to DMP metadata elements
|
7
|
+
class MetadataHandler
|
8
|
+
PK_DMP_PREFIX = 'DMP#'.freeze
|
9
|
+
PK_PROVENANCE_PREFIX = 'PROVENANCE#'.freeze
|
10
|
+
|
11
|
+
SK_PREFIX = 'VERSION#'.freeze
|
12
|
+
|
13
|
+
LATEST_VERSION = "#{SK_PREFIX}latest".freeze
|
14
|
+
TOMBSTONE_VERSION = "#{SK_PREFIX}tombstone".freeze
|
15
|
+
|
16
|
+
class << self
|
17
|
+
# determine if the objects are equal. This ignores :SK, :dmphub_modification_day
|
18
|
+
# and :dmphub_updated_at attributes
|
19
|
+
def eql(dmp_a:, dmp_b:)
|
20
|
+
dmp_a = {} if dmp_a.nil?
|
21
|
+
dmp_b = {} if dmp_b.nil?
|
22
|
+
# They are not equal if the :PK do not match (and aren't blank)
|
23
|
+
return false if !dmp_a['PK'].nil? && !dmp_b['PK'].nil? && dmp_a['PK'] != dmp_b['PK']
|
24
|
+
|
25
|
+
a = deep_copy(obj: dmp_a)
|
26
|
+
b = deep_copy(obj: dmp_b)
|
27
|
+
|
28
|
+
# ignore some of the attributes before comparing
|
29
|
+
%w[SK dmphub_modification_day dmphub_updated_at dmphub_created_at].each do |key|
|
30
|
+
a.delete(key) unless a[key].nil?
|
31
|
+
b.delete(key) unless b[key].nil?
|
32
|
+
end
|
33
|
+
a == b
|
34
|
+
end
|
35
|
+
|
36
|
+
# Append the PK prefix for the object
|
37
|
+
def append_pk_prefix(dmp: nil, provenance: nil)
|
38
|
+
# If all the :PK types were passed return nil because we only want one
|
39
|
+
return nil if !dmp.nil? && !provenance.nil?
|
40
|
+
|
41
|
+
return "#{PK_DMP_PREFIX}#{remove_pk_prefix(dmp: dmp)}" unless dmp.nil?
|
42
|
+
return "#{PK_PROVENANCE_PREFIX}#{remove_pk_prefix(provenance: provenance)}" unless provenance.nil?
|
43
|
+
|
44
|
+
nil
|
45
|
+
end
|
46
|
+
|
47
|
+
# Strip off the PK prefix
|
48
|
+
def remove_pk_prefix(dmp: nil, provenance: nil)
|
49
|
+
# If all the :PK types were passed return nil because we only want one
|
50
|
+
return nil if !dmp.nil? && !provenance.nil?
|
51
|
+
|
52
|
+
return dmp.gsub(PK_DMP_PREFIX, '') unless dmp.nil?
|
53
|
+
return provenance.gsub(PK_PROVENANCE_PREFIX, '') unless provenance.nil?
|
54
|
+
|
55
|
+
nil
|
56
|
+
end
|
57
|
+
|
58
|
+
# Add all attributes necessary for the DMPHub
|
59
|
+
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
60
|
+
def annotate_json(provenance:, p_key:, json:)
|
61
|
+
return nil if provenance.nil? || p_key.nil? || json.nil?
|
62
|
+
|
63
|
+
# Fail if the :PK does not match the :dmp_id if the json has a :PK
|
64
|
+
id = Dmp::DmpIdHandler.dmp_id_to_pk(json: json.fetch('dmp_id', {}))
|
65
|
+
id = nil if id != p_key && !json['PK'].nil?
|
66
|
+
|
67
|
+
annotated = deep_copy(obj: json)
|
68
|
+
annotated['PK'] = json['PK'] || p_key
|
69
|
+
annotated['SK'] = LATEST_VERSION
|
70
|
+
|
71
|
+
# Ensure that the :dmp_id matches the :PK
|
72
|
+
annotated['dmp_id'] = Dmp::DmpIdHandler.pk_to_dmp_id(p_key: annotated['PK'])
|
73
|
+
|
74
|
+
# Update the modification timestamps
|
75
|
+
annotated['dmphub_modification_day'] = Time.now.strftime('%Y-%M-%d')
|
76
|
+
annotated['dmphub_updated_at'] = Time.now.iso8601
|
77
|
+
# Only add the Creation date if it is blank
|
78
|
+
annotated['dmphub_created_at'] = Time.now.iso8601 if json['dmphub_created_at'].nil?
|
79
|
+
return annotated unless json['dmphub_provenance_id'].nil?
|
80
|
+
|
81
|
+
annotated['dmphub_provenance_id'] = provenance
|
82
|
+
return annotated if !annotated['dmphub_provenance_identifier'].nil? ||
|
83
|
+
json.fetch('dmp_id', {})['identifier'].nil?
|
84
|
+
|
85
|
+
# Record the original Provenance system's identifier
|
86
|
+
annotated['dmphub_provenance_identifier'] = json['dmp_id']
|
87
|
+
annotated
|
88
|
+
end
|
89
|
+
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
90
|
+
|
91
|
+
# Process an update on the DMP metadata
|
92
|
+
def process_update(updater:, original_version:, new_version:)
|
93
|
+
return nil if updater.nil? || new_version.nil?
|
94
|
+
# If there is no :original_version then assume it's a new DMP
|
95
|
+
return new_version if original_version.nil?
|
96
|
+
# does not allow tombstoned DMPs to be updated
|
97
|
+
return original_version if original_version['SK'] == TOMBSTONE_VERSION
|
98
|
+
return original_version if eql(dmp_a: original_version, dmp_b: new_version)
|
99
|
+
|
100
|
+
owner = original_version['dmphub_provenance_id']
|
101
|
+
args = { owner: owner, updater: updater }
|
102
|
+
|
103
|
+
# If the system of provenance is making the change then just use the
|
104
|
+
# new version as the base and then splice in any mods made by others
|
105
|
+
args = args.merge({ base: new_version, mods: original_version})
|
106
|
+
return splice_for_owner(args) if owner == updater
|
107
|
+
|
108
|
+
# Otherwise use the original version as the base and then update the
|
109
|
+
# metadata owned by the updater system
|
110
|
+
args = args.merge({ base: original_version, mods: new_version})
|
111
|
+
splice_for_others(args)
|
112
|
+
end
|
113
|
+
|
114
|
+
private
|
115
|
+
|
116
|
+
# Splice changes from other systems back onto the system of provenance's changes
|
117
|
+
def splice_for_owner(owner:, updater:, base:, mods:)
|
118
|
+
return base if owner.nil? || updater.nil? || mods.nil?
|
119
|
+
return mods if base.nil?
|
120
|
+
|
121
|
+
provenance_regex = %r{"dmphub_provenance_id":"#{PK_PROVENANCE_PREFIX}[a-zA-Z\-_]+"}
|
122
|
+
others = base.to_json.match(provenance_regex)
|
123
|
+
# Just return it as is if there are no mods by other systems
|
124
|
+
return mods if others.nil?
|
125
|
+
|
126
|
+
spliced = deep_copy(obj: base)
|
127
|
+
cloned_mods = deep_copy(obj: mods)
|
128
|
+
|
129
|
+
# ensure that the :project and :funding are defined
|
130
|
+
spliced['project'] = [{}] if spliced['project'].nil? || spliced['project'].empty?
|
131
|
+
spliced['project'].first['funding'] = [] if spliced['project'].first['funding'].nil?
|
132
|
+
# get all the new funding and retain other system's funding metadata
|
133
|
+
mod_fundings = cloned_mods.fetch('project', [{}]).first.fetch('funding', [])
|
134
|
+
other_fundings = spliced['project'].first['funding'].reject { |fund| fund['dmphub_provenance_id'].nil? }
|
135
|
+
# process funding (just attach all funding not owned by the system of provenance)
|
136
|
+
spliced['project'].first['funding'] = mod_fundings
|
137
|
+
spliced['project'].first['funding'] << other_fundings if other_fundings.any?
|
138
|
+
return spliced if cloned_mods['dmproadmap_related_identifiers'].nil?
|
139
|
+
|
140
|
+
# process related_identifiers (just attach all related identifiers not owned by the system of provenance)
|
141
|
+
spliced['dmproadmap_related_identifiers'] = [] if spliced['dmproadmap_related_identifiers'].nil?
|
142
|
+
mod_relateds = cloned_mods.fetch('dmproadmap_related_identifiers', [])
|
143
|
+
other_relateds = spliced['dmproadmap_related_identifiers'].reject { |id| id['dmphub_provenance_id'].nil? }
|
144
|
+
spliced['dmproadmap_related_identifiers'] = mod_relateds
|
145
|
+
spliced['dmproadmap_related_identifiers'] << other_relateds if other_relateds.any?
|
146
|
+
spliced
|
147
|
+
end
|
148
|
+
|
149
|
+
# Splice changes from other systems back onto the system of provenance's changes
|
150
|
+
def splice_for_others(owner:, updater:, base:, mods:)
|
151
|
+
return base if owner.nil? || updater.nil? || base.nil? || mods.nil?
|
152
|
+
|
153
|
+
spliced = deep_copy(obj: base)
|
154
|
+
base_funds = spliced.fetch('project', [{}]).first.fetch('funding', [])
|
155
|
+
base_relateds = spliced.fetch('dmproadmap_related_identifiers', [])
|
156
|
+
|
157
|
+
mod_funds = mods.fetch('project', [{}]).first.fetch('funding', [])
|
158
|
+
mod_relateds = mods.fetch('dmproadmap_related_identifiers', [])
|
159
|
+
|
160
|
+
# process funding
|
161
|
+
spliced['project'].first['funding'] = update_funding(
|
162
|
+
updater: updater, base: base_funds, mods: mod_funds
|
163
|
+
)
|
164
|
+
return spliced if mod_relateds.empty?
|
165
|
+
|
166
|
+
# process related_identifiers
|
167
|
+
spliced['dmproadmap_related_identifiers'] = update_related_identifiers(
|
168
|
+
updater: updater, base: base_relateds, mods: mod_relateds
|
169
|
+
)
|
170
|
+
spliced
|
171
|
+
end
|
172
|
+
|
173
|
+
# Splice funding changes
|
174
|
+
def update_funding(updater:, base:, mods:)
|
175
|
+
return base if updater.nil? || mods.nil? || mods.empty?
|
176
|
+
|
177
|
+
spliced = deep_copy(obj: base)
|
178
|
+
mods.each do |funding|
|
179
|
+
# Ignore it if it has no status or grant id
|
180
|
+
next if funding['funding_status'].nil? && funding['grant_id'].nil?
|
181
|
+
|
182
|
+
# See if there is an existing funding record for the funder that's waiting on an update
|
183
|
+
spliced = [] if spliced.nil?
|
184
|
+
items = spliced.select do |orig|
|
185
|
+
!orig['funder_id'].nil? &&
|
186
|
+
orig['funder_id'] == funding['funder_id'] &&
|
187
|
+
%w[applied planned].include?(orig['funding_status'])
|
188
|
+
end
|
189
|
+
# Always grab the most current
|
190
|
+
item = items.sort { |a, b| b.fetch('dmphub_created_at', '') <=> a.fetch('dmphub_created_at', '') }.first
|
191
|
+
|
192
|
+
# Out with the old and in with the new
|
193
|
+
spliced.delete(item) unless item.nil?
|
194
|
+
# retain the original name
|
195
|
+
funding['name'] = item['name'] unless item.nil?
|
196
|
+
item = deep_copy(obj: funding)
|
197
|
+
|
198
|
+
item['funding_status'] == funding['funding_status'] unless funding['funding_status'].nil?
|
199
|
+
spliced << item if funding['grant_id'].nil?
|
200
|
+
next if funding['grant_id'].nil?
|
201
|
+
|
202
|
+
item['grant_id'] = funding['grant_id']
|
203
|
+
item['funding_status'] = funding['grant_id'].nil? ? 'rejected' : 'granted'
|
204
|
+
|
205
|
+
# Add the provenance to the entry
|
206
|
+
item['grant_id']['dmphub_provenance_id'] = updater
|
207
|
+
item['grant_id']['dmphub_created_at'] = Time.now.iso8601
|
208
|
+
spliced << item
|
209
|
+
end
|
210
|
+
spliced
|
211
|
+
end
|
212
|
+
|
213
|
+
# Splice related identifier changes
|
214
|
+
def update_related_identifiers(updater:, base:, mods:)
|
215
|
+
return base if updater.nil? || mods.nil? || mods.empty?
|
216
|
+
|
217
|
+
# Remove the updater's existing related identifiers and replace with the new set
|
218
|
+
spliced = base.nil? ? [] : deep_copy(obj: base)
|
219
|
+
spliced = spliced.reject { |related| related['dmphub_provenance_id'] == updater }
|
220
|
+
# Add the provenance to the entry
|
221
|
+
updates = mods.nil? ? [] : deep_copy(obj: mods)
|
222
|
+
updates = updates.map do |related|
|
223
|
+
related['dmphub_provenance_id'] = updater
|
224
|
+
related
|
225
|
+
end
|
226
|
+
spliced + updates
|
227
|
+
end
|
228
|
+
|
229
|
+
# Ruby's clone/dup methods do not clone/dup the children, so we need to do it here
|
230
|
+
def deep_copy(obj:)
|
231
|
+
case obj.class.name
|
232
|
+
when 'Array'
|
233
|
+
obj.map { |item| deep_copy(obj: item) }
|
234
|
+
when 'Hash'
|
235
|
+
hash = obj.dup
|
236
|
+
hash.each_pair do |key, value|
|
237
|
+
if ::String === key || ::Symbol === key
|
238
|
+
hash[key] = deep_copy(obj: value)
|
239
|
+
else
|
240
|
+
hash.delete(key)
|
241
|
+
hash[deep_copy(obj: key)] = deep_copy(obj: value)
|
242
|
+
end
|
243
|
+
end
|
244
|
+
hash
|
245
|
+
else
|
246
|
+
obj.dup
|
247
|
+
end
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dmp-dynamo_adapter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- briri
|
@@ -58,7 +58,9 @@ executables: []
|
|
58
58
|
extensions: []
|
59
59
|
extra_rdoc_files: []
|
60
60
|
files:
|
61
|
+
- lib/dmp/dmp_id_handler.rb
|
61
62
|
- lib/dmp/dynamo_adapter.rb
|
63
|
+
- lib/dmp/metadata_handler.rb
|
62
64
|
homepage: https://github.com/CDLUC3/dmphub-v2/tree/main/gems/dmp-dynamo_adapter
|
63
65
|
licenses:
|
64
66
|
- MIT
|