uc3-dmp-id 0.1.0 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +9 -0
- data/lib/uc3-dmp-id/asserter.rb +19 -9
- data/lib/uc3-dmp-id/comparator.rb +500 -0
- data/lib/uc3-dmp-id/creator.rb +10 -10
- data/lib/uc3-dmp-id/deleter.rb +5 -5
- data/lib/uc3-dmp-id/finder.rb +26 -26
- data/lib/uc3-dmp-id/helper.rb +16 -16
- data/lib/uc3-dmp-id/updater.rb +25 -24
- data/lib/uc3-dmp-id/validator.rb +2 -2
- data/lib/uc3-dmp-id/version.rb +1 -1
- data/lib/uc3-dmp-id/versioner.rb +7 -7
- metadata +22 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 51832c144e5663dc01c805f92e81e50e63f2ce00200a8cad5525b7a34c9d7eb9
|
4
|
+
data.tar.gz: e197deb7f608ef478716a8aea113e853ab0d3903f1496f970a73d31d30b7e892
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 31bc5d1bb73176c2afff25715590c50d0612f558880f2bde3a750ac5ea8d49674c5dd9b00eff36960454c972a1a31aca6b4f853a98e995dd65977e1f82903ad2
|
7
|
+
data.tar.gz: 86153dfeebc52570ecb83a012f4e7242e9bec607627c2dcefc5beb101a711199bc346c1388045127df89f13e65c83aa473c0fc77e37810d436ea939101b74f2b
|
data/README.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
1
|
# Uc3DmpId
|
2
2
|
|
3
3
|
Helper methods for working with DMP ID JSON records
|
4
|
+
|
5
|
+
After you have made changes, be sure to increment the version number in `lib/uc3-dmp-id/version.rb`.
|
6
|
+
|
7
|
+
To build and push this gem to RubyGems:
|
8
|
+
- Make sure you are logged into RubyGems in your terminal window (see their docs)
|
9
|
+
- Run `gem build uc3-dmp-id.gemspec` to build the gem
|
10
|
+
- Run `gem push uc3-dmp-id-[version].gem` to publish to RubyGems
|
11
|
+
|
12
|
+
After you have pushed a new version to RubyGems, you should rebuild and redeploy the AWS SAM application.
|
data/lib/uc3-dmp-id/asserter.rb
CHANGED
@@ -28,13 +28,13 @@ module Uc3DmpId
|
|
28
28
|
related_works = modified_version.fetch('dmproadmap_related_identifiers', [])
|
29
29
|
|
30
30
|
if related_works.any?
|
31
|
-
latest_version = _add_related_identifier(updater
|
32
|
-
identifiers: related_works, note
|
31
|
+
latest_version = _add_related_identifier(updater:, latest_version:,
|
32
|
+
identifiers: related_works, note:, logger:)
|
33
33
|
end
|
34
34
|
return latest_version unless !funding.nil? && funding.any?
|
35
35
|
|
36
|
-
_add_funding_mod(updater
|
37
|
-
note
|
36
|
+
_add_funding_mod(updater:, latest_version:, funding:,
|
37
|
+
note:, logger:)
|
38
38
|
end
|
39
39
|
# rubocop:enable Metrics/AbcSize, Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
40
40
|
|
@@ -106,7 +106,7 @@ module Uc3DmpId
|
|
106
106
|
end
|
107
107
|
|
108
108
|
latest_version['dmproadmap_related_identifiers'] = [] if latest_version['dmproadmap_related_identifiers'].nil?
|
109
|
-
assertion = _generate_assertion(updater
|
109
|
+
assertion = _generate_assertion(updater:, note:,
|
110
110
|
mods: JSON.parse({ dmproadmap_related_identifiers: additions }.to_json))
|
111
111
|
if logger.respond_to?(:debug)
|
112
112
|
logger.debug(message: 'Adding change to :dmphub_modifications.',
|
@@ -151,7 +151,7 @@ module Uc3DmpId
|
|
151
151
|
latest_version['dmphub_modifications'] = [] if latest_version['dmphub_modifications'].nil?
|
152
152
|
mod = JSON.parse({ funding: fund }.to_json)
|
153
153
|
mod['funding']['funding_status'] = 'granted'
|
154
|
-
assertion = _generate_assertion(updater
|
154
|
+
assertion = _generate_assertion(updater:, mods: mod, note:)
|
155
155
|
if logger.respond_to?(:debug)
|
156
156
|
logger.debug(message: 'Adding change to :dmphub_modifications.',
|
157
157
|
details: assertion)
|
@@ -168,7 +168,8 @@ module Uc3DmpId
|
|
168
168
|
# "id": "ABCD1234",
|
169
169
|
# "provenance": "dmphub",
|
170
170
|
# "timestamp": "2023-07-07T14:50:23+00:00",
|
171
|
-
# "note": "
|
171
|
+
# "note": "Data received from OpenAlex, matched by PI names and title keywords.",
|
172
|
+
# "confiedence": "Med",
|
172
173
|
# "dmproadmap_related_identifiers": {
|
173
174
|
# "work_type": "article",
|
174
175
|
# "descriptor": "is_cited_by",
|
@@ -183,7 +184,8 @@ module Uc3DmpId
|
|
183
184
|
# "id": "ABCD1234",
|
184
185
|
# "provenance": "dmphub",
|
185
186
|
# "timestamp": "2023-07-07T14:50:23+00:00",
|
186
|
-
# "note": "
|
187
|
+
# "note": "Data received from the NIH API, matched by the opportunity number.",
|
188
|
+
# "confidence": "High",
|
187
189
|
# "funding": {
|
188
190
|
# "funding_status": "granted",
|
189
191
|
# "grant_id": {
|
@@ -200,11 +202,19 @@ module Uc3DmpId
|
|
200
202
|
provenance: updater.gsub('PROVENANCE#', ''),
|
201
203
|
timestamp: Time.now.utc.iso8601,
|
202
204
|
status: 'pending',
|
203
|
-
note:
|
205
|
+
note:
|
204
206
|
}
|
205
207
|
mods.each_pair { |key, val| assertion[key] = val }
|
206
208
|
JSON.parse(assertion.to_json)
|
207
209
|
end
|
208
210
|
end
|
211
|
+
|
212
|
+
def _score_related_work(latest_version:, work:)
|
213
|
+
|
214
|
+
end
|
215
|
+
|
216
|
+
def _score_funding(latest_version:, funding:)
|
217
|
+
|
218
|
+
end
|
209
219
|
end
|
210
220
|
end
|
@@ -0,0 +1,500 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'text'
|
4
|
+
|
5
|
+
module Uc3DmpId
|
6
|
+
class ComparatorError < StandardError; end
|
7
|
+
|
8
|
+
# Class that compares incoming data from an external source to the DMP
|
9
|
+
# It determines if they are likely related and applies a confidence rating
|
10
|
+
class Comparator
|
11
|
+
|
12
|
+
MSG_MISSING_AUGMENTER = 'No Augmenter specified!'
|
13
|
+
MSG_MISSING_DMP = 'No DMP or the DMP did not contain enough information to use.'
|
14
|
+
|
15
|
+
STOP_WORDS = %w[a an and if of or the then they]
|
16
|
+
|
17
|
+
# See the bottom of this file for a hard-coded crosswalk between Crossref funder ids and ROR ids
|
18
|
+
# Some APIs do not support ROR fully for funder ids, so we need to be able to reference both
|
19
|
+
|
20
|
+
attr_accessor :augmenter, :dmp, :details_hash, :logger
|
21
|
+
|
22
|
+
def initialize(**args)
|
23
|
+
@logger = args[:logger]
|
24
|
+
@details_hash = {}
|
25
|
+
|
26
|
+
@augmenter = args[:augmenter]
|
27
|
+
raise ComparatorError, MSG_MISSING_AUGMENTER if @augmenter.nil? ||
|
28
|
+
!@augmenter['PK']&.start_with?('AUGMENTERS#')
|
29
|
+
|
30
|
+
@dmp = args.fetch(:dmp, {})['dmp'].nil? ? args[:dmp] : args.fetch(:dmp, {})['dmp']
|
31
|
+
_extract_dmp_details(dmp:)
|
32
|
+
raise ComparatorError, MSG_MISSING_DMP if @details_hash.empty?
|
33
|
+
end
|
34
|
+
|
35
|
+
# Compare the incoming hash with the DMP details that were gathered during initialization.
|
36
|
+
#
|
37
|
+
# The Hash should contain:
|
38
|
+
# {
|
39
|
+
# title: "Example research project",
|
40
|
+
# abstract: "Lorem ipsum psuedo abstract",
|
41
|
+
# keywords: ["foo", "bar"],z
|
42
|
+
# people: [
|
43
|
+
# {
|
44
|
+
# id: "https://orcid.org/blah",
|
45
|
+
# last_name: "doe",
|
46
|
+
# affiliation: { id: "https://ror.org/blah", name: "Foo" }
|
47
|
+
# }
|
48
|
+
# ],
|
49
|
+
# fundings: [
|
50
|
+
# { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
|
51
|
+
# ],
|
52
|
+
# repositories: [
|
53
|
+
# { id: ["http://some.repo.org", "https://doi.org/re3data123"], name: "Repo" }
|
54
|
+
# ]
|
55
|
+
# }
|
56
|
+
def compare(hash:)
|
57
|
+
response = { confidence: 'None', score: 0, notes: [], source: @augmenter['name'] }
|
58
|
+
return response unless hash.is_a?(Hash) && !hash['title'].nil?
|
59
|
+
|
60
|
+
# Compare the grant ids. If we have a match return the response immediately since that is
|
61
|
+
# a very positive match!
|
62
|
+
response = _grants_match?(array: hash['fundings'], response:)
|
63
|
+
return response if response[:confidence] != 'None'
|
64
|
+
|
65
|
+
response = _opportunities_match?(array: hash['fundings'], response:)
|
66
|
+
response = _orcids_match?(array: hash['people'], response:)
|
67
|
+
response = _last_name_and_affiliation_match?(array: hash['people'], response:)
|
68
|
+
|
69
|
+
# Only process the following if we had some matching contributors, affiliations or opportuniy nbrs
|
70
|
+
response = _repository_match?(array: hash['repositories'], response:) if response[:score] > 0
|
71
|
+
response = _keyword_match?(array: hash['repositories'], response:) if response[:score] > 0
|
72
|
+
response = _text_match?(type: 'title', text: hash['title'], response:) if response[:score] > 0
|
73
|
+
response = _text_match?(type: 'abstract', text: hash['abstract'], response:) if response[:score] > 0
|
74
|
+
# If the score is less than 3 then we have no confidence that it is a match
|
75
|
+
return response if response[:score] <= 2
|
76
|
+
|
77
|
+
# Set the confidence level based on the score
|
78
|
+
response[:confidence] = response[:score] > 15 ? 'High' : (response[:score] > 10 ? 'Medium' : 'Low')
|
79
|
+
response
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
def _extract_dmp_details(dmp:)
|
85
|
+
return nil unless dmp.is_a?(Hash) && !dmp['title'].nil? && !dmp['contact'].nil?
|
86
|
+
|
87
|
+
projects = dmp.fetch('project', [{}])
|
88
|
+
fundings = projects.map { |proj| proj.fetch('funding', []) }.flatten.compact.uniq
|
89
|
+
hosts = dmp.fetch('dataset', []).map { |dset| dset.fetch('distribution', []).map { |d| d['host'] } }
|
90
|
+
people = [dmp['contact']]
|
91
|
+
people << dmp.fetch('contributor', [])
|
92
|
+
|
93
|
+
# Extract all of the important bits about the DMP
|
94
|
+
@details_hash = {
|
95
|
+
created: dmp.fetch('created', Time.now.iso8601),
|
96
|
+
title: _cleanse_text(text: projects&.first&.fetch('title', dmp['title'])),
|
97
|
+
abstract: _cleanse_text(text: projects&.first&.fetch('description', dmp['description'])),
|
98
|
+
keywords: dmp.fetch('dataset', []).map { |ds| ds.fetch('keyword', []) }.flatten.compact.uniq,
|
99
|
+
identifiers: [dmp.fetch('dmp_id', {})['identifier']],
|
100
|
+
last_names: [],
|
101
|
+
affiliation_ids: [],
|
102
|
+
affiliations: [],
|
103
|
+
funder_names: [],
|
104
|
+
funder_ids: [],
|
105
|
+
opportunity_ids: [],
|
106
|
+
grant_ids: [],
|
107
|
+
repositories: []
|
108
|
+
}
|
109
|
+
_extract_people(array: people&.flatten&.compact&.uniq)
|
110
|
+
_extract_funding(array: fundings)
|
111
|
+
_extract_repositories(repos: hosts.flatten.compact.uniq)
|
112
|
+
|
113
|
+
# Clean up the results by flattening and removing duplicates from the Arrays
|
114
|
+
@details_hash.keys.each do |key|
|
115
|
+
@details_hash[key] = @details_hash[key].flatten.compact.uniq if @details_hash[key].is_a?(Array)
|
116
|
+
end
|
117
|
+
@logger&.debug(message: "Extracted the following from the DMP", details: @details_hash)
|
118
|
+
end
|
119
|
+
|
120
|
+
# Extract all of the funding information
|
121
|
+
def _extract_funding(array:)
|
122
|
+
return [] unless array.is_a?(Array)
|
123
|
+
|
124
|
+
array.each do |funding|
|
125
|
+
next unless funding.is_a?(Hash)
|
126
|
+
|
127
|
+
funder_id = funding.fetch('funder_id', {})
|
128
|
+
ror = funder_id['identifier'] if funder_id['type']&.downcase&.strip == 'ror'
|
129
|
+
fundref = ror.nil? ? funder_id['identifier']&.downcase&.strip : ROR_FUNDREF_ID_CROSSWALK[:"#{ror}"]
|
130
|
+
opportunity = funding.fetch('dmproadmap_funding_opportunity_id', {})['identifier']
|
131
|
+
grant = funding.fetch('grant_id', {})['identifier']
|
132
|
+
|
133
|
+
@details_hash[:identifiers] << ror&.downcase&.strip
|
134
|
+
@details_hash[:identifiers] << fundref&.downcase&.strip
|
135
|
+
@details_hash[:identifiers] << grant&.downcase&.strip
|
136
|
+
@details_hash[:identifiers] << grant&.split('/')&.last&.downcase&.strip
|
137
|
+
@details_hash[:identifiers] << opportunity&.downcase&.strip
|
138
|
+
|
139
|
+
@details_hash[:funder_names] << funding['name']&.downcase&.strip
|
140
|
+
@details_hash[:funder_ids] << fundref
|
141
|
+
@details_hash[:opportunity_ids] << opportunity&.downcase&.strip
|
142
|
+
@details_hash[:grant_ids] << [grant&.downcase&.strip, grant&.split('/')&.last&.downcase&.strip]
|
143
|
+
end
|
144
|
+
array
|
145
|
+
end
|
146
|
+
|
147
|
+
# Extract all of the ORCIDs, last names, and affiliation ids and names
|
148
|
+
def _extract_people(array:)
|
149
|
+
return [] unless array.is_a?(Array)
|
150
|
+
|
151
|
+
array.each do |entry|
|
152
|
+
next unless entry.is_a?(Hash)
|
153
|
+
|
154
|
+
id = entry.fetch('contributor_id', entry.fetch('contact_id', {}))['identifier']&.downcase&.strip
|
155
|
+
affil = entry.fetch('dmproadmap_affiliation', {})
|
156
|
+
ror = affil.fetch('affiliation_id', {})['identifier']&.downcase&.strip
|
157
|
+
name = entry.fetch('name', '')&.downcase&.strip
|
158
|
+
last_name = name.include?(', ') ? name.split(', ').first : name.split.last
|
159
|
+
|
160
|
+
@details_hash[:identifiers] << [id, ror&.downcase&.strip]
|
161
|
+
@details_hash[:last_names] << last_name
|
162
|
+
@details_hash[:affiliation_ids] << ror
|
163
|
+
@details_hash[:affiliations] << affil.fetch('name', '')&.split(' (')&.first&.downcase&.strip
|
164
|
+
end
|
165
|
+
array
|
166
|
+
end
|
167
|
+
|
168
|
+
# Extract all of the re3data ids, URLs and names
|
169
|
+
def _extract_repositories(repos:)
|
170
|
+
return [] unless repos.is_a?(Array)
|
171
|
+
|
172
|
+
repos.each do |repo|
|
173
|
+
next unless repo.is_a?(Hash)
|
174
|
+
|
175
|
+
@details_hash[:identifiers] << [
|
176
|
+
repo['url']&.downcase&.strip, repo.fetch('dmproadmap_host_id', {})['identifier']&.downcase&.strip
|
177
|
+
]
|
178
|
+
@details_hash[:repositories] << repo.fetch('name', '')&.downcase&.strip
|
179
|
+
end
|
180
|
+
repos
|
181
|
+
end
|
182
|
+
|
183
|
+
# Returns whether or not the incoming grant id(s) match the DMPs grant id. Expecting:
|
184
|
+
# [
|
185
|
+
# { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
|
186
|
+
# ]
|
187
|
+
def _grants_match?(array:, response:)
|
188
|
+
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
189
|
+
|
190
|
+
ids = array.select { |funding| funding.is_a?(Hash) && funding['grant'].is_a?(Array) }
|
191
|
+
.map { |funding| funding['grant'].map { |id| id&.downcase&.strip } }
|
192
|
+
.flatten.compact.uniq
|
193
|
+
|
194
|
+
matched = _compare_arrays(array_a: @details_hash.fetch(:grant_ids, []), array_b: ids)
|
195
|
+
return response if matched <= 0
|
196
|
+
|
197
|
+
response[:confidence] = 'Absolute'
|
198
|
+
response[:score] = 100
|
199
|
+
response[:notes] << 'the grant ID matched'
|
200
|
+
response
|
201
|
+
end
|
202
|
+
|
203
|
+
# Returns whether or not the incoming grant id(s) match the DMPs opportunity id. Expecting:
|
204
|
+
# [
|
205
|
+
# { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
|
206
|
+
# ]
|
207
|
+
def _opportunities_match?(array:, response:)
|
208
|
+
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
209
|
+
|
210
|
+
ids = array.select { |funding| funding.is_a?(Hash) && funding['grant'].is_a?(Array) }
|
211
|
+
.map { |funding| funding['grant'].map { |id| id&.downcase&.strip } }
|
212
|
+
.flatten.compact.uniq
|
213
|
+
|
214
|
+
matched = _compare_arrays(array_a: @details_hash.fetch(:opportunity_ids, []), array_b: ids)
|
215
|
+
return response if matched <= 0
|
216
|
+
|
217
|
+
response[:score] += 5
|
218
|
+
response[:notes] << 'the funding opportunity number matched'
|
219
|
+
response
|
220
|
+
end
|
221
|
+
|
222
|
+
# Returns whether or not the inciming list of creators/contributors match those on the DMP. Expecting:
|
223
|
+
# [
|
224
|
+
# {
|
225
|
+
# id: "https://orcid.org/blah",
|
226
|
+
# last_name: "doe",
|
227
|
+
# affiliation: { id: "https://ror.org/blah", name: "Foo" }
|
228
|
+
# }
|
229
|
+
# ]
|
230
|
+
def _orcids_match?(array:, response:)
|
231
|
+
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
232
|
+
|
233
|
+
ids = array.select { |repo| repo.is_a?(Hash) }
|
234
|
+
.map { |person| person['id']&.downcase&.strip }
|
235
|
+
.flatten.compact.uniq
|
236
|
+
|
237
|
+
matched = _compare_arrays(array_a: @details_hash.fetch(:identifiers, []), array_b: ids)
|
238
|
+
return response if matched <= 0
|
239
|
+
|
240
|
+
response[:score] += (matched * 2)
|
241
|
+
response[:notes] << 'contributor ORCIDs matched'
|
242
|
+
response
|
243
|
+
end
|
244
|
+
|
245
|
+
# Returns whether or not the inciming list of creators/contributors match those on the DMP. Expecting:
|
246
|
+
# [
|
247
|
+
# {
|
248
|
+
# id: "https://orcid.org/blah",
|
249
|
+
# last_name: "doe",
|
250
|
+
# affiliation: { id: "https://ror.org/blah", name: "Foo" }
|
251
|
+
# }
|
252
|
+
# ]
|
253
|
+
def _last_name_and_affiliation_match?(array:, response:)
|
254
|
+
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
255
|
+
|
256
|
+
array = array.select { |repo| repo.is_a?(Hash) }
|
257
|
+
affiliations = array.map { |person| person['affiliation'] }&.flatten&.compact&.uniq
|
258
|
+
last_names = array.map { |person| person['last_name']&.downcase&.strip }&.flatten&.compact&.uniq
|
259
|
+
rors = affiliations.map { |affil| affil['id']&.downcase&.strip }&.flatten&.compact&.uniq
|
260
|
+
affil_names = affiliations.map { |affil| affil['name']&.downcase&.strip }&.flatten&.compact&.uniq
|
261
|
+
|
262
|
+
# Check the person last names and affiliation name and RORs
|
263
|
+
last_names_matched = _compare_arrays(array_a: @details_hash.fetch(:last_names, []), array_b: last_names)
|
264
|
+
rors_matched = _compare_arrays(array_a: @details_hash.fetch(:affiliation_ids, []), array_b: rors)
|
265
|
+
affil_names_matched = _compare_arrays(array_a: @details_hash.fetch(:affiliations, []), array_b: affil_names)
|
266
|
+
return response if last_names_matched <= 0 && rors_matched <= 0 && affil_names_matched <= 0
|
267
|
+
|
268
|
+
response[:score] += last_names_matched + rors_matched + affil_names_matched
|
269
|
+
response[:notes] << 'contributor names and affiliations matched'
|
270
|
+
response
|
271
|
+
end
|
272
|
+
|
273
|
+
# Returns whether or not the incoming list of repositories match those defined in the DMP. Expecting:
|
274
|
+
# [
|
275
|
+
# { id: ["http://some.repo.org", "https://doi.org/re3data123"], name: "Repo" }
|
276
|
+
# ]
|
277
|
+
def _repository_match?(array:, response:)
|
278
|
+
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
279
|
+
|
280
|
+
# We only care about repositories with ids/urls
|
281
|
+
ids = array.select { |repo| repo.is_a?(Hash) }
|
282
|
+
.map { |repo| repo['id'].map { |id| id&.downcase&.strip } }
|
283
|
+
.flatten.compact.uniq
|
284
|
+
|
285
|
+
matched = _compare_arrays(array_a: @details_hash.fetch(:identifiers, []), array_b: ids)
|
286
|
+
return response if matched <= 0
|
287
|
+
|
288
|
+
response[:score] += matched
|
289
|
+
response[:notes] << 'repositories matched'
|
290
|
+
response
|
291
|
+
end
|
292
|
+
|
293
|
+
# Returns whether or not the list of keywords exist in the DMP. Expecting:
|
294
|
+
# keywords: ["foo", "bar"]
|
295
|
+
def _keyword_match?(array:, response:)
|
296
|
+
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
297
|
+
|
298
|
+
keywords = array.map { |word| word&.downcase&.strip }&.flatten&.compact&.uniq
|
299
|
+
matched = _compare_arrays(array_a: @details_hash.fetch(:keywords, []), array_b: keywords)
|
300
|
+
return response if matched <= 0
|
301
|
+
|
302
|
+
response[:score] += 1
|
303
|
+
response[:notes] << 'keywords matched'
|
304
|
+
response
|
305
|
+
end
|
306
|
+
|
307
|
+
# Uses an NLP library to determine if the :text matches the DMP/Project :title or :description
|
308
|
+
def _text_match?(type: 'title', text:, response:, logger: nil)
|
309
|
+
return response unless response.is_a?(Hash) && text.is_a?(String) && !text.strip.empty? &&
|
310
|
+
!@details_hash[type.to_sym].nil?
|
311
|
+
|
312
|
+
nlp_processor = Text::WhiteSimilarity.new
|
313
|
+
cleansed = _cleanse_text(text:)
|
314
|
+
|
315
|
+
details = {
|
316
|
+
"dmp_#{type}": @details_hash[type.to_sym],
|
317
|
+
"incoming_#{type}": cleansed,
|
318
|
+
nlp_score: nlp_processor.similarity(@details_hash[type.to_sym], cleansed)
|
319
|
+
}
|
320
|
+
@logger&.debug(message: "Text::WhiteSimilarity score", details:)
|
321
|
+
return response if details[:nlp_score] < 0.5
|
322
|
+
|
323
|
+
response[:score] += details[:nlp_score] >= 0.75 ? 5 : 2
|
324
|
+
response[:notes] << "#{type}s are similar"
|
325
|
+
response
|
326
|
+
end
|
327
|
+
|
328
|
+
# Change the incoming text to lower case, remove spaces and STOP_WORDS
|
329
|
+
def _cleanse_text(text:)
|
330
|
+
return nil unless text.is_a?(String)
|
331
|
+
|
332
|
+
text.downcase.split.reject { |word| STOP_WORDS.include?(word) }.join(' ').strip
|
333
|
+
end
|
334
|
+
|
335
|
+
# Do an introspection of the 2 arrays and return the number of matches
|
336
|
+
def _compare_arrays(array_a: [], array_b: [])
|
337
|
+
return 0 unless array_a.is_a?(Array) && array_b.is_a?(Array)
|
338
|
+
|
339
|
+
intersection = array_a & array_b
|
340
|
+
intersection.nil? || intersection.size <= 0 ? 0 : intersection.size
|
341
|
+
end
|
342
|
+
|
343
|
+
# TODO: Remove this hard-coded crosswalk once the community has broader support for using ROR for funder ids
|
344
|
+
ROR_FUNDREF_ID_CROSSWALK = {
|
345
|
+
# NIH ID Crosswalk
|
346
|
+
"https://ror.org/01cwqze88": "https://doi.org/10.13039/100000002",
|
347
|
+
"https://ror.org/04mhx6838": "https://doi.org/10.13039/100000055",
|
348
|
+
"https://ror.org/012pb6c26": "https://doi.org/10.13039/100000050",
|
349
|
+
"https://ror.org/03wkg3b53": "https://doi.org/10.13039/100000053",
|
350
|
+
"https://ror.org/0060t0j89": "https://doi.org/10.13039/100000092",
|
351
|
+
"https://ror.org/00372qc85": "https://doi.org/10.13039/100000070",
|
352
|
+
"https://ror.org/00190t495": "https://doi.org/10.13039/100008460",
|
353
|
+
"https://ror.org/00j4k1h63": "https://doi.org/10.13039/100000066",
|
354
|
+
"https://ror.org/01y3zfr79": "https://doi.org/10.13039/100000056",
|
355
|
+
"https://ror.org/04q48ey07": "https://doi.org/10.13039/100000057",
|
356
|
+
"https://ror.org/0493hgw16": "https://doi.org/10.13039/100006545",
|
357
|
+
"https://ror.org/04vfsmv21": "https://doi.org/10.13039/100000098",
|
358
|
+
"https://ror.org/03jh5a977": "https://doi.org/10.13039/100000093",
|
359
|
+
"https://ror.org/04xeg9z08": "https://doi.org/10.13039/100000025",
|
360
|
+
"https://ror.org/01s5ya894": "https://doi.org/10.13039/100000065",
|
361
|
+
"https://ror.org/02meqm098": "https://doi.org/10.13039/100000002",
|
362
|
+
"https://ror.org/049v75w11": "https://doi.org/10.13039/100000049",
|
363
|
+
"https://ror.org/004a2wv92": "https://doi.org/10.13039/100000072",
|
364
|
+
"https://ror.org/00adh9b73": "https://doi.org/10.13039/100000062",
|
365
|
+
"https://ror.org/043z4tv69": "https://doi.org/10.13039/100000060",
|
366
|
+
"https://ror.org/00x19de83": "https://doi.org/10.13039/100000002",
|
367
|
+
"https://ror.org/02jzrsm59": "https://doi.org/10.13039/100000027",
|
368
|
+
"https://ror.org/006zn3t30": "https://doi.org/10.13039/100000069",
|
369
|
+
"https://ror.org/04byxyr05": "https://doi.org/10.13039/100000071",
|
370
|
+
"https://ror.org/04pw6fb54": "https://doi.org/10.13039/100006108",
|
371
|
+
"https://ror.org/05aq6yn88": "https://doi.org/10.13039/100006955",
|
372
|
+
"https://ror.org/02xey9a22": "https://doi.org/10.13039/100000061",
|
373
|
+
"https://ror.org/00fj8a872": "https://doi.org/10.13039/100000052",
|
374
|
+
"https://ror.org/01wtjyf13": "https://doi.org/10.13039/100000063",
|
375
|
+
"https://ror.org/04r5s4b52": "https://doi.org/10.13039/100005440",
|
376
|
+
"https://ror.org/046zezr58": "https://doi.org/10.13039/100006085",
|
377
|
+
"https://ror.org/02e3wq066": "https://doi.org/10.13039/100006086",
|
378
|
+
"https://ror.org/031gy6182": "https://doi.org/10.13039/100000002",
|
379
|
+
"https://ror.org/054j5yq82": "https://doi.org/10.13039/100000002",
|
380
|
+
"https://ror.org/02yrzyf97": "https://doi.org/10.13039/100000002",
|
381
|
+
|
382
|
+
# NSF ID Crosswalk
|
383
|
+
"https://.org/021nxhr62": "https://doi.org/10.13039/100000001",
|
384
|
+
"https://.org/04aqat463": "https://doi.org/10.13039/100000001",
|
385
|
+
"https://.org/01rcfpa16": "https://doi.org/10.13039/100005441",
|
386
|
+
"https://.org/014eweh95": "https://doi.org/10.13039/100005445",
|
387
|
+
"https://.org/001xhss06": "https://doi.org/10.13039/100000076",
|
388
|
+
"https://.org/04qn9mx93": "https://doi.org/10.13039/100000153",
|
389
|
+
"https://.org/03g87he71": "https://doi.org/10.13039/100000155",
|
390
|
+
"https://.org/01tnvpc68": "https://doi.org/10.13039/100000156",
|
391
|
+
"https://.org/01rvays47": "https://doi.org/10.13039/100000154",
|
392
|
+
"https://.org/002jdaq33": "https://doi.org/10.13039/100000152",
|
393
|
+
"https://.org/025kzpk63": "https://doi.org/10.13039/100000083",
|
394
|
+
"https://.org/04nh1dc89": "https://doi.org/10.13039/100007523",
|
395
|
+
"https://.org/01mng8331": "https://doi.org/10.13039/100000143",
|
396
|
+
"https://.org/02rdzmk74": "https://doi.org/10.13039/100000144",
|
397
|
+
"https://.org/053a2cp42": "https://doi.org/10.13039/100000145",
|
398
|
+
"https://.org/014bj5w56": "https://doi.org/10.13039/100000081",
|
399
|
+
"https://.org/00whkrf32": "https://doi.org/10.13039/100000082",
|
400
|
+
"https://.org/05s7cqk18": "https://doi.org/10.13039/100000173",
|
401
|
+
"https://.org/02kd4km72": "https://doi.org/10.13039/100000172",
|
402
|
+
"https://.org/03mamvh39": "https://doi.org/10.13039/100000171",
|
403
|
+
"https://.org/00b6sbb32": "https://doi.org/10.13039/100000084",
|
404
|
+
"https://.org/0471zv972": "https://doi.org/10.13039/100000146",
|
405
|
+
"https://.org/028yd4c30": "https://doi.org/10.13039/100000147",
|
406
|
+
"https://.org/01krpsy48": "https://doi.org/10.13039/100000148",
|
407
|
+
"https://.org/050rnw378": "https://doi.org/10.13039/100000149",
|
408
|
+
"https://.org/0388pet74": "https://doi.org/10.13039/100000150",
|
409
|
+
"https://.org/03xyg3m20": "https://doi.org/10.13039/100000151",
|
410
|
+
"https://.org/05p847d66": "https://doi.org/10.13039/100000085",
|
411
|
+
"https://.org/037gd6g64": "https://doi.org/10.13039/100000159",
|
412
|
+
"https://.org/05v01mk25": "https://doi.org/10.13039/100000160",
|
413
|
+
"https://.org/05wqqhv83": "https://doi.org/10.13039/100000141",
|
414
|
+
"https://.org/05nwjp114": "https://doi.org/10.13039/100007352",
|
415
|
+
"https://.org/05fnzca26": "https://doi.org/10.13039/100000162",
|
416
|
+
"https://.org/02trddg58": "https://doi.org/10.13039/100000163",
|
417
|
+
"https://.org/029b7h395": "https://doi.org/10.13039/100000086",
|
418
|
+
"https://.org/04mg8wm74": "https://doi.org/10.13039/100000164",
|
419
|
+
"https://.org/01ar8dr59": "https://doi.org/10.13039/100000165",
|
420
|
+
"https://.org/01pc7k308": "https://doi.org/10.13039/100000078",
|
421
|
+
"https://.org/051fftw81": "https://doi.org/10.13039/100000121",
|
422
|
+
"https://.org/04ap5x931": "https://doi.org/10.13039/100000166",
|
423
|
+
"https://.org/00apvva27": "https://doi.org/10.13039/100005716",
|
424
|
+
"https://.org/04nseet23": "https://doi.org/10.13039/100000179",
|
425
|
+
"https://.org/04k9mqs78": "https://doi.org/10.13039/100000106",
|
426
|
+
"https://.org/01k638r21": "https://doi.org/10.13039/100000089",
|
427
|
+
"https://.org/01gmp5538": "https://doi.org/10.13039/100005447",
|
428
|
+
"https://.org/01vnjbg30": "https://doi.org/10.13039/100005449",
|
429
|
+
"https://.org/03h7mcc28": "https://doi.org/10.13039/100000088",
|
430
|
+
"https://.org/05wgkzg12": "https://doi.org/10.13039/100000169",
|
431
|
+
"https://.org/0445wmv88": "https://doi.org/10.13039/100000170",
|
432
|
+
"https://.org/02dz2hb46": "https://doi.org/10.13039/100000077",
|
433
|
+
"https://.org/034m1ez10": "https://doi.org/10.13039/100000107",
|
434
|
+
"https://.org/02a65dj82": "https://doi.org/10.13039/100005717",
|
435
|
+
"https://.org/020fhsn68": "https://doi.org/10.13039/100000001",
|
436
|
+
"https://.org/03z9hh605": "https://doi.org/10.13039/100000174",
|
437
|
+
"https://.org/04ya3kq71": "https://doi.org/10.13039/100007521",
|
438
|
+
"https://.org/04evh7y43": "https://doi.org/10.13039/100005443",
|
439
|
+
"https://.org/04h67aa53": "https://doi.org/10.13039/100000177",
|
440
|
+
"https://.org/025dabr11": "https://doi.org/10.13039/100005446",
|
441
|
+
"https://.org/04vw0kz07": "https://doi.org/10.13039/100005448",
|
442
|
+
"https://.org/054ydxh33": "https://doi.org/10.13039/100005554",
|
443
|
+
"https://.org/01sharn77": "https://doi.org/10.13039/100006091",
|
444
|
+
"https://.org/02ch5q898": "https://doi.org/10.13039/100000001",
|
445
|
+
|
446
|
+
# NASA ID Crosswalk
|
447
|
+
"https://.org/0171mag52": "https://doi.org/10.13039/100006198",
|
448
|
+
"https://.org/027k65916": "https://doi.org/10.13039/100006196",
|
449
|
+
"https://.org/027ka1x80": "https://doi.org/10.13039/100000104",
|
450
|
+
"https://.org/02acart68": "https://doi.org/10.13039/100006195",
|
451
|
+
"https://.org/059fqnc42": "https://doi.org/10.13039/100006193",
|
452
|
+
"https://.org/01cyfxe35": "https://doi.org/10.13039/100016595",
|
453
|
+
"https://.org/04xx4z452": "https://doi.org/10.13039/100006203",
|
454
|
+
"https://.org/0399mhs52": "https://doi.org/10.13039/100006199",
|
455
|
+
"https://.org/02epydz83": "https://doi.org/10.13039/100006197",
|
456
|
+
"https://.org/03j9e2j92": "https://doi.org/10.13039/100006205",
|
457
|
+
"https://.org/02s42x260": "https://doi.org/10.13039/100000104",
|
458
|
+
"https://.org/01p7gwa14": "https://doi.org/10.13039/100000104",
|
459
|
+
"https://.org/01qxmdg18": "https://doi.org/10.13039/100000104",
|
460
|
+
"https://.org/006ndaj41": "https://doi.org/10.13039/100000104",
|
461
|
+
"https://.org/03em45j53": "https://doi.org/10.13039/100007346",
|
462
|
+
"https://.org/045t78n53": "https://doi.org/10.13039/100000104",
|
463
|
+
"https://.org/00r57r863": "https://doi.org/10.13039/100000104",
|
464
|
+
"https://.org/0401vze59": "https://doi.org/10.13039/100007726",
|
465
|
+
"https://.org/04hccab49": "https://doi.org/10.13039/100000104",
|
466
|
+
"https://.org/04437j066": "https://doi.org/10.13039/100000104",
|
467
|
+
"https://.org/028b18z22": "https://doi.org/10.13039/100000104",
|
468
|
+
"https://.org/00ryjtt64": "https://doi.org/10.13039/100000104",
|
469
|
+
|
470
|
+
# DOE ID Crosswalk
|
471
|
+
"https://ror.org/01bj3aw27": "https://doi.org/10.13039/100000015",
|
472
|
+
"https://ror.org/03q1rgc19": "https://doi.org/10.13039/100006133",
|
473
|
+
"https://ror.org/02xznz413": "https://doi.org/10.13039/100006134",
|
474
|
+
"https://ror.org/03sk1we31": "https://doi.org/10.13039/100006168",
|
475
|
+
"https://ror.org/00f93gc02": "https://doi.org/10.13039/100006177",
|
476
|
+
"https://ror.org/05tj7dm33": "https://doi.org/10.13039/100006147",
|
477
|
+
"https://ror.org/0012c7r22": "https://doi.org/10.13039/100006192",
|
478
|
+
"https://ror.org/00mmn6b08": "https://doi.org/10.13039/100006132",
|
479
|
+
"https://ror.org/03ery9d53": "https://doi.org/10.13039/100006120",
|
480
|
+
"https://ror.org/033jmdj81": "https://doi.org/10.13039/100000015",
|
481
|
+
"https://ror.org/03rd4h240": "https://doi.org/10.13039/100006130",
|
482
|
+
"https://ror.org/0054t4769": "https://doi.org/10.13039/100006200",
|
483
|
+
"https://ror.org/03eecgp81": "https://doi.org/10.13039/100006174",
|
484
|
+
"https://ror.org/00heb4d89": "https://doi.org/10.13039/100006135",
|
485
|
+
"https://ror.org/05ek3m339": "https://doi.org/10.13039/100006150",
|
486
|
+
"https://ror.org/00km40770": "https://doi.org/10.13039/100006138",
|
487
|
+
"https://ror.org/02ah1da87": "https://doi.org/10.13039/100006137",
|
488
|
+
"https://ror.org/05hsv7e61": "https://doi.org/10.13039/100000015",
|
489
|
+
"https://ror.org/01c9ay627": "https://doi.org/10.13039/100006165",
|
490
|
+
"https://ror.org/04z2gev20": "https://doi.org/10.13039/100006183",
|
491
|
+
"https://ror.org/02z1qvq09": "https://doi.org/10.13039/100006144",
|
492
|
+
"https://ror.org/03jf3w726": "https://doi.org/10.13039/100006186",
|
493
|
+
"https://ror.org/04848jz84": "https://doi.org/10.13039/100006142",
|
494
|
+
"https://ror.org/04s778r16": "https://doi.org/10.13039/100006171",
|
495
|
+
"https://ror.org/04nnxen11": "https://doi.org/10.13039/100000015",
|
496
|
+
"https://ror.org/05csy5p27": "https://doi.org/10.13039/100010268",
|
497
|
+
"https://ror.org/05efnac71": "https://doi.org/10.13039/100000015"
|
498
|
+
}
|
499
|
+
end
|
500
|
+
end
|
data/lib/uc3-dmp-id/creator.rb
CHANGED
@@ -23,23 +23,23 @@ module Uc3DmpId
|
|
23
23
|
raise CreatorError, Helper::MSG_DMP_FORBIDDEN unless provenance.is_a?(Hash) && !provenance['PK'].nil?
|
24
24
|
|
25
25
|
# Validate the incoming JSON first
|
26
|
-
json = Helper.parse_json(json:
|
27
|
-
errs = Validator.validate(mode: 'author', json:
|
26
|
+
json = Helper.parse_json(json:)
|
27
|
+
errs = Validator.validate(mode: 'author', json:)
|
28
28
|
raise CreatorError, errs.join(', ') if errs.is_a?(Array) && errs.any? && errs.first != Validator::MSG_VALID_JSON
|
29
29
|
|
30
30
|
# Try to find it by the :dmp_id first and Fail if found
|
31
31
|
dmp_id = Helper.dmp_id_to_pk(json: json.fetch('dmp', {})['dmp_id'])
|
32
|
-
result = Finder.exists?(p_key: dmp_id, logger:
|
32
|
+
result = Finder.exists?(p_key: dmp_id, logger:) unless dmp_id.nil?
|
33
33
|
raise CreatorError, Helper::MSG_DMP_EXISTS if result.is_a?(Hash)
|
34
34
|
|
35
35
|
# raise CreatorError, Uc3DmpId::MSG_DMP_EXISTS unless json['PK'].nil?
|
36
36
|
|
37
37
|
client = Uc3DmpDynamo::Client.new
|
38
|
-
p_key = _preregister_dmp_id(client
|
38
|
+
p_key = _preregister_dmp_id(client:, provenance:, json:, logger:)
|
39
39
|
raise CreatorError, MSG_UNABLE_TO_MINT if p_key.nil?
|
40
40
|
|
41
41
|
# Add the DMPHub specific attributes and then save
|
42
|
-
annotated = Helper.annotate_dmp_json(provenance
|
42
|
+
annotated = Helper.annotate_dmp_json(provenance:, p_key:, json: json['dmp'])
|
43
43
|
logger.info(message: "Creating DMP ID: #{p_key}") if logger.respond_to?(:debug)
|
44
44
|
|
45
45
|
# Set the :created and :modified timestamps
|
@@ -48,10 +48,10 @@ module Uc3DmpId
|
|
48
48
|
annotated['modified'] = now
|
49
49
|
|
50
50
|
# Create the item
|
51
|
-
resp = client.put_item(json: annotated, logger:
|
51
|
+
resp = client.put_item(json: annotated, logger:)
|
52
52
|
raise CreatorError, Helper::MSG_DMP_NO_DMP_ID if resp.nil?
|
53
53
|
|
54
|
-
_post_process(json: annotated, logger:
|
54
|
+
_post_process(json: annotated, logger:)
|
55
55
|
Helper.cleanse_dmp_json(json: JSON.parse({ dmp: annotated }.to_json))
|
56
56
|
end
|
57
57
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
@@ -75,7 +75,7 @@ module Uc3DmpId
|
|
75
75
|
counter = 0
|
76
76
|
while dmp_id == '' && counter <= 10
|
77
77
|
prefix = "#{ENV.fetch('DMP_ID_SHOULDER', nil)}#{SecureRandom.hex(2).upcase}#{SecureRandom.hex(2)}"
|
78
|
-
dmp_id = prefix unless Finder.exists?(client
|
78
|
+
dmp_id = prefix unless Finder.exists?(client:, p_key: prefix)
|
79
79
|
counter += 1
|
80
80
|
end
|
81
81
|
# Something went wrong and it was unable to identify a unique id
|
@@ -94,7 +94,7 @@ module Uc3DmpId
|
|
94
94
|
|
95
95
|
# Publish the change to the EventBridge
|
96
96
|
publisher = Uc3DmpEventBridge::Publisher.new
|
97
|
-
publisher.publish(source: 'DmpCreator', event_type: 'EZID update', dmp: json, logger:
|
97
|
+
publisher.publish(source: 'DmpCreator', event_type: 'EZID update', dmp: json, logger:)
|
98
98
|
|
99
99
|
# Determine if there are any related identifiers that we should try to fetch a citation for
|
100
100
|
citable_identifiers = Helper.citable_related_identifiers(dmp: json)
|
@@ -108,7 +108,7 @@ module Uc3DmpId
|
|
108
108
|
}
|
109
109
|
logger.debug(message: 'Fetching citations', details: citable_identifiers) if logger.respond_to?(:debug)
|
110
110
|
publisher.publish(source: 'DmpCreator', dmp: json, event_type: 'Citation Fetch', detail: citer_detail,
|
111
|
-
logger:
|
111
|
+
logger:)
|
112
112
|
true
|
113
113
|
end
|
114
114
|
end
|
data/lib/uc3-dmp-id/deleter.rb
CHANGED
@@ -19,7 +19,7 @@ module Uc3DmpId
|
|
19
19
|
|
20
20
|
# Fetch the latest version of the DMP ID by it's PK
|
21
21
|
client = Uc3DmpDynamo::Client.new
|
22
|
-
dmp = Finder.by_pk(p_key
|
22
|
+
dmp = Finder.by_pk(p_key:, client:, cleanse: false, logger:)
|
23
23
|
raise DeleterError, Helper::MSG_DMP_NOT_FOUND unless dmp.is_a?(Hash) && !dmp['dmp'].nil?
|
24
24
|
|
25
25
|
# Only allow this if the provenance is the owner of the DMP!
|
@@ -38,16 +38,16 @@ module Uc3DmpId
|
|
38
38
|
dmp['dmp']['dmphub_tombstoned_at'] = now
|
39
39
|
|
40
40
|
# Create the Tombstone version
|
41
|
-
resp = client.put_item(json: dmp['dmp'], logger:
|
41
|
+
resp = client.put_item(json: dmp['dmp'], logger:)
|
42
42
|
raise DeleterError, Helper::MSG_DMP_NO_TOMBSTONE if resp.nil?
|
43
43
|
|
44
44
|
# Delete the Latest version
|
45
|
-
client.delete_item(p_key
|
45
|
+
client.delete_item(p_key:, s_key: Helper::DMP_LATEST_VERSION, logger:)
|
46
46
|
|
47
47
|
# TODO: We should do a check here to see if it was successful!
|
48
48
|
|
49
49
|
# Notify EZID about the removal
|
50
|
-
_post_process(json: dmp, logger:
|
50
|
+
_post_process(json: dmp, logger:)
|
51
51
|
|
52
52
|
# Return the tombstoned record
|
53
53
|
Helper.cleanse_dmp_json(json: dmp)
|
@@ -66,7 +66,7 @@ module Uc3DmpId
|
|
66
66
|
|
67
67
|
# Publish the change to the EventBridge
|
68
68
|
publisher = Uc3DmpEventBridge::Publisher.new
|
69
|
-
publisher.publish(source: 'DmpDeleter', event_type: 'EZID update', dmp: json, logger:
|
69
|
+
publisher.publish(source: 'DmpDeleter', event_type: 'EZID update', dmp: json, logger:)
|
70
70
|
true
|
71
71
|
end
|
72
72
|
end
|
data/lib/uc3-dmp-id/finder.rb
CHANGED
@@ -20,15 +20,15 @@ module Uc3DmpId
|
|
20
20
|
# TODO: Replace this with ElasticSearch
|
21
21
|
def search_dmps(args:, logger: nil)
|
22
22
|
client = Uc3DmpDynamo::Client.new
|
23
|
-
return _by_owner(owner_org: args['owner_orcid'], client
|
23
|
+
return _by_owner(owner_org: args['owner_orcid'], client:, logger:) unless args['owner_orcid'].nil?
|
24
24
|
|
25
25
|
unless args['owner_org_ror'].nil?
|
26
|
-
return _by_owner_org(owner_org: args['owner_org_ror'], client
|
27
|
-
logger:
|
26
|
+
return _by_owner_org(owner_org: args['owner_org_ror'], client:,
|
27
|
+
logger:)
|
28
28
|
end
|
29
29
|
unless args['modification_day'].nil?
|
30
|
-
return _by_mod_day(day: args['modification_day'], client
|
31
|
-
logger:
|
30
|
+
return _by_mod_day(day: args['modification_day'], client:,
|
31
|
+
logger:)
|
32
32
|
end
|
33
33
|
|
34
34
|
[]
|
@@ -38,20 +38,20 @@ module Uc3DmpId
|
|
38
38
|
# -------------------------------------------------------------------------
|
39
39
|
# rubocop:disable Metrics/AbcSize
|
40
40
|
def by_json(json:, client: nil, cleanse: true, logger: nil)
|
41
|
-
json = Helper.parse_json(json:
|
41
|
+
json = Helper.parse_json(json:)&.fetch('dmp', {})
|
42
42
|
raise FinderError, MSG_INVALID_ARGS if !json.is_a?(Hash) || (json['PK'].nil? && json['dmp_id'].nil?)
|
43
43
|
|
44
44
|
p_key = json['PK']
|
45
45
|
# Translate the incoming :dmp_id into a PK
|
46
46
|
p_key = Helper.dmp_id_to_pk(json: json.fetch('dmp_id', {})) if p_key.nil?
|
47
|
-
client =
|
47
|
+
client = Uc3DmpDynamo::Client.new if client.nil?
|
48
48
|
|
49
49
|
# TODO: Re-enable this once we figure out Dynamo indexes
|
50
50
|
# find_by_dmphub_provenance_id -> if no PK and no dmp_id result
|
51
51
|
# return by_provenance_identifier(json: json, client: client, logger: logger) if p_key.nil?
|
52
52
|
|
53
53
|
# find_by_PK
|
54
|
-
p_key.nil? ? nil : by_pk(p_key
|
54
|
+
p_key.nil? ? nil : by_pk(p_key:, s_key: json['SK'], client:, cleanse:, logger:)
|
55
55
|
end
|
56
56
|
# rubocop:enable Metrics/AbcSize
|
57
57
|
|
@@ -62,20 +62,20 @@ module Uc3DmpId
|
|
62
62
|
raise FinderError, MSG_MISSING_PK if p_key.nil?
|
63
63
|
|
64
64
|
s_key = Helper::DMP_LATEST_VERSION if s_key.nil? || s_key.to_s.strip.empty?
|
65
|
-
client =
|
65
|
+
client = Uc3DmpDynamo::Client.new if client.nil?
|
66
66
|
resp = client.get_item(
|
67
67
|
key: {
|
68
|
-
PK: Helper.append_pk_prefix(p_key:
|
69
|
-
SK: Helper.append_sk_prefix(s_key:
|
68
|
+
PK: Helper.append_pk_prefix(p_key:),
|
69
|
+
SK: Helper.append_sk_prefix(s_key:)
|
70
70
|
},
|
71
|
-
logger:
|
71
|
+
logger:
|
72
72
|
)
|
73
73
|
return resp unless resp.is_a?(Hash)
|
74
74
|
|
75
75
|
dmp = resp['dmp'].nil? ? JSON.parse({ dmp: resp }.to_json) : resp
|
76
76
|
return nil if dmp['dmp']['PK'].nil?
|
77
77
|
|
78
|
-
dmp = Versioner.append_versions(p_key: dmp['dmp']['PK'], dmp
|
78
|
+
dmp = Versioner.append_versions(p_key: dmp['dmp']['PK'], dmp:, client:, logger:) if cleanse
|
79
79
|
cleanse ? Helper.cleanse_dmp_json(json: dmp) : dmp
|
80
80
|
end
|
81
81
|
# rubocop:enable Metrics/AbcSize
|
@@ -85,13 +85,13 @@ module Uc3DmpId
|
|
85
85
|
def exists?(p_key:, s_key: Helper::DMP_LATEST_VERSION, client: nil, logger: nil)
|
86
86
|
raise FinderError, MSG_MISSING_PK if p_key.nil?
|
87
87
|
|
88
|
-
client =
|
88
|
+
client = Uc3DmpDynamo::Client.new if client.nil?
|
89
89
|
client.pk_exists?(
|
90
90
|
key: {
|
91
|
-
PK: Helper.append_pk_prefix(p_key:
|
92
|
-
SK: Helper.append_sk_prefix(s_key:
|
91
|
+
PK: Helper.append_pk_prefix(p_key:),
|
92
|
+
SK: Helper.append_sk_prefix(s_key:)
|
93
93
|
},
|
94
|
-
logger:
|
94
|
+
logger:
|
95
95
|
)
|
96
96
|
end
|
97
97
|
|
@@ -115,15 +115,15 @@ module Uc3DmpId
|
|
115
115
|
filter_expression: 'SK = :version',
|
116
116
|
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
117
117
|
}
|
118
|
-
client =
|
119
|
-
resp = client.query(args
|
118
|
+
client = Uc3DmpDynamo::Client.new if client.nil?
|
119
|
+
resp = client.query(args:, logger:)
|
120
120
|
return resp unless resp.is_a?(Hash)
|
121
121
|
|
122
122
|
dmp = resp['dmp'].nil? ? JSON.parse({ dmp: resp }.to_json) : resp
|
123
123
|
return nil if dmp['dmp']['PK'].nil?
|
124
124
|
|
125
125
|
# If we got a hit, fetch the DMP and return it.
|
126
|
-
by_pk(p_key: dmp['dmp']['PK'], s_key: dmp['dmp']['SK'], cleanse
|
126
|
+
by_pk(p_key: dmp['dmp']['PK'], s_key: dmp['dmp']['SK'], cleanse:, logger:)
|
127
127
|
end
|
128
128
|
# rubocop:enable Metrics/AbcSize
|
129
129
|
|
@@ -149,8 +149,8 @@ module Uc3DmpId
|
|
149
149
|
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
150
150
|
}
|
151
151
|
logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
|
152
|
-
client =
|
153
|
-
_process_search_response(response: client.query(args
|
152
|
+
client = Uc3DmpDynamo::Client.new if client.nil?
|
153
|
+
_process_search_response(response: client.query(args:, logger:))
|
154
154
|
end
|
155
155
|
|
156
156
|
# Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
|
@@ -174,8 +174,8 @@ module Uc3DmpId
|
|
174
174
|
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
175
175
|
}
|
176
176
|
logger.info(message: "Querying _by_owner_org with #{args}") if logger.respond_to?(:info)
|
177
|
-
client =
|
178
|
-
_process_search_response(response: client.query(args
|
177
|
+
client = Uc3DmpDynamo::Client.new if client.nil?
|
178
|
+
_process_search_response(response: client.query(args:, logger:))
|
179
179
|
end
|
180
180
|
|
181
181
|
# Fetch the DMP IDs modified on the specified date (the date is the :dmphub_modification_day on the DMP ID record)
|
@@ -195,8 +195,8 @@ module Uc3DmpId
|
|
195
195
|
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
196
196
|
}
|
197
197
|
logger.info(message: "Querying _by_mod_day with #{args}") if logger.respond_to?(:info)
|
198
|
-
client =
|
199
|
-
_process_search_response(response: client.query(args
|
198
|
+
client = Uc3DmpDynamo::Client.new if client.nil?
|
199
|
+
_process_search_response(response: client.query(args:, logger:))
|
200
200
|
end
|
201
201
|
|
202
202
|
# Transform the search results so that we do not include any of the DMPHub specific metadata
|
data/lib/uc3-dmp-id/helper.rb
CHANGED
@@ -7,17 +7,17 @@ module Uc3DmpId
|
|
7
7
|
# Helper functions for working with DMP IDs
|
8
8
|
class Helper
|
9
9
|
PK_DMP_PREFIX = 'DMP#'
|
10
|
-
PK_DMP_REGEX = %r{DMP#[a-zA-Z0-9\-_.]+/[a-zA-Z0-9]{2}\.[a-zA-Z0-9./:]+}
|
10
|
+
PK_DMP_REGEX = %r{DMP#[a-zA-Z0-9\-_.]+/[a-zA-Z0-9]{2}\.[a-zA-Z0-9./:]+}
|
11
11
|
|
12
12
|
SK_DMP_PREFIX = 'VERSION#'
|
13
|
-
SK_DMP_REGEX = /VERSION#\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2}
|
13
|
+
SK_DMP_REGEX = /VERSION#\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2}/
|
14
14
|
|
15
15
|
# TODO: Verify the assumed structure of the DOI is valid
|
16
|
-
DOI_REGEX = %r{[0-9]{2}\.[0-9]{4,}/[a-zA-Z0-9/_.-]+}
|
17
|
-
URL_REGEX = %r{(https?://)?([a-zA-Z0-9\-_]\.)+[a-zA-Z0-9\-_]{2,3}(:[0-9]+)?/?}
|
16
|
+
DOI_REGEX = %r{[0-9]{2}\.[0-9]{4,}/[a-zA-Z0-9/_.-]+}
|
17
|
+
URL_REGEX = %r{(https?://)?([a-zA-Z0-9\-_]\.)+[a-zA-Z0-9\-_]{2,3}(:[0-9]+)?/?}
|
18
18
|
|
19
|
-
DMP_LATEST_VERSION = "#{SK_DMP_PREFIX}latest"
|
20
|
-
DMP_TOMBSTONE_VERSION = "#{SK_DMP_PREFIX}tombstone"
|
19
|
+
DMP_LATEST_VERSION = "#{SK_DMP_PREFIX}latest".freeze
|
20
|
+
DMP_TOMBSTONE_VERSION = "#{SK_DMP_PREFIX}tombstone".freeze
|
21
21
|
|
22
22
|
DEFAULT_API_URL = 'https://api.dmphub.uc3dev.cdlib.net/dmps/'
|
23
23
|
DEFAULT_LANDING_PAGE_URL = 'https://dmphub.uc3dev.cdlib.net/dmps/'
|
@@ -41,7 +41,7 @@ module Uc3DmpId
|
|
41
41
|
# Append the PK prefix for the object
|
42
42
|
# -------------------------------------------------------------------------------------
|
43
43
|
def append_pk_prefix(p_key:)
|
44
|
-
p_key.is_a?(String) ? "#{PK_DMP_PREFIX}#{remove_pk_prefix(p_key:
|
44
|
+
p_key.is_a?(String) ? "#{PK_DMP_PREFIX}#{remove_pk_prefix(p_key:)}" : nil
|
45
45
|
end
|
46
46
|
|
47
47
|
# Strip off the PK prefix
|
@@ -53,7 +53,7 @@ module Uc3DmpId
|
|
53
53
|
# Append the SK prefix for the object
|
54
54
|
# -------------------------------------------------------------------------------------
|
55
55
|
def append_sk_prefix(s_key:)
|
56
|
-
s_key.is_a?(String) ? "#{SK_DMP_PREFIX}#{remove_sk_prefix(s_key:
|
56
|
+
s_key.is_a?(String) ? "#{SK_DMP_PREFIX}#{remove_sk_prefix(s_key:)}" : nil
|
57
57
|
end
|
58
58
|
|
59
59
|
# Strip off the SK prefix
|
@@ -82,7 +82,7 @@ module Uc3DmpId
|
|
82
82
|
return with_protocol ? value : value.gsub(%r{https?://}, '') if value.start_with?('http')
|
83
83
|
|
84
84
|
dmp_id = dmp_id.gsub('doi:', '')
|
85
|
-
dmp_id = dmp_id
|
85
|
+
dmp_id = dmp_id[1..dmp_id.length] if dmp_id.start_with?('/')
|
86
86
|
base_domain = with_protocol ? dmp_id_base_url : dmp_id_base_url.gsub(%r{https?://}, '')
|
87
87
|
"#{base_domain}#{dmp_id}"
|
88
88
|
end
|
@@ -95,7 +95,7 @@ module Uc3DmpId
|
|
95
95
|
p_key = param if param.start_with?(dmp_id_base_url) || param.start_with?(base_domain)
|
96
96
|
p_key = CGI.unescape(p_key.nil? ? param : p_key)
|
97
97
|
p_key = format_dmp_id(value: p_key)
|
98
|
-
append_pk_prefix(p_key:
|
98
|
+
append_pk_prefix(p_key:)
|
99
99
|
end
|
100
100
|
|
101
101
|
# Append the :PK prefix to the :dmp_id
|
@@ -115,7 +115,7 @@ module Uc3DmpId
|
|
115
115
|
|
116
116
|
{
|
117
117
|
type: 'doi',
|
118
|
-
identifier: format_dmp_id(value: remove_pk_prefix(p_key:
|
118
|
+
identifier: format_dmp_id(value: remove_pk_prefix(p_key:), with_protocol: true)
|
119
119
|
}
|
120
120
|
end
|
121
121
|
|
@@ -180,7 +180,7 @@ module Uc3DmpId
|
|
180
180
|
# Add DMPHub specific fields to the DMP ID JSON
|
181
181
|
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
182
182
|
def annotate_dmp_json(provenance:, p_key:, json:)
|
183
|
-
json = parse_json(json:
|
183
|
+
json = parse_json(json:)
|
184
184
|
bool_vals = [1, '1', true, 'true', 'yes']
|
185
185
|
return json if provenance.nil? || p_key.nil? || !json.is_a?(Hash)
|
186
186
|
|
@@ -189,14 +189,14 @@ module Uc3DmpId
|
|
189
189
|
return json if id != p_key && !json['PK'].nil?
|
190
190
|
|
191
191
|
annotated = deep_copy_dmp(obj: json)
|
192
|
-
annotated['PK'] = json['PK'] || append_pk_prefix(p_key:
|
192
|
+
annotated['PK'] = json['PK'] || append_pk_prefix(p_key:)
|
193
193
|
annotated['SK'] = DMP_LATEST_VERSION
|
194
194
|
|
195
195
|
# Ensure that the :dmp_id matches the :PK
|
196
196
|
annotated['dmp_id'] = JSON.parse(pk_to_dmp_id(p_key: remove_pk_prefix(p_key: annotated['PK'])).to_json)
|
197
197
|
|
198
|
-
owner_id = extract_owner_id(json:
|
199
|
-
owner_org = extract_owner_org(json:
|
198
|
+
owner_id = extract_owner_id(json:)
|
199
|
+
owner_org = extract_owner_org(json:)
|
200
200
|
|
201
201
|
# Set the :dmproadmap_featured flag appropriately
|
202
202
|
featured = annotated.fetch('dmproadmap_featured', 'no')
|
@@ -219,7 +219,7 @@ module Uc3DmpId
|
|
219
219
|
annotated['dmphub_provenance_identifier'] = annotated.fetch('dmproadmap_links', {})['get']
|
220
220
|
else
|
221
221
|
annotated['dmphub_provenance_identifier'] = format_provenance_id(
|
222
|
-
provenance
|
222
|
+
provenance:, value: json.fetch('dmp_id', {})['identifier']
|
223
223
|
)
|
224
224
|
end
|
225
225
|
annotated
|
data/lib/uc3-dmp-id/updater.rb
CHANGED
@@ -16,18 +16,18 @@ module Uc3DmpId
|
|
16
16
|
def update(provenance:, p_key:, json: {}, note: nil, logger: nil)
|
17
17
|
raise UpdaterError, Helper::MSG_DMP_INVALID_DMP_ID unless p_key.is_a?(String) && !p_key.strip.empty?
|
18
18
|
|
19
|
-
mods = Helper.parse_json(json:
|
20
|
-
p_key = Helper.append_pk_prefix(p_key:
|
19
|
+
mods = Helper.parse_json(json:).fetch('dmp', {})
|
20
|
+
p_key = Helper.append_pk_prefix(p_key:)
|
21
21
|
logger.debug(message: "Incoming modifications for PK #{p_key}", details: mods) if logger.respond_to?(:debug)
|
22
22
|
|
23
23
|
# Fetch the latest version of the DMP ID
|
24
24
|
client = Uc3DmpDynamo::Client.new
|
25
|
-
latest_version = Finder.by_pk(p_key
|
26
|
-
latest_version = latest_version
|
25
|
+
latest_version = Finder.by_pk(p_key:, client:, logger:, cleanse: false)
|
26
|
+
latest_version = latest_version.fetch('dmp', {}) unless latest_version['dmp'].nil?
|
27
27
|
logger.debug(message: "Latest version for PK #{p_key}", details: latest_version) if logger.respond_to?(:debug)
|
28
28
|
|
29
29
|
# Verify that the DMP ID is updateable with the info passed in
|
30
|
-
errs = _updateable?(provenance
|
30
|
+
errs = _updateable?(provenance:, p_key:, latest_version: latest_version['dmp'],
|
31
31
|
mods: mods['dmp'])
|
32
32
|
logger.error(message: errs.join(', ')) if logger.respond_to?(:error) && errs.is_a?(Array) && errs.any?
|
33
33
|
raise UpdaterError, errs if errs.is_a?(Array) && errs.any?
|
@@ -37,35 +37,35 @@ module Uc3DmpId
|
|
37
37
|
# Version the DMP ID record (if applicable).
|
38
38
|
owner = latest_version['dmphub_provenance_id']
|
39
39
|
updater = provenance['PK']
|
40
|
-
version = Versioner.generate_version(client
|
41
|
-
updater
|
40
|
+
version = Versioner.generate_version(client:, latest_version:, owner:,
|
41
|
+
updater:, logger:)
|
42
42
|
raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if version.nil?
|
43
43
|
|
44
44
|
# Remove the version info because we don't want to save it on the record
|
45
45
|
version.delete('dmphub_versions')
|
46
46
|
|
47
47
|
# Splice the assertions
|
48
|
-
version = _process_modifications(owner
|
49
|
-
logger:
|
48
|
+
version = _process_modifications(owner:, updater:, version:, mods:, note:,
|
49
|
+
logger:)
|
50
50
|
# Set the :modified timestamps
|
51
51
|
now = Time.now.utc
|
52
52
|
version['modified'] = now.iso8601
|
53
53
|
version['dmphub_modification_day'] = now.strftime('%Y-%m-%d')
|
54
54
|
|
55
55
|
# Save the changes
|
56
|
-
resp = client.put_item(json: version, logger:
|
56
|
+
resp = client.put_item(json: version, logger:)
|
57
57
|
raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if resp.nil?
|
58
58
|
|
59
59
|
# Send the updates to EZID
|
60
|
-
_post_process(provenance
|
60
|
+
_post_process(provenance:, json: version, logger:)
|
61
61
|
|
62
62
|
# Return the new version record
|
63
63
|
logger.info(message: "Updated DMP ID: #{p_key}") if logger.respond_to?(:debug)
|
64
64
|
|
65
65
|
# Append the :dmphub_versions Array
|
66
66
|
json = JSON.parse({ dmp: version }.to_json)
|
67
|
-
json = Versioner.append_versions(p_key
|
68
|
-
Helper.cleanse_dmp_json(json:
|
67
|
+
json = Versioner.append_versions(p_key:, dmp: json, client:, logger:)
|
68
|
+
Helper.cleanse_dmp_json(json:)
|
69
69
|
end
|
70
70
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
71
71
|
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
@@ -77,21 +77,21 @@ module Uc3DmpId
|
|
77
77
|
raise UpdaterError, Helper::MSG_DMP_INVALID_DMP_ID unless p_key.is_a?(String) && !p_key.strip.empty?
|
78
78
|
|
79
79
|
# fetch the existing latest version of the DMP ID
|
80
|
-
client = Uc3DmpDynamo::Client.new(logger:
|
81
|
-
dmp = Finder.by_pk(p_key
|
80
|
+
client = Uc3DmpDynamo::Client.new(logger:)
|
81
|
+
dmp = Finder.by_pk(p_key:, client:, logger:, cleanse: false)
|
82
82
|
logger.info(message: 'Existing latest record', details: dmp) if logger.respond_to?(:debug)
|
83
83
|
raise UpdaterError, Helper::MSG_DMP_FORBIDDEN unless provenance.is_a?(Hash) && !provenance['PK'].nil? &&
|
84
84
|
provenance['PK'] == dmp['dmp']['dmphub_provenance_id']
|
85
85
|
|
86
86
|
# Add the download URl for the PDF as a related identifier on the DMP ID record
|
87
|
-
annotated = Helper.annotate_dmp_json(provenance
|
87
|
+
annotated = Helper.annotate_dmp_json(provenance:, p_key:, json: dmp['dmp'])
|
88
88
|
annotated['dmproadmap_related_identifiers'] = [] if annotated['dmproadmap_related_identifiers'].nil?
|
89
89
|
annotated['dmproadmap_related_identifiers'] << JSON.parse({
|
90
90
|
descriptor: 'is_metadata_for', work_type: 'output_management_plan', type: 'url', identifier: url
|
91
91
|
}.to_json)
|
92
92
|
|
93
93
|
# Save the changes without creating a new version!
|
94
|
-
resp = client.put_item(json: annotated, logger:
|
94
|
+
resp = client.put_item(json: annotated, logger:)
|
95
95
|
raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if resp.nil?
|
96
96
|
|
97
97
|
logger.info(message: "Added DMP ID narrative for PK: #{p_key}, Narrative: #{url}") if logger.respond_to?(:debug)
|
@@ -111,8 +111,9 @@ module Uc3DmpId
|
|
111
111
|
return [Helper::MSG_DMP_FORBIDDEN] unless provenance.is_a?(Hash) && !provenance['PK'].nil?
|
112
112
|
# Verify that the JSON is for the same DMP in the PK
|
113
113
|
return [Helper::MSG_DMP_FORBIDDEN] unless Helper.dmp_id_to_pk(json: mods.fetch('dmp_id', {})) == p_key
|
114
|
+
|
114
115
|
# Bail out if the DMP ID could not be found or the PKs do not match for some reason
|
115
|
-
|
116
|
+
[Helper::MSG_DMP_UNKNOWN] unless latest_version.is_a?(Hash) && latest_version['PK'] == p_key
|
116
117
|
end
|
117
118
|
# rubocop:enable Metrics/AbcSize
|
118
119
|
|
@@ -123,14 +124,14 @@ module Uc3DmpId
|
|
123
124
|
|
124
125
|
updated = if owner == updater
|
125
126
|
# Splice together any assertions that may have been made while the user was editing the DMP ID
|
126
|
-
Asserter.splice(latest_version: version, modified_version: mods, logger:
|
127
|
+
Asserter.splice(latest_version: version, modified_version: mods, logger:)
|
127
128
|
else
|
128
129
|
# Attach the incoming changes as an assertion to the DMP ID since the updater is NOT the owner
|
129
|
-
Asserter.add(updater
|
130
|
-
logger:
|
130
|
+
Asserter.add(updater:, latest_version: version, modified_version: mods, note:,
|
131
|
+
logger:)
|
131
132
|
end
|
132
133
|
|
133
|
-
_merge_versions(latest_version: version, mods: updated, logger:
|
134
|
+
_merge_versions(latest_version: version, mods: updated, logger:)
|
134
135
|
end
|
135
136
|
# rubocop:enable Metrics/ParameterLists
|
136
137
|
|
@@ -172,7 +173,7 @@ module Uc3DmpId
|
|
172
173
|
logger.debug(message: 'Sending event for EZID publication',
|
173
174
|
details: json)
|
174
175
|
end
|
175
|
-
publisher.publish(source: 'DmpUpdater', event_type: 'EZID update', dmp: json, logger:
|
176
|
+
publisher.publish(source: 'DmpUpdater', event_type: 'EZID update', dmp: json, logger:) if publishable
|
176
177
|
|
177
178
|
# Determine if there are any related identifiers that we should try to fetch a citation for
|
178
179
|
citable_identifiers = Helper.citable_related_identifiers(dmp: json)
|
@@ -189,7 +190,7 @@ module Uc3DmpId
|
|
189
190
|
details: citable_identifiers)
|
190
191
|
end
|
191
192
|
publisher.publish(source: 'DmpUpdater', dmp: json, event_type: 'Citation Fetch', detail: citer_detail,
|
192
|
-
logger:
|
193
|
+
logger:)
|
193
194
|
true
|
194
195
|
end
|
195
196
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
data/lib/uc3-dmp-id/validator.rb
CHANGED
@@ -23,11 +23,11 @@ module Uc3DmpId
|
|
23
23
|
# Validate the specified DMP's :json against the schema for the specified :mode
|
24
24
|
# rubocop:disable Metrics/AbcSize
|
25
25
|
def validate(mode:, json:)
|
26
|
-
json = Helper.parse_json(json:
|
26
|
+
json = Helper.parse_json(json:)
|
27
27
|
return [MSG_EMPTY_JSON] if json.nil? || !VALIDATION_MODES.include?(mode)
|
28
28
|
|
29
29
|
# Load the appropriate JSON schema for the mode
|
30
|
-
schema = _load_schema(mode:
|
30
|
+
schema = _load_schema(mode:)
|
31
31
|
return [MSG_NO_SCHEMA] if schema.nil?
|
32
32
|
|
33
33
|
# Validate the JSON
|
data/lib/uc3-dmp-id/version.rb
CHANGED
data/lib/uc3-dmp-id/versioner.rb
CHANGED
@@ -18,13 +18,13 @@ module Uc3DmpId
|
|
18
18
|
|
19
19
|
args = {
|
20
20
|
key_conditions: {
|
21
|
-
PK: { attribute_value_list: [Helper.append_pk_prefix(p_key:
|
21
|
+
PK: { attribute_value_list: [Helper.append_pk_prefix(p_key:)], comparison_operator: 'EQ' }
|
22
22
|
},
|
23
23
|
projection_expression: 'modified',
|
24
24
|
scan_index_forward: false
|
25
25
|
}
|
26
|
-
client =
|
27
|
-
client.query(args
|
26
|
+
client = Uc3DmpDynamo::Client.new if client.nil?
|
27
|
+
client.query(args:, logger:)
|
28
28
|
end
|
29
29
|
|
30
30
|
# Generate a snapshot of the current latest version of the DMP ID using the existing :modified as
|
@@ -57,8 +57,8 @@ module Uc3DmpId
|
|
57
57
|
prior['SK'] = "#{Helper::SK_DMP_PREFIX}#{latest_version['modified'] || Time.now.utc.iso8601}"
|
58
58
|
|
59
59
|
# Create the prior version record ()
|
60
|
-
client =
|
61
|
-
resp = client.put_item(json: prior, logger:
|
60
|
+
client = Uc3DmpDynamo::Client.new if client.nil?
|
61
|
+
resp = client.put_item(json: prior, logger:)
|
62
62
|
return nil if resp.nil?
|
63
63
|
|
64
64
|
msg = "#{SOURCE} created version PK: #{prior['PK']} SK: #{prior['SK']}"
|
@@ -74,7 +74,7 @@ module Uc3DmpId
|
|
74
74
|
json = Helper.parse_json(json: dmp)
|
75
75
|
return json unless p_key.is_a?(String) && !p_key.strip.empty? && json.is_a?(Hash) && !json['dmp'].nil?
|
76
76
|
|
77
|
-
results = get_versions(p_key
|
77
|
+
results = get_versions(p_key:, client:, logger:)
|
78
78
|
return json unless results.length > 1
|
79
79
|
|
80
80
|
# TODO: we may want to include milliseconds in the future if we get increased volume so that
|
@@ -82,7 +82,7 @@ module Uc3DmpId
|
|
82
82
|
versions = results.map do |ver|
|
83
83
|
next if ver['modified'].nil?
|
84
84
|
|
85
|
-
base_url = "#{Helper.landing_page_url}#{Helper.remove_pk_prefix(p_key:
|
85
|
+
base_url = "#{Helper.landing_page_url}#{Helper.remove_pk_prefix(p_key:)}"
|
86
86
|
{
|
87
87
|
timestamp: ver['modified'],
|
88
88
|
url: dmp['dmp']['modified'] == ver['modified'] ? base_url : "#{base_url}?version=#{ver['modified']}"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: uc3-dmp-id
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brian Riley
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-10-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '3.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: text
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.3'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.3'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: uc3-dmp-dynamo
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -76,6 +90,7 @@ files:
|
|
76
90
|
- README.md
|
77
91
|
- lib/uc3-dmp-id.rb
|
78
92
|
- lib/uc3-dmp-id/asserter.rb
|
93
|
+
- lib/uc3-dmp-id/comparator.rb
|
79
94
|
- lib/uc3-dmp-id/creator.rb
|
80
95
|
- lib/uc3-dmp-id/deleter.rb
|
81
96
|
- lib/uc3-dmp-id/finder.rb
|
@@ -91,7 +106,7 @@ licenses:
|
|
91
106
|
- MIT
|
92
107
|
metadata:
|
93
108
|
rubygems_mfa_required: 'false'
|
94
|
-
post_install_message:
|
109
|
+
post_install_message:
|
95
110
|
rdoc_options: []
|
96
111
|
require_paths:
|
97
112
|
- lib
|
@@ -100,15 +115,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
100
115
|
requirements:
|
101
116
|
- - ">="
|
102
117
|
- !ruby/object:Gem::Version
|
103
|
-
version: '2
|
118
|
+
version: '3.2'
|
104
119
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
105
120
|
requirements:
|
106
121
|
- - ">="
|
107
122
|
- !ruby/object:Gem::Version
|
108
123
|
version: '0'
|
109
124
|
requirements: []
|
110
|
-
rubygems_version: 3.
|
111
|
-
signing_key:
|
125
|
+
rubygems_version: 3.4.10
|
126
|
+
signing_key:
|
112
127
|
specification_version: 4
|
113
128
|
summary: DMPTool gem that provides support for DMP ID records
|
114
129
|
test_files: []
|