uc3-dmp-id 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +9 -0
- data/lib/uc3-dmp-id/asserter.rb +19 -9
- data/lib/uc3-dmp-id/comparator.rb +500 -0
- data/lib/uc3-dmp-id/creator.rb +10 -10
- data/lib/uc3-dmp-id/deleter.rb +5 -5
- data/lib/uc3-dmp-id/finder.rb +26 -26
- data/lib/uc3-dmp-id/helper.rb +16 -16
- data/lib/uc3-dmp-id/updater.rb +25 -24
- data/lib/uc3-dmp-id/validator.rb +2 -2
- data/lib/uc3-dmp-id/version.rb +1 -1
- data/lib/uc3-dmp-id/versioner.rb +7 -7
- metadata +22 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 51832c144e5663dc01c805f92e81e50e63f2ce00200a8cad5525b7a34c9d7eb9
|
4
|
+
data.tar.gz: e197deb7f608ef478716a8aea113e853ab0d3903f1496f970a73d31d30b7e892
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 31bc5d1bb73176c2afff25715590c50d0612f558880f2bde3a750ac5ea8d49674c5dd9b00eff36960454c972a1a31aca6b4f853a98e995dd65977e1f82903ad2
|
7
|
+
data.tar.gz: 86153dfeebc52570ecb83a012f4e7242e9bec607627c2dcefc5beb101a711199bc346c1388045127df89f13e65c83aa473c0fc77e37810d436ea939101b74f2b
|
data/README.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
1
|
# Uc3DmpId
|
2
2
|
|
3
3
|
Helper methods for working with DMP ID JSON records
|
4
|
+
|
5
|
+
After you have made changes, be sure to increment the version number in `lib/uc3-dmp-id/version.rb`.
|
6
|
+
|
7
|
+
To build and push this gem to RubyGems:
|
8
|
+
- Make sure you are logged into RubyGems in your terminal window (see their docs)
|
9
|
+
- Run `gem build uc3-dmp-id.gemspec` to build the gem
|
10
|
+
- Run `gem push uc3-dmp-id-[version].gem` to publish to RubyGems
|
11
|
+
|
12
|
+
After you have pushed a new version to RubyGems, you should rebuild and redeploy the AWS SAM application.
|
data/lib/uc3-dmp-id/asserter.rb
CHANGED
@@ -28,13 +28,13 @@ module Uc3DmpId
|
|
28
28
|
related_works = modified_version.fetch('dmproadmap_related_identifiers', [])
|
29
29
|
|
30
30
|
if related_works.any?
|
31
|
-
latest_version = _add_related_identifier(updater
|
32
|
-
identifiers: related_works, note
|
31
|
+
latest_version = _add_related_identifier(updater:, latest_version:,
|
32
|
+
identifiers: related_works, note:, logger:)
|
33
33
|
end
|
34
34
|
return latest_version unless !funding.nil? && funding.any?
|
35
35
|
|
36
|
-
_add_funding_mod(updater
|
37
|
-
note
|
36
|
+
_add_funding_mod(updater:, latest_version:, funding:,
|
37
|
+
note:, logger:)
|
38
38
|
end
|
39
39
|
# rubocop:enable Metrics/AbcSize, Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
40
40
|
|
@@ -106,7 +106,7 @@ module Uc3DmpId
|
|
106
106
|
end
|
107
107
|
|
108
108
|
latest_version['dmproadmap_related_identifiers'] = [] if latest_version['dmproadmap_related_identifiers'].nil?
|
109
|
-
assertion = _generate_assertion(updater
|
109
|
+
assertion = _generate_assertion(updater:, note:,
|
110
110
|
mods: JSON.parse({ dmproadmap_related_identifiers: additions }.to_json))
|
111
111
|
if logger.respond_to?(:debug)
|
112
112
|
logger.debug(message: 'Adding change to :dmphub_modifications.',
|
@@ -151,7 +151,7 @@ module Uc3DmpId
|
|
151
151
|
latest_version['dmphub_modifications'] = [] if latest_version['dmphub_modifications'].nil?
|
152
152
|
mod = JSON.parse({ funding: fund }.to_json)
|
153
153
|
mod['funding']['funding_status'] = 'granted'
|
154
|
-
assertion = _generate_assertion(updater
|
154
|
+
assertion = _generate_assertion(updater:, mods: mod, note:)
|
155
155
|
if logger.respond_to?(:debug)
|
156
156
|
logger.debug(message: 'Adding change to :dmphub_modifications.',
|
157
157
|
details: assertion)
|
@@ -168,7 +168,8 @@ module Uc3DmpId
|
|
168
168
|
# "id": "ABCD1234",
|
169
169
|
# "provenance": "dmphub",
|
170
170
|
# "timestamp": "2023-07-07T14:50:23+00:00",
|
171
|
-
# "note": "
|
171
|
+
# "note": "Data received from OpenAlex, matched by PI names and title keywords.",
|
172
|
+
# "confiedence": "Med",
|
172
173
|
# "dmproadmap_related_identifiers": {
|
173
174
|
# "work_type": "article",
|
174
175
|
# "descriptor": "is_cited_by",
|
@@ -183,7 +184,8 @@ module Uc3DmpId
|
|
183
184
|
# "id": "ABCD1234",
|
184
185
|
# "provenance": "dmphub",
|
185
186
|
# "timestamp": "2023-07-07T14:50:23+00:00",
|
186
|
-
# "note": "
|
187
|
+
# "note": "Data received from the NIH API, matched by the opportunity number.",
|
188
|
+
# "confidence": "High",
|
187
189
|
# "funding": {
|
188
190
|
# "funding_status": "granted",
|
189
191
|
# "grant_id": {
|
@@ -200,11 +202,19 @@ module Uc3DmpId
|
|
200
202
|
provenance: updater.gsub('PROVENANCE#', ''),
|
201
203
|
timestamp: Time.now.utc.iso8601,
|
202
204
|
status: 'pending',
|
203
|
-
note:
|
205
|
+
note:
|
204
206
|
}
|
205
207
|
mods.each_pair { |key, val| assertion[key] = val }
|
206
208
|
JSON.parse(assertion.to_json)
|
207
209
|
end
|
208
210
|
end
|
211
|
+
|
212
|
+
def _score_related_work(latest_version:, work:)
|
213
|
+
|
214
|
+
end
|
215
|
+
|
216
|
+
def _score_funding(latest_version:, funding:)
|
217
|
+
|
218
|
+
end
|
209
219
|
end
|
210
220
|
end
|
@@ -0,0 +1,500 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'text'
|
4
|
+
|
5
|
+
module Uc3DmpId
|
6
|
+
class ComparatorError < StandardError; end
|
7
|
+
|
8
|
+
# Class that compares incoming data from an external source to the DMP
|
9
|
+
# It determines if they are likely related and applies a confidence rating
|
10
|
+
class Comparator
|
11
|
+
|
12
|
+
MSG_MISSING_AUGMENTER = 'No Augmenter specified!'
|
13
|
+
MSG_MISSING_DMP = 'No DMP or the DMP did not contain enough information to use.'
|
14
|
+
|
15
|
+
STOP_WORDS = %w[a an and if of or the then they]
|
16
|
+
|
17
|
+
# See the bottom of this file for a hard-coded crosswalk between Crossref funder ids and ROR ids
|
18
|
+
# Some APIs do not support ROR fully for funder ids, so we need to be able to reference both
|
19
|
+
|
20
|
+
attr_accessor :augmenter, :dmp, :details_hash, :logger
|
21
|
+
|
22
|
+
def initialize(**args)
|
23
|
+
@logger = args[:logger]
|
24
|
+
@details_hash = {}
|
25
|
+
|
26
|
+
@augmenter = args[:augmenter]
|
27
|
+
raise ComparatorError, MSG_MISSING_AUGMENTER if @augmenter.nil? ||
|
28
|
+
!@augmenter['PK']&.start_with?('AUGMENTERS#')
|
29
|
+
|
30
|
+
@dmp = args.fetch(:dmp, {})['dmp'].nil? ? args[:dmp] : args.fetch(:dmp, {})['dmp']
|
31
|
+
_extract_dmp_details(dmp:)
|
32
|
+
raise ComparatorError, MSG_MISSING_DMP if @details_hash.empty?
|
33
|
+
end
|
34
|
+
|
35
|
+
# Compare the incoming hash with the DMP details that were gathered during initialization.
|
36
|
+
#
|
37
|
+
# The Hash should contain:
|
38
|
+
# {
|
39
|
+
# title: "Example research project",
|
40
|
+
# abstract: "Lorem ipsum psuedo abstract",
|
41
|
+
# keywords: ["foo", "bar"],z
|
42
|
+
# people: [
|
43
|
+
# {
|
44
|
+
# id: "https://orcid.org/blah",
|
45
|
+
# last_name: "doe",
|
46
|
+
# affiliation: { id: "https://ror.org/blah", name: "Foo" }
|
47
|
+
# }
|
48
|
+
# ],
|
49
|
+
# fundings: [
|
50
|
+
# { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
|
51
|
+
# ],
|
52
|
+
# repositories: [
|
53
|
+
# { id: ["http://some.repo.org", "https://doi.org/re3data123"], name: "Repo" }
|
54
|
+
# ]
|
55
|
+
# }
|
56
|
+
def compare(hash:)
|
57
|
+
response = { confidence: 'None', score: 0, notes: [], source: @augmenter['name'] }
|
58
|
+
return response unless hash.is_a?(Hash) && !hash['title'].nil?
|
59
|
+
|
60
|
+
# Compare the grant ids. If we have a match return the response immediately since that is
|
61
|
+
# a very positive match!
|
62
|
+
response = _grants_match?(array: hash['fundings'], response:)
|
63
|
+
return response if response[:confidence] != 'None'
|
64
|
+
|
65
|
+
response = _opportunities_match?(array: hash['fundings'], response:)
|
66
|
+
response = _orcids_match?(array: hash['people'], response:)
|
67
|
+
response = _last_name_and_affiliation_match?(array: hash['people'], response:)
|
68
|
+
|
69
|
+
# Only process the following if we had some matching contributors, affiliations or opportuniy nbrs
|
70
|
+
response = _repository_match?(array: hash['repositories'], response:) if response[:score] > 0
|
71
|
+
response = _keyword_match?(array: hash['repositories'], response:) if response[:score] > 0
|
72
|
+
response = _text_match?(type: 'title', text: hash['title'], response:) if response[:score] > 0
|
73
|
+
response = _text_match?(type: 'abstract', text: hash['abstract'], response:) if response[:score] > 0
|
74
|
+
# If the score is less than 3 then we have no confidence that it is a match
|
75
|
+
return response if response[:score] <= 2
|
76
|
+
|
77
|
+
# Set the confidence level based on the score
|
78
|
+
response[:confidence] = response[:score] > 15 ? 'High' : (response[:score] > 10 ? 'Medium' : 'Low')
|
79
|
+
response
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
def _extract_dmp_details(dmp:)
|
85
|
+
return nil unless dmp.is_a?(Hash) && !dmp['title'].nil? && !dmp['contact'].nil?
|
86
|
+
|
87
|
+
projects = dmp.fetch('project', [{}])
|
88
|
+
fundings = projects.map { |proj| proj.fetch('funding', []) }.flatten.compact.uniq
|
89
|
+
hosts = dmp.fetch('dataset', []).map { |dset| dset.fetch('distribution', []).map { |d| d['host'] } }
|
90
|
+
people = [dmp['contact']]
|
91
|
+
people << dmp.fetch('contributor', [])
|
92
|
+
|
93
|
+
# Extract all of the important bits about the DMP
|
94
|
+
@details_hash = {
|
95
|
+
created: dmp.fetch('created', Time.now.iso8601),
|
96
|
+
title: _cleanse_text(text: projects&.first&.fetch('title', dmp['title'])),
|
97
|
+
abstract: _cleanse_text(text: projects&.first&.fetch('description', dmp['description'])),
|
98
|
+
keywords: dmp.fetch('dataset', []).map { |ds| ds.fetch('keyword', []) }.flatten.compact.uniq,
|
99
|
+
identifiers: [dmp.fetch('dmp_id', {})['identifier']],
|
100
|
+
last_names: [],
|
101
|
+
affiliation_ids: [],
|
102
|
+
affiliations: [],
|
103
|
+
funder_names: [],
|
104
|
+
funder_ids: [],
|
105
|
+
opportunity_ids: [],
|
106
|
+
grant_ids: [],
|
107
|
+
repositories: []
|
108
|
+
}
|
109
|
+
_extract_people(array: people&.flatten&.compact&.uniq)
|
110
|
+
_extract_funding(array: fundings)
|
111
|
+
_extract_repositories(repos: hosts.flatten.compact.uniq)
|
112
|
+
|
113
|
+
# Clean up the results by flattening and removing duplicates from the Arrays
|
114
|
+
@details_hash.keys.each do |key|
|
115
|
+
@details_hash[key] = @details_hash[key].flatten.compact.uniq if @details_hash[key].is_a?(Array)
|
116
|
+
end
|
117
|
+
@logger&.debug(message: "Extracted the following from the DMP", details: @details_hash)
|
118
|
+
end
|
119
|
+
|
120
|
+
# Extract all of the funding information
|
121
|
+
def _extract_funding(array:)
|
122
|
+
return [] unless array.is_a?(Array)
|
123
|
+
|
124
|
+
array.each do |funding|
|
125
|
+
next unless funding.is_a?(Hash)
|
126
|
+
|
127
|
+
funder_id = funding.fetch('funder_id', {})
|
128
|
+
ror = funder_id['identifier'] if funder_id['type']&.downcase&.strip == 'ror'
|
129
|
+
fundref = ror.nil? ? funder_id['identifier']&.downcase&.strip : ROR_FUNDREF_ID_CROSSWALK[:"#{ror}"]
|
130
|
+
opportunity = funding.fetch('dmproadmap_funding_opportunity_id', {})['identifier']
|
131
|
+
grant = funding.fetch('grant_id', {})['identifier']
|
132
|
+
|
133
|
+
@details_hash[:identifiers] << ror&.downcase&.strip
|
134
|
+
@details_hash[:identifiers] << fundref&.downcase&.strip
|
135
|
+
@details_hash[:identifiers] << grant&.downcase&.strip
|
136
|
+
@details_hash[:identifiers] << grant&.split('/')&.last&.downcase&.strip
|
137
|
+
@details_hash[:identifiers] << opportunity&.downcase&.strip
|
138
|
+
|
139
|
+
@details_hash[:funder_names] << funding['name']&.downcase&.strip
|
140
|
+
@details_hash[:funder_ids] << fundref
|
141
|
+
@details_hash[:opportunity_ids] << opportunity&.downcase&.strip
|
142
|
+
@details_hash[:grant_ids] << [grant&.downcase&.strip, grant&.split('/')&.last&.downcase&.strip]
|
143
|
+
end
|
144
|
+
array
|
145
|
+
end
|
146
|
+
|
147
|
+
# Extract all of the ORCIDs, last names, and affiliation ids and names
|
148
|
+
def _extract_people(array:)
|
149
|
+
return [] unless array.is_a?(Array)
|
150
|
+
|
151
|
+
array.each do |entry|
|
152
|
+
next unless entry.is_a?(Hash)
|
153
|
+
|
154
|
+
id = entry.fetch('contributor_id', entry.fetch('contact_id', {}))['identifier']&.downcase&.strip
|
155
|
+
affil = entry.fetch('dmproadmap_affiliation', {})
|
156
|
+
ror = affil.fetch('affiliation_id', {})['identifier']&.downcase&.strip
|
157
|
+
name = entry.fetch('name', '')&.downcase&.strip
|
158
|
+
last_name = name.include?(', ') ? name.split(', ').first : name.split.last
|
159
|
+
|
160
|
+
@details_hash[:identifiers] << [id, ror&.downcase&.strip]
|
161
|
+
@details_hash[:last_names] << last_name
|
162
|
+
@details_hash[:affiliation_ids] << ror
|
163
|
+
@details_hash[:affiliations] << affil.fetch('name', '')&.split(' (')&.first&.downcase&.strip
|
164
|
+
end
|
165
|
+
array
|
166
|
+
end
|
167
|
+
|
168
|
+
# Extract all of the re3data ids, URLs and names
|
169
|
+
def _extract_repositories(repos:)
|
170
|
+
return [] unless repos.is_a?(Array)
|
171
|
+
|
172
|
+
repos.each do |repo|
|
173
|
+
next unless repo.is_a?(Hash)
|
174
|
+
|
175
|
+
@details_hash[:identifiers] << [
|
176
|
+
repo['url']&.downcase&.strip, repo.fetch('dmproadmap_host_id', {})['identifier']&.downcase&.strip
|
177
|
+
]
|
178
|
+
@details_hash[:repositories] << repo.fetch('name', '')&.downcase&.strip
|
179
|
+
end
|
180
|
+
repos
|
181
|
+
end
|
182
|
+
|
183
|
+
# Returns whether or not the incoming grant id(s) match the DMPs grant id. Expecting:
|
184
|
+
# [
|
185
|
+
# { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
|
186
|
+
# ]
|
187
|
+
def _grants_match?(array:, response:)
|
188
|
+
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
189
|
+
|
190
|
+
ids = array.select { |funding| funding.is_a?(Hash) && funding['grant'].is_a?(Array) }
|
191
|
+
.map { |funding| funding['grant'].map { |id| id&.downcase&.strip } }
|
192
|
+
.flatten.compact.uniq
|
193
|
+
|
194
|
+
matched = _compare_arrays(array_a: @details_hash.fetch(:grant_ids, []), array_b: ids)
|
195
|
+
return response if matched <= 0
|
196
|
+
|
197
|
+
response[:confidence] = 'Absolute'
|
198
|
+
response[:score] = 100
|
199
|
+
response[:notes] << 'the grant ID matched'
|
200
|
+
response
|
201
|
+
end
|
202
|
+
|
203
|
+
# Returns whether or not the incoming grant id(s) match the DMPs opportunity id. Expecting:
|
204
|
+
# [
|
205
|
+
# { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
|
206
|
+
# ]
|
207
|
+
def _opportunities_match?(array:, response:)
|
208
|
+
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
209
|
+
|
210
|
+
ids = array.select { |funding| funding.is_a?(Hash) && funding['grant'].is_a?(Array) }
|
211
|
+
.map { |funding| funding['grant'].map { |id| id&.downcase&.strip } }
|
212
|
+
.flatten.compact.uniq
|
213
|
+
|
214
|
+
matched = _compare_arrays(array_a: @details_hash.fetch(:opportunity_ids, []), array_b: ids)
|
215
|
+
return response if matched <= 0
|
216
|
+
|
217
|
+
response[:score] += 5
|
218
|
+
response[:notes] << 'the funding opportunity number matched'
|
219
|
+
response
|
220
|
+
end
|
221
|
+
|
222
|
+
# Returns whether or not the inciming list of creators/contributors match those on the DMP. Expecting:
|
223
|
+
# [
|
224
|
+
# {
|
225
|
+
# id: "https://orcid.org/blah",
|
226
|
+
# last_name: "doe",
|
227
|
+
# affiliation: { id: "https://ror.org/blah", name: "Foo" }
|
228
|
+
# }
|
229
|
+
# ]
|
230
|
+
def _orcids_match?(array:, response:)
|
231
|
+
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
232
|
+
|
233
|
+
ids = array.select { |repo| repo.is_a?(Hash) }
|
234
|
+
.map { |person| person['id']&.downcase&.strip }
|
235
|
+
.flatten.compact.uniq
|
236
|
+
|
237
|
+
matched = _compare_arrays(array_a: @details_hash.fetch(:identifiers, []), array_b: ids)
|
238
|
+
return response if matched <= 0
|
239
|
+
|
240
|
+
response[:score] += (matched * 2)
|
241
|
+
response[:notes] << 'contributor ORCIDs matched'
|
242
|
+
response
|
243
|
+
end
|
244
|
+
|
245
|
+
# Returns whether or not the inciming list of creators/contributors match those on the DMP. Expecting:
|
246
|
+
# [
|
247
|
+
# {
|
248
|
+
# id: "https://orcid.org/blah",
|
249
|
+
# last_name: "doe",
|
250
|
+
# affiliation: { id: "https://ror.org/blah", name: "Foo" }
|
251
|
+
# }
|
252
|
+
# ]
|
253
|
+
def _last_name_and_affiliation_match?(array:, response:)
|
254
|
+
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
255
|
+
|
256
|
+
array = array.select { |repo| repo.is_a?(Hash) }
|
257
|
+
affiliations = array.map { |person| person['affiliation'] }&.flatten&.compact&.uniq
|
258
|
+
last_names = array.map { |person| person['last_name']&.downcase&.strip }&.flatten&.compact&.uniq
|
259
|
+
rors = affiliations.map { |affil| affil['id']&.downcase&.strip }&.flatten&.compact&.uniq
|
260
|
+
affil_names = affiliations.map { |affil| affil['name']&.downcase&.strip }&.flatten&.compact&.uniq
|
261
|
+
|
262
|
+
# Check the person last names and affiliation name and RORs
|
263
|
+
last_names_matched = _compare_arrays(array_a: @details_hash.fetch(:last_names, []), array_b: last_names)
|
264
|
+
rors_matched = _compare_arrays(array_a: @details_hash.fetch(:affiliation_ids, []), array_b: rors)
|
265
|
+
affil_names_matched = _compare_arrays(array_a: @details_hash.fetch(:affiliations, []), array_b: affil_names)
|
266
|
+
return response if last_names_matched <= 0 && rors_matched <= 0 && affil_names_matched <= 0
|
267
|
+
|
268
|
+
response[:score] += last_names_matched + rors_matched + affil_names_matched
|
269
|
+
response[:notes] << 'contributor names and affiliations matched'
|
270
|
+
response
|
271
|
+
end
|
272
|
+
|
273
|
+
# Returns whether or not the incoming list of repositories match those defined in the DMP. Expecting:
|
274
|
+
# [
|
275
|
+
# { id: ["http://some.repo.org", "https://doi.org/re3data123"], name: "Repo" }
|
276
|
+
# ]
|
277
|
+
def _repository_match?(array:, response:)
|
278
|
+
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
279
|
+
|
280
|
+
# We only care about repositories with ids/urls
|
281
|
+
ids = array.select { |repo| repo.is_a?(Hash) }
|
282
|
+
.map { |repo| repo['id'].map { |id| id&.downcase&.strip } }
|
283
|
+
.flatten.compact.uniq
|
284
|
+
|
285
|
+
matched = _compare_arrays(array_a: @details_hash.fetch(:identifiers, []), array_b: ids)
|
286
|
+
return response if matched <= 0
|
287
|
+
|
288
|
+
response[:score] += matched
|
289
|
+
response[:notes] << 'repositories matched'
|
290
|
+
response
|
291
|
+
end
|
292
|
+
|
293
|
+
# Returns whether or not the list of keywords exist in the DMP. Expecting:
|
294
|
+
# keywords: ["foo", "bar"]
|
295
|
+
def _keyword_match?(array:, response:)
|
296
|
+
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
297
|
+
|
298
|
+
keywords = array.map { |word| word&.downcase&.strip }&.flatten&.compact&.uniq
|
299
|
+
matched = _compare_arrays(array_a: @details_hash.fetch(:keywords, []), array_b: keywords)
|
300
|
+
return response if matched <= 0
|
301
|
+
|
302
|
+
response[:score] += 1
|
303
|
+
response[:notes] << 'keywords matched'
|
304
|
+
response
|
305
|
+
end
|
306
|
+
|
307
|
+
# Uses an NLP library to determine if the :text matches the DMP/Project :title or :description
|
308
|
+
def _text_match?(type: 'title', text:, response:, logger: nil)
|
309
|
+
return response unless response.is_a?(Hash) && text.is_a?(String) && !text.strip.empty? &&
|
310
|
+
!@details_hash[type.to_sym].nil?
|
311
|
+
|
312
|
+
nlp_processor = Text::WhiteSimilarity.new
|
313
|
+
cleansed = _cleanse_text(text:)
|
314
|
+
|
315
|
+
details = {
|
316
|
+
"dmp_#{type}": @details_hash[type.to_sym],
|
317
|
+
"incoming_#{type}": cleansed,
|
318
|
+
nlp_score: nlp_processor.similarity(@details_hash[type.to_sym], cleansed)
|
319
|
+
}
|
320
|
+
@logger&.debug(message: "Text::WhiteSimilarity score", details:)
|
321
|
+
return response if details[:nlp_score] < 0.5
|
322
|
+
|
323
|
+
response[:score] += details[:nlp_score] >= 0.75 ? 5 : 2
|
324
|
+
response[:notes] << "#{type}s are similar"
|
325
|
+
response
|
326
|
+
end
|
327
|
+
|
328
|
+
# Change the incoming text to lower case, remove spaces and STOP_WORDS
|
329
|
+
def _cleanse_text(text:)
|
330
|
+
return nil unless text.is_a?(String)
|
331
|
+
|
332
|
+
text.downcase.split.reject { |word| STOP_WORDS.include?(word) }.join(' ').strip
|
333
|
+
end
|
334
|
+
|
335
|
+
# Do an introspection of the 2 arrays and return the number of matches
|
336
|
+
def _compare_arrays(array_a: [], array_b: [])
|
337
|
+
return 0 unless array_a.is_a?(Array) && array_b.is_a?(Array)
|
338
|
+
|
339
|
+
intersection = array_a & array_b
|
340
|
+
intersection.nil? || intersection.size <= 0 ? 0 : intersection.size
|
341
|
+
end
|
342
|
+
|
343
|
+
# TODO: Remove this hard-coded crosswalk once the community has broader support for using ROR for funder ids
|
344
|
+
ROR_FUNDREF_ID_CROSSWALK = {
|
345
|
+
# NIH ID Crosswalk
|
346
|
+
"https://ror.org/01cwqze88": "https://doi.org/10.13039/100000002",
|
347
|
+
"https://ror.org/04mhx6838": "https://doi.org/10.13039/100000055",
|
348
|
+
"https://ror.org/012pb6c26": "https://doi.org/10.13039/100000050",
|
349
|
+
"https://ror.org/03wkg3b53": "https://doi.org/10.13039/100000053",
|
350
|
+
"https://ror.org/0060t0j89": "https://doi.org/10.13039/100000092",
|
351
|
+
"https://ror.org/00372qc85": "https://doi.org/10.13039/100000070",
|
352
|
+
"https://ror.org/00190t495": "https://doi.org/10.13039/100008460",
|
353
|
+
"https://ror.org/00j4k1h63": "https://doi.org/10.13039/100000066",
|
354
|
+
"https://ror.org/01y3zfr79": "https://doi.org/10.13039/100000056",
|
355
|
+
"https://ror.org/04q48ey07": "https://doi.org/10.13039/100000057",
|
356
|
+
"https://ror.org/0493hgw16": "https://doi.org/10.13039/100006545",
|
357
|
+
"https://ror.org/04vfsmv21": "https://doi.org/10.13039/100000098",
|
358
|
+
"https://ror.org/03jh5a977": "https://doi.org/10.13039/100000093",
|
359
|
+
"https://ror.org/04xeg9z08": "https://doi.org/10.13039/100000025",
|
360
|
+
"https://ror.org/01s5ya894": "https://doi.org/10.13039/100000065",
|
361
|
+
"https://ror.org/02meqm098": "https://doi.org/10.13039/100000002",
|
362
|
+
"https://ror.org/049v75w11": "https://doi.org/10.13039/100000049",
|
363
|
+
"https://ror.org/004a2wv92": "https://doi.org/10.13039/100000072",
|
364
|
+
"https://ror.org/00adh9b73": "https://doi.org/10.13039/100000062",
|
365
|
+
"https://ror.org/043z4tv69": "https://doi.org/10.13039/100000060",
|
366
|
+
"https://ror.org/00x19de83": "https://doi.org/10.13039/100000002",
|
367
|
+
"https://ror.org/02jzrsm59": "https://doi.org/10.13039/100000027",
|
368
|
+
"https://ror.org/006zn3t30": "https://doi.org/10.13039/100000069",
|
369
|
+
"https://ror.org/04byxyr05": "https://doi.org/10.13039/100000071",
|
370
|
+
"https://ror.org/04pw6fb54": "https://doi.org/10.13039/100006108",
|
371
|
+
"https://ror.org/05aq6yn88": "https://doi.org/10.13039/100006955",
|
372
|
+
"https://ror.org/02xey9a22": "https://doi.org/10.13039/100000061",
|
373
|
+
"https://ror.org/00fj8a872": "https://doi.org/10.13039/100000052",
|
374
|
+
"https://ror.org/01wtjyf13": "https://doi.org/10.13039/100000063",
|
375
|
+
"https://ror.org/04r5s4b52": "https://doi.org/10.13039/100005440",
|
376
|
+
"https://ror.org/046zezr58": "https://doi.org/10.13039/100006085",
|
377
|
+
"https://ror.org/02e3wq066": "https://doi.org/10.13039/100006086",
|
378
|
+
"https://ror.org/031gy6182": "https://doi.org/10.13039/100000002",
|
379
|
+
"https://ror.org/054j5yq82": "https://doi.org/10.13039/100000002",
|
380
|
+
"https://ror.org/02yrzyf97": "https://doi.org/10.13039/100000002",
|
381
|
+
|
382
|
+
# NSF ID Crosswalk
|
383
|
+
"https://.org/021nxhr62": "https://doi.org/10.13039/100000001",
|
384
|
+
"https://.org/04aqat463": "https://doi.org/10.13039/100000001",
|
385
|
+
"https://.org/01rcfpa16": "https://doi.org/10.13039/100005441",
|
386
|
+
"https://.org/014eweh95": "https://doi.org/10.13039/100005445",
|
387
|
+
"https://.org/001xhss06": "https://doi.org/10.13039/100000076",
|
388
|
+
"https://.org/04qn9mx93": "https://doi.org/10.13039/100000153",
|
389
|
+
"https://.org/03g87he71": "https://doi.org/10.13039/100000155",
|
390
|
+
"https://.org/01tnvpc68": "https://doi.org/10.13039/100000156",
|
391
|
+
"https://.org/01rvays47": "https://doi.org/10.13039/100000154",
|
392
|
+
"https://.org/002jdaq33": "https://doi.org/10.13039/100000152",
|
393
|
+
"https://.org/025kzpk63": "https://doi.org/10.13039/100000083",
|
394
|
+
"https://.org/04nh1dc89": "https://doi.org/10.13039/100007523",
|
395
|
+
"https://.org/01mng8331": "https://doi.org/10.13039/100000143",
|
396
|
+
"https://.org/02rdzmk74": "https://doi.org/10.13039/100000144",
|
397
|
+
"https://.org/053a2cp42": "https://doi.org/10.13039/100000145",
|
398
|
+
"https://.org/014bj5w56": "https://doi.org/10.13039/100000081",
|
399
|
+
"https://.org/00whkrf32": "https://doi.org/10.13039/100000082",
|
400
|
+
"https://.org/05s7cqk18": "https://doi.org/10.13039/100000173",
|
401
|
+
"https://.org/02kd4km72": "https://doi.org/10.13039/100000172",
|
402
|
+
"https://.org/03mamvh39": "https://doi.org/10.13039/100000171",
|
403
|
+
"https://.org/00b6sbb32": "https://doi.org/10.13039/100000084",
|
404
|
+
"https://.org/0471zv972": "https://doi.org/10.13039/100000146",
|
405
|
+
"https://.org/028yd4c30": "https://doi.org/10.13039/100000147",
|
406
|
+
"https://.org/01krpsy48": "https://doi.org/10.13039/100000148",
|
407
|
+
"https://.org/050rnw378": "https://doi.org/10.13039/100000149",
|
408
|
+
"https://.org/0388pet74": "https://doi.org/10.13039/100000150",
|
409
|
+
"https://.org/03xyg3m20": "https://doi.org/10.13039/100000151",
|
410
|
+
"https://.org/05p847d66": "https://doi.org/10.13039/100000085",
|
411
|
+
"https://.org/037gd6g64": "https://doi.org/10.13039/100000159",
|
412
|
+
"https://.org/05v01mk25": "https://doi.org/10.13039/100000160",
|
413
|
+
"https://.org/05wqqhv83": "https://doi.org/10.13039/100000141",
|
414
|
+
"https://.org/05nwjp114": "https://doi.org/10.13039/100007352",
|
415
|
+
"https://.org/05fnzca26": "https://doi.org/10.13039/100000162",
|
416
|
+
"https://.org/02trddg58": "https://doi.org/10.13039/100000163",
|
417
|
+
"https://.org/029b7h395": "https://doi.org/10.13039/100000086",
|
418
|
+
"https://.org/04mg8wm74": "https://doi.org/10.13039/100000164",
|
419
|
+
"https://.org/01ar8dr59": "https://doi.org/10.13039/100000165",
|
420
|
+
"https://.org/01pc7k308": "https://doi.org/10.13039/100000078",
|
421
|
+
"https://.org/051fftw81": "https://doi.org/10.13039/100000121",
|
422
|
+
"https://.org/04ap5x931": "https://doi.org/10.13039/100000166",
|
423
|
+
"https://.org/00apvva27": "https://doi.org/10.13039/100005716",
|
424
|
+
"https://.org/04nseet23": "https://doi.org/10.13039/100000179",
|
425
|
+
"https://.org/04k9mqs78": "https://doi.org/10.13039/100000106",
|
426
|
+
"https://.org/01k638r21": "https://doi.org/10.13039/100000089",
|
427
|
+
"https://.org/01gmp5538": "https://doi.org/10.13039/100005447",
|
428
|
+
"https://.org/01vnjbg30": "https://doi.org/10.13039/100005449",
|
429
|
+
"https://.org/03h7mcc28": "https://doi.org/10.13039/100000088",
|
430
|
+
"https://.org/05wgkzg12": "https://doi.org/10.13039/100000169",
|
431
|
+
"https://.org/0445wmv88": "https://doi.org/10.13039/100000170",
|
432
|
+
"https://.org/02dz2hb46": "https://doi.org/10.13039/100000077",
|
433
|
+
"https://.org/034m1ez10": "https://doi.org/10.13039/100000107",
|
434
|
+
"https://.org/02a65dj82": "https://doi.org/10.13039/100005717",
|
435
|
+
"https://.org/020fhsn68": "https://doi.org/10.13039/100000001",
|
436
|
+
"https://.org/03z9hh605": "https://doi.org/10.13039/100000174",
|
437
|
+
"https://.org/04ya3kq71": "https://doi.org/10.13039/100007521",
|
438
|
+
"https://.org/04evh7y43": "https://doi.org/10.13039/100005443",
|
439
|
+
"https://.org/04h67aa53": "https://doi.org/10.13039/100000177",
|
440
|
+
"https://.org/025dabr11": "https://doi.org/10.13039/100005446",
|
441
|
+
"https://.org/04vw0kz07": "https://doi.org/10.13039/100005448",
|
442
|
+
"https://.org/054ydxh33": "https://doi.org/10.13039/100005554",
|
443
|
+
"https://.org/01sharn77": "https://doi.org/10.13039/100006091",
|
444
|
+
"https://.org/02ch5q898": "https://doi.org/10.13039/100000001",
|
445
|
+
|
446
|
+
# NASA ID Crosswalk
|
447
|
+
"https://.org/0171mag52": "https://doi.org/10.13039/100006198",
|
448
|
+
"https://.org/027k65916": "https://doi.org/10.13039/100006196",
|
449
|
+
"https://.org/027ka1x80": "https://doi.org/10.13039/100000104",
|
450
|
+
"https://.org/02acart68": "https://doi.org/10.13039/100006195",
|
451
|
+
"https://.org/059fqnc42": "https://doi.org/10.13039/100006193",
|
452
|
+
"https://.org/01cyfxe35": "https://doi.org/10.13039/100016595",
|
453
|
+
"https://.org/04xx4z452": "https://doi.org/10.13039/100006203",
|
454
|
+
"https://.org/0399mhs52": "https://doi.org/10.13039/100006199",
|
455
|
+
"https://.org/02epydz83": "https://doi.org/10.13039/100006197",
|
456
|
+
"https://.org/03j9e2j92": "https://doi.org/10.13039/100006205",
|
457
|
+
"https://.org/02s42x260": "https://doi.org/10.13039/100000104",
|
458
|
+
"https://.org/01p7gwa14": "https://doi.org/10.13039/100000104",
|
459
|
+
"https://.org/01qxmdg18": "https://doi.org/10.13039/100000104",
|
460
|
+
"https://.org/006ndaj41": "https://doi.org/10.13039/100000104",
|
461
|
+
"https://.org/03em45j53": "https://doi.org/10.13039/100007346",
|
462
|
+
"https://.org/045t78n53": "https://doi.org/10.13039/100000104",
|
463
|
+
"https://.org/00r57r863": "https://doi.org/10.13039/100000104",
|
464
|
+
"https://.org/0401vze59": "https://doi.org/10.13039/100007726",
|
465
|
+
"https://.org/04hccab49": "https://doi.org/10.13039/100000104",
|
466
|
+
"https://.org/04437j066": "https://doi.org/10.13039/100000104",
|
467
|
+
"https://.org/028b18z22": "https://doi.org/10.13039/100000104",
|
468
|
+
"https://.org/00ryjtt64": "https://doi.org/10.13039/100000104",
|
469
|
+
|
470
|
+
# DOE ID Crosswalk
|
471
|
+
"https://ror.org/01bj3aw27": "https://doi.org/10.13039/100000015",
|
472
|
+
"https://ror.org/03q1rgc19": "https://doi.org/10.13039/100006133",
|
473
|
+
"https://ror.org/02xznz413": "https://doi.org/10.13039/100006134",
|
474
|
+
"https://ror.org/03sk1we31": "https://doi.org/10.13039/100006168",
|
475
|
+
"https://ror.org/00f93gc02": "https://doi.org/10.13039/100006177",
|
476
|
+
"https://ror.org/05tj7dm33": "https://doi.org/10.13039/100006147",
|
477
|
+
"https://ror.org/0012c7r22": "https://doi.org/10.13039/100006192",
|
478
|
+
"https://ror.org/00mmn6b08": "https://doi.org/10.13039/100006132",
|
479
|
+
"https://ror.org/03ery9d53": "https://doi.org/10.13039/100006120",
|
480
|
+
"https://ror.org/033jmdj81": "https://doi.org/10.13039/100000015",
|
481
|
+
"https://ror.org/03rd4h240": "https://doi.org/10.13039/100006130",
|
482
|
+
"https://ror.org/0054t4769": "https://doi.org/10.13039/100006200",
|
483
|
+
"https://ror.org/03eecgp81": "https://doi.org/10.13039/100006174",
|
484
|
+
"https://ror.org/00heb4d89": "https://doi.org/10.13039/100006135",
|
485
|
+
"https://ror.org/05ek3m339": "https://doi.org/10.13039/100006150",
|
486
|
+
"https://ror.org/00km40770": "https://doi.org/10.13039/100006138",
|
487
|
+
"https://ror.org/02ah1da87": "https://doi.org/10.13039/100006137",
|
488
|
+
"https://ror.org/05hsv7e61": "https://doi.org/10.13039/100000015",
|
489
|
+
"https://ror.org/01c9ay627": "https://doi.org/10.13039/100006165",
|
490
|
+
"https://ror.org/04z2gev20": "https://doi.org/10.13039/100006183",
|
491
|
+
"https://ror.org/02z1qvq09": "https://doi.org/10.13039/100006144",
|
492
|
+
"https://ror.org/03jf3w726": "https://doi.org/10.13039/100006186",
|
493
|
+
"https://ror.org/04848jz84": "https://doi.org/10.13039/100006142",
|
494
|
+
"https://ror.org/04s778r16": "https://doi.org/10.13039/100006171",
|
495
|
+
"https://ror.org/04nnxen11": "https://doi.org/10.13039/100000015",
|
496
|
+
"https://ror.org/05csy5p27": "https://doi.org/10.13039/100010268",
|
497
|
+
"https://ror.org/05efnac71": "https://doi.org/10.13039/100000015"
|
498
|
+
}
|
499
|
+
end
|
500
|
+
end
|
data/lib/uc3-dmp-id/creator.rb
CHANGED
@@ -23,23 +23,23 @@ module Uc3DmpId
|
|
23
23
|
raise CreatorError, Helper::MSG_DMP_FORBIDDEN unless provenance.is_a?(Hash) && !provenance['PK'].nil?
|
24
24
|
|
25
25
|
# Validate the incoming JSON first
|
26
|
-
json = Helper.parse_json(json:
|
27
|
-
errs = Validator.validate(mode: 'author', json:
|
26
|
+
json = Helper.parse_json(json:)
|
27
|
+
errs = Validator.validate(mode: 'author', json:)
|
28
28
|
raise CreatorError, errs.join(', ') if errs.is_a?(Array) && errs.any? && errs.first != Validator::MSG_VALID_JSON
|
29
29
|
|
30
30
|
# Try to find it by the :dmp_id first and Fail if found
|
31
31
|
dmp_id = Helper.dmp_id_to_pk(json: json.fetch('dmp', {})['dmp_id'])
|
32
|
-
result = Finder.exists?(p_key: dmp_id, logger:
|
32
|
+
result = Finder.exists?(p_key: dmp_id, logger:) unless dmp_id.nil?
|
33
33
|
raise CreatorError, Helper::MSG_DMP_EXISTS if result.is_a?(Hash)
|
34
34
|
|
35
35
|
# raise CreatorError, Uc3DmpId::MSG_DMP_EXISTS unless json['PK'].nil?
|
36
36
|
|
37
37
|
client = Uc3DmpDynamo::Client.new
|
38
|
-
p_key = _preregister_dmp_id(client
|
38
|
+
p_key = _preregister_dmp_id(client:, provenance:, json:, logger:)
|
39
39
|
raise CreatorError, MSG_UNABLE_TO_MINT if p_key.nil?
|
40
40
|
|
41
41
|
# Add the DMPHub specific attributes and then save
|
42
|
-
annotated = Helper.annotate_dmp_json(provenance
|
42
|
+
annotated = Helper.annotate_dmp_json(provenance:, p_key:, json: json['dmp'])
|
43
43
|
logger.info(message: "Creating DMP ID: #{p_key}") if logger.respond_to?(:debug)
|
44
44
|
|
45
45
|
# Set the :created and :modified timestamps
|
@@ -48,10 +48,10 @@ module Uc3DmpId
|
|
48
48
|
annotated['modified'] = now
|
49
49
|
|
50
50
|
# Create the item
|
51
|
-
resp = client.put_item(json: annotated, logger:
|
51
|
+
resp = client.put_item(json: annotated, logger:)
|
52
52
|
raise CreatorError, Helper::MSG_DMP_NO_DMP_ID if resp.nil?
|
53
53
|
|
54
|
-
_post_process(json: annotated, logger:
|
54
|
+
_post_process(json: annotated, logger:)
|
55
55
|
Helper.cleanse_dmp_json(json: JSON.parse({ dmp: annotated }.to_json))
|
56
56
|
end
|
57
57
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
@@ -75,7 +75,7 @@ module Uc3DmpId
|
|
75
75
|
counter = 0
|
76
76
|
while dmp_id == '' && counter <= 10
|
77
77
|
prefix = "#{ENV.fetch('DMP_ID_SHOULDER', nil)}#{SecureRandom.hex(2).upcase}#{SecureRandom.hex(2)}"
|
78
|
-
dmp_id = prefix unless Finder.exists?(client
|
78
|
+
dmp_id = prefix unless Finder.exists?(client:, p_key: prefix)
|
79
79
|
counter += 1
|
80
80
|
end
|
81
81
|
# Something went wrong and it was unable to identify a unique id
|
@@ -94,7 +94,7 @@ module Uc3DmpId
|
|
94
94
|
|
95
95
|
# Publish the change to the EventBridge
|
96
96
|
publisher = Uc3DmpEventBridge::Publisher.new
|
97
|
-
publisher.publish(source: 'DmpCreator', event_type: 'EZID update', dmp: json, logger:
|
97
|
+
publisher.publish(source: 'DmpCreator', event_type: 'EZID update', dmp: json, logger:)
|
98
98
|
|
99
99
|
# Determine if there are any related identifiers that we should try to fetch a citation for
|
100
100
|
citable_identifiers = Helper.citable_related_identifiers(dmp: json)
|
@@ -108,7 +108,7 @@ module Uc3DmpId
|
|
108
108
|
}
|
109
109
|
logger.debug(message: 'Fetching citations', details: citable_identifiers) if logger.respond_to?(:debug)
|
110
110
|
publisher.publish(source: 'DmpCreator', dmp: json, event_type: 'Citation Fetch', detail: citer_detail,
|
111
|
-
logger:
|
111
|
+
logger:)
|
112
112
|
true
|
113
113
|
end
|
114
114
|
end
|
data/lib/uc3-dmp-id/deleter.rb
CHANGED
@@ -19,7 +19,7 @@ module Uc3DmpId
|
|
19
19
|
|
20
20
|
# Fetch the latest version of the DMP ID by it's PK
|
21
21
|
client = Uc3DmpDynamo::Client.new
|
22
|
-
dmp = Finder.by_pk(p_key
|
22
|
+
dmp = Finder.by_pk(p_key:, client:, cleanse: false, logger:)
|
23
23
|
raise DeleterError, Helper::MSG_DMP_NOT_FOUND unless dmp.is_a?(Hash) && !dmp['dmp'].nil?
|
24
24
|
|
25
25
|
# Only allow this if the provenance is the owner of the DMP!
|
@@ -38,16 +38,16 @@ module Uc3DmpId
|
|
38
38
|
dmp['dmp']['dmphub_tombstoned_at'] = now
|
39
39
|
|
40
40
|
# Create the Tombstone version
|
41
|
-
resp = client.put_item(json: dmp['dmp'], logger:
|
41
|
+
resp = client.put_item(json: dmp['dmp'], logger:)
|
42
42
|
raise DeleterError, Helper::MSG_DMP_NO_TOMBSTONE if resp.nil?
|
43
43
|
|
44
44
|
# Delete the Latest version
|
45
|
-
client.delete_item(p_key
|
45
|
+
client.delete_item(p_key:, s_key: Helper::DMP_LATEST_VERSION, logger:)
|
46
46
|
|
47
47
|
# TODO: We should do a check here to see if it was successful!
|
48
48
|
|
49
49
|
# Notify EZID about the removal
|
50
|
-
_post_process(json: dmp, logger:
|
50
|
+
_post_process(json: dmp, logger:)
|
51
51
|
|
52
52
|
# Return the tombstoned record
|
53
53
|
Helper.cleanse_dmp_json(json: dmp)
|
@@ -66,7 +66,7 @@ module Uc3DmpId
|
|
66
66
|
|
67
67
|
# Publish the change to the EventBridge
|
68
68
|
publisher = Uc3DmpEventBridge::Publisher.new
|
69
|
-
publisher.publish(source: 'DmpDeleter', event_type: 'EZID update', dmp: json, logger:
|
69
|
+
publisher.publish(source: 'DmpDeleter', event_type: 'EZID update', dmp: json, logger:)
|
70
70
|
true
|
71
71
|
end
|
72
72
|
end
|
data/lib/uc3-dmp-id/finder.rb
CHANGED
@@ -20,15 +20,15 @@ module Uc3DmpId
|
|
20
20
|
# TODO: Replace this with ElasticSearch
|
21
21
|
def search_dmps(args:, logger: nil)
|
22
22
|
client = Uc3DmpDynamo::Client.new
|
23
|
-
return _by_owner(owner_org: args['owner_orcid'], client
|
23
|
+
return _by_owner(owner_org: args['owner_orcid'], client:, logger:) unless args['owner_orcid'].nil?
|
24
24
|
|
25
25
|
unless args['owner_org_ror'].nil?
|
26
|
-
return _by_owner_org(owner_org: args['owner_org_ror'], client
|
27
|
-
logger:
|
26
|
+
return _by_owner_org(owner_org: args['owner_org_ror'], client:,
|
27
|
+
logger:)
|
28
28
|
end
|
29
29
|
unless args['modification_day'].nil?
|
30
|
-
return _by_mod_day(day: args['modification_day'], client
|
31
|
-
logger:
|
30
|
+
return _by_mod_day(day: args['modification_day'], client:,
|
31
|
+
logger:)
|
32
32
|
end
|
33
33
|
|
34
34
|
[]
|
@@ -38,20 +38,20 @@ module Uc3DmpId
|
|
38
38
|
# -------------------------------------------------------------------------
|
39
39
|
# rubocop:disable Metrics/AbcSize
|
40
40
|
def by_json(json:, client: nil, cleanse: true, logger: nil)
|
41
|
-
json = Helper.parse_json(json:
|
41
|
+
json = Helper.parse_json(json:)&.fetch('dmp', {})
|
42
42
|
raise FinderError, MSG_INVALID_ARGS if !json.is_a?(Hash) || (json['PK'].nil? && json['dmp_id'].nil?)
|
43
43
|
|
44
44
|
p_key = json['PK']
|
45
45
|
# Translate the incoming :dmp_id into a PK
|
46
46
|
p_key = Helper.dmp_id_to_pk(json: json.fetch('dmp_id', {})) if p_key.nil?
|
47
|
-
client =
|
47
|
+
client = Uc3DmpDynamo::Client.new if client.nil?
|
48
48
|
|
49
49
|
# TODO: Re-enable this once we figure out Dynamo indexes
|
50
50
|
# find_by_dmphub_provenance_id -> if no PK and no dmp_id result
|
51
51
|
# return by_provenance_identifier(json: json, client: client, logger: logger) if p_key.nil?
|
52
52
|
|
53
53
|
# find_by_PK
|
54
|
-
p_key.nil? ? nil : by_pk(p_key
|
54
|
+
p_key.nil? ? nil : by_pk(p_key:, s_key: json['SK'], client:, cleanse:, logger:)
|
55
55
|
end
|
56
56
|
# rubocop:enable Metrics/AbcSize
|
57
57
|
|
@@ -62,20 +62,20 @@ module Uc3DmpId
|
|
62
62
|
raise FinderError, MSG_MISSING_PK if p_key.nil?
|
63
63
|
|
64
64
|
s_key = Helper::DMP_LATEST_VERSION if s_key.nil? || s_key.to_s.strip.empty?
|
65
|
-
client =
|
65
|
+
client = Uc3DmpDynamo::Client.new if client.nil?
|
66
66
|
resp = client.get_item(
|
67
67
|
key: {
|
68
|
-
PK: Helper.append_pk_prefix(p_key:
|
69
|
-
SK: Helper.append_sk_prefix(s_key:
|
68
|
+
PK: Helper.append_pk_prefix(p_key:),
|
69
|
+
SK: Helper.append_sk_prefix(s_key:)
|
70
70
|
},
|
71
|
-
logger:
|
71
|
+
logger:
|
72
72
|
)
|
73
73
|
return resp unless resp.is_a?(Hash)
|
74
74
|
|
75
75
|
dmp = resp['dmp'].nil? ? JSON.parse({ dmp: resp }.to_json) : resp
|
76
76
|
return nil if dmp['dmp']['PK'].nil?
|
77
77
|
|
78
|
-
dmp = Versioner.append_versions(p_key: dmp['dmp']['PK'], dmp
|
78
|
+
dmp = Versioner.append_versions(p_key: dmp['dmp']['PK'], dmp:, client:, logger:) if cleanse
|
79
79
|
cleanse ? Helper.cleanse_dmp_json(json: dmp) : dmp
|
80
80
|
end
|
81
81
|
# rubocop:enable Metrics/AbcSize
|
@@ -85,13 +85,13 @@ module Uc3DmpId
|
|
85
85
|
def exists?(p_key:, s_key: Helper::DMP_LATEST_VERSION, client: nil, logger: nil)
|
86
86
|
raise FinderError, MSG_MISSING_PK if p_key.nil?
|
87
87
|
|
88
|
-
client =
|
88
|
+
client = Uc3DmpDynamo::Client.new if client.nil?
|
89
89
|
client.pk_exists?(
|
90
90
|
key: {
|
91
|
-
PK: Helper.append_pk_prefix(p_key:
|
92
|
-
SK: Helper.append_sk_prefix(s_key:
|
91
|
+
PK: Helper.append_pk_prefix(p_key:),
|
92
|
+
SK: Helper.append_sk_prefix(s_key:)
|
93
93
|
},
|
94
|
-
logger:
|
94
|
+
logger:
|
95
95
|
)
|
96
96
|
end
|
97
97
|
|
@@ -115,15 +115,15 @@ module Uc3DmpId
|
|
115
115
|
filter_expression: 'SK = :version',
|
116
116
|
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
117
117
|
}
|
118
|
-
client =
|
119
|
-
resp = client.query(args
|
118
|
+
client = Uc3DmpDynamo::Client.new if client.nil?
|
119
|
+
resp = client.query(args:, logger:)
|
120
120
|
return resp unless resp.is_a?(Hash)
|
121
121
|
|
122
122
|
dmp = resp['dmp'].nil? ? JSON.parse({ dmp: resp }.to_json) : resp
|
123
123
|
return nil if dmp['dmp']['PK'].nil?
|
124
124
|
|
125
125
|
# If we got a hit, fetch the DMP and return it.
|
126
|
-
by_pk(p_key: dmp['dmp']['PK'], s_key: dmp['dmp']['SK'], cleanse
|
126
|
+
by_pk(p_key: dmp['dmp']['PK'], s_key: dmp['dmp']['SK'], cleanse:, logger:)
|
127
127
|
end
|
128
128
|
# rubocop:enable Metrics/AbcSize
|
129
129
|
|
@@ -149,8 +149,8 @@ module Uc3DmpId
|
|
149
149
|
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
150
150
|
}
|
151
151
|
logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
|
152
|
-
client =
|
153
|
-
_process_search_response(response: client.query(args
|
152
|
+
client = Uc3DmpDynamo::Client.new if client.nil?
|
153
|
+
_process_search_response(response: client.query(args:, logger:))
|
154
154
|
end
|
155
155
|
|
156
156
|
# Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
|
@@ -174,8 +174,8 @@ module Uc3DmpId
|
|
174
174
|
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
175
175
|
}
|
176
176
|
logger.info(message: "Querying _by_owner_org with #{args}") if logger.respond_to?(:info)
|
177
|
-
client =
|
178
|
-
_process_search_response(response: client.query(args
|
177
|
+
client = Uc3DmpDynamo::Client.new if client.nil?
|
178
|
+
_process_search_response(response: client.query(args:, logger:))
|
179
179
|
end
|
180
180
|
|
181
181
|
# Fetch the DMP IDs modified on the specified date (the date is the :dmphub_modification_day on the DMP ID record)
|
@@ -195,8 +195,8 @@ module Uc3DmpId
|
|
195
195
|
expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
|
196
196
|
}
|
197
197
|
logger.info(message: "Querying _by_mod_day with #{args}") if logger.respond_to?(:info)
|
198
|
-
client =
|
199
|
-
_process_search_response(response: client.query(args
|
198
|
+
client = Uc3DmpDynamo::Client.new if client.nil?
|
199
|
+
_process_search_response(response: client.query(args:, logger:))
|
200
200
|
end
|
201
201
|
|
202
202
|
# Transform the search results so that we do not include any of the DMPHub specific metadata
|
data/lib/uc3-dmp-id/helper.rb
CHANGED
@@ -7,17 +7,17 @@ module Uc3DmpId
|
|
7
7
|
# Helper functions for working with DMP IDs
|
8
8
|
class Helper
|
9
9
|
PK_DMP_PREFIX = 'DMP#'
|
10
|
-
PK_DMP_REGEX = %r{DMP#[a-zA-Z0-9\-_.]+/[a-zA-Z0-9]{2}\.[a-zA-Z0-9./:]+}
|
10
|
+
PK_DMP_REGEX = %r{DMP#[a-zA-Z0-9\-_.]+/[a-zA-Z0-9]{2}\.[a-zA-Z0-9./:]+}
|
11
11
|
|
12
12
|
SK_DMP_PREFIX = 'VERSION#'
|
13
|
-
SK_DMP_REGEX = /VERSION#\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2}
|
13
|
+
SK_DMP_REGEX = /VERSION#\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2}/
|
14
14
|
|
15
15
|
# TODO: Verify the assumed structure of the DOI is valid
|
16
|
-
DOI_REGEX = %r{[0-9]{2}\.[0-9]{4,}/[a-zA-Z0-9/_.-]+}
|
17
|
-
URL_REGEX = %r{(https?://)?([a-zA-Z0-9\-_]\.)+[a-zA-Z0-9\-_]{2,3}(:[0-9]+)?/?}
|
16
|
+
DOI_REGEX = %r{[0-9]{2}\.[0-9]{4,}/[a-zA-Z0-9/_.-]+}
|
17
|
+
URL_REGEX = %r{(https?://)?([a-zA-Z0-9\-_]\.)+[a-zA-Z0-9\-_]{2,3}(:[0-9]+)?/?}
|
18
18
|
|
19
|
-
DMP_LATEST_VERSION = "#{SK_DMP_PREFIX}latest"
|
20
|
-
DMP_TOMBSTONE_VERSION = "#{SK_DMP_PREFIX}tombstone"
|
19
|
+
DMP_LATEST_VERSION = "#{SK_DMP_PREFIX}latest".freeze
|
20
|
+
DMP_TOMBSTONE_VERSION = "#{SK_DMP_PREFIX}tombstone".freeze
|
21
21
|
|
22
22
|
DEFAULT_API_URL = 'https://api.dmphub.uc3dev.cdlib.net/dmps/'
|
23
23
|
DEFAULT_LANDING_PAGE_URL = 'https://dmphub.uc3dev.cdlib.net/dmps/'
|
@@ -41,7 +41,7 @@ module Uc3DmpId
|
|
41
41
|
# Append the PK prefix for the object
|
42
42
|
# -------------------------------------------------------------------------------------
|
43
43
|
def append_pk_prefix(p_key:)
|
44
|
-
p_key.is_a?(String) ? "#{PK_DMP_PREFIX}#{remove_pk_prefix(p_key:
|
44
|
+
p_key.is_a?(String) ? "#{PK_DMP_PREFIX}#{remove_pk_prefix(p_key:)}" : nil
|
45
45
|
end
|
46
46
|
|
47
47
|
# Strip off the PK prefix
|
@@ -53,7 +53,7 @@ module Uc3DmpId
|
|
53
53
|
# Append the SK prefix for the object
|
54
54
|
# -------------------------------------------------------------------------------------
|
55
55
|
def append_sk_prefix(s_key:)
|
56
|
-
s_key.is_a?(String) ? "#{SK_DMP_PREFIX}#{remove_sk_prefix(s_key:
|
56
|
+
s_key.is_a?(String) ? "#{SK_DMP_PREFIX}#{remove_sk_prefix(s_key:)}" : nil
|
57
57
|
end
|
58
58
|
|
59
59
|
# Strip off the SK prefix
|
@@ -82,7 +82,7 @@ module Uc3DmpId
|
|
82
82
|
return with_protocol ? value : value.gsub(%r{https?://}, '') if value.start_with?('http')
|
83
83
|
|
84
84
|
dmp_id = dmp_id.gsub('doi:', '')
|
85
|
-
dmp_id = dmp_id
|
85
|
+
dmp_id = dmp_id[1..dmp_id.length] if dmp_id.start_with?('/')
|
86
86
|
base_domain = with_protocol ? dmp_id_base_url : dmp_id_base_url.gsub(%r{https?://}, '')
|
87
87
|
"#{base_domain}#{dmp_id}"
|
88
88
|
end
|
@@ -95,7 +95,7 @@ module Uc3DmpId
|
|
95
95
|
p_key = param if param.start_with?(dmp_id_base_url) || param.start_with?(base_domain)
|
96
96
|
p_key = CGI.unescape(p_key.nil? ? param : p_key)
|
97
97
|
p_key = format_dmp_id(value: p_key)
|
98
|
-
append_pk_prefix(p_key:
|
98
|
+
append_pk_prefix(p_key:)
|
99
99
|
end
|
100
100
|
|
101
101
|
# Append the :PK prefix to the :dmp_id
|
@@ -115,7 +115,7 @@ module Uc3DmpId
|
|
115
115
|
|
116
116
|
{
|
117
117
|
type: 'doi',
|
118
|
-
identifier: format_dmp_id(value: remove_pk_prefix(p_key:
|
118
|
+
identifier: format_dmp_id(value: remove_pk_prefix(p_key:), with_protocol: true)
|
119
119
|
}
|
120
120
|
end
|
121
121
|
|
@@ -180,7 +180,7 @@ module Uc3DmpId
|
|
180
180
|
# Add DMPHub specific fields to the DMP ID JSON
|
181
181
|
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
182
182
|
def annotate_dmp_json(provenance:, p_key:, json:)
|
183
|
-
json = parse_json(json:
|
183
|
+
json = parse_json(json:)
|
184
184
|
bool_vals = [1, '1', true, 'true', 'yes']
|
185
185
|
return json if provenance.nil? || p_key.nil? || !json.is_a?(Hash)
|
186
186
|
|
@@ -189,14 +189,14 @@ module Uc3DmpId
|
|
189
189
|
return json if id != p_key && !json['PK'].nil?
|
190
190
|
|
191
191
|
annotated = deep_copy_dmp(obj: json)
|
192
|
-
annotated['PK'] = json['PK'] || append_pk_prefix(p_key:
|
192
|
+
annotated['PK'] = json['PK'] || append_pk_prefix(p_key:)
|
193
193
|
annotated['SK'] = DMP_LATEST_VERSION
|
194
194
|
|
195
195
|
# Ensure that the :dmp_id matches the :PK
|
196
196
|
annotated['dmp_id'] = JSON.parse(pk_to_dmp_id(p_key: remove_pk_prefix(p_key: annotated['PK'])).to_json)
|
197
197
|
|
198
|
-
owner_id = extract_owner_id(json:
|
199
|
-
owner_org = extract_owner_org(json:
|
198
|
+
owner_id = extract_owner_id(json:)
|
199
|
+
owner_org = extract_owner_org(json:)
|
200
200
|
|
201
201
|
# Set the :dmproadmap_featured flag appropriately
|
202
202
|
featured = annotated.fetch('dmproadmap_featured', 'no')
|
@@ -219,7 +219,7 @@ module Uc3DmpId
|
|
219
219
|
annotated['dmphub_provenance_identifier'] = annotated.fetch('dmproadmap_links', {})['get']
|
220
220
|
else
|
221
221
|
annotated['dmphub_provenance_identifier'] = format_provenance_id(
|
222
|
-
provenance
|
222
|
+
provenance:, value: json.fetch('dmp_id', {})['identifier']
|
223
223
|
)
|
224
224
|
end
|
225
225
|
annotated
|
data/lib/uc3-dmp-id/updater.rb
CHANGED
@@ -16,18 +16,18 @@ module Uc3DmpId
|
|
16
16
|
def update(provenance:, p_key:, json: {}, note: nil, logger: nil)
|
17
17
|
raise UpdaterError, Helper::MSG_DMP_INVALID_DMP_ID unless p_key.is_a?(String) && !p_key.strip.empty?
|
18
18
|
|
19
|
-
mods = Helper.parse_json(json:
|
20
|
-
p_key = Helper.append_pk_prefix(p_key:
|
19
|
+
mods = Helper.parse_json(json:).fetch('dmp', {})
|
20
|
+
p_key = Helper.append_pk_prefix(p_key:)
|
21
21
|
logger.debug(message: "Incoming modifications for PK #{p_key}", details: mods) if logger.respond_to?(:debug)
|
22
22
|
|
23
23
|
# Fetch the latest version of the DMP ID
|
24
24
|
client = Uc3DmpDynamo::Client.new
|
25
|
-
latest_version = Finder.by_pk(p_key
|
26
|
-
latest_version = latest_version
|
25
|
+
latest_version = Finder.by_pk(p_key:, client:, logger:, cleanse: false)
|
26
|
+
latest_version = latest_version.fetch('dmp', {}) unless latest_version['dmp'].nil?
|
27
27
|
logger.debug(message: "Latest version for PK #{p_key}", details: latest_version) if logger.respond_to?(:debug)
|
28
28
|
|
29
29
|
# Verify that the DMP ID is updateable with the info passed in
|
30
|
-
errs = _updateable?(provenance
|
30
|
+
errs = _updateable?(provenance:, p_key:, latest_version: latest_version['dmp'],
|
31
31
|
mods: mods['dmp'])
|
32
32
|
logger.error(message: errs.join(', ')) if logger.respond_to?(:error) && errs.is_a?(Array) && errs.any?
|
33
33
|
raise UpdaterError, errs if errs.is_a?(Array) && errs.any?
|
@@ -37,35 +37,35 @@ module Uc3DmpId
|
|
37
37
|
# Version the DMP ID record (if applicable).
|
38
38
|
owner = latest_version['dmphub_provenance_id']
|
39
39
|
updater = provenance['PK']
|
40
|
-
version = Versioner.generate_version(client
|
41
|
-
updater
|
40
|
+
version = Versioner.generate_version(client:, latest_version:, owner:,
|
41
|
+
updater:, logger:)
|
42
42
|
raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if version.nil?
|
43
43
|
|
44
44
|
# Remove the version info because we don't want to save it on the record
|
45
45
|
version.delete('dmphub_versions')
|
46
46
|
|
47
47
|
# Splice the assertions
|
48
|
-
version = _process_modifications(owner
|
49
|
-
logger:
|
48
|
+
version = _process_modifications(owner:, updater:, version:, mods:, note:,
|
49
|
+
logger:)
|
50
50
|
# Set the :modified timestamps
|
51
51
|
now = Time.now.utc
|
52
52
|
version['modified'] = now.iso8601
|
53
53
|
version['dmphub_modification_day'] = now.strftime('%Y-%m-%d')
|
54
54
|
|
55
55
|
# Save the changes
|
56
|
-
resp = client.put_item(json: version, logger:
|
56
|
+
resp = client.put_item(json: version, logger:)
|
57
57
|
raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if resp.nil?
|
58
58
|
|
59
59
|
# Send the updates to EZID
|
60
|
-
_post_process(provenance
|
60
|
+
_post_process(provenance:, json: version, logger:)
|
61
61
|
|
62
62
|
# Return the new version record
|
63
63
|
logger.info(message: "Updated DMP ID: #{p_key}") if logger.respond_to?(:debug)
|
64
64
|
|
65
65
|
# Append the :dmphub_versions Array
|
66
66
|
json = JSON.parse({ dmp: version }.to_json)
|
67
|
-
json = Versioner.append_versions(p_key
|
68
|
-
Helper.cleanse_dmp_json(json:
|
67
|
+
json = Versioner.append_versions(p_key:, dmp: json, client:, logger:)
|
68
|
+
Helper.cleanse_dmp_json(json:)
|
69
69
|
end
|
70
70
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
71
71
|
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
@@ -77,21 +77,21 @@ module Uc3DmpId
|
|
77
77
|
raise UpdaterError, Helper::MSG_DMP_INVALID_DMP_ID unless p_key.is_a?(String) && !p_key.strip.empty?
|
78
78
|
|
79
79
|
# fetch the existing latest version of the DMP ID
|
80
|
-
client = Uc3DmpDynamo::Client.new(logger:
|
81
|
-
dmp = Finder.by_pk(p_key
|
80
|
+
client = Uc3DmpDynamo::Client.new(logger:)
|
81
|
+
dmp = Finder.by_pk(p_key:, client:, logger:, cleanse: false)
|
82
82
|
logger.info(message: 'Existing latest record', details: dmp) if logger.respond_to?(:debug)
|
83
83
|
raise UpdaterError, Helper::MSG_DMP_FORBIDDEN unless provenance.is_a?(Hash) && !provenance['PK'].nil? &&
|
84
84
|
provenance['PK'] == dmp['dmp']['dmphub_provenance_id']
|
85
85
|
|
86
86
|
# Add the download URl for the PDF as a related identifier on the DMP ID record
|
87
|
-
annotated = Helper.annotate_dmp_json(provenance
|
87
|
+
annotated = Helper.annotate_dmp_json(provenance:, p_key:, json: dmp['dmp'])
|
88
88
|
annotated['dmproadmap_related_identifiers'] = [] if annotated['dmproadmap_related_identifiers'].nil?
|
89
89
|
annotated['dmproadmap_related_identifiers'] << JSON.parse({
|
90
90
|
descriptor: 'is_metadata_for', work_type: 'output_management_plan', type: 'url', identifier: url
|
91
91
|
}.to_json)
|
92
92
|
|
93
93
|
# Save the changes without creating a new version!
|
94
|
-
resp = client.put_item(json: annotated, logger:
|
94
|
+
resp = client.put_item(json: annotated, logger:)
|
95
95
|
raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if resp.nil?
|
96
96
|
|
97
97
|
logger.info(message: "Added DMP ID narrative for PK: #{p_key}, Narrative: #{url}") if logger.respond_to?(:debug)
|
@@ -111,8 +111,9 @@ module Uc3DmpId
|
|
111
111
|
return [Helper::MSG_DMP_FORBIDDEN] unless provenance.is_a?(Hash) && !provenance['PK'].nil?
|
112
112
|
# Verify that the JSON is for the same DMP in the PK
|
113
113
|
return [Helper::MSG_DMP_FORBIDDEN] unless Helper.dmp_id_to_pk(json: mods.fetch('dmp_id', {})) == p_key
|
114
|
+
|
114
115
|
# Bail out if the DMP ID could not be found or the PKs do not match for some reason
|
115
|
-
|
116
|
+
[Helper::MSG_DMP_UNKNOWN] unless latest_version.is_a?(Hash) && latest_version['PK'] == p_key
|
116
117
|
end
|
117
118
|
# rubocop:enable Metrics/AbcSize
|
118
119
|
|
@@ -123,14 +124,14 @@ module Uc3DmpId
|
|
123
124
|
|
124
125
|
updated = if owner == updater
|
125
126
|
# Splice together any assertions that may have been made while the user was editing the DMP ID
|
126
|
-
Asserter.splice(latest_version: version, modified_version: mods, logger:
|
127
|
+
Asserter.splice(latest_version: version, modified_version: mods, logger:)
|
127
128
|
else
|
128
129
|
# Attach the incoming changes as an assertion to the DMP ID since the updater is NOT the owner
|
129
|
-
Asserter.add(updater
|
130
|
-
logger:
|
130
|
+
Asserter.add(updater:, latest_version: version, modified_version: mods, note:,
|
131
|
+
logger:)
|
131
132
|
end
|
132
133
|
|
133
|
-
_merge_versions(latest_version: version, mods: updated, logger:
|
134
|
+
_merge_versions(latest_version: version, mods: updated, logger:)
|
134
135
|
end
|
135
136
|
# rubocop:enable Metrics/ParameterLists
|
136
137
|
|
@@ -172,7 +173,7 @@ module Uc3DmpId
|
|
172
173
|
logger.debug(message: 'Sending event for EZID publication',
|
173
174
|
details: json)
|
174
175
|
end
|
175
|
-
publisher.publish(source: 'DmpUpdater', event_type: 'EZID update', dmp: json, logger:
|
176
|
+
publisher.publish(source: 'DmpUpdater', event_type: 'EZID update', dmp: json, logger:) if publishable
|
176
177
|
|
177
178
|
# Determine if there are any related identifiers that we should try to fetch a citation for
|
178
179
|
citable_identifiers = Helper.citable_related_identifiers(dmp: json)
|
@@ -189,7 +190,7 @@ module Uc3DmpId
|
|
189
190
|
details: citable_identifiers)
|
190
191
|
end
|
191
192
|
publisher.publish(source: 'DmpUpdater', dmp: json, event_type: 'Citation Fetch', detail: citer_detail,
|
192
|
-
logger:
|
193
|
+
logger:)
|
193
194
|
true
|
194
195
|
end
|
195
196
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
data/lib/uc3-dmp-id/validator.rb
CHANGED
@@ -23,11 +23,11 @@ module Uc3DmpId
|
|
23
23
|
# Validate the specified DMP's :json against the schema for the specified :mode
|
24
24
|
# rubocop:disable Metrics/AbcSize
|
25
25
|
def validate(mode:, json:)
|
26
|
-
json = Helper.parse_json(json:
|
26
|
+
json = Helper.parse_json(json:)
|
27
27
|
return [MSG_EMPTY_JSON] if json.nil? || !VALIDATION_MODES.include?(mode)
|
28
28
|
|
29
29
|
# Load the appropriate JSON schema for the mode
|
30
|
-
schema = _load_schema(mode:
|
30
|
+
schema = _load_schema(mode:)
|
31
31
|
return [MSG_NO_SCHEMA] if schema.nil?
|
32
32
|
|
33
33
|
# Validate the JSON
|
data/lib/uc3-dmp-id/version.rb
CHANGED
data/lib/uc3-dmp-id/versioner.rb
CHANGED
@@ -18,13 +18,13 @@ module Uc3DmpId
|
|
18
18
|
|
19
19
|
args = {
|
20
20
|
key_conditions: {
|
21
|
-
PK: { attribute_value_list: [Helper.append_pk_prefix(p_key:
|
21
|
+
PK: { attribute_value_list: [Helper.append_pk_prefix(p_key:)], comparison_operator: 'EQ' }
|
22
22
|
},
|
23
23
|
projection_expression: 'modified',
|
24
24
|
scan_index_forward: false
|
25
25
|
}
|
26
|
-
client =
|
27
|
-
client.query(args
|
26
|
+
client = Uc3DmpDynamo::Client.new if client.nil?
|
27
|
+
client.query(args:, logger:)
|
28
28
|
end
|
29
29
|
|
30
30
|
# Generate a snapshot of the current latest version of the DMP ID using the existing :modified as
|
@@ -57,8 +57,8 @@ module Uc3DmpId
|
|
57
57
|
prior['SK'] = "#{Helper::SK_DMP_PREFIX}#{latest_version['modified'] || Time.now.utc.iso8601}"
|
58
58
|
|
59
59
|
# Create the prior version record ()
|
60
|
-
client =
|
61
|
-
resp = client.put_item(json: prior, logger:
|
60
|
+
client = Uc3DmpDynamo::Client.new if client.nil?
|
61
|
+
resp = client.put_item(json: prior, logger:)
|
62
62
|
return nil if resp.nil?
|
63
63
|
|
64
64
|
msg = "#{SOURCE} created version PK: #{prior['PK']} SK: #{prior['SK']}"
|
@@ -74,7 +74,7 @@ module Uc3DmpId
|
|
74
74
|
json = Helper.parse_json(json: dmp)
|
75
75
|
return json unless p_key.is_a?(String) && !p_key.strip.empty? && json.is_a?(Hash) && !json['dmp'].nil?
|
76
76
|
|
77
|
-
results = get_versions(p_key
|
77
|
+
results = get_versions(p_key:, client:, logger:)
|
78
78
|
return json unless results.length > 1
|
79
79
|
|
80
80
|
# TODO: we may want to include milliseconds in the future if we get increased volume so that
|
@@ -82,7 +82,7 @@ module Uc3DmpId
|
|
82
82
|
versions = results.map do |ver|
|
83
83
|
next if ver['modified'].nil?
|
84
84
|
|
85
|
-
base_url = "#{Helper.landing_page_url}#{Helper.remove_pk_prefix(p_key:
|
85
|
+
base_url = "#{Helper.landing_page_url}#{Helper.remove_pk_prefix(p_key:)}"
|
86
86
|
{
|
87
87
|
timestamp: ver['modified'],
|
88
88
|
url: dmp['dmp']['modified'] == ver['modified'] ? base_url : "#{base_url}?version=#{ver['modified']}"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: uc3-dmp-id
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brian Riley
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-10-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '3.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: text
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.3'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.3'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: uc3-dmp-dynamo
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -76,6 +90,7 @@ files:
|
|
76
90
|
- README.md
|
77
91
|
- lib/uc3-dmp-id.rb
|
78
92
|
- lib/uc3-dmp-id/asserter.rb
|
93
|
+
- lib/uc3-dmp-id/comparator.rb
|
79
94
|
- lib/uc3-dmp-id/creator.rb
|
80
95
|
- lib/uc3-dmp-id/deleter.rb
|
81
96
|
- lib/uc3-dmp-id/finder.rb
|
@@ -91,7 +106,7 @@ licenses:
|
|
91
106
|
- MIT
|
92
107
|
metadata:
|
93
108
|
rubygems_mfa_required: 'false'
|
94
|
-
post_install_message:
|
109
|
+
post_install_message:
|
95
110
|
rdoc_options: []
|
96
111
|
require_paths:
|
97
112
|
- lib
|
@@ -100,15 +115,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
100
115
|
requirements:
|
101
116
|
- - ">="
|
102
117
|
- !ruby/object:Gem::Version
|
103
|
-
version: '2
|
118
|
+
version: '3.2'
|
104
119
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
105
120
|
requirements:
|
106
121
|
- - ">="
|
107
122
|
- !ruby/object:Gem::Version
|
108
123
|
version: '0'
|
109
124
|
requirements: []
|
110
|
-
rubygems_version: 3.
|
111
|
-
signing_key:
|
125
|
+
rubygems_version: 3.4.10
|
126
|
+
signing_key:
|
112
127
|
specification_version: 4
|
113
128
|
summary: DMPTool gem that provides support for DMP ID records
|
114
129
|
test_files: []
|