uc3-dmp-id 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/uc3-dmp-id/asserter.rb +12 -2
- data/lib/uc3-dmp-id/comparator.rb +500 -0
- data/lib/uc3-dmp-id/version.rb +1 -1
- metadata +17 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 51832c144e5663dc01c805f92e81e50e63f2ce00200a8cad5525b7a34c9d7eb9
|
|
4
|
+
data.tar.gz: e197deb7f608ef478716a8aea113e853ab0d3903f1496f970a73d31d30b7e892
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 31bc5d1bb73176c2afff25715590c50d0612f558880f2bde3a750ac5ea8d49674c5dd9b00eff36960454c972a1a31aca6b4f853a98e995dd65977e1f82903ad2
|
|
7
|
+
data.tar.gz: 86153dfeebc52570ecb83a012f4e7242e9bec607627c2dcefc5beb101a711199bc346c1388045127df89f13e65c83aa473c0fc77e37810d436ea939101b74f2b
|
data/lib/uc3-dmp-id/asserter.rb
CHANGED
|
@@ -168,7 +168,8 @@ module Uc3DmpId
|
|
|
168
168
|
# "id": "ABCD1234",
|
|
169
169
|
# "provenance": "dmphub",
|
|
170
170
|
# "timestamp": "2023-07-07T14:50:23+00:00",
|
|
171
|
-
# "note": "
|
|
171
|
+
# "note": "Data received from OpenAlex, matched by PI names and title keywords.",
|
|
172
|
+
# "confiedence": "Med",
|
|
172
173
|
# "dmproadmap_related_identifiers": {
|
|
173
174
|
# "work_type": "article",
|
|
174
175
|
# "descriptor": "is_cited_by",
|
|
@@ -183,7 +184,8 @@ module Uc3DmpId
|
|
|
183
184
|
# "id": "ABCD1234",
|
|
184
185
|
# "provenance": "dmphub",
|
|
185
186
|
# "timestamp": "2023-07-07T14:50:23+00:00",
|
|
186
|
-
# "note": "
|
|
187
|
+
# "note": "Data received from the NIH API, matched by the opportunity number.",
|
|
188
|
+
# "confidence": "High",
|
|
187
189
|
# "funding": {
|
|
188
190
|
# "funding_status": "granted",
|
|
189
191
|
# "grant_id": {
|
|
@@ -206,5 +208,13 @@ module Uc3DmpId
|
|
|
206
208
|
JSON.parse(assertion.to_json)
|
|
207
209
|
end
|
|
208
210
|
end
|
|
211
|
+
|
|
212
|
+
def _score_related_work(latest_version:, work:)
|
|
213
|
+
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def _score_funding(latest_version:, funding:)
|
|
217
|
+
|
|
218
|
+
end
|
|
209
219
|
end
|
|
210
220
|
end
|
|
@@ -0,0 +1,500 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'text'
|
|
4
|
+
|
|
5
|
+
module Uc3DmpId
|
|
6
|
+
class ComparatorError < StandardError; end
|
|
7
|
+
|
|
8
|
+
# Class that compares incoming data from an external source to the DMP
|
|
9
|
+
# It determines if they are likely related and applies a confidence rating
|
|
10
|
+
class Comparator
|
|
11
|
+
|
|
12
|
+
MSG_MISSING_AUGMENTER = 'No Augmenter specified!'
|
|
13
|
+
MSG_MISSING_DMP = 'No DMP or the DMP did not contain enough information to use.'
|
|
14
|
+
|
|
15
|
+
STOP_WORDS = %w[a an and if of or the then they]
|
|
16
|
+
|
|
17
|
+
# See the bottom of this file for a hard-coded crosswalk between Crossref funder ids and ROR ids
|
|
18
|
+
# Some APIs do not support ROR fully for funder ids, so we need to be able to reference both
|
|
19
|
+
|
|
20
|
+
attr_accessor :augmenter, :dmp, :details_hash, :logger
|
|
21
|
+
|
|
22
|
+
def initialize(**args)
|
|
23
|
+
@logger = args[:logger]
|
|
24
|
+
@details_hash = {}
|
|
25
|
+
|
|
26
|
+
@augmenter = args[:augmenter]
|
|
27
|
+
raise ComparatorError, MSG_MISSING_AUGMENTER if @augmenter.nil? ||
|
|
28
|
+
!@augmenter['PK']&.start_with?('AUGMENTERS#')
|
|
29
|
+
|
|
30
|
+
@dmp = args.fetch(:dmp, {})['dmp'].nil? ? args[:dmp] : args.fetch(:dmp, {})['dmp']
|
|
31
|
+
_extract_dmp_details(dmp:)
|
|
32
|
+
raise ComparatorError, MSG_MISSING_DMP if @details_hash.empty?
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Compare the incoming hash with the DMP details that were gathered during initialization.
|
|
36
|
+
#
|
|
37
|
+
# The Hash should contain:
|
|
38
|
+
# {
|
|
39
|
+
# title: "Example research project",
|
|
40
|
+
# abstract: "Lorem ipsum psuedo abstract",
|
|
41
|
+
# keywords: ["foo", "bar"],z
|
|
42
|
+
# people: [
|
|
43
|
+
# {
|
|
44
|
+
# id: "https://orcid.org/blah",
|
|
45
|
+
# last_name: "doe",
|
|
46
|
+
# affiliation: { id: "https://ror.org/blah", name: "Foo" }
|
|
47
|
+
# }
|
|
48
|
+
# ],
|
|
49
|
+
# fundings: [
|
|
50
|
+
# { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
|
|
51
|
+
# ],
|
|
52
|
+
# repositories: [
|
|
53
|
+
# { id: ["http://some.repo.org", "https://doi.org/re3data123"], name: "Repo" }
|
|
54
|
+
# ]
|
|
55
|
+
# }
|
|
56
|
+
def compare(hash:)
|
|
57
|
+
response = { confidence: 'None', score: 0, notes: [], source: @augmenter['name'] }
|
|
58
|
+
return response unless hash.is_a?(Hash) && !hash['title'].nil?
|
|
59
|
+
|
|
60
|
+
# Compare the grant ids. If we have a match return the response immediately since that is
|
|
61
|
+
# a very positive match!
|
|
62
|
+
response = _grants_match?(array: hash['fundings'], response:)
|
|
63
|
+
return response if response[:confidence] != 'None'
|
|
64
|
+
|
|
65
|
+
response = _opportunities_match?(array: hash['fundings'], response:)
|
|
66
|
+
response = _orcids_match?(array: hash['people'], response:)
|
|
67
|
+
response = _last_name_and_affiliation_match?(array: hash['people'], response:)
|
|
68
|
+
|
|
69
|
+
# Only process the following if we had some matching contributors, affiliations or opportuniy nbrs
|
|
70
|
+
response = _repository_match?(array: hash['repositories'], response:) if response[:score] > 0
|
|
71
|
+
response = _keyword_match?(array: hash['repositories'], response:) if response[:score] > 0
|
|
72
|
+
response = _text_match?(type: 'title', text: hash['title'], response:) if response[:score] > 0
|
|
73
|
+
response = _text_match?(type: 'abstract', text: hash['abstract'], response:) if response[:score] > 0
|
|
74
|
+
# If the score is less than 3 then we have no confidence that it is a match
|
|
75
|
+
return response if response[:score] <= 2
|
|
76
|
+
|
|
77
|
+
# Set the confidence level based on the score
|
|
78
|
+
response[:confidence] = response[:score] > 15 ? 'High' : (response[:score] > 10 ? 'Medium' : 'Low')
|
|
79
|
+
response
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
private
|
|
83
|
+
|
|
84
|
+
def _extract_dmp_details(dmp:)
|
|
85
|
+
return nil unless dmp.is_a?(Hash) && !dmp['title'].nil? && !dmp['contact'].nil?
|
|
86
|
+
|
|
87
|
+
projects = dmp.fetch('project', [{}])
|
|
88
|
+
fundings = projects.map { |proj| proj.fetch('funding', []) }.flatten.compact.uniq
|
|
89
|
+
hosts = dmp.fetch('dataset', []).map { |dset| dset.fetch('distribution', []).map { |d| d['host'] } }
|
|
90
|
+
people = [dmp['contact']]
|
|
91
|
+
people << dmp.fetch('contributor', [])
|
|
92
|
+
|
|
93
|
+
# Extract all of the important bits about the DMP
|
|
94
|
+
@details_hash = {
|
|
95
|
+
created: dmp.fetch('created', Time.now.iso8601),
|
|
96
|
+
title: _cleanse_text(text: projects&.first&.fetch('title', dmp['title'])),
|
|
97
|
+
abstract: _cleanse_text(text: projects&.first&.fetch('description', dmp['description'])),
|
|
98
|
+
keywords: dmp.fetch('dataset', []).map { |ds| ds.fetch('keyword', []) }.flatten.compact.uniq,
|
|
99
|
+
identifiers: [dmp.fetch('dmp_id', {})['identifier']],
|
|
100
|
+
last_names: [],
|
|
101
|
+
affiliation_ids: [],
|
|
102
|
+
affiliations: [],
|
|
103
|
+
funder_names: [],
|
|
104
|
+
funder_ids: [],
|
|
105
|
+
opportunity_ids: [],
|
|
106
|
+
grant_ids: [],
|
|
107
|
+
repositories: []
|
|
108
|
+
}
|
|
109
|
+
_extract_people(array: people&.flatten&.compact&.uniq)
|
|
110
|
+
_extract_funding(array: fundings)
|
|
111
|
+
_extract_repositories(repos: hosts.flatten.compact.uniq)
|
|
112
|
+
|
|
113
|
+
# Clean up the results by flattening and removing duplicates from the Arrays
|
|
114
|
+
@details_hash.keys.each do |key|
|
|
115
|
+
@details_hash[key] = @details_hash[key].flatten.compact.uniq if @details_hash[key].is_a?(Array)
|
|
116
|
+
end
|
|
117
|
+
@logger&.debug(message: "Extracted the following from the DMP", details: @details_hash)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Extract all of the funding information
|
|
121
|
+
def _extract_funding(array:)
|
|
122
|
+
return [] unless array.is_a?(Array)
|
|
123
|
+
|
|
124
|
+
array.each do |funding|
|
|
125
|
+
next unless funding.is_a?(Hash)
|
|
126
|
+
|
|
127
|
+
funder_id = funding.fetch('funder_id', {})
|
|
128
|
+
ror = funder_id['identifier'] if funder_id['type']&.downcase&.strip == 'ror'
|
|
129
|
+
fundref = ror.nil? ? funder_id['identifier']&.downcase&.strip : ROR_FUNDREF_ID_CROSSWALK[:"#{ror}"]
|
|
130
|
+
opportunity = funding.fetch('dmproadmap_funding_opportunity_id', {})['identifier']
|
|
131
|
+
grant = funding.fetch('grant_id', {})['identifier']
|
|
132
|
+
|
|
133
|
+
@details_hash[:identifiers] << ror&.downcase&.strip
|
|
134
|
+
@details_hash[:identifiers] << fundref&.downcase&.strip
|
|
135
|
+
@details_hash[:identifiers] << grant&.downcase&.strip
|
|
136
|
+
@details_hash[:identifiers] << grant&.split('/')&.last&.downcase&.strip
|
|
137
|
+
@details_hash[:identifiers] << opportunity&.downcase&.strip
|
|
138
|
+
|
|
139
|
+
@details_hash[:funder_names] << funding['name']&.downcase&.strip
|
|
140
|
+
@details_hash[:funder_ids] << fundref
|
|
141
|
+
@details_hash[:opportunity_ids] << opportunity&.downcase&.strip
|
|
142
|
+
@details_hash[:grant_ids] << [grant&.downcase&.strip, grant&.split('/')&.last&.downcase&.strip]
|
|
143
|
+
end
|
|
144
|
+
array
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Extract all of the ORCIDs, last names, and affiliation ids and names
|
|
148
|
+
def _extract_people(array:)
|
|
149
|
+
return [] unless array.is_a?(Array)
|
|
150
|
+
|
|
151
|
+
array.each do |entry|
|
|
152
|
+
next unless entry.is_a?(Hash)
|
|
153
|
+
|
|
154
|
+
id = entry.fetch('contributor_id', entry.fetch('contact_id', {}))['identifier']&.downcase&.strip
|
|
155
|
+
affil = entry.fetch('dmproadmap_affiliation', {})
|
|
156
|
+
ror = affil.fetch('affiliation_id', {})['identifier']&.downcase&.strip
|
|
157
|
+
name = entry.fetch('name', '')&.downcase&.strip
|
|
158
|
+
last_name = name.include?(', ') ? name.split(', ').first : name.split.last
|
|
159
|
+
|
|
160
|
+
@details_hash[:identifiers] << [id, ror&.downcase&.strip]
|
|
161
|
+
@details_hash[:last_names] << last_name
|
|
162
|
+
@details_hash[:affiliation_ids] << ror
|
|
163
|
+
@details_hash[:affiliations] << affil.fetch('name', '')&.split(' (')&.first&.downcase&.strip
|
|
164
|
+
end
|
|
165
|
+
array
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Extract all of the re3data ids, URLs and names
|
|
169
|
+
def _extract_repositories(repos:)
|
|
170
|
+
return [] unless repos.is_a?(Array)
|
|
171
|
+
|
|
172
|
+
repos.each do |repo|
|
|
173
|
+
next unless repo.is_a?(Hash)
|
|
174
|
+
|
|
175
|
+
@details_hash[:identifiers] << [
|
|
176
|
+
repo['url']&.downcase&.strip, repo.fetch('dmproadmap_host_id', {})['identifier']&.downcase&.strip
|
|
177
|
+
]
|
|
178
|
+
@details_hash[:repositories] << repo.fetch('name', '')&.downcase&.strip
|
|
179
|
+
end
|
|
180
|
+
repos
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Returns whether or not the incoming grant id(s) match the DMPs grant id. Expecting:
|
|
184
|
+
# [
|
|
185
|
+
# { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
|
|
186
|
+
# ]
|
|
187
|
+
def _grants_match?(array:, response:)
|
|
188
|
+
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
|
189
|
+
|
|
190
|
+
ids = array.select { |funding| funding.is_a?(Hash) && funding['grant'].is_a?(Array) }
|
|
191
|
+
.map { |funding| funding['grant'].map { |id| id&.downcase&.strip } }
|
|
192
|
+
.flatten.compact.uniq
|
|
193
|
+
|
|
194
|
+
matched = _compare_arrays(array_a: @details_hash.fetch(:grant_ids, []), array_b: ids)
|
|
195
|
+
return response if matched <= 0
|
|
196
|
+
|
|
197
|
+
response[:confidence] = 'Absolute'
|
|
198
|
+
response[:score] = 100
|
|
199
|
+
response[:notes] << 'the grant ID matched'
|
|
200
|
+
response
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Returns whether or not the incoming grant id(s) match the DMPs opportunity id. Expecting:
|
|
204
|
+
# [
|
|
205
|
+
# { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
|
|
206
|
+
# ]
|
|
207
|
+
def _opportunities_match?(array:, response:)
|
|
208
|
+
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
|
209
|
+
|
|
210
|
+
ids = array.select { |funding| funding.is_a?(Hash) && funding['grant'].is_a?(Array) }
|
|
211
|
+
.map { |funding| funding['grant'].map { |id| id&.downcase&.strip } }
|
|
212
|
+
.flatten.compact.uniq
|
|
213
|
+
|
|
214
|
+
matched = _compare_arrays(array_a: @details_hash.fetch(:opportunity_ids, []), array_b: ids)
|
|
215
|
+
return response if matched <= 0
|
|
216
|
+
|
|
217
|
+
response[:score] += 5
|
|
218
|
+
response[:notes] << 'the funding opportunity number matched'
|
|
219
|
+
response
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# Returns whether or not the inciming list of creators/contributors match those on the DMP. Expecting:
|
|
223
|
+
# [
|
|
224
|
+
# {
|
|
225
|
+
# id: "https://orcid.org/blah",
|
|
226
|
+
# last_name: "doe",
|
|
227
|
+
# affiliation: { id: "https://ror.org/blah", name: "Foo" }
|
|
228
|
+
# }
|
|
229
|
+
# ]
|
|
230
|
+
def _orcids_match?(array:, response:)
|
|
231
|
+
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
|
232
|
+
|
|
233
|
+
ids = array.select { |repo| repo.is_a?(Hash) }
|
|
234
|
+
.map { |person| person['id']&.downcase&.strip }
|
|
235
|
+
.flatten.compact.uniq
|
|
236
|
+
|
|
237
|
+
matched = _compare_arrays(array_a: @details_hash.fetch(:identifiers, []), array_b: ids)
|
|
238
|
+
return response if matched <= 0
|
|
239
|
+
|
|
240
|
+
response[:score] += (matched * 2)
|
|
241
|
+
response[:notes] << 'contributor ORCIDs matched'
|
|
242
|
+
response
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
# Returns whether or not the inciming list of creators/contributors match those on the DMP. Expecting:
|
|
246
|
+
# [
|
|
247
|
+
# {
|
|
248
|
+
# id: "https://orcid.org/blah",
|
|
249
|
+
# last_name: "doe",
|
|
250
|
+
# affiliation: { id: "https://ror.org/blah", name: "Foo" }
|
|
251
|
+
# }
|
|
252
|
+
# ]
|
|
253
|
+
def _last_name_and_affiliation_match?(array:, response:)
|
|
254
|
+
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
|
255
|
+
|
|
256
|
+
array = array.select { |repo| repo.is_a?(Hash) }
|
|
257
|
+
affiliations = array.map { |person| person['affiliation'] }&.flatten&.compact&.uniq
|
|
258
|
+
last_names = array.map { |person| person['last_name']&.downcase&.strip }&.flatten&.compact&.uniq
|
|
259
|
+
rors = affiliations.map { |affil| affil['id']&.downcase&.strip }&.flatten&.compact&.uniq
|
|
260
|
+
affil_names = affiliations.map { |affil| affil['name']&.downcase&.strip }&.flatten&.compact&.uniq
|
|
261
|
+
|
|
262
|
+
# Check the person last names and affiliation name and RORs
|
|
263
|
+
last_names_matched = _compare_arrays(array_a: @details_hash.fetch(:last_names, []), array_b: last_names)
|
|
264
|
+
rors_matched = _compare_arrays(array_a: @details_hash.fetch(:affiliation_ids, []), array_b: rors)
|
|
265
|
+
affil_names_matched = _compare_arrays(array_a: @details_hash.fetch(:affiliations, []), array_b: affil_names)
|
|
266
|
+
return response if last_names_matched <= 0 && rors_matched <= 0 && affil_names_matched <= 0
|
|
267
|
+
|
|
268
|
+
response[:score] += last_names_matched + rors_matched + affil_names_matched
|
|
269
|
+
response[:notes] << 'contributor names and affiliations matched'
|
|
270
|
+
response
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
# Returns whether or not the incoming list of repositories match those defined in the DMP. Expecting:
|
|
274
|
+
# [
|
|
275
|
+
# { id: ["http://some.repo.org", "https://doi.org/re3data123"], name: "Repo" }
|
|
276
|
+
# ]
|
|
277
|
+
def _repository_match?(array:, response:)
|
|
278
|
+
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
|
279
|
+
|
|
280
|
+
# We only care about repositories with ids/urls
|
|
281
|
+
ids = array.select { |repo| repo.is_a?(Hash) }
|
|
282
|
+
.map { |repo| repo['id'].map { |id| id&.downcase&.strip } }
|
|
283
|
+
.flatten.compact.uniq
|
|
284
|
+
|
|
285
|
+
matched = _compare_arrays(array_a: @details_hash.fetch(:identifiers, []), array_b: ids)
|
|
286
|
+
return response if matched <= 0
|
|
287
|
+
|
|
288
|
+
response[:score] += matched
|
|
289
|
+
response[:notes] << 'repositories matched'
|
|
290
|
+
response
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
# Returns whether or not the list of keywords exist in the DMP. Expecting:
|
|
294
|
+
# keywords: ["foo", "bar"]
|
|
295
|
+
def _keyword_match?(array:, response:)
|
|
296
|
+
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
|
297
|
+
|
|
298
|
+
keywords = array.map { |word| word&.downcase&.strip }&.flatten&.compact&.uniq
|
|
299
|
+
matched = _compare_arrays(array_a: @details_hash.fetch(:keywords, []), array_b: keywords)
|
|
300
|
+
return response if matched <= 0
|
|
301
|
+
|
|
302
|
+
response[:score] += 1
|
|
303
|
+
response[:notes] << 'keywords matched'
|
|
304
|
+
response
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
# Uses an NLP library to determine if the :text matches the DMP/Project :title or :description
|
|
308
|
+
def _text_match?(type: 'title', text:, response:, logger: nil)
|
|
309
|
+
return response unless response.is_a?(Hash) && text.is_a?(String) && !text.strip.empty? &&
|
|
310
|
+
!@details_hash[type.to_sym].nil?
|
|
311
|
+
|
|
312
|
+
nlp_processor = Text::WhiteSimilarity.new
|
|
313
|
+
cleansed = _cleanse_text(text:)
|
|
314
|
+
|
|
315
|
+
details = {
|
|
316
|
+
"dmp_#{type}": @details_hash[type.to_sym],
|
|
317
|
+
"incoming_#{type}": cleansed,
|
|
318
|
+
nlp_score: nlp_processor.similarity(@details_hash[type.to_sym], cleansed)
|
|
319
|
+
}
|
|
320
|
+
@logger&.debug(message: "Text::WhiteSimilarity score", details:)
|
|
321
|
+
return response if details[:nlp_score] < 0.5
|
|
322
|
+
|
|
323
|
+
response[:score] += details[:nlp_score] >= 0.75 ? 5 : 2
|
|
324
|
+
response[:notes] << "#{type}s are similar"
|
|
325
|
+
response
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
# Change the incoming text to lower case, remove spaces and STOP_WORDS
|
|
329
|
+
def _cleanse_text(text:)
|
|
330
|
+
return nil unless text.is_a?(String)
|
|
331
|
+
|
|
332
|
+
text.downcase.split.reject { |word| STOP_WORDS.include?(word) }.join(' ').strip
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
# Do an introspection of the 2 arrays and return the number of matches
|
|
336
|
+
def _compare_arrays(array_a: [], array_b: [])
|
|
337
|
+
return 0 unless array_a.is_a?(Array) && array_b.is_a?(Array)
|
|
338
|
+
|
|
339
|
+
intersection = array_a & array_b
|
|
340
|
+
intersection.nil? || intersection.size <= 0 ? 0 : intersection.size
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
# TODO: Remove this hard-coded crosswalk once the community has broader support for using ROR for funder ids
|
|
344
|
+
ROR_FUNDREF_ID_CROSSWALK = {
|
|
345
|
+
# NIH ID Crosswalk
|
|
346
|
+
"https://ror.org/01cwqze88": "https://doi.org/10.13039/100000002",
|
|
347
|
+
"https://ror.org/04mhx6838": "https://doi.org/10.13039/100000055",
|
|
348
|
+
"https://ror.org/012pb6c26": "https://doi.org/10.13039/100000050",
|
|
349
|
+
"https://ror.org/03wkg3b53": "https://doi.org/10.13039/100000053",
|
|
350
|
+
"https://ror.org/0060t0j89": "https://doi.org/10.13039/100000092",
|
|
351
|
+
"https://ror.org/00372qc85": "https://doi.org/10.13039/100000070",
|
|
352
|
+
"https://ror.org/00190t495": "https://doi.org/10.13039/100008460",
|
|
353
|
+
"https://ror.org/00j4k1h63": "https://doi.org/10.13039/100000066",
|
|
354
|
+
"https://ror.org/01y3zfr79": "https://doi.org/10.13039/100000056",
|
|
355
|
+
"https://ror.org/04q48ey07": "https://doi.org/10.13039/100000057",
|
|
356
|
+
"https://ror.org/0493hgw16": "https://doi.org/10.13039/100006545",
|
|
357
|
+
"https://ror.org/04vfsmv21": "https://doi.org/10.13039/100000098",
|
|
358
|
+
"https://ror.org/03jh5a977": "https://doi.org/10.13039/100000093",
|
|
359
|
+
"https://ror.org/04xeg9z08": "https://doi.org/10.13039/100000025",
|
|
360
|
+
"https://ror.org/01s5ya894": "https://doi.org/10.13039/100000065",
|
|
361
|
+
"https://ror.org/02meqm098": "https://doi.org/10.13039/100000002",
|
|
362
|
+
"https://ror.org/049v75w11": "https://doi.org/10.13039/100000049",
|
|
363
|
+
"https://ror.org/004a2wv92": "https://doi.org/10.13039/100000072",
|
|
364
|
+
"https://ror.org/00adh9b73": "https://doi.org/10.13039/100000062",
|
|
365
|
+
"https://ror.org/043z4tv69": "https://doi.org/10.13039/100000060",
|
|
366
|
+
"https://ror.org/00x19de83": "https://doi.org/10.13039/100000002",
|
|
367
|
+
"https://ror.org/02jzrsm59": "https://doi.org/10.13039/100000027",
|
|
368
|
+
"https://ror.org/006zn3t30": "https://doi.org/10.13039/100000069",
|
|
369
|
+
"https://ror.org/04byxyr05": "https://doi.org/10.13039/100000071",
|
|
370
|
+
"https://ror.org/04pw6fb54": "https://doi.org/10.13039/100006108",
|
|
371
|
+
"https://ror.org/05aq6yn88": "https://doi.org/10.13039/100006955",
|
|
372
|
+
"https://ror.org/02xey9a22": "https://doi.org/10.13039/100000061",
|
|
373
|
+
"https://ror.org/00fj8a872": "https://doi.org/10.13039/100000052",
|
|
374
|
+
"https://ror.org/01wtjyf13": "https://doi.org/10.13039/100000063",
|
|
375
|
+
"https://ror.org/04r5s4b52": "https://doi.org/10.13039/100005440",
|
|
376
|
+
"https://ror.org/046zezr58": "https://doi.org/10.13039/100006085",
|
|
377
|
+
"https://ror.org/02e3wq066": "https://doi.org/10.13039/100006086",
|
|
378
|
+
"https://ror.org/031gy6182": "https://doi.org/10.13039/100000002",
|
|
379
|
+
"https://ror.org/054j5yq82": "https://doi.org/10.13039/100000002",
|
|
380
|
+
"https://ror.org/02yrzyf97": "https://doi.org/10.13039/100000002",
|
|
381
|
+
|
|
382
|
+
# NSF ID Crosswalk
|
|
383
|
+
"https://.org/021nxhr62": "https://doi.org/10.13039/100000001",
|
|
384
|
+
"https://.org/04aqat463": "https://doi.org/10.13039/100000001",
|
|
385
|
+
"https://.org/01rcfpa16": "https://doi.org/10.13039/100005441",
|
|
386
|
+
"https://.org/014eweh95": "https://doi.org/10.13039/100005445",
|
|
387
|
+
"https://.org/001xhss06": "https://doi.org/10.13039/100000076",
|
|
388
|
+
"https://.org/04qn9mx93": "https://doi.org/10.13039/100000153",
|
|
389
|
+
"https://.org/03g87he71": "https://doi.org/10.13039/100000155",
|
|
390
|
+
"https://.org/01tnvpc68": "https://doi.org/10.13039/100000156",
|
|
391
|
+
"https://.org/01rvays47": "https://doi.org/10.13039/100000154",
|
|
392
|
+
"https://.org/002jdaq33": "https://doi.org/10.13039/100000152",
|
|
393
|
+
"https://.org/025kzpk63": "https://doi.org/10.13039/100000083",
|
|
394
|
+
"https://.org/04nh1dc89": "https://doi.org/10.13039/100007523",
|
|
395
|
+
"https://.org/01mng8331": "https://doi.org/10.13039/100000143",
|
|
396
|
+
"https://.org/02rdzmk74": "https://doi.org/10.13039/100000144",
|
|
397
|
+
"https://.org/053a2cp42": "https://doi.org/10.13039/100000145",
|
|
398
|
+
"https://.org/014bj5w56": "https://doi.org/10.13039/100000081",
|
|
399
|
+
"https://.org/00whkrf32": "https://doi.org/10.13039/100000082",
|
|
400
|
+
"https://.org/05s7cqk18": "https://doi.org/10.13039/100000173",
|
|
401
|
+
"https://.org/02kd4km72": "https://doi.org/10.13039/100000172",
|
|
402
|
+
"https://.org/03mamvh39": "https://doi.org/10.13039/100000171",
|
|
403
|
+
"https://.org/00b6sbb32": "https://doi.org/10.13039/100000084",
|
|
404
|
+
"https://.org/0471zv972": "https://doi.org/10.13039/100000146",
|
|
405
|
+
"https://.org/028yd4c30": "https://doi.org/10.13039/100000147",
|
|
406
|
+
"https://.org/01krpsy48": "https://doi.org/10.13039/100000148",
|
|
407
|
+
"https://.org/050rnw378": "https://doi.org/10.13039/100000149",
|
|
408
|
+
"https://.org/0388pet74": "https://doi.org/10.13039/100000150",
|
|
409
|
+
"https://.org/03xyg3m20": "https://doi.org/10.13039/100000151",
|
|
410
|
+
"https://.org/05p847d66": "https://doi.org/10.13039/100000085",
|
|
411
|
+
"https://.org/037gd6g64": "https://doi.org/10.13039/100000159",
|
|
412
|
+
"https://.org/05v01mk25": "https://doi.org/10.13039/100000160",
|
|
413
|
+
"https://.org/05wqqhv83": "https://doi.org/10.13039/100000141",
|
|
414
|
+
"https://.org/05nwjp114": "https://doi.org/10.13039/100007352",
|
|
415
|
+
"https://.org/05fnzca26": "https://doi.org/10.13039/100000162",
|
|
416
|
+
"https://.org/02trddg58": "https://doi.org/10.13039/100000163",
|
|
417
|
+
"https://.org/029b7h395": "https://doi.org/10.13039/100000086",
|
|
418
|
+
"https://.org/04mg8wm74": "https://doi.org/10.13039/100000164",
|
|
419
|
+
"https://.org/01ar8dr59": "https://doi.org/10.13039/100000165",
|
|
420
|
+
"https://.org/01pc7k308": "https://doi.org/10.13039/100000078",
|
|
421
|
+
"https://.org/051fftw81": "https://doi.org/10.13039/100000121",
|
|
422
|
+
"https://.org/04ap5x931": "https://doi.org/10.13039/100000166",
|
|
423
|
+
"https://.org/00apvva27": "https://doi.org/10.13039/100005716",
|
|
424
|
+
"https://.org/04nseet23": "https://doi.org/10.13039/100000179",
|
|
425
|
+
"https://.org/04k9mqs78": "https://doi.org/10.13039/100000106",
|
|
426
|
+
"https://.org/01k638r21": "https://doi.org/10.13039/100000089",
|
|
427
|
+
"https://.org/01gmp5538": "https://doi.org/10.13039/100005447",
|
|
428
|
+
"https://.org/01vnjbg30": "https://doi.org/10.13039/100005449",
|
|
429
|
+
"https://.org/03h7mcc28": "https://doi.org/10.13039/100000088",
|
|
430
|
+
"https://.org/05wgkzg12": "https://doi.org/10.13039/100000169",
|
|
431
|
+
"https://.org/0445wmv88": "https://doi.org/10.13039/100000170",
|
|
432
|
+
"https://.org/02dz2hb46": "https://doi.org/10.13039/100000077",
|
|
433
|
+
"https://.org/034m1ez10": "https://doi.org/10.13039/100000107",
|
|
434
|
+
"https://.org/02a65dj82": "https://doi.org/10.13039/100005717",
|
|
435
|
+
"https://.org/020fhsn68": "https://doi.org/10.13039/100000001",
|
|
436
|
+
"https://.org/03z9hh605": "https://doi.org/10.13039/100000174",
|
|
437
|
+
"https://.org/04ya3kq71": "https://doi.org/10.13039/100007521",
|
|
438
|
+
"https://.org/04evh7y43": "https://doi.org/10.13039/100005443",
|
|
439
|
+
"https://.org/04h67aa53": "https://doi.org/10.13039/100000177",
|
|
440
|
+
"https://.org/025dabr11": "https://doi.org/10.13039/100005446",
|
|
441
|
+
"https://.org/04vw0kz07": "https://doi.org/10.13039/100005448",
|
|
442
|
+
"https://.org/054ydxh33": "https://doi.org/10.13039/100005554",
|
|
443
|
+
"https://.org/01sharn77": "https://doi.org/10.13039/100006091",
|
|
444
|
+
"https://.org/02ch5q898": "https://doi.org/10.13039/100000001",
|
|
445
|
+
|
|
446
|
+
# NASA ID Crosswalk
|
|
447
|
+
"https://.org/0171mag52": "https://doi.org/10.13039/100006198",
|
|
448
|
+
"https://.org/027k65916": "https://doi.org/10.13039/100006196",
|
|
449
|
+
"https://.org/027ka1x80": "https://doi.org/10.13039/100000104",
|
|
450
|
+
"https://.org/02acart68": "https://doi.org/10.13039/100006195",
|
|
451
|
+
"https://.org/059fqnc42": "https://doi.org/10.13039/100006193",
|
|
452
|
+
"https://.org/01cyfxe35": "https://doi.org/10.13039/100016595",
|
|
453
|
+
"https://.org/04xx4z452": "https://doi.org/10.13039/100006203",
|
|
454
|
+
"https://.org/0399mhs52": "https://doi.org/10.13039/100006199",
|
|
455
|
+
"https://.org/02epydz83": "https://doi.org/10.13039/100006197",
|
|
456
|
+
"https://.org/03j9e2j92": "https://doi.org/10.13039/100006205",
|
|
457
|
+
"https://.org/02s42x260": "https://doi.org/10.13039/100000104",
|
|
458
|
+
"https://.org/01p7gwa14": "https://doi.org/10.13039/100000104",
|
|
459
|
+
"https://.org/01qxmdg18": "https://doi.org/10.13039/100000104",
|
|
460
|
+
"https://.org/006ndaj41": "https://doi.org/10.13039/100000104",
|
|
461
|
+
"https://.org/03em45j53": "https://doi.org/10.13039/100007346",
|
|
462
|
+
"https://.org/045t78n53": "https://doi.org/10.13039/100000104",
|
|
463
|
+
"https://.org/00r57r863": "https://doi.org/10.13039/100000104",
|
|
464
|
+
"https://.org/0401vze59": "https://doi.org/10.13039/100007726",
|
|
465
|
+
"https://.org/04hccab49": "https://doi.org/10.13039/100000104",
|
|
466
|
+
"https://.org/04437j066": "https://doi.org/10.13039/100000104",
|
|
467
|
+
"https://.org/028b18z22": "https://doi.org/10.13039/100000104",
|
|
468
|
+
"https://.org/00ryjtt64": "https://doi.org/10.13039/100000104",
|
|
469
|
+
|
|
470
|
+
# DOE ID Crosswalk
|
|
471
|
+
"https://ror.org/01bj3aw27": "https://doi.org/10.13039/100000015",
|
|
472
|
+
"https://ror.org/03q1rgc19": "https://doi.org/10.13039/100006133",
|
|
473
|
+
"https://ror.org/02xznz413": "https://doi.org/10.13039/100006134",
|
|
474
|
+
"https://ror.org/03sk1we31": "https://doi.org/10.13039/100006168",
|
|
475
|
+
"https://ror.org/00f93gc02": "https://doi.org/10.13039/100006177",
|
|
476
|
+
"https://ror.org/05tj7dm33": "https://doi.org/10.13039/100006147",
|
|
477
|
+
"https://ror.org/0012c7r22": "https://doi.org/10.13039/100006192",
|
|
478
|
+
"https://ror.org/00mmn6b08": "https://doi.org/10.13039/100006132",
|
|
479
|
+
"https://ror.org/03ery9d53": "https://doi.org/10.13039/100006120",
|
|
480
|
+
"https://ror.org/033jmdj81": "https://doi.org/10.13039/100000015",
|
|
481
|
+
"https://ror.org/03rd4h240": "https://doi.org/10.13039/100006130",
|
|
482
|
+
"https://ror.org/0054t4769": "https://doi.org/10.13039/100006200",
|
|
483
|
+
"https://ror.org/03eecgp81": "https://doi.org/10.13039/100006174",
|
|
484
|
+
"https://ror.org/00heb4d89": "https://doi.org/10.13039/100006135",
|
|
485
|
+
"https://ror.org/05ek3m339": "https://doi.org/10.13039/100006150",
|
|
486
|
+
"https://ror.org/00km40770": "https://doi.org/10.13039/100006138",
|
|
487
|
+
"https://ror.org/02ah1da87": "https://doi.org/10.13039/100006137",
|
|
488
|
+
"https://ror.org/05hsv7e61": "https://doi.org/10.13039/100000015",
|
|
489
|
+
"https://ror.org/01c9ay627": "https://doi.org/10.13039/100006165",
|
|
490
|
+
"https://ror.org/04z2gev20": "https://doi.org/10.13039/100006183",
|
|
491
|
+
"https://ror.org/02z1qvq09": "https://doi.org/10.13039/100006144",
|
|
492
|
+
"https://ror.org/03jf3w726": "https://doi.org/10.13039/100006186",
|
|
493
|
+
"https://ror.org/04848jz84": "https://doi.org/10.13039/100006142",
|
|
494
|
+
"https://ror.org/04s778r16": "https://doi.org/10.13039/100006171",
|
|
495
|
+
"https://ror.org/04nnxen11": "https://doi.org/10.13039/100000015",
|
|
496
|
+
"https://ror.org/05csy5p27": "https://doi.org/10.13039/100010268",
|
|
497
|
+
"https://ror.org/05efnac71": "https://doi.org/10.13039/100000015"
|
|
498
|
+
}
|
|
499
|
+
end
|
|
500
|
+
end
|
data/lib/uc3-dmp-id/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: uc3-dmp-id
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Brian Riley
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-10-
|
|
11
|
+
date: 2023-10-26 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: json
|
|
@@ -38,6 +38,20 @@ dependencies:
|
|
|
38
38
|
- - "~>"
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
40
|
version: '3.0'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: text
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '1.3'
|
|
48
|
+
type: :runtime
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '1.3'
|
|
41
55
|
- !ruby/object:Gem::Dependency
|
|
42
56
|
name: uc3-dmp-dynamo
|
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -76,6 +90,7 @@ files:
|
|
|
76
90
|
- README.md
|
|
77
91
|
- lib/uc3-dmp-id.rb
|
|
78
92
|
- lib/uc3-dmp-id/asserter.rb
|
|
93
|
+
- lib/uc3-dmp-id/comparator.rb
|
|
79
94
|
- lib/uc3-dmp-id/creator.rb
|
|
80
95
|
- lib/uc3-dmp-id/deleter.rb
|
|
81
96
|
- lib/uc3-dmp-id/finder.rb
|