uc3-dmp-id 0.1.25 → 0.1.27
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/uc3-dmp-id/comparator.rb +59 -65
- data/lib/uc3-dmp-id/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 17f46e6944531f941dd897052fcf78719c8dd5596ec829614f31fb80a5daba91
|
4
|
+
data.tar.gz: b5d84feae8896a1ba0dffbefb23d213a315828330510368099a329c787b3abd8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c0ce4ad0d1a07e58cd13d25c8b8f68f7d7f71094fd73a89651c099c2d8ae2e49a71aad3556da1cbd215dcfd6d450d3dab2d6505363daea9c17565c61307b8a36
|
7
|
+
data.tar.gz: 3d14bd02b6ca70548d7e0f5567df945c39693184fb5633f4191b54c84dd406de940ad2cbd1b9774aaf6e73d595d70b8f9e60dcd6c0ccce4524539143a01da729
|
@@ -29,48 +29,54 @@ module Uc3DmpId
|
|
29
29
|
|
30
30
|
# Compare the incoming hash with the DMP details that were gathered during initialization.
|
31
31
|
#
|
32
|
-
# The Hash should
|
33
|
-
#
|
34
|
-
#
|
35
|
-
#
|
36
|
-
#
|
37
|
-
#
|
38
|
-
#
|
39
|
-
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
#
|
47
|
-
#
|
48
|
-
#
|
49
|
-
#
|
50
|
-
#
|
32
|
+
# The incoming Hash should match the documents found in OpenSearch. For example:
|
33
|
+
# {
|
34
|
+
# "people": ["john doe", "jdoe@example.com"],
|
35
|
+
# "people_ids": ["https://orcid.org/0000-0000-0000-ZZZZ"],
|
36
|
+
# "affiliations": ["example college"],
|
37
|
+
# "affiliation_ids": ["https://ror.org/00000zzzz"],
|
38
|
+
# "funder_ids": ["https://doi.org/10.13039/00000000000"],
|
39
|
+
# "funders": ["example funder (example.gov)"],
|
40
|
+
# "funder_opportunity_ids": ["485yt8325ty"],
|
41
|
+
# "grant_ids": [],
|
42
|
+
# "funding_status": "planned",
|
43
|
+
# "dmp_id": "doi.org/11.22222/A1B2c3po",
|
44
|
+
# "title": "example data management plan",
|
45
|
+
# "visibility": "private",
|
46
|
+
# "featured": 0,
|
47
|
+
# "description": "the example project abstract",
|
48
|
+
# "project_start": "2022-01-03",
|
49
|
+
# "project_end": "2024-12-23",
|
50
|
+
# "created": "2023-08-07",
|
51
|
+
# "modified": "2023-08-07",
|
52
|
+
# "registered": "2023-08-07"
|
53
|
+
# }
|
51
54
|
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
52
55
|
def compare(hash:)
|
53
|
-
|
56
|
+
scoring = []
|
57
|
+
return scoring unless hash.is_a?(Hash) && !hash['title'].nil?
|
54
58
|
|
55
|
-
|
56
|
-
# a very positive match!
|
57
|
-
scoring = @dmps.map do |entry|
|
59
|
+
@dmps.each do |entry|
|
58
60
|
dmp = entry.fetch('_source', {})
|
61
|
+
|
62
|
+
# Compare the grant ids. If we have a match return the response immediately since that is
|
63
|
+
# a very positive match!
|
59
64
|
response = { dmp_id: dmp['_id'], confidence: 'None', score: 0, notes: [] }
|
60
|
-
response = _grants_match?(array: hash
|
61
|
-
|
65
|
+
response = _grants_match?(array: hash.fetch('grant_ids', []), dmp:, response:)
|
66
|
+
scoring << respoonse if response[:confidence] != 'None'
|
67
|
+
next if response[:confidence] != 'None'
|
62
68
|
|
63
|
-
|
64
|
-
response =
|
65
|
-
response =
|
69
|
+
# Compare the people involved, their affiliations and any funding opportunity numbers
|
70
|
+
response = _opportunities_match?(array: hash.fetch('funder_opportunity_ids', []), dmp:, response:)
|
71
|
+
response = _orcids_match?(array: hash.fetch('people_ids', []), dmp:, response:)
|
72
|
+
response = _last_name_and_affiliation_match?(hash:, dmp:, response:)
|
66
73
|
|
67
|
-
# Only process the following if we had some matching
|
68
|
-
response = _repository_match?(
|
69
|
-
# response = _keyword_match?(array: hash['keywords'], response:) if response[:score].positive?
|
74
|
+
# Only process the following if we had some matching people, affiliations or opportunity nbrs
|
75
|
+
response = _repository_match?(hash:, dmp:, response:) if response[:score].positive?
|
70
76
|
response = _text_match?(type: 'title', text: hash['title'], dmp:, response:) if response[:score].positive?
|
71
|
-
response = _text_match?(type: 'abstract', text: hash['
|
77
|
+
response = _text_match?(type: 'abstract', text: hash['description'], dmp:, response:) if response[:score].positive?
|
72
78
|
# If the score is less than 3 then we have no confidence that it is a match
|
73
|
-
|
79
|
+
next if response[:score] <= 2
|
74
80
|
|
75
81
|
# Set the confidence level based on the score
|
76
82
|
response[:confidence] = if response[:score] > 10
|
@@ -78,7 +84,7 @@ module Uc3DmpId
|
|
78
84
|
else
|
79
85
|
(response[:score] > 5 ? 'Medium' : 'Low')
|
80
86
|
end
|
81
|
-
response
|
87
|
+
scoring << response
|
82
88
|
end
|
83
89
|
|
84
90
|
# TODO: introduce a tie-breaker here (maybe the closes to the project_end date)
|
@@ -160,28 +166,20 @@ module Uc3DmpId
|
|
160
166
|
# rubocop:enable Metrics/AbcSize
|
161
167
|
|
162
168
|
# Returns whether or not the inciming list of creators/contributors match those on the DMP. Expecting:
|
163
|
-
#
|
164
|
-
#
|
165
|
-
#
|
166
|
-
#
|
167
|
-
#
|
168
|
-
# }
|
169
|
-
# ]
|
169
|
+
# {
|
170
|
+
# people: ["john doe", "jdoe@example.com"],
|
171
|
+
# affiliations: ["example college"],
|
172
|
+
# affiliation_ids: ["https://ror.org/blah"]
|
173
|
+
# }
|
170
174
|
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
171
|
-
def _last_name_and_affiliation_match?(
|
172
|
-
return response unless
|
173
|
-
return response unless
|
174
|
-
|
175
|
-
array = array.select { |repo| repo.is_a?(Hash) }
|
176
|
-
affiliations = array.map { |person| person['affiliation'] }&.flatten&.compact&.uniq
|
177
|
-
last_names = array.map { |person| person['last_name']&.downcase&.strip }&.flatten&.compact&.uniq
|
178
|
-
rors = affiliations.map { |affil| affil['id']&.downcase&.strip }&.flatten&.compact&.uniq
|
179
|
-
affil_names = affiliations.map { |affil| affil['name']&.downcase&.strip }&.flatten&.compact&.uniq
|
175
|
+
def _last_name_and_affiliation_match?(hash:, dmp:, response:)
|
176
|
+
return response unless hash.is_a?(Hash) && dmp.is_a?(Hash) && response.is_a?(Hash)
|
177
|
+
return response unless hash['people'].is_a?(Array)
|
180
178
|
|
181
179
|
# Check the person last names and affiliation name and RORs
|
182
|
-
last_names_matched = _compare_arrays(array_a: dmp['people'], array_b:
|
183
|
-
rors_matched = _compare_arrays(array_a: dmp.fetch('affiliation_ids', []), array_b:
|
184
|
-
affil_names_matched = _compare_arrays(array_a: dmp.fetch('affiliations', []), array_b:
|
180
|
+
last_names_matched = _compare_arrays(array_a: dmp['people'], array_b: hash['people'])
|
181
|
+
rors_matched = _compare_arrays(array_a: dmp.fetch('affiliation_ids', []), array_b: hash['affiliation_ids'])
|
182
|
+
affil_names_matched = _compare_arrays(array_a: dmp.fetch('affiliations', []), array_b: hash['affiliations'])
|
185
183
|
return response if last_names_matched <= 0 && rors_matched <= 0 && affil_names_matched <= 0
|
186
184
|
|
187
185
|
response[:score] += last_names_matched + rors_matched + affil_names_matched
|
@@ -191,20 +189,16 @@ module Uc3DmpId
|
|
191
189
|
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
192
190
|
|
193
191
|
# Returns whether or not the incoming list of repositories match those defined in the DMP. Expecting:
|
194
|
-
#
|
195
|
-
#
|
196
|
-
#
|
192
|
+
# {
|
193
|
+
# repo_ids: ["http://some.repo.org", "https://doi.org/re3data123"],
|
194
|
+
# repos: ["repo"]
|
195
|
+
# }
|
197
196
|
# rubocop:disable Metrics/AbcSize
|
198
|
-
def _repository_match?(
|
199
|
-
return response unless
|
200
|
-
return response unless
|
201
|
-
|
202
|
-
# We only care about repositories with ids/urls
|
203
|
-
ids = array.select { |repo| repo.is_a?(Hash) }
|
204
|
-
.map { |repo| repo['id'].map { |id| id&.downcase&.strip } }
|
205
|
-
.flatten.compact.uniq
|
197
|
+
def _repository_match?(hash:, dmp:, response:)
|
198
|
+
return response unless hash.is_a?(Hash) && dmp.is_a?(Hash) && response.is_a?(Hash)
|
199
|
+
return response unless hash['repo_ids'].is_a?(Array)
|
206
200
|
|
207
|
-
matched = _compare_arrays(array_a: dmp['
|
201
|
+
matched = _compare_arrays(array_a: dmp['repo_ids'], array_b: hash['repo_ids'])
|
208
202
|
return response if matched <= 0
|
209
203
|
|
210
204
|
response[:score] += matched
|
data/lib/uc3-dmp-id/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: uc3-dmp-id
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.27
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brian Riley
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-03-
|
11
|
+
date: 2024-03-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|