uc3-dmp-id 0.1.37 → 0.1.39
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/uc3-dmp-id/comparator.rb +28 -10
- data/lib/uc3-dmp-id/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 95b6443cf60f6baa7312ffb70dccf20018669ddcfdb468d24b923159313126b8
|
4
|
+
data.tar.gz: 121969eb56f2dcaa026fe0e866241170b6951336602e8f498829d865ad967032
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aa222d18770474937bf0e06e911508adf783b6bad7158f0162a67ba888b9d33c6aa6fd575aad47ff49dc34b56a5fdaddafbd9698d1a5e06e75ade8528421447a
|
7
|
+
data.tar.gz: e248c1e81c1fb622e7fd9c54aa37ca2aa6c93644ada4c9cc028cd800befaf9a5475303fa9ee2618e8eefc7e77854c4a8ab542d05a3ee26304e8f2a7eb50b3c0c
|
@@ -70,14 +70,15 @@ module Uc3DmpId
|
|
70
70
|
# Compare the people involved, their affiliations and any funding opportunity numbers
|
71
71
|
response = _opportunities_match?(array: hash.fetch('funder_opportunity_ids', []), dmp:, response:)
|
72
72
|
response = _orcids_match?(array: hash.fetch('people_ids', []), dmp:, response:)
|
73
|
-
response =
|
73
|
+
response = _last_name_match?(hash:, dmp:, response:)
|
74
|
+
response = _affiliation_match?(hash:, dmp:, response:)
|
74
75
|
|
75
76
|
# Only process the following if we had some matching people, affiliations or opportunity nbrs
|
76
77
|
response = _repository_match?(hash:, dmp:, response:) if response[:score].positive?
|
77
78
|
response = _text_match?(type: 'title', text: hash['title'], dmp:, response:) if response[:score].positive?
|
78
79
|
response = _text_match?(type: 'abstract', text: hash['description'], dmp:, response:) if response[:score].positive?
|
79
|
-
# If the score is less than
|
80
|
-
next if response[:score] <=
|
80
|
+
# If the score is less than 3 then we have no confidence that it is a match
|
81
|
+
# next if response[:score] <= 2
|
81
82
|
|
82
83
|
# Set the confidence level based on the score
|
83
84
|
response[:dmp_id] = "DMP##{dmp['dmp_id']}"
|
@@ -178,29 +179,46 @@ module Uc3DmpId
|
|
178
179
|
# affiliation_ids: ["https://ror.org/blah"]
|
179
180
|
# }
|
180
181
|
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
181
|
-
def
|
182
|
+
def _last_name_match?(hash:, dmp:, response:)
|
182
183
|
return response unless hash.is_a?(Hash) && dmp.is_a?(Hash) && response.is_a?(Hash)
|
183
184
|
return response unless hash['people'].is_a?(Array)
|
184
185
|
|
185
186
|
# Check the person last names and affiliation name and RORs
|
186
187
|
last_names_matched = _compare_arrays(array_a: dmp['people'], array_b: hash['people'])
|
188
|
+
return response if last_names_matched <= 0
|
189
|
+
|
190
|
+
@logger&.debug(
|
191
|
+
message: 'Contributor name match',
|
192
|
+
details: {
|
193
|
+
people: { dmp: dmp['people'], work: hash['people'] }
|
194
|
+
}
|
195
|
+
)
|
196
|
+
response[:score] += last_names_matched * 2
|
197
|
+
response[:notes] << 'contributor names matched'
|
198
|
+
response
|
199
|
+
end
|
200
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
201
|
+
|
202
|
+
def _affiliation_match?(hash:, dmp:, response:)
|
203
|
+
return response unless hash.is_a?(Hash) && dmp.is_a?(Hash) && response.is_a?(Hash)
|
204
|
+
return response unless hash['affiliations'].is_a?(Array) || hash['affiliation_ids'].is_a?(Array)
|
205
|
+
|
206
|
+
# Check the affiliation names and RORs
|
187
207
|
rors_matched = _compare_arrays(array_a: dmp.fetch('affiliation_ids', []), array_b: hash['affiliation_ids'])
|
188
208
|
affil_names_matched = _compare_arrays(array_a: dmp.fetch('affiliations', []), array_b: hash['affiliations'])
|
189
|
-
return response if
|
209
|
+
return response if rors_matched <= 0 && affil_names_matched <= 0
|
190
210
|
|
191
211
|
@logger&.debug(
|
192
|
-
message: '
|
212
|
+
message: 'Affiliation name match',
|
193
213
|
details: {
|
194
|
-
people: { dmp: dmp['people'], work: hash['people'] },
|
195
214
|
rors: { dmp: dmp['affiliation_ids'], work: hash['affiliation_ids'] },
|
196
215
|
places: { dmp: dmp['affiliations'], work: hash['affiliations'] }
|
197
216
|
}
|
198
217
|
)
|
199
|
-
response[:score] +=
|
200
|
-
response[:notes] << '
|
218
|
+
response[:score] += rors_matched + affil_names_matched
|
219
|
+
response[:notes] << 'affiliations matched'
|
201
220
|
response
|
202
221
|
end
|
203
|
-
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
204
222
|
|
205
223
|
# Returns whether or not the incoming list of repositories match those defined in the DMP. Expecting:
|
206
224
|
# {
|
data/lib/uc3-dmp-id/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: uc3-dmp-id
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.39
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brian Riley
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-05-
|
11
|
+
date: 2024-05-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|