uc3-dmp-id 0.1.35 → 0.1.37
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/uc3-dmp-id/comparator.rb +21 -4
- data/lib/uc3-dmp-id/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4b8c0ff9aa6add1fc2da937d9f76872ae6bb287adf5df008d9c0d434f25fd717
|
4
|
+
data.tar.gz: a0479d353eb1d64cfab1bdb718f3efcf6a1d66cec8ea444ab735e74708f88984
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a578c6b89bb5d94172d5e2c24255f81326871616767039a4aa4de6d1c00da4a9d16fcf20f26a4479d8d1d5017e90dfcb27ca9c6c4bb5b1ed5421928bfb518eeb
|
7
|
+
data.tar.gz: 3dc8d1999fb6f26cdf8fc465d160513982ac7607bfb7c5e4c5f719b4f43d3b5d82261c5c4ce87cdd10be1d1631276b212b54cfd60b3f5523b389b3f4da25dbe5
|
@@ -24,6 +24,8 @@ module Uc3DmpId
|
|
24
24
|
@details_hash = {}
|
25
25
|
|
26
26
|
@dmps = args.fetch(:dmps, [])
|
27
|
+
|
28
|
+
@logger&.debug(message: 'Comparator DMPs', details: @dmps)
|
27
29
|
raise ComparatorError, MSG_MISSING_DMPS if @dmps.empty?
|
28
30
|
end
|
29
31
|
|
@@ -57,11 +59,12 @@ module Uc3DmpId
|
|
57
59
|
return scoring unless hash.is_a?(Hash) && !hash['title'].nil?
|
58
60
|
|
59
61
|
@dmps.each do |dmp|
|
62
|
+
@logger&.debug(message: 'Incoming external work', details: hash)
|
60
63
|
# Compare the grant ids. If we have a match return the response immediately since that is
|
61
64
|
# a very positive match!
|
62
65
|
response = { confidence: 'None', score: 0, notes: [] }
|
63
66
|
response = _grants_match?(array: hash.fetch('grant_ids', []), dmp:, response:)
|
64
|
-
scoring <<
|
67
|
+
scoring << response if response[:confidence] != 'None'
|
65
68
|
next if response[:confidence] != 'None'
|
66
69
|
|
67
70
|
# Compare the people involved, their affiliations and any funding opportunity numbers
|
@@ -73,8 +76,8 @@ module Uc3DmpId
|
|
73
76
|
response = _repository_match?(hash:, dmp:, response:) if response[:score].positive?
|
74
77
|
response = _text_match?(type: 'title', text: hash['title'], dmp:, response:) if response[:score].positive?
|
75
78
|
response = _text_match?(type: 'abstract', text: hash['description'], dmp:, response:) if response[:score].positive?
|
76
|
-
# If the score is less than
|
77
|
-
next if response[:score] <=
|
79
|
+
# If the score is less than 4 then we have no confidence that it is a match
|
80
|
+
next if response[:score] <= 3
|
78
81
|
|
79
82
|
# Set the confidence level based on the score
|
80
83
|
response[:dmp_id] = "DMP##{dmp['dmp_id']}"
|
@@ -110,6 +113,7 @@ module Uc3DmpId
|
|
110
113
|
matched = _compare_arrays(array_a: dmp['grant_ids'], array_b: ids)
|
111
114
|
return response if matched <= 0
|
112
115
|
|
116
|
+
@logger&.debug(message: 'Grant ID match!', details: { dmp: dmp['grant_ids'], work: ids })
|
113
117
|
response[:confidence] = 'Absolute'
|
114
118
|
response[:score] = 100
|
115
119
|
response[:notes] << 'the grant ID matched'
|
@@ -133,6 +137,7 @@ module Uc3DmpId
|
|
133
137
|
matched = _compare_arrays(array_a: dmp['funder_opportunity_ids'], array_b: ids)
|
134
138
|
return response if matched <= 0
|
135
139
|
|
140
|
+
@logger&.debug(message: 'Opportunities match!', details: { dmp: dmp['funder_opportunity_ids'], work: ids })
|
136
141
|
response[:score] += 5
|
137
142
|
response[:notes] << 'the funding opportunity number matched'
|
138
143
|
response
|
@@ -159,6 +164,7 @@ module Uc3DmpId
|
|
159
164
|
matched = _compare_arrays(array_a: dmp['people_ids'], array_b: ids)
|
160
165
|
return response if matched <= 0
|
161
166
|
|
167
|
+
@logger&.debug(message: 'ORCID match!', details: { dmp: dmp['people_ids'], work: ids })
|
162
168
|
response[:score] += (matched * 2)
|
163
169
|
response[:notes] << 'contributor ORCIDs matched'
|
164
170
|
response
|
@@ -182,8 +188,16 @@ module Uc3DmpId
|
|
182
188
|
affil_names_matched = _compare_arrays(array_a: dmp.fetch('affiliations', []), array_b: hash['affiliations'])
|
183
189
|
return response if last_names_matched <= 0 && rors_matched <= 0 && affil_names_matched <= 0
|
184
190
|
|
191
|
+
@logger&.debug(
|
192
|
+
message: 'Contributor name match',
|
193
|
+
details: {
|
194
|
+
people: { dmp: dmp['people'], work: hash['people'] },
|
195
|
+
rors: { dmp: dmp['affiliation_ids'], work: hash['affiliation_ids'] },
|
196
|
+
places: { dmp: dmp['affiliations'], work: hash['affiliations'] }
|
197
|
+
}
|
198
|
+
)
|
185
199
|
response[:score] += last_names_matched + rors_matched + affil_names_matched
|
186
|
-
response[:notes] << 'contributor names and affiliations matched'
|
200
|
+
response[:notes] << 'contributor names and/or affiliations matched'
|
187
201
|
response
|
188
202
|
end
|
189
203
|
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
@@ -201,6 +215,7 @@ module Uc3DmpId
|
|
201
215
|
matched = _compare_arrays(array_a: dmp['repo_ids'], array_b: hash['repo_ids'])
|
202
216
|
return response if matched <= 0
|
203
217
|
|
218
|
+
@logger&.debug(message: 'Repos match!', details: { dmp: dmp['repo_ids'], work: hash['repo_ids'] })
|
204
219
|
response[:score] += matched
|
205
220
|
response[:notes] << 'repositories matched'
|
206
221
|
response
|
@@ -224,6 +239,8 @@ module Uc3DmpId
|
|
224
239
|
# @logger&.debug(message: 'Text::WhiteSimilarity score', details:)
|
225
240
|
return response if details[:nlp_score] < 0.5
|
226
241
|
|
242
|
+
@logger&.debug(message: 'Titles match', details: { dmp: dmp['title'], work: text }) if type == 'title'
|
243
|
+
@logger&.debug(message: 'Abstracts match', details: { dmp: dmp['description'], work: text }) unless type == 'title'
|
227
244
|
response[:score] += details[:nlp_score] >= 0.75 ? 5 : 2
|
228
245
|
response[:notes] << "#{type}s are similar"
|
229
246
|
response
|
data/lib/uc3-dmp-id/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: uc3-dmp-id
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.37
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brian Riley
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-05-
|
11
|
+
date: 2024-05-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|