uc3-dmp-id 0.1.26 → 0.1.27
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/uc3-dmp-id/comparator.rb +14 -11
- data/lib/uc3-dmp-id/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 17f46e6944531f941dd897052fcf78719c8dd5596ec829614f31fb80a5daba91
|
4
|
+
data.tar.gz: b5d84feae8896a1ba0dffbefb23d213a315828330510368099a329c787b3abd8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c0ce4ad0d1a07e58cd13d25c8b8f68f7d7f71094fd73a89651c099c2d8ae2e49a71aad3556da1cbd215dcfd6d450d3dab2d6505363daea9c17565c61307b8a36
|
7
|
+
data.tar.gz: 3d14bd02b6ca70548d7e0f5567df945c39693184fb5633f4191b54c84dd406de940ad2cbd1b9774aaf6e73d595d70b8f9e60dcd6c0ccce4524539143a01da729
|
@@ -53,27 +53,30 @@ module Uc3DmpId
|
|
53
53
|
# }
|
54
54
|
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
55
55
|
def compare(hash:)
|
56
|
-
|
56
|
+
scoring = []
|
57
|
+
return scoring unless hash.is_a?(Hash) && !hash['title'].nil?
|
57
58
|
|
58
|
-
|
59
|
-
# a very positive match!
|
60
|
-
scoring = @dmps.map do |entry|
|
59
|
+
@dmps.each do |entry|
|
61
60
|
dmp = entry.fetch('_source', {})
|
61
|
+
|
62
|
+
# Compare the grant ids. If we have a match return the response immediately since that is
|
63
|
+
# a very positive match!
|
62
64
|
response = { dmp_id: dmp['_id'], confidence: 'None', score: 0, notes: [] }
|
63
65
|
response = _grants_match?(array: hash.fetch('grant_ids', []), dmp:, response:)
|
64
|
-
|
66
|
+
scoring << respoonse if response[:confidence] != 'None'
|
67
|
+
next if response[:confidence] != 'None'
|
65
68
|
|
69
|
+
# Compare the people involved, their affiliations and any funding opportunity numbers
|
66
70
|
response = _opportunities_match?(array: hash.fetch('funder_opportunity_ids', []), dmp:, response:)
|
67
71
|
response = _orcids_match?(array: hash.fetch('people_ids', []), dmp:, response:)
|
68
72
|
response = _last_name_and_affiliation_match?(hash:, dmp:, response:)
|
69
73
|
|
70
|
-
# Only process the following if we had some matching
|
74
|
+
# Only process the following if we had some matching people, affiliations or opportunity nbrs
|
71
75
|
response = _repository_match?(hash:, dmp:, response:) if response[:score].positive?
|
72
|
-
# response = _keyword_match?(array: hash['keywords'], response:) if response[:score].positive?
|
73
76
|
response = _text_match?(type: 'title', text: hash['title'], dmp:, response:) if response[:score].positive?
|
74
77
|
response = _text_match?(type: 'abstract', text: hash['description'], dmp:, response:) if response[:score].positive?
|
75
78
|
# If the score is less than 3 then we have no confidence that it is a match
|
76
|
-
|
79
|
+
next if response[:score] <= 2
|
77
80
|
|
78
81
|
# Set the confidence level based on the score
|
79
82
|
response[:confidence] = if response[:score] > 10
|
@@ -81,7 +84,7 @@ module Uc3DmpId
|
|
81
84
|
else
|
82
85
|
(response[:score] > 5 ? 'Medium' : 'Low')
|
83
86
|
end
|
84
|
-
response
|
87
|
+
scoring << response
|
85
88
|
end
|
86
89
|
|
87
90
|
# TODO: introduce a tie-breaker here (maybe the closes to the project_end date)
|
@@ -171,7 +174,7 @@ module Uc3DmpId
|
|
171
174
|
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
172
175
|
def _last_name_and_affiliation_match?(hash:, dmp:, response:)
|
173
176
|
return response unless hash.is_a?(Hash) && dmp.is_a?(Hash) && response.is_a?(Hash)
|
174
|
-
return response unless hash['people'].is_a?(Array)
|
177
|
+
return response unless hash['people'].is_a?(Array)
|
175
178
|
|
176
179
|
# Check the person last names and affiliation name and RORs
|
177
180
|
last_names_matched = _compare_arrays(array_a: dmp['people'], array_b: hash['people'])
|
@@ -193,7 +196,7 @@ module Uc3DmpId
|
|
193
196
|
# rubocop:disable Metrics/AbcSize
|
194
197
|
def _repository_match?(hash:, dmp:, response:)
|
195
198
|
return response unless hash.is_a?(Hash) && dmp.is_a?(Hash) && response.is_a?(Hash)
|
196
|
-
return response unless hash['repo_ids'].is_a?(Array)
|
199
|
+
return response unless hash['repo_ids'].is_a?(Array)
|
197
200
|
|
198
201
|
matched = _compare_arrays(array_a: dmp['repo_ids'], array_b: hash['repo_ids'])
|
199
202
|
return response if matched <= 0
|
data/lib/uc3-dmp-id/version.rb
CHANGED