uc3-dmp-id 0.1.25 → 0.1.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/uc3-dmp-id/comparator.rb +47 -56
- data/lib/uc3-dmp-id/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f692820577dff088fec1d1df2e4975a00a6b30952fa66979b1fa8449523d092e
|
|
4
|
+
data.tar.gz: 8bf2326f3a6fddf9454e915c596eb40231c939337e517de73f6ea998f04a9d6f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: fcc0689438e54715882ed7c30aa7d608e9c3abaa1e61fec7589a213c7665f257be933f93a482dc4d589756517c94a7f9b4803930f0a2a1bd51d877cd890b8a90
|
|
7
|
+
data.tar.gz: df18ed7a17053c9527e6a111150b81c2a6d285bda1263b854b0cc3fae5b703e3ef9dd3213b62230431c898c948cf80f6e821da3515089e6cfc156622223bdf7e
|
|
@@ -29,25 +29,28 @@ module Uc3DmpId
|
|
|
29
29
|
|
|
30
30
|
# Compare the incoming hash with the DMP details that were gathered during initialization.
|
|
31
31
|
#
|
|
32
|
-
# The Hash should
|
|
33
|
-
#
|
|
34
|
-
#
|
|
35
|
-
#
|
|
36
|
-
#
|
|
37
|
-
#
|
|
38
|
-
#
|
|
39
|
-
#
|
|
40
|
-
#
|
|
41
|
-
#
|
|
42
|
-
#
|
|
43
|
-
#
|
|
44
|
-
#
|
|
45
|
-
#
|
|
46
|
-
#
|
|
47
|
-
#
|
|
48
|
-
#
|
|
49
|
-
#
|
|
50
|
-
#
|
|
32
|
+
# The incoming Hash should match the documents found in OpenSearch. For example:
|
|
33
|
+
# {
|
|
34
|
+
# "people": ["john doe", "jdoe@example.com"],
|
|
35
|
+
# "people_ids": ["https://orcid.org/0000-0000-0000-ZZZZ"],
|
|
36
|
+
# "affiliations": ["example college"],
|
|
37
|
+
# "affiliation_ids": ["https://ror.org/00000zzzz"],
|
|
38
|
+
# "funder_ids": ["https://doi.org/10.13039/00000000000"],
|
|
39
|
+
# "funders": ["example funder (example.gov)"],
|
|
40
|
+
# "funder_opportunity_ids": ["485yt8325ty"],
|
|
41
|
+
# "grant_ids": [],
|
|
42
|
+
# "funding_status": "planned",
|
|
43
|
+
# "dmp_id": "doi.org/11.22222/A1B2c3po",
|
|
44
|
+
# "title": "example data management plan",
|
|
45
|
+
# "visibility": "private",
|
|
46
|
+
# "featured": 0,
|
|
47
|
+
# "description": "the example project abstract",
|
|
48
|
+
# "project_start": "2022-01-03",
|
|
49
|
+
# "project_end": "2024-12-23",
|
|
50
|
+
# "created": "2023-08-07",
|
|
51
|
+
# "modified": "2023-08-07",
|
|
52
|
+
# "registered": "2023-08-07"
|
|
53
|
+
# }
|
|
51
54
|
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
52
55
|
def compare(hash:)
|
|
53
56
|
return [] unless hash.is_a?(Hash) && !hash['title'].nil?
|
|
@@ -57,18 +60,18 @@ module Uc3DmpId
|
|
|
57
60
|
scoring = @dmps.map do |entry|
|
|
58
61
|
dmp = entry.fetch('_source', {})
|
|
59
62
|
response = { dmp_id: dmp['_id'], confidence: 'None', score: 0, notes: [] }
|
|
60
|
-
response = _grants_match?(array: hash
|
|
63
|
+
response = _grants_match?(array: hash.fetch('grant_ids', []), dmp:, response:)
|
|
61
64
|
return response if response[:confidence] != 'None'
|
|
62
65
|
|
|
63
|
-
response = _opportunities_match?(array: hash
|
|
64
|
-
response = _orcids_match?(array: hash
|
|
65
|
-
response = _last_name_and_affiliation_match?(
|
|
66
|
+
response = _opportunities_match?(array: hash.fetch('funder_opportunity_ids', []), dmp:, response:)
|
|
67
|
+
response = _orcids_match?(array: hash.fetch('people_ids', []), dmp:, response:)
|
|
68
|
+
response = _last_name_and_affiliation_match?(hash:, dmp:, response:)
|
|
66
69
|
|
|
67
70
|
# Only process the following if we had some matching contributors, affiliations or opportuniy nbrs
|
|
68
|
-
response = _repository_match?(
|
|
71
|
+
response = _repository_match?(hash:, dmp:, response:) if response[:score].positive?
|
|
69
72
|
# response = _keyword_match?(array: hash['keywords'], response:) if response[:score].positive?
|
|
70
73
|
response = _text_match?(type: 'title', text: hash['title'], dmp:, response:) if response[:score].positive?
|
|
71
|
-
response = _text_match?(type: 'abstract', text: hash['
|
|
74
|
+
response = _text_match?(type: 'abstract', text: hash['description'], dmp:, response:) if response[:score].positive?
|
|
72
75
|
# If the score is less than 3 then we have no confidence that it is a match
|
|
73
76
|
return nil if response[:score] <= 2
|
|
74
77
|
|
|
@@ -160,28 +163,20 @@ module Uc3DmpId
|
|
|
160
163
|
# rubocop:enable Metrics/AbcSize
|
|
161
164
|
|
|
162
165
|
# Returns whether or not the inciming list of creators/contributors match those on the DMP. Expecting:
|
|
163
|
-
#
|
|
164
|
-
#
|
|
165
|
-
#
|
|
166
|
-
#
|
|
167
|
-
#
|
|
168
|
-
# }
|
|
169
|
-
# ]
|
|
166
|
+
# {
|
|
167
|
+
# people: ["john doe", "jdoe@example.com"],
|
|
168
|
+
# affiliations: ["example college"],
|
|
169
|
+
# affiliation_ids: ["https://ror.org/blah"]
|
|
170
|
+
# }
|
|
170
171
|
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
171
|
-
def _last_name_and_affiliation_match?(
|
|
172
|
-
return response unless
|
|
173
|
-
return response unless
|
|
174
|
-
|
|
175
|
-
array = array.select { |repo| repo.is_a?(Hash) }
|
|
176
|
-
affiliations = array.map { |person| person['affiliation'] }&.flatten&.compact&.uniq
|
|
177
|
-
last_names = array.map { |person| person['last_name']&.downcase&.strip }&.flatten&.compact&.uniq
|
|
178
|
-
rors = affiliations.map { |affil| affil['id']&.downcase&.strip }&.flatten&.compact&.uniq
|
|
179
|
-
affil_names = affiliations.map { |affil| affil['name']&.downcase&.strip }&.flatten&.compact&.uniq
|
|
172
|
+
def _last_name_and_affiliation_match?(hash:, dmp:, response:)
|
|
173
|
+
return response unless hash.is_a?(Hash) && dmp.is_a?(Hash) && response.is_a?(Hash)
|
|
174
|
+
return response unless hash['people'].is_a?(Array) && !dmp['people'].empty?
|
|
180
175
|
|
|
181
176
|
# Check the person last names and affiliation name and RORs
|
|
182
|
-
last_names_matched = _compare_arrays(array_a: dmp['people'], array_b:
|
|
183
|
-
rors_matched = _compare_arrays(array_a: dmp.fetch('affiliation_ids', []), array_b:
|
|
184
|
-
affil_names_matched = _compare_arrays(array_a: dmp.fetch('affiliations', []), array_b:
|
|
177
|
+
last_names_matched = _compare_arrays(array_a: dmp['people'], array_b: hash['people'])
|
|
178
|
+
rors_matched = _compare_arrays(array_a: dmp.fetch('affiliation_ids', []), array_b: hash['affiliation_ids'])
|
|
179
|
+
affil_names_matched = _compare_arrays(array_a: dmp.fetch('affiliations', []), array_b: hash['affiliations'])
|
|
185
180
|
return response if last_names_matched <= 0 && rors_matched <= 0 && affil_names_matched <= 0
|
|
186
181
|
|
|
187
182
|
response[:score] += last_names_matched + rors_matched + affil_names_matched
|
|
@@ -191,20 +186,16 @@ module Uc3DmpId
|
|
|
191
186
|
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
192
187
|
|
|
193
188
|
# Returns whether or not the incoming list of repositories match those defined in the DMP. Expecting:
|
|
194
|
-
#
|
|
195
|
-
#
|
|
196
|
-
#
|
|
189
|
+
# {
|
|
190
|
+
# repo_ids: ["http://some.repo.org", "https://doi.org/re3data123"],
|
|
191
|
+
# repos: ["repo"]
|
|
192
|
+
# }
|
|
197
193
|
# rubocop:disable Metrics/AbcSize
|
|
198
|
-
def _repository_match?(
|
|
199
|
-
return response unless
|
|
200
|
-
return response unless
|
|
201
|
-
|
|
202
|
-
# We only care about repositories with ids/urls
|
|
203
|
-
ids = array.select { |repo| repo.is_a?(Hash) }
|
|
204
|
-
.map { |repo| repo['id'].map { |id| id&.downcase&.strip } }
|
|
205
|
-
.flatten.compact.uniq
|
|
194
|
+
def _repository_match?(hash:, dmp:, response:)
|
|
195
|
+
return response unless hash.is_a?(Hash) && dmp.is_a?(Hash) && response.is_a?(Hash)
|
|
196
|
+
return response unless hash['repo_ids'].is_a?(Array) && !dmp['repo_ids'].empty?
|
|
206
197
|
|
|
207
|
-
matched = _compare_arrays(array_a: dmp['
|
|
198
|
+
matched = _compare_arrays(array_a: dmp['repo_ids'], array_b: hash['repo_ids'])
|
|
208
199
|
return response if matched <= 0
|
|
209
200
|
|
|
210
201
|
response[:score] += matched
|
data/lib/uc3-dmp-id/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: uc3-dmp-id
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.26
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Brian Riley
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2024-03-
|
|
11
|
+
date: 2024-03-10 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: json
|