uc3-dmp-id 0.1.25 → 0.1.27

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 69c311d2bc8bd7acee827939e1d99c4fdf233ddf5e29386f682ac60f67478a6a
4
- data.tar.gz: 829d403c8ada7d01f444494d163bee0fe1c34db320a93bb34abfcc98aebeb122
3
+ metadata.gz: 17f46e6944531f941dd897052fcf78719c8dd5596ec829614f31fb80a5daba91
4
+ data.tar.gz: b5d84feae8896a1ba0dffbefb23d213a315828330510368099a329c787b3abd8
5
5
  SHA512:
6
- metadata.gz: 8dc439bf6244f758ceb5c1afb5c2d825b58705342b3a146dfcae9af7dcde86352705d703a2aa0e003922f99c61ec2757aaf7c0ea26279c12efdfedbb6cec8b1b
7
- data.tar.gz: fd03efac1ba2cacc9be9d40334c835259461a84bb32715e5cbc9723c202d56a52aecdcc8d67ed29a1f050ee0bac853e973955f2211ee41a9da34984a5d13b342
6
+ metadata.gz: c0ce4ad0d1a07e58cd13d25c8b8f68f7d7f71094fd73a89651c099c2d8ae2e49a71aad3556da1cbd215dcfd6d450d3dab2d6505363daea9c17565c61307b8a36
7
+ data.tar.gz: 3d14bd02b6ca70548d7e0f5567df945c39693184fb5633f4191b54c84dd406de940ad2cbd1b9774aaf6e73d595d70b8f9e60dcd6c0ccce4524539143a01da729
@@ -29,48 +29,54 @@ module Uc3DmpId
29
29
 
30
30
  # Compare the incoming hash with the DMP details that were gathered during initialization.
31
31
  #
32
- # The Hash should contain:
33
- # {
34
- # title: "Example research project",
35
- # abstract: "Lorem ipsum psuedo abstract",
36
- # keywords: ["foo", "bar"],z
37
- # people: [
38
- # {
39
- # id: "https://orcid.org/blah",
40
- # last_name: "doe",
41
- # affiliation: { id: "https://ror.org/blah", name: "Foo" }
42
- # }
43
- # ],
44
- # fundings: [
45
- # { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
46
- # ],
47
- # repositories: [
48
- # { id: ["http://some.repo.org", "https://doi.org/re3data123"], name: "Repo" }
49
- # ]
50
- # }
32
+ # The incoming Hash should match the documents found in OpenSearch. For example:
33
+ # {
34
+ # "people": ["john doe", "jdoe@example.com"],
35
+ # "people_ids": ["https://orcid.org/0000-0000-0000-ZZZZ"],
36
+ # "affiliations": ["example college"],
37
+ # "affiliation_ids": ["https://ror.org/00000zzzz"],
38
+ # "funder_ids": ["https://doi.org/10.13039/00000000000"],
39
+ # "funders": ["example funder (example.gov)"],
40
+ # "funder_opportunity_ids": ["485yt8325ty"],
41
+ # "grant_ids": [],
42
+ # "funding_status": "planned",
43
+ # "dmp_id": "doi.org/11.22222/A1B2c3po",
44
+ # "title": "example data management plan",
45
+ # "visibility": "private",
46
+ # "featured": 0,
47
+ # "description": "the example project abstract",
48
+ # "project_start": "2022-01-03",
49
+ # "project_end": "2024-12-23",
50
+ # "created": "2023-08-07",
51
+ # "modified": "2023-08-07",
52
+ # "registered": "2023-08-07"
53
+ # }
51
54
  # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
52
55
  def compare(hash:)
53
- return [] unless hash.is_a?(Hash) && !hash['title'].nil?
56
+ scoring = []
57
+ return scoring unless hash.is_a?(Hash) && !hash['title'].nil?
54
58
 
55
- # Compare the grant ids. If we have a match return the response immediately since that is
56
- # a very positive match!
57
- scoring = @dmps.map do |entry|
59
+ @dmps.each do |entry|
58
60
  dmp = entry.fetch('_source', {})
61
+
62
+ # Compare the grant ids. If we have a match return the response immediately since that is
63
+ # a very positive match!
59
64
  response = { dmp_id: dmp['_id'], confidence: 'None', score: 0, notes: [] }
60
- response = _grants_match?(array: hash['fundings'], dmp:, response:)
61
- return response if response[:confidence] != 'None'
65
+ response = _grants_match?(array: hash.fetch('grant_ids', []), dmp:, response:)
66
+ scoring << respoonse if response[:confidence] != 'None'
67
+ next if response[:confidence] != 'None'
62
68
 
63
- response = _opportunities_match?(array: hash['fundings'], dmp:, response:)
64
- response = _orcids_match?(array: hash['people'], dmp:, response:)
65
- response = _last_name_and_affiliation_match?(array: hash['people'], dmp:, response:)
69
+ # Compare the people involved, their affiliations and any funding opportunity numbers
70
+ response = _opportunities_match?(array: hash.fetch('funder_opportunity_ids', []), dmp:, response:)
71
+ response = _orcids_match?(array: hash.fetch('people_ids', []), dmp:, response:)
72
+ response = _last_name_and_affiliation_match?(hash:, dmp:, response:)
66
73
 
67
- # Only process the following if we had some matching contributors, affiliations or opportuniy nbrs
68
- response = _repository_match?(array: hash['repositories'], dmp:, response:) if response[:score].positive?
69
- # response = _keyword_match?(array: hash['keywords'], response:) if response[:score].positive?
74
+ # Only process the following if we had some matching people, affiliations or opportunity nbrs
75
+ response = _repository_match?(hash:, dmp:, response:) if response[:score].positive?
70
76
  response = _text_match?(type: 'title', text: hash['title'], dmp:, response:) if response[:score].positive?
71
- response = _text_match?(type: 'abstract', text: hash['abstract'], dmp:, response:) if response[:score].positive?
77
+ response = _text_match?(type: 'abstract', text: hash['description'], dmp:, response:) if response[:score].positive?
72
78
  # If the score is less than 3 then we have no confidence that it is a match
73
- return nil if response[:score] <= 2
79
+ next if response[:score] <= 2
74
80
 
75
81
  # Set the confidence level based on the score
76
82
  response[:confidence] = if response[:score] > 10
@@ -78,7 +84,7 @@ module Uc3DmpId
78
84
  else
79
85
  (response[:score] > 5 ? 'Medium' : 'Low')
80
86
  end
81
- response
87
+ scoring << response
82
88
  end
83
89
 
84
90
  # TODO: introduce a tie-breaker here (maybe the closes to the project_end date)
@@ -160,28 +166,20 @@ module Uc3DmpId
160
166
  # rubocop:enable Metrics/AbcSize
161
167
 
162
168
  # Returns whether or not the inciming list of creators/contributors match those on the DMP. Expecting:
163
- # [
164
- # {
165
- # id: "https://orcid.org/blah",
166
- # last_name: "doe",
167
- # affiliation: { id: "https://ror.org/blah", name: "Foo" }
168
- # }
169
- # ]
169
+ # {
170
+ # people: ["john doe", "jdoe@example.com"],
171
+ # affiliations: ["example college"],
172
+ # affiliation_ids: ["https://ror.org/blah"]
173
+ # }
170
174
  # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
171
- def _last_name_and_affiliation_match?(array:, dmp:, response:)
172
- return response unless array.is_a?(Array) && dmp.is_a?(Hash) && response.is_a?(Hash)
173
- return response unless dmp['people'].is_a?(Array) && !dmp['people'].empty?
174
-
175
- array = array.select { |repo| repo.is_a?(Hash) }
176
- affiliations = array.map { |person| person['affiliation'] }&.flatten&.compact&.uniq
177
- last_names = array.map { |person| person['last_name']&.downcase&.strip }&.flatten&.compact&.uniq
178
- rors = affiliations.map { |affil| affil['id']&.downcase&.strip }&.flatten&.compact&.uniq
179
- affil_names = affiliations.map { |affil| affil['name']&.downcase&.strip }&.flatten&.compact&.uniq
175
+ def _last_name_and_affiliation_match?(hash:, dmp:, response:)
176
+ return response unless hash.is_a?(Hash) && dmp.is_a?(Hash) && response.is_a?(Hash)
177
+ return response unless hash['people'].is_a?(Array)
180
178
 
181
179
  # Check the person last names and affiliation name and RORs
182
- last_names_matched = _compare_arrays(array_a: dmp['people'], array_b: last_names)
183
- rors_matched = _compare_arrays(array_a: dmp.fetch('affiliation_ids', []), array_b: rors)
184
- affil_names_matched = _compare_arrays(array_a: dmp.fetch('affiliations', []), array_b: affil_names)
180
+ last_names_matched = _compare_arrays(array_a: dmp['people'], array_b: hash['people'])
181
+ rors_matched = _compare_arrays(array_a: dmp.fetch('affiliation_ids', []), array_b: hash['affiliation_ids'])
182
+ affil_names_matched = _compare_arrays(array_a: dmp.fetch('affiliations', []), array_b: hash['affiliations'])
185
183
  return response if last_names_matched <= 0 && rors_matched <= 0 && affil_names_matched <= 0
186
184
 
187
185
  response[:score] += last_names_matched + rors_matched + affil_names_matched
@@ -191,20 +189,16 @@ module Uc3DmpId
191
189
  # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
192
190
 
193
191
  # Returns whether or not the incoming list of repositories match those defined in the DMP. Expecting:
194
- # [
195
- # { id: ["http://some.repo.org", "https://doi.org/re3data123"], name: "Repo" }
196
- # ]
192
+ # {
193
+ # repo_ids: ["http://some.repo.org", "https://doi.org/re3data123"],
194
+ # repos: ["repo"]
195
+ # }
197
196
  # rubocop:disable Metrics/AbcSize
198
- def _repository_match?(array:, dmp:, response:)
199
- return response unless array.is_a?(Array) && dmp.is_a?(Hash) && response.is_a?(Hash)
200
- return response unless dmp['repositories'].is_a?(Array) && !dmp['repositories'].empty?
201
-
202
- # We only care about repositories with ids/urls
203
- ids = array.select { |repo| repo.is_a?(Hash) }
204
- .map { |repo| repo['id'].map { |id| id&.downcase&.strip } }
205
- .flatten.compact.uniq
197
+ def _repository_match?(hash:, dmp:, response:)
198
+ return response unless hash.is_a?(Hash) && dmp.is_a?(Hash) && response.is_a?(Hash)
199
+ return response unless hash['repo_ids'].is_a?(Array)
206
200
 
207
- matched = _compare_arrays(array_a: dmp['repositories'], array_b: ids)
201
+ matched = _compare_arrays(array_a: dmp['repo_ids'], array_b: hash['repo_ids'])
208
202
  return response if matched <= 0
209
203
 
210
204
  response[:score] += matched
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Uc3DmpId
4
- VERSION = '0.1.25'
4
+ VERSION = '0.1.27'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uc3-dmp-id
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.25
4
+ version: 0.1.27
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Riley
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-03-09 00:00:00.000000000 Z
11
+ date: 2024-03-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json