uc3-dmp-id 0.1.25 → 0.1.26

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 69c311d2bc8bd7acee827939e1d99c4fdf233ddf5e29386f682ac60f67478a6a
4
- data.tar.gz: 829d403c8ada7d01f444494d163bee0fe1c34db320a93bb34abfcc98aebeb122
3
+ metadata.gz: f692820577dff088fec1d1df2e4975a00a6b30952fa66979b1fa8449523d092e
4
+ data.tar.gz: 8bf2326f3a6fddf9454e915c596eb40231c939337e517de73f6ea998f04a9d6f
5
5
  SHA512:
6
- metadata.gz: 8dc439bf6244f758ceb5c1afb5c2d825b58705342b3a146dfcae9af7dcde86352705d703a2aa0e003922f99c61ec2757aaf7c0ea26279c12efdfedbb6cec8b1b
7
- data.tar.gz: fd03efac1ba2cacc9be9d40334c835259461a84bb32715e5cbc9723c202d56a52aecdcc8d67ed29a1f050ee0bac853e973955f2211ee41a9da34984a5d13b342
6
+ metadata.gz: fcc0689438e54715882ed7c30aa7d608e9c3abaa1e61fec7589a213c7665f257be933f93a482dc4d589756517c94a7f9b4803930f0a2a1bd51d877cd890b8a90
7
+ data.tar.gz: df18ed7a17053c9527e6a111150b81c2a6d285bda1263b854b0cc3fae5b703e3ef9dd3213b62230431c898c948cf80f6e821da3515089e6cfc156622223bdf7e
@@ -29,25 +29,28 @@ module Uc3DmpId
29
29
 
30
30
  # Compare the incoming hash with the DMP details that were gathered during initialization.
31
31
  #
32
- # The Hash should contain:
33
- # {
34
- # title: "Example research project",
35
- # abstract: "Lorem ipsum psuedo abstract",
36
- # keywords: ["foo", "bar"],z
37
- # people: [
38
- # {
39
- # id: "https://orcid.org/blah",
40
- # last_name: "doe",
41
- # affiliation: { id: "https://ror.org/blah", name: "Foo" }
42
- # }
43
- # ],
44
- # fundings: [
45
- # { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
46
- # ],
47
- # repositories: [
48
- # { id: ["http://some.repo.org", "https://doi.org/re3data123"], name: "Repo" }
49
- # ]
50
- # }
32
+ # The incoming Hash should match the documents found in OpenSearch. For example:
33
+ # {
34
+ # "people": ["john doe", "jdoe@example.com"],
35
+ # "people_ids": ["https://orcid.org/0000-0000-0000-ZZZZ"],
36
+ # "affiliations": ["example college"],
37
+ # "affiliation_ids": ["https://ror.org/00000zzzz"],
38
+ # "funder_ids": ["https://doi.org/10.13039/00000000000"],
39
+ # "funders": ["example funder (example.gov)"],
40
+ # "funder_opportunity_ids": ["485yt8325ty"],
41
+ # "grant_ids": [],
42
+ # "funding_status": "planned",
43
+ # "dmp_id": "doi.org/11.22222/A1B2c3po",
44
+ # "title": "example data management plan",
45
+ # "visibility": "private",
46
+ # "featured": 0,
47
+ # "description": "the example project abstract",
48
+ # "project_start": "2022-01-03",
49
+ # "project_end": "2024-12-23",
50
+ # "created": "2023-08-07",
51
+ # "modified": "2023-08-07",
52
+ # "registered": "2023-08-07"
53
+ # }
51
54
  # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
52
55
  def compare(hash:)
53
56
  return [] unless hash.is_a?(Hash) && !hash['title'].nil?
@@ -57,18 +60,18 @@ module Uc3DmpId
57
60
  scoring = @dmps.map do |entry|
58
61
  dmp = entry.fetch('_source', {})
59
62
  response = { dmp_id: dmp['_id'], confidence: 'None', score: 0, notes: [] }
60
- response = _grants_match?(array: hash['fundings'], dmp:, response:)
63
+ response = _grants_match?(array: hash.fetch('grant_ids', []), dmp:, response:)
61
64
  return response if response[:confidence] != 'None'
62
65
 
63
- response = _opportunities_match?(array: hash['fundings'], dmp:, response:)
64
- response = _orcids_match?(array: hash['people'], dmp:, response:)
65
- response = _last_name_and_affiliation_match?(array: hash['people'], dmp:, response:)
66
+ response = _opportunities_match?(array: hash.fetch('funder_opportunity_ids', []), dmp:, response:)
67
+ response = _orcids_match?(array: hash.fetch('people_ids', []), dmp:, response:)
68
+ response = _last_name_and_affiliation_match?(hash:, dmp:, response:)
66
69
 
67
70
  # Only process the following if we had some matching contributors, affiliations or opportuniy nbrs
68
- response = _repository_match?(array: hash['repositories'], dmp:, response:) if response[:score].positive?
71
+ response = _repository_match?(hash:, dmp:, response:) if response[:score].positive?
69
72
  # response = _keyword_match?(array: hash['keywords'], response:) if response[:score].positive?
70
73
  response = _text_match?(type: 'title', text: hash['title'], dmp:, response:) if response[:score].positive?
71
- response = _text_match?(type: 'abstract', text: hash['abstract'], dmp:, response:) if response[:score].positive?
74
+ response = _text_match?(type: 'abstract', text: hash['description'], dmp:, response:) if response[:score].positive?
72
75
  # If the score is less than 3 then we have no confidence that it is a match
73
76
  return nil if response[:score] <= 2
74
77
 
@@ -160,28 +163,20 @@ module Uc3DmpId
160
163
  # rubocop:enable Metrics/AbcSize
161
164
 
162
165
  # Returns whether or not the inciming list of creators/contributors match those on the DMP. Expecting:
163
- # [
164
- # {
165
- # id: "https://orcid.org/blah",
166
- # last_name: "doe",
167
- # affiliation: { id: "https://ror.org/blah", name: "Foo" }
168
- # }
169
- # ]
166
+ # {
167
+ # people: ["john doe", "jdoe@example.com"],
168
+ # affiliations: ["example college"],
169
+ # affiliation_ids: ["https://ror.org/blah"]
170
+ # }
170
171
  # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
171
- def _last_name_and_affiliation_match?(array:, dmp:, response:)
172
- return response unless array.is_a?(Array) && dmp.is_a?(Hash) && response.is_a?(Hash)
173
- return response unless dmp['people'].is_a?(Array) && !dmp['people'].empty?
174
-
175
- array = array.select { |repo| repo.is_a?(Hash) }
176
- affiliations = array.map { |person| person['affiliation'] }&.flatten&.compact&.uniq
177
- last_names = array.map { |person| person['last_name']&.downcase&.strip }&.flatten&.compact&.uniq
178
- rors = affiliations.map { |affil| affil['id']&.downcase&.strip }&.flatten&.compact&.uniq
179
- affil_names = affiliations.map { |affil| affil['name']&.downcase&.strip }&.flatten&.compact&.uniq
172
+ def _last_name_and_affiliation_match?(hash:, dmp:, response:)
173
+ return response unless hash.is_a?(Hash) && dmp.is_a?(Hash) && response.is_a?(Hash)
174
+ return response unless hash['people'].is_a?(Array) && !dmp['people'].empty?
180
175
 
181
176
  # Check the person last names and affiliation name and RORs
182
- last_names_matched = _compare_arrays(array_a: dmp['people'], array_b: last_names)
183
- rors_matched = _compare_arrays(array_a: dmp.fetch('affiliation_ids', []), array_b: rors)
184
- affil_names_matched = _compare_arrays(array_a: dmp.fetch('affiliations', []), array_b: affil_names)
177
+ last_names_matched = _compare_arrays(array_a: dmp['people'], array_b: hash['people'])
178
+ rors_matched = _compare_arrays(array_a: dmp.fetch('affiliation_ids', []), array_b: hash['affiliation_ids'])
179
+ affil_names_matched = _compare_arrays(array_a: dmp.fetch('affiliations', []), array_b: hash['affiliations'])
185
180
  return response if last_names_matched <= 0 && rors_matched <= 0 && affil_names_matched <= 0
186
181
 
187
182
  response[:score] += last_names_matched + rors_matched + affil_names_matched
@@ -191,20 +186,16 @@ module Uc3DmpId
191
186
  # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
192
187
 
193
188
  # Returns whether or not the incoming list of repositories match those defined in the DMP. Expecting:
194
- # [
195
- # { id: ["http://some.repo.org", "https://doi.org/re3data123"], name: "Repo" }
196
- # ]
189
+ # {
190
+ # repo_ids: ["http://some.repo.org", "https://doi.org/re3data123"],
191
+ # repos: ["repo"]
192
+ # }
197
193
  # rubocop:disable Metrics/AbcSize
198
- def _repository_match?(array:, dmp:, response:)
199
- return response unless array.is_a?(Array) && dmp.is_a?(Hash) && response.is_a?(Hash)
200
- return response unless dmp['repositories'].is_a?(Array) && !dmp['repositories'].empty?
201
-
202
- # We only care about repositories with ids/urls
203
- ids = array.select { |repo| repo.is_a?(Hash) }
204
- .map { |repo| repo['id'].map { |id| id&.downcase&.strip } }
205
- .flatten.compact.uniq
194
+ def _repository_match?(hash:, dmp:, response:)
195
+ return response unless hash.is_a?(Hash) && dmp.is_a?(Hash) && response.is_a?(Hash)
196
+ return response unless hash['repo_ids'].is_a?(Array) && !dmp['repo_ids'].empty?
206
197
 
207
- matched = _compare_arrays(array_a: dmp['repositories'], array_b: ids)
198
+ matched = _compare_arrays(array_a: dmp['repo_ids'], array_b: hash['repo_ids'])
208
199
  return response if matched <= 0
209
200
 
210
201
  response[:score] += matched
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Uc3DmpId
4
- VERSION = '0.1.25'
4
+ VERSION = '0.1.26'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uc3-dmp-id
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.25
4
+ version: 0.1.26
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Riley
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-03-09 00:00:00.000000000 Z
11
+ date: 2024-03-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json