uc3-dmp-id 0.1.25 → 0.1.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 69c311d2bc8bd7acee827939e1d99c4fdf233ddf5e29386f682ac60f67478a6a
4
- data.tar.gz: 829d403c8ada7d01f444494d163bee0fe1c34db320a93bb34abfcc98aebeb122
3
+ metadata.gz: f692820577dff088fec1d1df2e4975a00a6b30952fa66979b1fa8449523d092e
4
+ data.tar.gz: 8bf2326f3a6fddf9454e915c596eb40231c939337e517de73f6ea998f04a9d6f
5
5
  SHA512:
6
- metadata.gz: 8dc439bf6244f758ceb5c1afb5c2d825b58705342b3a146dfcae9af7dcde86352705d703a2aa0e003922f99c61ec2757aaf7c0ea26279c12efdfedbb6cec8b1b
7
- data.tar.gz: fd03efac1ba2cacc9be9d40334c835259461a84bb32715e5cbc9723c202d56a52aecdcc8d67ed29a1f050ee0bac853e973955f2211ee41a9da34984a5d13b342
6
+ metadata.gz: fcc0689438e54715882ed7c30aa7d608e9c3abaa1e61fec7589a213c7665f257be933f93a482dc4d589756517c94a7f9b4803930f0a2a1bd51d877cd890b8a90
7
+ data.tar.gz: df18ed7a17053c9527e6a111150b81c2a6d285bda1263b854b0cc3fae5b703e3ef9dd3213b62230431c898c948cf80f6e821da3515089e6cfc156622223bdf7e
@@ -29,25 +29,28 @@ module Uc3DmpId
29
29
 
30
30
  # Compare the incoming hash with the DMP details that were gathered during initialization.
31
31
  #
32
- # The Hash should contain:
33
- # {
34
- # title: "Example research project",
35
- # abstract: "Lorem ipsum psuedo abstract",
36
- # keywords: ["foo", "bar"],z
37
- # people: [
38
- # {
39
- # id: "https://orcid.org/blah",
40
- # last_name: "doe",
41
- # affiliation: { id: "https://ror.org/blah", name: "Foo" }
42
- # }
43
- # ],
44
- # fundings: [
45
- # { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
46
- # ],
47
- # repositories: [
48
- # { id: ["http://some.repo.org", "https://doi.org/re3data123"], name: "Repo" }
49
- # ]
50
- # }
32
+ # The incoming Hash should match the documents found in OpenSearch. For example:
33
+ # {
34
+ # "people": ["john doe", "jdoe@example.com"],
35
+ # "people_ids": ["https://orcid.org/0000-0000-0000-ZZZZ"],
36
+ # "affiliations": ["example college"],
37
+ # "affiliation_ids": ["https://ror.org/00000zzzz"],
38
+ # "funder_ids": ["https://doi.org/10.13039/00000000000"],
39
+ # "funders": ["example funder (example.gov)"],
40
+ # "funder_opportunity_ids": ["485yt8325ty"],
41
+ # "grant_ids": [],
42
+ # "funding_status": "planned",
43
+ # "dmp_id": "doi.org/11.22222/A1B2c3po",
44
+ # "title": "example data management plan",
45
+ # "visibility": "private",
46
+ # "featured": 0,
47
+ # "description": "the example project abstract",
48
+ # "project_start": "2022-01-03",
49
+ # "project_end": "2024-12-23",
50
+ # "created": "2023-08-07",
51
+ # "modified": "2023-08-07",
52
+ # "registered": "2023-08-07"
53
+ # }
51
54
  # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
52
55
  def compare(hash:)
53
56
  return [] unless hash.is_a?(Hash) && !hash['title'].nil?
@@ -57,18 +60,18 @@ module Uc3DmpId
57
60
  scoring = @dmps.map do |entry|
58
61
  dmp = entry.fetch('_source', {})
59
62
  response = { dmp_id: dmp['_id'], confidence: 'None', score: 0, notes: [] }
60
- response = _grants_match?(array: hash['fundings'], dmp:, response:)
63
+ response = _grants_match?(array: hash.fetch('grant_ids', []), dmp:, response:)
61
64
  return response if response[:confidence] != 'None'
62
65
 
63
- response = _opportunities_match?(array: hash['fundings'], dmp:, response:)
64
- response = _orcids_match?(array: hash['people'], dmp:, response:)
65
- response = _last_name_and_affiliation_match?(array: hash['people'], dmp:, response:)
66
+ response = _opportunities_match?(array: hash.fetch('funder_opportunity_ids', []), dmp:, response:)
67
+ response = _orcids_match?(array: hash.fetch('people_ids', []), dmp:, response:)
68
+ response = _last_name_and_affiliation_match?(hash:, dmp:, response:)
66
69
 
67
70
  # Only process the following if we had some matching contributors, affiliations or opportuniy nbrs
68
- response = _repository_match?(array: hash['repositories'], dmp:, response:) if response[:score].positive?
71
+ response = _repository_match?(hash:, dmp:, response:) if response[:score].positive?
69
72
  # response = _keyword_match?(array: hash['keywords'], response:) if response[:score].positive?
70
73
  response = _text_match?(type: 'title', text: hash['title'], dmp:, response:) if response[:score].positive?
71
- response = _text_match?(type: 'abstract', text: hash['abstract'], dmp:, response:) if response[:score].positive?
74
+ response = _text_match?(type: 'abstract', text: hash['description'], dmp:, response:) if response[:score].positive?
72
75
  # If the score is less than 3 then we have no confidence that it is a match
73
76
  return nil if response[:score] <= 2
74
77
 
@@ -160,28 +163,20 @@ module Uc3DmpId
160
163
  # rubocop:enable Metrics/AbcSize
161
164
 
162
165
  # Returns whether or not the inciming list of creators/contributors match those on the DMP. Expecting:
163
- # [
164
- # {
165
- # id: "https://orcid.org/blah",
166
- # last_name: "doe",
167
- # affiliation: { id: "https://ror.org/blah", name: "Foo" }
168
- # }
169
- # ]
166
+ # {
167
+ # people: ["john doe", "jdoe@example.com"],
168
+ # affiliations: ["example college"],
169
+ # affiliation_ids: ["https://ror.org/blah"]
170
+ # }
170
171
  # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
171
- def _last_name_and_affiliation_match?(array:, dmp:, response:)
172
- return response unless array.is_a?(Array) && dmp.is_a?(Hash) && response.is_a?(Hash)
173
- return response unless dmp['people'].is_a?(Array) && !dmp['people'].empty?
174
-
175
- array = array.select { |repo| repo.is_a?(Hash) }
176
- affiliations = array.map { |person| person['affiliation'] }&.flatten&.compact&.uniq
177
- last_names = array.map { |person| person['last_name']&.downcase&.strip }&.flatten&.compact&.uniq
178
- rors = affiliations.map { |affil| affil['id']&.downcase&.strip }&.flatten&.compact&.uniq
179
- affil_names = affiliations.map { |affil| affil['name']&.downcase&.strip }&.flatten&.compact&.uniq
172
+ def _last_name_and_affiliation_match?(hash:, dmp:, response:)
173
+ return response unless hash.is_a?(Hash) && dmp.is_a?(Hash) && response.is_a?(Hash)
174
+ return response unless hash['people'].is_a?(Array) && !dmp['people'].empty?
180
175
 
181
176
  # Check the person last names and affiliation name and RORs
182
- last_names_matched = _compare_arrays(array_a: dmp['people'], array_b: last_names)
183
- rors_matched = _compare_arrays(array_a: dmp.fetch('affiliation_ids', []), array_b: rors)
184
- affil_names_matched = _compare_arrays(array_a: dmp.fetch('affiliations', []), array_b: affil_names)
177
+ last_names_matched = _compare_arrays(array_a: dmp['people'], array_b: hash['people'])
178
+ rors_matched = _compare_arrays(array_a: dmp.fetch('affiliation_ids', []), array_b: hash['affiliation_ids'])
179
+ affil_names_matched = _compare_arrays(array_a: dmp.fetch('affiliations', []), array_b: hash['affiliations'])
185
180
  return response if last_names_matched <= 0 && rors_matched <= 0 && affil_names_matched <= 0
186
181
 
187
182
  response[:score] += last_names_matched + rors_matched + affil_names_matched
@@ -191,20 +186,16 @@ module Uc3DmpId
191
186
  # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
192
187
 
193
188
  # Returns whether or not the incoming list of repositories match those defined in the DMP. Expecting:
194
- # [
195
- # { id: ["http://some.repo.org", "https://doi.org/re3data123"], name: "Repo" }
196
- # ]
189
+ # {
190
+ # repo_ids: ["http://some.repo.org", "https://doi.org/re3data123"],
191
+ # repos: ["repo"]
192
+ # }
197
193
  # rubocop:disable Metrics/AbcSize
198
- def _repository_match?(array:, dmp:, response:)
199
- return response unless array.is_a?(Array) && dmp.is_a?(Hash) && response.is_a?(Hash)
200
- return response unless dmp['repositories'].is_a?(Array) && !dmp['repositories'].empty?
201
-
202
- # We only care about repositories with ids/urls
203
- ids = array.select { |repo| repo.is_a?(Hash) }
204
- .map { |repo| repo['id'].map { |id| id&.downcase&.strip } }
205
- .flatten.compact.uniq
194
+ def _repository_match?(hash:, dmp:, response:)
195
+ return response unless hash.is_a?(Hash) && dmp.is_a?(Hash) && response.is_a?(Hash)
196
+ return response unless hash['repo_ids'].is_a?(Array) && !dmp['repo_ids'].empty?
206
197
 
207
- matched = _compare_arrays(array_a: dmp['repositories'], array_b: ids)
198
+ matched = _compare_arrays(array_a: dmp['repo_ids'], array_b: hash['repo_ids'])
208
199
  return response if matched <= 0
209
200
 
210
201
  response[:score] += matched
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Uc3DmpId
4
- VERSION = '0.1.25'
4
+ VERSION = '0.1.26'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uc3-dmp-id
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.25
4
+ version: 0.1.26
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Riley
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-03-09 00:00:00.000000000 Z
11
+ date: 2024-03-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json