uc3-dmp-id 0.1.24 → 0.1.26
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/uc3-dmp-id/comparator.rb +93 -216
- data/lib/uc3-dmp-id/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f692820577dff088fec1d1df2e4975a00a6b30952fa66979b1fa8449523d092e
|
4
|
+
data.tar.gz: 8bf2326f3a6fddf9454e915c596eb40231c939337e517de73f6ea998f04a9d6f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fcc0689438e54715882ed7c30aa7d608e9c3abaa1e61fec7589a213c7665f257be933f93a482dc4d589756517c94a7f9b4803930f0a2a1bd51d877cd890b8a90
|
7
|
+
data.tar.gz: df18ed7a17053c9527e6a111150b81c2a6d285bda1263b854b0cc3fae5b703e3ef9dd3213b62230431c898c948cf80f6e821da3515089e6cfc156622223bdf7e
|
@@ -9,203 +9,102 @@ module Uc3DmpId
|
|
9
9
|
# Class that compares incoming data from an external source to the DMP
|
10
10
|
# It determines if they are likely related and applies a confidence rating
|
11
11
|
class Comparator
|
12
|
-
|
12
|
+
MSG_MISSING_DMPS = 'No DMPs were defined. Expected an Array of OpenSearch documents!'
|
13
13
|
|
14
14
|
STOP_WORDS = %w[a an and if of or the then they].freeze
|
15
15
|
|
16
16
|
# See the bottom of this file for a hard-coded crosswalk between Crossref funder ids and ROR ids
|
17
17
|
# Some APIs do not support ROR fully for funder ids, so we need to be able to reference both
|
18
18
|
|
19
|
-
attr_accessor :
|
19
|
+
attr_accessor :dmps, :logger
|
20
20
|
|
21
|
+
# Expecting an Array of OpenSearch documents as :dmps in the :args
|
21
22
|
def initialize(**args)
|
22
23
|
@logger = args[:logger]
|
23
24
|
@details_hash = {}
|
24
25
|
|
25
|
-
@
|
26
|
-
|
27
|
-
raise ComparatorError, MSG_MISSING_DMP if @details_hash.empty?
|
26
|
+
@dmps = args.fetch(:dmps, [])
|
27
|
+
raise ComparatorError, MSG_MISSING_DMPS if @dmps.empty?
|
28
28
|
end
|
29
29
|
|
30
30
|
# Compare the incoming hash with the DMP details that were gathered during initialization.
|
31
31
|
#
|
32
|
-
# The Hash should
|
33
|
-
#
|
34
|
-
#
|
35
|
-
#
|
36
|
-
#
|
37
|
-
#
|
38
|
-
#
|
39
|
-
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
#
|
47
|
-
#
|
48
|
-
#
|
49
|
-
#
|
50
|
-
#
|
32
|
+
# The incoming Hash should match the documents found in OpenSearch. For example:
|
33
|
+
# {
|
34
|
+
# "people": ["john doe", "jdoe@example.com"],
|
35
|
+
# "people_ids": ["https://orcid.org/0000-0000-0000-ZZZZ"],
|
36
|
+
# "affiliations": ["example college"],
|
37
|
+
# "affiliation_ids": ["https://ror.org/00000zzzz"],
|
38
|
+
# "funder_ids": ["https://doi.org/10.13039/00000000000"],
|
39
|
+
# "funders": ["example funder (example.gov)"],
|
40
|
+
# "funder_opportunity_ids": ["485yt8325ty"],
|
41
|
+
# "grant_ids": [],
|
42
|
+
# "funding_status": "planned",
|
43
|
+
# "dmp_id": "doi.org/11.22222/A1B2c3po",
|
44
|
+
# "title": "example data management plan",
|
45
|
+
# "visibility": "private",
|
46
|
+
# "featured": 0,
|
47
|
+
# "description": "the example project abstract",
|
48
|
+
# "project_start": "2022-01-03",
|
49
|
+
# "project_end": "2024-12-23",
|
50
|
+
# "created": "2023-08-07",
|
51
|
+
# "modified": "2023-08-07",
|
52
|
+
# "registered": "2023-08-07"
|
53
|
+
# }
|
51
54
|
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
52
55
|
def compare(hash:)
|
53
|
-
|
54
|
-
return response unless hash.is_a?(Hash) && !hash['title'].nil?
|
56
|
+
return [] unless hash.is_a?(Hash) && !hash['title'].nil?
|
55
57
|
|
56
58
|
# Compare the grant ids. If we have a match return the response immediately since that is
|
57
59
|
# a very positive match!
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
private
|
84
|
-
|
85
|
-
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
86
|
-
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
87
|
-
def _extract_dmp_details(dmp:)
|
88
|
-
return nil unless dmp.is_a?(Hash) && !dmp['title'].nil? && !dmp['contact'].nil?
|
89
|
-
|
90
|
-
projects = dmp.fetch('project', [{}])
|
91
|
-
fundings = projects.map { |proj| proj.fetch('funding', []) }.flatten.compact.uniq
|
92
|
-
hosts = dmp.fetch('dataset', []).map { |dset| dset.fetch('distribution', []).map { |d| d['host'] } }
|
93
|
-
people = [dmp['contact']]
|
94
|
-
people << dmp.fetch('contributor', [])
|
95
|
-
|
96
|
-
# Extract all of the important bits about the DMP
|
97
|
-
@details_hash = {
|
98
|
-
created: dmp.fetch('created', Time.now.iso8601),
|
99
|
-
title: _cleanse_text(text: projects&.first&.fetch('title', dmp['title'])),
|
100
|
-
abstract: _cleanse_text(text: projects&.first&.fetch('description', dmp['description'])),
|
101
|
-
keywords: dmp.fetch('dataset', []).map { |ds| ds.fetch('keyword', []) }.flatten.compact.uniq,
|
102
|
-
identifiers: [dmp.fetch('dmp_id', {})['identifier']],
|
103
|
-
last_names: [],
|
104
|
-
orcids: [],
|
105
|
-
affiliation_ids: [],
|
106
|
-
affiliations: [],
|
107
|
-
funder_names: [],
|
108
|
-
funder_ids: [],
|
109
|
-
opportunity_ids: [],
|
110
|
-
grant_ids: [],
|
111
|
-
repositories: []
|
112
|
-
}
|
113
|
-
_extract_people(array: people&.flatten&.compact&.uniq)
|
114
|
-
_extract_funding(array: fundings)
|
115
|
-
_extract_repositories(repos: hosts.flatten.compact.uniq)
|
116
|
-
|
117
|
-
# Clean up the results by flattening and removing duplicates from the Arrays
|
118
|
-
@details_hash.each_key do |key|
|
119
|
-
@details_hash[key] = @details_hash[key].flatten.compact.uniq if @details_hash[key].is_a?(Array)
|
120
|
-
end
|
121
|
-
@logger&.debug(message: 'Extracted the following from the DMP', details: @details_hash)
|
122
|
-
end
|
123
|
-
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
124
|
-
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
125
|
-
|
126
|
-
# Extract all of the funding information
|
127
|
-
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
128
|
-
def _extract_funding(array:)
|
129
|
-
return [] unless array.is_a?(Array)
|
130
|
-
|
131
|
-
array.each do |funding|
|
132
|
-
next unless funding.is_a?(Hash)
|
133
|
-
|
134
|
-
funder_id = funding.fetch('funder_id', {})
|
135
|
-
ror = funder_id['identifier'] if funder_id['type']&.downcase&.strip == 'ror'
|
136
|
-
fundref = ror.nil? ? funder_id['identifier']&.downcase&.strip : ROR_FUNDREF_ID_CROSSWALK[:"#{ror}"]
|
137
|
-
opportunity = funding.fetch('dmproadmap_funding_opportunity_id', {})['identifier']
|
138
|
-
grant = funding.fetch('grant_id', {})['identifier']
|
139
|
-
|
140
|
-
@details_hash[:identifiers] << ror&.downcase&.strip
|
141
|
-
@details_hash[:identifiers] << fundref&.downcase&.strip
|
142
|
-
@details_hash[:identifiers] << grant&.downcase&.strip
|
143
|
-
@details_hash[:identifiers] << grant&.split('/')&.last&.downcase&.strip
|
144
|
-
@details_hash[:identifiers] << opportunity&.downcase&.strip
|
145
|
-
|
146
|
-
@details_hash[:funder_names] << funding['name']&.downcase&.split(' (').first&.strip
|
147
|
-
@details_hash[:funder_ids] << fundref
|
148
|
-
@details_hash[:opportunity_ids] << opportunity&.downcase&.strip
|
149
|
-
@details_hash[:grant_ids] << [grant&.downcase&.strip, grant&.split('/')&.last&.downcase&.strip]
|
60
|
+
scoring = @dmps.map do |entry|
|
61
|
+
dmp = entry.fetch('_source', {})
|
62
|
+
response = { dmp_id: dmp['_id'], confidence: 'None', score: 0, notes: [] }
|
63
|
+
response = _grants_match?(array: hash.fetch('grant_ids', []), dmp:, response:)
|
64
|
+
return response if response[:confidence] != 'None'
|
65
|
+
|
66
|
+
response = _opportunities_match?(array: hash.fetch('funder_opportunity_ids', []), dmp:, response:)
|
67
|
+
response = _orcids_match?(array: hash.fetch('people_ids', []), dmp:, response:)
|
68
|
+
response = _last_name_and_affiliation_match?(hash:, dmp:, response:)
|
69
|
+
|
70
|
+
# Only process the following if we had some matching contributors, affiliations or opportuniy nbrs
|
71
|
+
response = _repository_match?(hash:, dmp:, response:) if response[:score].positive?
|
72
|
+
# response = _keyword_match?(array: hash['keywords'], response:) if response[:score].positive?
|
73
|
+
response = _text_match?(type: 'title', text: hash['title'], dmp:, response:) if response[:score].positive?
|
74
|
+
response = _text_match?(type: 'abstract', text: hash['description'], dmp:, response:) if response[:score].positive?
|
75
|
+
# If the score is less than 3 then we have no confidence that it is a match
|
76
|
+
return nil if response[:score] <= 2
|
77
|
+
|
78
|
+
# Set the confidence level based on the score
|
79
|
+
response[:confidence] = if response[:score] > 10
|
80
|
+
'High'
|
81
|
+
else
|
82
|
+
(response[:score] > 5 ? 'Medium' : 'Low')
|
83
|
+
end
|
84
|
+
response
|
150
85
|
end
|
151
|
-
array
|
152
|
-
end
|
153
|
-
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
154
86
|
|
155
|
-
|
156
|
-
|
157
|
-
def _extract_people(array:)
|
158
|
-
return [] unless array.is_a?(Array)
|
159
|
-
|
160
|
-
array.each do |entry|
|
161
|
-
next unless entry.is_a?(Hash)
|
162
|
-
|
163
|
-
id = entry.fetch('contributor_id', entry.fetch('contact_id', {}))['identifier']&.downcase&.strip
|
164
|
-
affil = entry.fetch('dmproadmap_affiliation', {})
|
165
|
-
ror = affil.fetch('affiliation_id', {})['identifier']&.downcase&.strip
|
166
|
-
name = entry.fetch('name', '')&.downcase&.strip
|
167
|
-
last_name = name.include?(', ') ? name.split(', ').first : name.split.last
|
168
|
-
|
169
|
-
@details_hash[:orcids] << id unless id.nil?
|
170
|
-
@details_hash[:identifiers] << [id, ror&.downcase&.strip]
|
171
|
-
@details_hash[:last_names] << last_name
|
172
|
-
@details_hash[:affiliation_ids] << ror
|
173
|
-
@details_hash[:affiliations] << affil.fetch('name', '')&.split(' (')&.first&.downcase&.strip
|
174
|
-
end
|
175
|
-
array
|
87
|
+
# TODO: introduce a tie-breaker here (maybe the closes to the project_end date)
|
88
|
+
scoring.compact.sort { |a, b| b[:score] <=> a[:score] }&.first
|
176
89
|
end
|
177
90
|
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
178
91
|
|
179
|
-
|
180
|
-
# rubocop:disable Metrics/AbcSize
|
181
|
-
def _extract_repositories(repos:)
|
182
|
-
return [] unless repos.is_a?(Array)
|
183
|
-
|
184
|
-
repos.each do |repo|
|
185
|
-
next unless repo.is_a?(Hash)
|
186
|
-
|
187
|
-
@details_hash[:identifiers] << [
|
188
|
-
repo['url']&.downcase&.strip, repo.fetch('dmproadmap_host_id', {})['identifier']&.downcase&.strip
|
189
|
-
]
|
190
|
-
@details_hash[:repositories] << repo.fetch('name', '')&.downcase&.strip
|
191
|
-
end
|
192
|
-
repos
|
193
|
-
end
|
194
|
-
# rubocop:enable Metrics/AbcSize
|
92
|
+
private
|
195
93
|
|
196
94
|
# Returns whether or not the incoming grant id(s) match the DMPs grant id. Expecting:
|
197
95
|
# [
|
198
96
|
# { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
|
199
97
|
# ]
|
200
98
|
# rubocop:disable Metrics/AbcSize
|
201
|
-
def _grants_match?(array:, response:)
|
202
|
-
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
99
|
+
def _grants_match?(array:, dmp:, response:)
|
100
|
+
return response unless array.is_a?(Array) && dmp.is_a?(Hash) && response.is_a?(Hash)
|
101
|
+
return response unless dmp['grant_ids'].is_a?(Array) && !dmp['grant_ids'].empty?
|
203
102
|
|
204
103
|
ids = array.select { |funding| funding.is_a?(Hash) && funding['grant'].is_a?(Array) }
|
205
104
|
.map { |funding| funding['grant'].map { |id| id&.downcase&.strip } }
|
206
105
|
.flatten.compact.uniq
|
207
106
|
|
208
|
-
matched = _compare_arrays(array_a:
|
107
|
+
matched = _compare_arrays(array_a: dmp['grant_ids'], array_b: ids)
|
209
108
|
return response if matched <= 0
|
210
109
|
|
211
110
|
response[:confidence] = 'Absolute'
|
@@ -220,14 +119,15 @@ module Uc3DmpId
|
|
220
119
|
# { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
|
221
120
|
# ]
|
222
121
|
# rubocop:disable Metrics/AbcSize
|
223
|
-
def _opportunities_match?(array:, response:)
|
224
|
-
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
122
|
+
def _opportunities_match?(array:, dmp:, response:)
|
123
|
+
return response unless array.is_a?(Array) && dmp.is_a?(Hash) && response.is_a?(Hash)
|
124
|
+
return response unless dmp['funder_opportunity_ids'].is_a?(Array) && !dmp['funder_opportunity_ids'].empty?
|
225
125
|
|
226
126
|
ids = array.select { |funding| funding.is_a?(Hash) && funding['grant'].is_a?(Array) }
|
227
127
|
.map { |funding| funding['grant'].map { |id| id&.downcase&.strip } }
|
228
128
|
.flatten.compact.uniq
|
229
129
|
|
230
|
-
matched = _compare_arrays(array_a:
|
130
|
+
matched = _compare_arrays(array_a: dmp['funder_opportunity_ids'], array_b: ids)
|
231
131
|
return response if matched <= 0
|
232
132
|
|
233
133
|
response[:score] += 5
|
@@ -245,14 +145,15 @@ module Uc3DmpId
|
|
245
145
|
# }
|
246
146
|
# ]
|
247
147
|
# rubocop:disable Metrics/AbcSize
|
248
|
-
def _orcids_match?(array:, response:)
|
249
|
-
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
148
|
+
def _orcids_match?(array:, dmp:, response:)
|
149
|
+
return response unless array.is_a?(Array) && dmp.is_a?(Hash) && response.is_a?(Hash)
|
150
|
+
return response unless dmp['people_ids'].is_a?(Array) && !dmp['people_ids'].empty?
|
250
151
|
|
251
152
|
ids = array.select { |repo| repo.is_a?(Hash) }
|
252
153
|
.map { |person| person['id']&.downcase&.strip }
|
253
154
|
.flatten.compact.uniq
|
254
155
|
|
255
|
-
matched = _compare_arrays(array_a:
|
156
|
+
matched = _compare_arrays(array_a: dmp['people_ids'], array_b: ids)
|
256
157
|
return response if matched <= 0
|
257
158
|
|
258
159
|
response[:score] += (matched * 2)
|
@@ -262,28 +163,21 @@ module Uc3DmpId
|
|
262
163
|
# rubocop:enable Metrics/AbcSize
|
263
164
|
|
264
165
|
# Returns whether or not the inciming list of creators/contributors match those on the DMP. Expecting:
|
265
|
-
#
|
266
|
-
#
|
267
|
-
#
|
268
|
-
#
|
269
|
-
#
|
270
|
-
# }
|
271
|
-
# ]
|
166
|
+
# {
|
167
|
+
# people: ["john doe", "jdoe@example.com"],
|
168
|
+
# affiliations: ["example college"],
|
169
|
+
# affiliation_ids: ["https://ror.org/blah"]
|
170
|
+
# }
|
272
171
|
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
273
|
-
def _last_name_and_affiliation_match?(
|
274
|
-
return response unless
|
275
|
-
|
276
|
-
array = array.select { |repo| repo.is_a?(Hash) }
|
277
|
-
affiliations = array.map { |person| person['affiliation'] }&.flatten&.compact&.uniq
|
278
|
-
last_names = array.map { |person| person['last_name']&.downcase&.strip }&.flatten&.compact&.uniq
|
279
|
-
rors = affiliations.map { |affil| affil['id']&.downcase&.strip }&.flatten&.compact&.uniq
|
280
|
-
affil_names = affiliations.map { |affil| affil['name']&.downcase&.strip }&.flatten&.compact&.uniq
|
172
|
+
def _last_name_and_affiliation_match?(hash:, dmp:, response:)
|
173
|
+
return response unless hash.is_a?(Hash) && dmp.is_a?(Hash) && response.is_a?(Hash)
|
174
|
+
return response unless hash['people'].is_a?(Array) && !dmp['people'].empty?
|
281
175
|
|
282
176
|
# Check the person last names and affiliation name and RORs
|
283
|
-
last_names_matched = _compare_arrays(array_a:
|
284
|
-
rors_matched = _compare_arrays(array_a:
|
285
|
-
affil_names_matched = _compare_arrays(array_a:
|
286
|
-
return response if last_names_matched <= 0
|
177
|
+
last_names_matched = _compare_arrays(array_a: dmp['people'], array_b: hash['people'])
|
178
|
+
rors_matched = _compare_arrays(array_a: dmp.fetch('affiliation_ids', []), array_b: hash['affiliation_ids'])
|
179
|
+
affil_names_matched = _compare_arrays(array_a: dmp.fetch('affiliations', []), array_b: hash['affiliations'])
|
180
|
+
return response if last_names_matched <= 0 && rors_matched <= 0 && affil_names_matched <= 0
|
287
181
|
|
288
182
|
response[:score] += last_names_matched + rors_matched + affil_names_matched
|
289
183
|
response[:notes] << 'contributor names and affiliations matched'
|
@@ -292,19 +186,16 @@ module Uc3DmpId
|
|
292
186
|
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
293
187
|
|
294
188
|
# Returns whether or not the incoming list of repositories match those defined in the DMP. Expecting:
|
295
|
-
#
|
296
|
-
#
|
297
|
-
#
|
189
|
+
# {
|
190
|
+
# repo_ids: ["http://some.repo.org", "https://doi.org/re3data123"],
|
191
|
+
# repos: ["repo"]
|
192
|
+
# }
|
298
193
|
# rubocop:disable Metrics/AbcSize
|
299
|
-
def _repository_match?(
|
300
|
-
return response unless
|
301
|
-
|
302
|
-
# We only care about repositories with ids/urls
|
303
|
-
ids = array.select { |repo| repo.is_a?(Hash) }
|
304
|
-
.map { |repo| repo['id'].map { |id| id&.downcase&.strip } }
|
305
|
-
.flatten.compact.uniq
|
194
|
+
def _repository_match?(hash:, dmp:, response:)
|
195
|
+
return response unless hash.is_a?(Hash) && dmp.is_a?(Hash) && response.is_a?(Hash)
|
196
|
+
return response unless hash['repo_ids'].is_a?(Array) && !dmp['repo_ids'].empty?
|
306
197
|
|
307
|
-
matched = _compare_arrays(array_a:
|
198
|
+
matched = _compare_arrays(array_a: dmp['repo_ids'], array_b: hash['repo_ids'])
|
308
199
|
return response if matched <= 0
|
309
200
|
|
310
201
|
response[:score] += matched
|
@@ -313,33 +204,19 @@ module Uc3DmpId
|
|
313
204
|
end
|
314
205
|
# rubocop:enable Metrics/AbcSize
|
315
206
|
|
316
|
-
# Returns whether or not the list of keywords exist in the DMP. Expecting:
|
317
|
-
# keywords: ["foo", "bar"]
|
318
|
-
def _keyword_match?(array:, response:)
|
319
|
-
return response unless array.is_a?(Array) && response.is_a?(Hash)
|
320
|
-
|
321
|
-
keywords = array.map { |word| word&.downcase&.strip }&.flatten&.compact&.uniq
|
322
|
-
matched = _compare_arrays(array_a: @details_hash.fetch(:keywords, []), array_b: keywords)
|
323
|
-
return response if matched <= 0
|
324
|
-
|
325
|
-
response[:score] += 1
|
326
|
-
response[:notes] << 'keywords matched'
|
327
|
-
response
|
328
|
-
end
|
329
|
-
|
330
207
|
# Uses an NLP library to determine if the :text matches the DMP/Project :title or :description
|
331
208
|
# rubocop:disable Metrics/AbcSize
|
332
|
-
def _text_match?(text:, response:, type: 'title')
|
333
|
-
return response unless response.is_a?(Hash) && text.is_a?(String) && !text.strip.empty? &&
|
334
|
-
!@details_hash[type.to_sym].nil?
|
209
|
+
def _text_match?(text:, dmp:, response:, type: 'title')
|
210
|
+
return response unless response.is_a?(Hash) && text.is_a?(String) && !text.strip.empty? && dmp.is_a?(Hash)
|
335
211
|
|
336
212
|
nlp_processor = Text::WhiteSimilarity.new
|
337
213
|
cleansed = _cleanse_text(text:)
|
338
214
|
|
215
|
+
dmp_val = type == 'title' ? dmp['title'] : dmp['description']
|
339
216
|
details = {
|
340
|
-
"dmp_#{type}":
|
217
|
+
"dmp_#{type}": dmp_val,
|
341
218
|
"incoming_#{type}": cleansed,
|
342
|
-
nlp_score: nlp_processor.similarity(
|
219
|
+
nlp_score: nlp_processor.similarity(dmp_val, cleansed)
|
343
220
|
}
|
344
221
|
@logger&.debug(message: 'Text::WhiteSimilarity score', details:)
|
345
222
|
return response if details[:nlp_score] < 0.5
|
data/lib/uc3-dmp-id/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: uc3-dmp-id
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.26
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brian Riley
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-03-
|
11
|
+
date: 2024-03-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|