uc3-dmp-id 0.1.0 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1bdcb2754d5168bdeedf8583b91054cf62fc019d4479686da168d4ca70d83729
4
- data.tar.gz: b2af99461aa7614212aae435db0db174eeb50125006d0dacc87698ecaf41b7b8
3
+ metadata.gz: 51832c144e5663dc01c805f92e81e50e63f2ce00200a8cad5525b7a34c9d7eb9
4
+ data.tar.gz: e197deb7f608ef478716a8aea113e853ab0d3903f1496f970a73d31d30b7e892
5
5
  SHA512:
6
- metadata.gz: 43cdce10f8bccc41fc979c67a4758fd5726206c9066acefcbc8abe65b9d36f79fc69eedb461a9ce584cf61f85ddd0dca13617d99a39e9c831eb56ea725a00eb0
7
- data.tar.gz: 393d0e083ca8cfbf2039ba11861576a2eff489d48d8c541279ae30a3f9e7215d39175c49d1203456f56bb00a6a7e3da6c0211d4c3fc7e4f64f32f88e3d96accf
6
+ metadata.gz: 31bc5d1bb73176c2afff25715590c50d0612f558880f2bde3a750ac5ea8d49674c5dd9b00eff36960454c972a1a31aca6b4f853a98e995dd65977e1f82903ad2
7
+ data.tar.gz: 86153dfeebc52570ecb83a012f4e7242e9bec607627c2dcefc5beb101a711199bc346c1388045127df89f13e65c83aa473c0fc77e37810d436ea939101b74f2b
data/README.md CHANGED
@@ -1,3 +1,12 @@
1
1
  # Uc3DmpId
2
2
 
3
3
  Helper methods for working with DMP ID JSON records
4
+
5
+ After you have made changes, be sure to increment the version number in `lib/uc3-dmp-id/version.rb`.
6
+
7
+ To build and push this gem to RubyGems:
8
+ - Make sure you are logged into RubyGems in your terminal window (see their docs)
9
+ - Run `gem build uc3-dmp-id.gemspec` to build the gem
10
+ - Run `gem push uc3-dmp-id-[version].gem` to publish to RubyGems
11
+
12
+ After you have pushed a new version to RubyGems, you should rebuild and redeploy the AWS SAM application.
@@ -28,13 +28,13 @@ module Uc3DmpId
28
28
  related_works = modified_version.fetch('dmproadmap_related_identifiers', [])
29
29
 
30
30
  if related_works.any?
31
- latest_version = _add_related_identifier(updater: updater, latest_version: latest_version,
32
- identifiers: related_works, note: note, logger: logger)
31
+ latest_version = _add_related_identifier(updater:, latest_version:,
32
+ identifiers: related_works, note:, logger:)
33
33
  end
34
34
  return latest_version unless !funding.nil? && funding.any?
35
35
 
36
- _add_funding_mod(updater: updater, latest_version: latest_version, funding: funding,
37
- note: note, logger: logger)
36
+ _add_funding_mod(updater:, latest_version:, funding:,
37
+ note:, logger:)
38
38
  end
39
39
  # rubocop:enable Metrics/AbcSize, Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
40
40
 
@@ -106,7 +106,7 @@ module Uc3DmpId
106
106
  end
107
107
 
108
108
  latest_version['dmproadmap_related_identifiers'] = [] if latest_version['dmproadmap_related_identifiers'].nil?
109
- assertion = _generate_assertion(updater: updater, note: note,
109
+ assertion = _generate_assertion(updater:, note:,
110
110
  mods: JSON.parse({ dmproadmap_related_identifiers: additions }.to_json))
111
111
  if logger.respond_to?(:debug)
112
112
  logger.debug(message: 'Adding change to :dmphub_modifications.',
@@ -151,7 +151,7 @@ module Uc3DmpId
151
151
  latest_version['dmphub_modifications'] = [] if latest_version['dmphub_modifications'].nil?
152
152
  mod = JSON.parse({ funding: fund }.to_json)
153
153
  mod['funding']['funding_status'] = 'granted'
154
- assertion = _generate_assertion(updater: updater, mods: mod, note: note)
154
+ assertion = _generate_assertion(updater:, mods: mod, note:)
155
155
  if logger.respond_to?(:debug)
156
156
  logger.debug(message: 'Adding change to :dmphub_modifications.',
157
157
  details: assertion)
@@ -168,7 +168,8 @@ module Uc3DmpId
168
168
  # "id": "ABCD1234",
169
169
  # "provenance": "dmphub",
170
170
  # "timestamp": "2023-07-07T14:50:23+00:00",
171
- # "note": "data received from the NIH API",
171
+ # "note": "Data received from OpenAlex, matched by PI names and title keywords.",
172
+ # "confiedence": "Med",
172
173
  # "dmproadmap_related_identifiers": {
173
174
  # "work_type": "article",
174
175
  # "descriptor": "is_cited_by",
@@ -183,7 +184,8 @@ module Uc3DmpId
183
184
  # "id": "ABCD1234",
184
185
  # "provenance": "dmphub",
185
186
  # "timestamp": "2023-07-07T14:50:23+00:00",
186
- # "note": "data received from the NIH API",
187
+ # "note": "Data received from the NIH API, matched by the opportunity number.",
188
+ # "confidence": "High",
187
189
  # "funding": {
188
190
  # "funding_status": "granted",
189
191
  # "grant_id": {
@@ -200,11 +202,19 @@ module Uc3DmpId
200
202
  provenance: updater.gsub('PROVENANCE#', ''),
201
203
  timestamp: Time.now.utc.iso8601,
202
204
  status: 'pending',
203
- note: note
205
+ note:
204
206
  }
205
207
  mods.each_pair { |key, val| assertion[key] = val }
206
208
  JSON.parse(assertion.to_json)
207
209
  end
208
210
  end
211
+
212
+ def _score_related_work(latest_version:, work:)
213
+
214
+ end
215
+
216
+ def _score_funding(latest_version:, funding:)
217
+
218
+ end
209
219
  end
210
220
  end
@@ -0,0 +1,500 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'text'
4
+
5
+ module Uc3DmpId
6
+ class ComparatorError < StandardError; end
7
+
8
+ # Class that compares incoming data from an external source to the DMP
9
+ # It determines if they are likely related and applies a confidence rating
10
+ class Comparator
11
+
12
+ MSG_MISSING_AUGMENTER = 'No Augmenter specified!'
13
+ MSG_MISSING_DMP = 'No DMP or the DMP did not contain enough information to use.'
14
+
15
+ STOP_WORDS = %w[a an and if of or the then they]
16
+
17
+ # See the bottom of this file for a hard-coded crosswalk between Crossref funder ids and ROR ids
18
+ # Some APIs do not support ROR fully for funder ids, so we need to be able to reference both
19
+
20
+ attr_accessor :augmenter, :dmp, :details_hash, :logger
21
+
22
+ def initialize(**args)
23
+ @logger = args[:logger]
24
+ @details_hash = {}
25
+
26
+ @augmenter = args[:augmenter]
27
+ raise ComparatorError, MSG_MISSING_AUGMENTER if @augmenter.nil? ||
28
+ !@augmenter['PK']&.start_with?('AUGMENTERS#')
29
+
30
+ @dmp = args.fetch(:dmp, {})['dmp'].nil? ? args[:dmp] : args.fetch(:dmp, {})['dmp']
31
+ _extract_dmp_details(dmp:)
32
+ raise ComparatorError, MSG_MISSING_DMP if @details_hash.empty?
33
+ end
34
+
35
+ # Compare the incoming hash with the DMP details that were gathered during initialization.
36
+ #
37
+ # The Hash should contain:
38
+ # {
39
+ # title: "Example research project",
40
+ # abstract: "Lorem ipsum psuedo abstract",
41
+ # keywords: ["foo", "bar"],z
42
+ # people: [
43
+ # {
44
+ # id: "https://orcid.org/blah",
45
+ # last_name: "doe",
46
+ # affiliation: { id: "https://ror.org/blah", name: "Foo" }
47
+ # }
48
+ # ],
49
+ # fundings: [
50
+ # { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
51
+ # ],
52
+ # repositories: [
53
+ # { id: ["http://some.repo.org", "https://doi.org/re3data123"], name: "Repo" }
54
+ # ]
55
+ # }
56
+ def compare(hash:)
57
+ response = { confidence: 'None', score: 0, notes: [], source: @augmenter['name'] }
58
+ return response unless hash.is_a?(Hash) && !hash['title'].nil?
59
+
60
+ # Compare the grant ids. If we have a match return the response immediately since that is
61
+ # a very positive match!
62
+ response = _grants_match?(array: hash['fundings'], response:)
63
+ return response if response[:confidence] != 'None'
64
+
65
+ response = _opportunities_match?(array: hash['fundings'], response:)
66
+ response = _orcids_match?(array: hash['people'], response:)
67
+ response = _last_name_and_affiliation_match?(array: hash['people'], response:)
68
+
69
+ # Only process the following if we had some matching contributors, affiliations or opportuniy nbrs
70
+ response = _repository_match?(array: hash['repositories'], response:) if response[:score] > 0
71
+ response = _keyword_match?(array: hash['repositories'], response:) if response[:score] > 0
72
+ response = _text_match?(type: 'title', text: hash['title'], response:) if response[:score] > 0
73
+ response = _text_match?(type: 'abstract', text: hash['abstract'], response:) if response[:score] > 0
74
+ # If the score is less than 3 then we have no confidence that it is a match
75
+ return response if response[:score] <= 2
76
+
77
+ # Set the confidence level based on the score
78
+ response[:confidence] = response[:score] > 15 ? 'High' : (response[:score] > 10 ? 'Medium' : 'Low')
79
+ response
80
+ end
81
+
82
+ private
83
+
84
+ def _extract_dmp_details(dmp:)
85
+ return nil unless dmp.is_a?(Hash) && !dmp['title'].nil? && !dmp['contact'].nil?
86
+
87
+ projects = dmp.fetch('project', [{}])
88
+ fundings = projects.map { |proj| proj.fetch('funding', []) }.flatten.compact.uniq
89
+ hosts = dmp.fetch('dataset', []).map { |dset| dset.fetch('distribution', []).map { |d| d['host'] } }
90
+ people = [dmp['contact']]
91
+ people << dmp.fetch('contributor', [])
92
+
93
+ # Extract all of the important bits about the DMP
94
+ @details_hash = {
95
+ created: dmp.fetch('created', Time.now.iso8601),
96
+ title: _cleanse_text(text: projects&.first&.fetch('title', dmp['title'])),
97
+ abstract: _cleanse_text(text: projects&.first&.fetch('description', dmp['description'])),
98
+ keywords: dmp.fetch('dataset', []).map { |ds| ds.fetch('keyword', []) }.flatten.compact.uniq,
99
+ identifiers: [dmp.fetch('dmp_id', {})['identifier']],
100
+ last_names: [],
101
+ affiliation_ids: [],
102
+ affiliations: [],
103
+ funder_names: [],
104
+ funder_ids: [],
105
+ opportunity_ids: [],
106
+ grant_ids: [],
107
+ repositories: []
108
+ }
109
+ _extract_people(array: people&.flatten&.compact&.uniq)
110
+ _extract_funding(array: fundings)
111
+ _extract_repositories(repos: hosts.flatten.compact.uniq)
112
+
113
+ # Clean up the results by flattening and removing duplicates from the Arrays
114
+ @details_hash.keys.each do |key|
115
+ @details_hash[key] = @details_hash[key].flatten.compact.uniq if @details_hash[key].is_a?(Array)
116
+ end
117
+ @logger&.debug(message: "Extracted the following from the DMP", details: @details_hash)
118
+ end
119
+
120
+ # Extract all of the funding information
121
+ def _extract_funding(array:)
122
+ return [] unless array.is_a?(Array)
123
+
124
+ array.each do |funding|
125
+ next unless funding.is_a?(Hash)
126
+
127
+ funder_id = funding.fetch('funder_id', {})
128
+ ror = funder_id['identifier'] if funder_id['type']&.downcase&.strip == 'ror'
129
+ fundref = ror.nil? ? funder_id['identifier']&.downcase&.strip : ROR_FUNDREF_ID_CROSSWALK[:"#{ror}"]
130
+ opportunity = funding.fetch('dmproadmap_funding_opportunity_id', {})['identifier']
131
+ grant = funding.fetch('grant_id', {})['identifier']
132
+
133
+ @details_hash[:identifiers] << ror&.downcase&.strip
134
+ @details_hash[:identifiers] << fundref&.downcase&.strip
135
+ @details_hash[:identifiers] << grant&.downcase&.strip
136
+ @details_hash[:identifiers] << grant&.split('/')&.last&.downcase&.strip
137
+ @details_hash[:identifiers] << opportunity&.downcase&.strip
138
+
139
+ @details_hash[:funder_names] << funding['name']&.downcase&.strip
140
+ @details_hash[:funder_ids] << fundref
141
+ @details_hash[:opportunity_ids] << opportunity&.downcase&.strip
142
+ @details_hash[:grant_ids] << [grant&.downcase&.strip, grant&.split('/')&.last&.downcase&.strip]
143
+ end
144
+ array
145
+ end
146
+
147
+ # Extract all of the ORCIDs, last names, and affiliation ids and names
148
+ def _extract_people(array:)
149
+ return [] unless array.is_a?(Array)
150
+
151
+ array.each do |entry|
152
+ next unless entry.is_a?(Hash)
153
+
154
+ id = entry.fetch('contributor_id', entry.fetch('contact_id', {}))['identifier']&.downcase&.strip
155
+ affil = entry.fetch('dmproadmap_affiliation', {})
156
+ ror = affil.fetch('affiliation_id', {})['identifier']&.downcase&.strip
157
+ name = entry.fetch('name', '')&.downcase&.strip
158
+ last_name = name.include?(', ') ? name.split(', ').first : name.split.last
159
+
160
+ @details_hash[:identifiers] << [id, ror&.downcase&.strip]
161
+ @details_hash[:last_names] << last_name
162
+ @details_hash[:affiliation_ids] << ror
163
+ @details_hash[:affiliations] << affil.fetch('name', '')&.split(' (')&.first&.downcase&.strip
164
+ end
165
+ array
166
+ end
167
+
168
+ # Extract all of the re3data ids, URLs and names
169
+ def _extract_repositories(repos:)
170
+ return [] unless repos.is_a?(Array)
171
+
172
+ repos.each do |repo|
173
+ next unless repo.is_a?(Hash)
174
+
175
+ @details_hash[:identifiers] << [
176
+ repo['url']&.downcase&.strip, repo.fetch('dmproadmap_host_id', {})['identifier']&.downcase&.strip
177
+ ]
178
+ @details_hash[:repositories] << repo.fetch('name', '')&.downcase&.strip
179
+ end
180
+ repos
181
+ end
182
+
183
+ # Returns whether or not the incoming grant id(s) match the DMPs grant id. Expecting:
184
+ # [
185
+ # { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
186
+ # ]
187
+ def _grants_match?(array:, response:)
188
+ return response unless array.is_a?(Array) && response.is_a?(Hash)
189
+
190
+ ids = array.select { |funding| funding.is_a?(Hash) && funding['grant'].is_a?(Array) }
191
+ .map { |funding| funding['grant'].map { |id| id&.downcase&.strip } }
192
+ .flatten.compact.uniq
193
+
194
+ matched = _compare_arrays(array_a: @details_hash.fetch(:grant_ids, []), array_b: ids)
195
+ return response if matched <= 0
196
+
197
+ response[:confidence] = 'Absolute'
198
+ response[:score] = 100
199
+ response[:notes] << 'the grant ID matched'
200
+ response
201
+ end
202
+
203
+ # Returns whether or not the incoming grant id(s) match the DMPs opportunity id. Expecting:
204
+ # [
205
+ # { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
206
+ # ]
207
+ def _opportunities_match?(array:, response:)
208
+ return response unless array.is_a?(Array) && response.is_a?(Hash)
209
+
210
+ ids = array.select { |funding| funding.is_a?(Hash) && funding['grant'].is_a?(Array) }
211
+ .map { |funding| funding['grant'].map { |id| id&.downcase&.strip } }
212
+ .flatten.compact.uniq
213
+
214
+ matched = _compare_arrays(array_a: @details_hash.fetch(:opportunity_ids, []), array_b: ids)
215
+ return response if matched <= 0
216
+
217
+ response[:score] += 5
218
+ response[:notes] << 'the funding opportunity number matched'
219
+ response
220
+ end
221
+
222
+ # Returns whether or not the inciming list of creators/contributors match those on the DMP. Expecting:
223
+ # [
224
+ # {
225
+ # id: "https://orcid.org/blah",
226
+ # last_name: "doe",
227
+ # affiliation: { id: "https://ror.org/blah", name: "Foo" }
228
+ # }
229
+ # ]
230
+ def _orcids_match?(array:, response:)
231
+ return response unless array.is_a?(Array) && response.is_a?(Hash)
232
+
233
+ ids = array.select { |repo| repo.is_a?(Hash) }
234
+ .map { |person| person['id']&.downcase&.strip }
235
+ .flatten.compact.uniq
236
+
237
+ matched = _compare_arrays(array_a: @details_hash.fetch(:identifiers, []), array_b: ids)
238
+ return response if matched <= 0
239
+
240
+ response[:score] += (matched * 2)
241
+ response[:notes] << 'contributor ORCIDs matched'
242
+ response
243
+ end
244
+
245
+ # Returns whether or not the inciming list of creators/contributors match those on the DMP. Expecting:
246
+ # [
247
+ # {
248
+ # id: "https://orcid.org/blah",
249
+ # last_name: "doe",
250
+ # affiliation: { id: "https://ror.org/blah", name: "Foo" }
251
+ # }
252
+ # ]
253
+ def _last_name_and_affiliation_match?(array:, response:)
254
+ return response unless array.is_a?(Array) && response.is_a?(Hash)
255
+
256
+ array = array.select { |repo| repo.is_a?(Hash) }
257
+ affiliations = array.map { |person| person['affiliation'] }&.flatten&.compact&.uniq
258
+ last_names = array.map { |person| person['last_name']&.downcase&.strip }&.flatten&.compact&.uniq
259
+ rors = affiliations.map { |affil| affil['id']&.downcase&.strip }&.flatten&.compact&.uniq
260
+ affil_names = affiliations.map { |affil| affil['name']&.downcase&.strip }&.flatten&.compact&.uniq
261
+
262
+ # Check the person last names and affiliation name and RORs
263
+ last_names_matched = _compare_arrays(array_a: @details_hash.fetch(:last_names, []), array_b: last_names)
264
+ rors_matched = _compare_arrays(array_a: @details_hash.fetch(:affiliation_ids, []), array_b: rors)
265
+ affil_names_matched = _compare_arrays(array_a: @details_hash.fetch(:affiliations, []), array_b: affil_names)
266
+ return response if last_names_matched <= 0 && rors_matched <= 0 && affil_names_matched <= 0
267
+
268
+ response[:score] += last_names_matched + rors_matched + affil_names_matched
269
+ response[:notes] << 'contributor names and affiliations matched'
270
+ response
271
+ end
272
+
273
+ # Returns whether or not the incoming list of repositories match those defined in the DMP. Expecting:
274
+ # [
275
+ # { id: ["http://some.repo.org", "https://doi.org/re3data123"], name: "Repo" }
276
+ # ]
277
+ def _repository_match?(array:, response:)
278
+ return response unless array.is_a?(Array) && response.is_a?(Hash)
279
+
280
+ # We only care about repositories with ids/urls
281
+ ids = array.select { |repo| repo.is_a?(Hash) }
282
+ .map { |repo| repo['id'].map { |id| id&.downcase&.strip } }
283
+ .flatten.compact.uniq
284
+
285
+ matched = _compare_arrays(array_a: @details_hash.fetch(:identifiers, []), array_b: ids)
286
+ return response if matched <= 0
287
+
288
+ response[:score] += matched
289
+ response[:notes] << 'repositories matched'
290
+ response
291
+ end
292
+
293
+ # Returns whether or not the list of keywords exist in the DMP. Expecting:
294
+ # keywords: ["foo", "bar"]
295
+ def _keyword_match?(array:, response:)
296
+ return response unless array.is_a?(Array) && response.is_a?(Hash)
297
+
298
+ keywords = array.map { |word| word&.downcase&.strip }&.flatten&.compact&.uniq
299
+ matched = _compare_arrays(array_a: @details_hash.fetch(:keywords, []), array_b: keywords)
300
+ return response if matched <= 0
301
+
302
+ response[:score] += 1
303
+ response[:notes] << 'keywords matched'
304
+ response
305
+ end
306
+
307
+ # Uses an NLP library to determine if the :text matches the DMP/Project :title or :description
308
+ def _text_match?(type: 'title', text:, response:, logger: nil)
309
+ return response unless response.is_a?(Hash) && text.is_a?(String) && !text.strip.empty? &&
310
+ !@details_hash[type.to_sym].nil?
311
+
312
+ nlp_processor = Text::WhiteSimilarity.new
313
+ cleansed = _cleanse_text(text:)
314
+
315
+ details = {
316
+ "dmp_#{type}": @details_hash[type.to_sym],
317
+ "incoming_#{type}": cleansed,
318
+ nlp_score: nlp_processor.similarity(@details_hash[type.to_sym], cleansed)
319
+ }
320
+ @logger&.debug(message: "Text::WhiteSimilarity score", details:)
321
+ return response if details[:nlp_score] < 0.5
322
+
323
+ response[:score] += details[:nlp_score] >= 0.75 ? 5 : 2
324
+ response[:notes] << "#{type}s are similar"
325
+ response
326
+ end
327
+
328
+ # Change the incoming text to lower case, remove spaces and STOP_WORDS
329
+ def _cleanse_text(text:)
330
+ return nil unless text.is_a?(String)
331
+
332
+ text.downcase.split.reject { |word| STOP_WORDS.include?(word) }.join(' ').strip
333
+ end
334
+
335
+ # Do an introspection of the 2 arrays and return the number of matches
336
+ def _compare_arrays(array_a: [], array_b: [])
337
+ return 0 unless array_a.is_a?(Array) && array_b.is_a?(Array)
338
+
339
+ intersection = array_a & array_b
340
+ intersection.nil? || intersection.size <= 0 ? 0 : intersection.size
341
+ end
342
+
343
+ # TODO: Remove this hard-coded crosswalk once the community has broader support for using ROR for funder ids
344
+ ROR_FUNDREF_ID_CROSSWALK = {
345
+ # NIH ID Crosswalk
346
+ "https://ror.org/01cwqze88": "https://doi.org/10.13039/100000002",
347
+ "https://ror.org/04mhx6838": "https://doi.org/10.13039/100000055",
348
+ "https://ror.org/012pb6c26": "https://doi.org/10.13039/100000050",
349
+ "https://ror.org/03wkg3b53": "https://doi.org/10.13039/100000053",
350
+ "https://ror.org/0060t0j89": "https://doi.org/10.13039/100000092",
351
+ "https://ror.org/00372qc85": "https://doi.org/10.13039/100000070",
352
+ "https://ror.org/00190t495": "https://doi.org/10.13039/100008460",
353
+ "https://ror.org/00j4k1h63": "https://doi.org/10.13039/100000066",
354
+ "https://ror.org/01y3zfr79": "https://doi.org/10.13039/100000056",
355
+ "https://ror.org/04q48ey07": "https://doi.org/10.13039/100000057",
356
+ "https://ror.org/0493hgw16": "https://doi.org/10.13039/100006545",
357
+ "https://ror.org/04vfsmv21": "https://doi.org/10.13039/100000098",
358
+ "https://ror.org/03jh5a977": "https://doi.org/10.13039/100000093",
359
+ "https://ror.org/04xeg9z08": "https://doi.org/10.13039/100000025",
360
+ "https://ror.org/01s5ya894": "https://doi.org/10.13039/100000065",
361
+ "https://ror.org/02meqm098": "https://doi.org/10.13039/100000002",
362
+ "https://ror.org/049v75w11": "https://doi.org/10.13039/100000049",
363
+ "https://ror.org/004a2wv92": "https://doi.org/10.13039/100000072",
364
+ "https://ror.org/00adh9b73": "https://doi.org/10.13039/100000062",
365
+ "https://ror.org/043z4tv69": "https://doi.org/10.13039/100000060",
366
+ "https://ror.org/00x19de83": "https://doi.org/10.13039/100000002",
367
+ "https://ror.org/02jzrsm59": "https://doi.org/10.13039/100000027",
368
+ "https://ror.org/006zn3t30": "https://doi.org/10.13039/100000069",
369
+ "https://ror.org/04byxyr05": "https://doi.org/10.13039/100000071",
370
+ "https://ror.org/04pw6fb54": "https://doi.org/10.13039/100006108",
371
+ "https://ror.org/05aq6yn88": "https://doi.org/10.13039/100006955",
372
+ "https://ror.org/02xey9a22": "https://doi.org/10.13039/100000061",
373
+ "https://ror.org/00fj8a872": "https://doi.org/10.13039/100000052",
374
+ "https://ror.org/01wtjyf13": "https://doi.org/10.13039/100000063",
375
+ "https://ror.org/04r5s4b52": "https://doi.org/10.13039/100005440",
376
+ "https://ror.org/046zezr58": "https://doi.org/10.13039/100006085",
377
+ "https://ror.org/02e3wq066": "https://doi.org/10.13039/100006086",
378
+ "https://ror.org/031gy6182": "https://doi.org/10.13039/100000002",
379
+ "https://ror.org/054j5yq82": "https://doi.org/10.13039/100000002",
380
+ "https://ror.org/02yrzyf97": "https://doi.org/10.13039/100000002",
381
+
382
+ # NSF ID Crosswalk
383
+ "https://.org/021nxhr62": "https://doi.org/10.13039/100000001",
384
+ "https://.org/04aqat463": "https://doi.org/10.13039/100000001",
385
+ "https://.org/01rcfpa16": "https://doi.org/10.13039/100005441",
386
+ "https://.org/014eweh95": "https://doi.org/10.13039/100005445",
387
+ "https://.org/001xhss06": "https://doi.org/10.13039/100000076",
388
+ "https://.org/04qn9mx93": "https://doi.org/10.13039/100000153",
389
+ "https://.org/03g87he71": "https://doi.org/10.13039/100000155",
390
+ "https://.org/01tnvpc68": "https://doi.org/10.13039/100000156",
391
+ "https://.org/01rvays47": "https://doi.org/10.13039/100000154",
392
+ "https://.org/002jdaq33": "https://doi.org/10.13039/100000152",
393
+ "https://.org/025kzpk63": "https://doi.org/10.13039/100000083",
394
+ "https://.org/04nh1dc89": "https://doi.org/10.13039/100007523",
395
+ "https://.org/01mng8331": "https://doi.org/10.13039/100000143",
396
+ "https://.org/02rdzmk74": "https://doi.org/10.13039/100000144",
397
+ "https://.org/053a2cp42": "https://doi.org/10.13039/100000145",
398
+ "https://.org/014bj5w56": "https://doi.org/10.13039/100000081",
399
+ "https://.org/00whkrf32": "https://doi.org/10.13039/100000082",
400
+ "https://.org/05s7cqk18": "https://doi.org/10.13039/100000173",
401
+ "https://.org/02kd4km72": "https://doi.org/10.13039/100000172",
402
+ "https://.org/03mamvh39": "https://doi.org/10.13039/100000171",
403
+ "https://.org/00b6sbb32": "https://doi.org/10.13039/100000084",
404
+ "https://.org/0471zv972": "https://doi.org/10.13039/100000146",
405
+ "https://.org/028yd4c30": "https://doi.org/10.13039/100000147",
406
+ "https://.org/01krpsy48": "https://doi.org/10.13039/100000148",
407
+ "https://.org/050rnw378": "https://doi.org/10.13039/100000149",
408
+ "https://.org/0388pet74": "https://doi.org/10.13039/100000150",
409
+ "https://.org/03xyg3m20": "https://doi.org/10.13039/100000151",
410
+ "https://.org/05p847d66": "https://doi.org/10.13039/100000085",
411
+ "https://.org/037gd6g64": "https://doi.org/10.13039/100000159",
412
+ "https://.org/05v01mk25": "https://doi.org/10.13039/100000160",
413
+ "https://.org/05wqqhv83": "https://doi.org/10.13039/100000141",
414
+ "https://.org/05nwjp114": "https://doi.org/10.13039/100007352",
415
+ "https://.org/05fnzca26": "https://doi.org/10.13039/100000162",
416
+ "https://.org/02trddg58": "https://doi.org/10.13039/100000163",
417
+ "https://.org/029b7h395": "https://doi.org/10.13039/100000086",
418
+ "https://.org/04mg8wm74": "https://doi.org/10.13039/100000164",
419
+ "https://.org/01ar8dr59": "https://doi.org/10.13039/100000165",
420
+ "https://.org/01pc7k308": "https://doi.org/10.13039/100000078",
421
+ "https://.org/051fftw81": "https://doi.org/10.13039/100000121",
422
+ "https://.org/04ap5x931": "https://doi.org/10.13039/100000166",
423
+ "https://.org/00apvva27": "https://doi.org/10.13039/100005716",
424
+ "https://.org/04nseet23": "https://doi.org/10.13039/100000179",
425
+ "https://.org/04k9mqs78": "https://doi.org/10.13039/100000106",
426
+ "https://.org/01k638r21": "https://doi.org/10.13039/100000089",
427
+ "https://.org/01gmp5538": "https://doi.org/10.13039/100005447",
428
+ "https://.org/01vnjbg30": "https://doi.org/10.13039/100005449",
429
+ "https://.org/03h7mcc28": "https://doi.org/10.13039/100000088",
430
+ "https://.org/05wgkzg12": "https://doi.org/10.13039/100000169",
431
+ "https://.org/0445wmv88": "https://doi.org/10.13039/100000170",
432
+ "https://.org/02dz2hb46": "https://doi.org/10.13039/100000077",
433
+ "https://.org/034m1ez10": "https://doi.org/10.13039/100000107",
434
+ "https://.org/02a65dj82": "https://doi.org/10.13039/100005717",
435
+ "https://.org/020fhsn68": "https://doi.org/10.13039/100000001",
436
+ "https://.org/03z9hh605": "https://doi.org/10.13039/100000174",
437
+ "https://.org/04ya3kq71": "https://doi.org/10.13039/100007521",
438
+ "https://.org/04evh7y43": "https://doi.org/10.13039/100005443",
439
+ "https://.org/04h67aa53": "https://doi.org/10.13039/100000177",
440
+ "https://.org/025dabr11": "https://doi.org/10.13039/100005446",
441
+ "https://.org/04vw0kz07": "https://doi.org/10.13039/100005448",
442
+ "https://.org/054ydxh33": "https://doi.org/10.13039/100005554",
443
+ "https://.org/01sharn77": "https://doi.org/10.13039/100006091",
444
+ "https://.org/02ch5q898": "https://doi.org/10.13039/100000001",
445
+
446
+ # NASA ID Crosswalk
447
+ "https://.org/0171mag52": "https://doi.org/10.13039/100006198",
448
+ "https://.org/027k65916": "https://doi.org/10.13039/100006196",
449
+ "https://.org/027ka1x80": "https://doi.org/10.13039/100000104",
450
+ "https://.org/02acart68": "https://doi.org/10.13039/100006195",
451
+ "https://.org/059fqnc42": "https://doi.org/10.13039/100006193",
452
+ "https://.org/01cyfxe35": "https://doi.org/10.13039/100016595",
453
+ "https://.org/04xx4z452": "https://doi.org/10.13039/100006203",
454
+ "https://.org/0399mhs52": "https://doi.org/10.13039/100006199",
455
+ "https://.org/02epydz83": "https://doi.org/10.13039/100006197",
456
+ "https://.org/03j9e2j92": "https://doi.org/10.13039/100006205",
457
+ "https://.org/02s42x260": "https://doi.org/10.13039/100000104",
458
+ "https://.org/01p7gwa14": "https://doi.org/10.13039/100000104",
459
+ "https://.org/01qxmdg18": "https://doi.org/10.13039/100000104",
460
+ "https://.org/006ndaj41": "https://doi.org/10.13039/100000104",
461
+ "https://.org/03em45j53": "https://doi.org/10.13039/100007346",
462
+ "https://.org/045t78n53": "https://doi.org/10.13039/100000104",
463
+ "https://.org/00r57r863": "https://doi.org/10.13039/100000104",
464
+ "https://.org/0401vze59": "https://doi.org/10.13039/100007726",
465
+ "https://.org/04hccab49": "https://doi.org/10.13039/100000104",
466
+ "https://.org/04437j066": "https://doi.org/10.13039/100000104",
467
+ "https://.org/028b18z22": "https://doi.org/10.13039/100000104",
468
+ "https://.org/00ryjtt64": "https://doi.org/10.13039/100000104",
469
+
470
+ # DOE ID Crosswalk
471
+ "https://ror.org/01bj3aw27": "https://doi.org/10.13039/100000015",
472
+ "https://ror.org/03q1rgc19": "https://doi.org/10.13039/100006133",
473
+ "https://ror.org/02xznz413": "https://doi.org/10.13039/100006134",
474
+ "https://ror.org/03sk1we31": "https://doi.org/10.13039/100006168",
475
+ "https://ror.org/00f93gc02": "https://doi.org/10.13039/100006177",
476
+ "https://ror.org/05tj7dm33": "https://doi.org/10.13039/100006147",
477
+ "https://ror.org/0012c7r22": "https://doi.org/10.13039/100006192",
478
+ "https://ror.org/00mmn6b08": "https://doi.org/10.13039/100006132",
479
+ "https://ror.org/03ery9d53": "https://doi.org/10.13039/100006120",
480
+ "https://ror.org/033jmdj81": "https://doi.org/10.13039/100000015",
481
+ "https://ror.org/03rd4h240": "https://doi.org/10.13039/100006130",
482
+ "https://ror.org/0054t4769": "https://doi.org/10.13039/100006200",
483
+ "https://ror.org/03eecgp81": "https://doi.org/10.13039/100006174",
484
+ "https://ror.org/00heb4d89": "https://doi.org/10.13039/100006135",
485
+ "https://ror.org/05ek3m339": "https://doi.org/10.13039/100006150",
486
+ "https://ror.org/00km40770": "https://doi.org/10.13039/100006138",
487
+ "https://ror.org/02ah1da87": "https://doi.org/10.13039/100006137",
488
+ "https://ror.org/05hsv7e61": "https://doi.org/10.13039/100000015",
489
+ "https://ror.org/01c9ay627": "https://doi.org/10.13039/100006165",
490
+ "https://ror.org/04z2gev20": "https://doi.org/10.13039/100006183",
491
+ "https://ror.org/02z1qvq09": "https://doi.org/10.13039/100006144",
492
+ "https://ror.org/03jf3w726": "https://doi.org/10.13039/100006186",
493
+ "https://ror.org/04848jz84": "https://doi.org/10.13039/100006142",
494
+ "https://ror.org/04s778r16": "https://doi.org/10.13039/100006171",
495
+ "https://ror.org/04nnxen11": "https://doi.org/10.13039/100000015",
496
+ "https://ror.org/05csy5p27": "https://doi.org/10.13039/100010268",
497
+ "https://ror.org/05efnac71": "https://doi.org/10.13039/100000015"
498
+ }
499
+ end
500
+ end
@@ -23,23 +23,23 @@ module Uc3DmpId
23
23
  raise CreatorError, Helper::MSG_DMP_FORBIDDEN unless provenance.is_a?(Hash) && !provenance['PK'].nil?
24
24
 
25
25
  # Validate the incoming JSON first
26
- json = Helper.parse_json(json: json)
27
- errs = Validator.validate(mode: 'author', json: json)
26
+ json = Helper.parse_json(json:)
27
+ errs = Validator.validate(mode: 'author', json:)
28
28
  raise CreatorError, errs.join(', ') if errs.is_a?(Array) && errs.any? && errs.first != Validator::MSG_VALID_JSON
29
29
 
30
30
  # Try to find it by the :dmp_id first and Fail if found
31
31
  dmp_id = Helper.dmp_id_to_pk(json: json.fetch('dmp', {})['dmp_id'])
32
- result = Finder.exists?(p_key: dmp_id, logger: logger) unless dmp_id.nil?
32
+ result = Finder.exists?(p_key: dmp_id, logger:) unless dmp_id.nil?
33
33
  raise CreatorError, Helper::MSG_DMP_EXISTS if result.is_a?(Hash)
34
34
 
35
35
  # raise CreatorError, Uc3DmpId::MSG_DMP_EXISTS unless json['PK'].nil?
36
36
 
37
37
  client = Uc3DmpDynamo::Client.new
38
- p_key = _preregister_dmp_id(client: client, provenance: provenance, json: json, logger: logger)
38
+ p_key = _preregister_dmp_id(client:, provenance:, json:, logger:)
39
39
  raise CreatorError, MSG_UNABLE_TO_MINT if p_key.nil?
40
40
 
41
41
  # Add the DMPHub specific attributes and then save
42
- annotated = Helper.annotate_dmp_json(provenance: provenance, p_key: p_key, json: json['dmp'])
42
+ annotated = Helper.annotate_dmp_json(provenance:, p_key:, json: json['dmp'])
43
43
  logger.info(message: "Creating DMP ID: #{p_key}") if logger.respond_to?(:debug)
44
44
 
45
45
  # Set the :created and :modified timestamps
@@ -48,10 +48,10 @@ module Uc3DmpId
48
48
  annotated['modified'] = now
49
49
 
50
50
  # Create the item
51
- resp = client.put_item(json: annotated, logger: logger)
51
+ resp = client.put_item(json: annotated, logger:)
52
52
  raise CreatorError, Helper::MSG_DMP_NO_DMP_ID if resp.nil?
53
53
 
54
- _post_process(json: annotated, logger: logger)
54
+ _post_process(json: annotated, logger:)
55
55
  Helper.cleanse_dmp_json(json: JSON.parse({ dmp: annotated }.to_json))
56
56
  end
57
57
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
@@ -75,7 +75,7 @@ module Uc3DmpId
75
75
  counter = 0
76
76
  while dmp_id == '' && counter <= 10
77
77
  prefix = "#{ENV.fetch('DMP_ID_SHOULDER', nil)}#{SecureRandom.hex(2).upcase}#{SecureRandom.hex(2)}"
78
- dmp_id = prefix unless Finder.exists?(client: client, p_key: prefix)
78
+ dmp_id = prefix unless Finder.exists?(client:, p_key: prefix)
79
79
  counter += 1
80
80
  end
81
81
  # Something went wrong and it was unable to identify a unique id
@@ -94,7 +94,7 @@ module Uc3DmpId
94
94
 
95
95
  # Publish the change to the EventBridge
96
96
  publisher = Uc3DmpEventBridge::Publisher.new
97
- publisher.publish(source: 'DmpCreator', event_type: 'EZID update', dmp: json, logger: logger)
97
+ publisher.publish(source: 'DmpCreator', event_type: 'EZID update', dmp: json, logger:)
98
98
 
99
99
  # Determine if there are any related identifiers that we should try to fetch a citation for
100
100
  citable_identifiers = Helper.citable_related_identifiers(dmp: json)
@@ -108,7 +108,7 @@ module Uc3DmpId
108
108
  }
109
109
  logger.debug(message: 'Fetching citations', details: citable_identifiers) if logger.respond_to?(:debug)
110
110
  publisher.publish(source: 'DmpCreator', dmp: json, event_type: 'Citation Fetch', detail: citer_detail,
111
- logger: logger)
111
+ logger:)
112
112
  true
113
113
  end
114
114
  end
@@ -19,7 +19,7 @@ module Uc3DmpId
19
19
 
20
20
  # Fetch the latest version of the DMP ID by it's PK
21
21
  client = Uc3DmpDynamo::Client.new
22
- dmp = Finder.by_pk(p_key: p_key, client: client, cleanse: false, logger: logger)
22
+ dmp = Finder.by_pk(p_key:, client:, cleanse: false, logger:)
23
23
  raise DeleterError, Helper::MSG_DMP_NOT_FOUND unless dmp.is_a?(Hash) && !dmp['dmp'].nil?
24
24
 
25
25
  # Only allow this if the provenance is the owner of the DMP!
@@ -38,16 +38,16 @@ module Uc3DmpId
38
38
  dmp['dmp']['dmphub_tombstoned_at'] = now
39
39
 
40
40
  # Create the Tombstone version
41
- resp = client.put_item(json: dmp['dmp'], logger: logger)
41
+ resp = client.put_item(json: dmp['dmp'], logger:)
42
42
  raise DeleterError, Helper::MSG_DMP_NO_TOMBSTONE if resp.nil?
43
43
 
44
44
  # Delete the Latest version
45
- client.delete_item(p_key: p_key, s_key: Helper::DMP_LATEST_VERSION, logger: logger)
45
+ client.delete_item(p_key:, s_key: Helper::DMP_LATEST_VERSION, logger:)
46
46
 
47
47
  # TODO: We should do a check here to see if it was successful!
48
48
 
49
49
  # Notify EZID about the removal
50
- _post_process(json: dmp, logger: logger)
50
+ _post_process(json: dmp, logger:)
51
51
 
52
52
  # Return the tombstoned record
53
53
  Helper.cleanse_dmp_json(json: dmp)
@@ -66,7 +66,7 @@ module Uc3DmpId
66
66
 
67
67
  # Publish the change to the EventBridge
68
68
  publisher = Uc3DmpEventBridge::Publisher.new
69
- publisher.publish(source: 'DmpDeleter', event_type: 'EZID update', dmp: json, logger: logger)
69
+ publisher.publish(source: 'DmpDeleter', event_type: 'EZID update', dmp: json, logger:)
70
70
  true
71
71
  end
72
72
  end
@@ -20,15 +20,15 @@ module Uc3DmpId
20
20
  # TODO: Replace this with ElasticSearch
21
21
  def search_dmps(args:, logger: nil)
22
22
  client = Uc3DmpDynamo::Client.new
23
- return _by_owner(owner_org: args['owner_orcid'], client: client, logger: logger) unless args['owner_orcid'].nil?
23
+ return _by_owner(owner_org: args['owner_orcid'], client:, logger:) unless args['owner_orcid'].nil?
24
24
 
25
25
  unless args['owner_org_ror'].nil?
26
- return _by_owner_org(owner_org: args['owner_org_ror'], client: client,
27
- logger: logger)
26
+ return _by_owner_org(owner_org: args['owner_org_ror'], client:,
27
+ logger:)
28
28
  end
29
29
  unless args['modification_day'].nil?
30
- return _by_mod_day(day: args['modification_day'], client: client,
31
- logger: logger)
30
+ return _by_mod_day(day: args['modification_day'], client:,
31
+ logger:)
32
32
  end
33
33
 
34
34
  []
@@ -38,20 +38,20 @@ module Uc3DmpId
38
38
  # -------------------------------------------------------------------------
39
39
  # rubocop:disable Metrics/AbcSize
40
40
  def by_json(json:, client: nil, cleanse: true, logger: nil)
41
- json = Helper.parse_json(json: json)&.fetch('dmp', {})
41
+ json = Helper.parse_json(json:)&.fetch('dmp', {})
42
42
  raise FinderError, MSG_INVALID_ARGS if !json.is_a?(Hash) || (json['PK'].nil? && json['dmp_id'].nil?)
43
43
 
44
44
  p_key = json['PK']
45
45
  # Translate the incoming :dmp_id into a PK
46
46
  p_key = Helper.dmp_id_to_pk(json: json.fetch('dmp_id', {})) if p_key.nil?
47
- client = client.nil? ? Uc3DmpDynamo::Client.new : client
47
+ client = Uc3DmpDynamo::Client.new if client.nil?
48
48
 
49
49
  # TODO: Re-enable this once we figure out Dynamo indexes
50
50
  # find_by_dmphub_provenance_id -> if no PK and no dmp_id result
51
51
  # return by_provenance_identifier(json: json, client: client, logger: logger) if p_key.nil?
52
52
 
53
53
  # find_by_PK
54
- p_key.nil? ? nil : by_pk(p_key: p_key, s_key: json['SK'], client: client, cleanse: cleanse, logger: logger)
54
+ p_key.nil? ? nil : by_pk(p_key:, s_key: json['SK'], client:, cleanse:, logger:)
55
55
  end
56
56
  # rubocop:enable Metrics/AbcSize
57
57
 
@@ -62,20 +62,20 @@ module Uc3DmpId
62
62
  raise FinderError, MSG_MISSING_PK if p_key.nil?
63
63
 
64
64
  s_key = Helper::DMP_LATEST_VERSION if s_key.nil? || s_key.to_s.strip.empty?
65
- client = client.nil? ? Uc3DmpDynamo::Client.new : client
65
+ client = Uc3DmpDynamo::Client.new if client.nil?
66
66
  resp = client.get_item(
67
67
  key: {
68
- PK: Helper.append_pk_prefix(p_key: p_key),
69
- SK: Helper.append_sk_prefix(s_key: s_key)
68
+ PK: Helper.append_pk_prefix(p_key:),
69
+ SK: Helper.append_sk_prefix(s_key:)
70
70
  },
71
- logger: logger
71
+ logger:
72
72
  )
73
73
  return resp unless resp.is_a?(Hash)
74
74
 
75
75
  dmp = resp['dmp'].nil? ? JSON.parse({ dmp: resp }.to_json) : resp
76
76
  return nil if dmp['dmp']['PK'].nil?
77
77
 
78
- dmp = Versioner.append_versions(p_key: dmp['dmp']['PK'], dmp: dmp, client: client, logger: logger) if cleanse
78
+ dmp = Versioner.append_versions(p_key: dmp['dmp']['PK'], dmp:, client:, logger:) if cleanse
79
79
  cleanse ? Helper.cleanse_dmp_json(json: dmp) : dmp
80
80
  end
81
81
  # rubocop:enable Metrics/AbcSize
@@ -85,13 +85,13 @@ module Uc3DmpId
85
85
  def exists?(p_key:, s_key: Helper::DMP_LATEST_VERSION, client: nil, logger: nil)
86
86
  raise FinderError, MSG_MISSING_PK if p_key.nil?
87
87
 
88
- client = client.nil? ? Uc3DmpDynamo::Client.new : client
88
+ client = Uc3DmpDynamo::Client.new if client.nil?
89
89
  client.pk_exists?(
90
90
  key: {
91
- PK: Helper.append_pk_prefix(p_key: p_key),
92
- SK: Helper.append_sk_prefix(s_key: s_key)
91
+ PK: Helper.append_pk_prefix(p_key:),
92
+ SK: Helper.append_sk_prefix(s_key:)
93
93
  },
94
- logger: logger
94
+ logger:
95
95
  )
96
96
  end
97
97
 
@@ -115,15 +115,15 @@ module Uc3DmpId
115
115
  filter_expression: 'SK = :version',
116
116
  expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
117
117
  }
118
- client = client.nil? ? Uc3DmpDynamo::Client.new : client
119
- resp = client.query(args: args, logger: logger)
118
+ client = Uc3DmpDynamo::Client.new if client.nil?
119
+ resp = client.query(args:, logger:)
120
120
  return resp unless resp.is_a?(Hash)
121
121
 
122
122
  dmp = resp['dmp'].nil? ? JSON.parse({ dmp: resp }.to_json) : resp
123
123
  return nil if dmp['dmp']['PK'].nil?
124
124
 
125
125
  # If we got a hit, fetch the DMP and return it.
126
- by_pk(p_key: dmp['dmp']['PK'], s_key: dmp['dmp']['SK'], cleanse: cleanse, logger: logger)
126
+ by_pk(p_key: dmp['dmp']['PK'], s_key: dmp['dmp']['SK'], cleanse:, logger:)
127
127
  end
128
128
  # rubocop:enable Metrics/AbcSize
129
129
 
@@ -149,8 +149,8 @@ module Uc3DmpId
149
149
  expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
150
150
  }
151
151
  logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
152
- client = client.nil? ? Uc3DmpDynamo::Client.new : client
153
- _process_search_response(response: client.query(args: args, logger: logger))
152
+ client = Uc3DmpDynamo::Client.new if client.nil?
153
+ _process_search_response(response: client.query(args:, logger:))
154
154
  end
155
155
 
156
156
  # Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
@@ -174,8 +174,8 @@ module Uc3DmpId
174
174
  expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
175
175
  }
176
176
  logger.info(message: "Querying _by_owner_org with #{args}") if logger.respond_to?(:info)
177
- client = client.nil? ? Uc3DmpDynamo::Client.new : client
178
- _process_search_response(response: client.query(args: args, logger: logger))
177
+ client = Uc3DmpDynamo::Client.new if client.nil?
178
+ _process_search_response(response: client.query(args:, logger:))
179
179
  end
180
180
 
181
181
  # Fetch the DMP IDs modified on the specified date (the date is the :dmphub_modification_day on the DMP ID record)
@@ -195,8 +195,8 @@ module Uc3DmpId
195
195
  expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
196
196
  }
197
197
  logger.info(message: "Querying _by_mod_day with #{args}") if logger.respond_to?(:info)
198
- client = client.nil? ? Uc3DmpDynamo::Client.new : client
199
- _process_search_response(response: client.query(args: args, logger: logger))
198
+ client = Uc3DmpDynamo::Client.new if client.nil?
199
+ _process_search_response(response: client.query(args:, logger:))
200
200
  end
201
201
 
202
202
  # Transform the search results so that we do not include any of the DMPHub specific metadata
@@ -7,17 +7,17 @@ module Uc3DmpId
7
7
  # Helper functions for working with DMP IDs
8
8
  class Helper
9
9
  PK_DMP_PREFIX = 'DMP#'
10
- PK_DMP_REGEX = %r{DMP#[a-zA-Z0-9\-_.]+/[a-zA-Z0-9]{2}\.[a-zA-Z0-9./:]+}.freeze
10
+ PK_DMP_REGEX = %r{DMP#[a-zA-Z0-9\-_.]+/[a-zA-Z0-9]{2}\.[a-zA-Z0-9./:]+}
11
11
 
12
12
  SK_DMP_PREFIX = 'VERSION#'
13
- SK_DMP_REGEX = /VERSION#\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2}/.freeze
13
+ SK_DMP_REGEX = /VERSION#\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2}/
14
14
 
15
15
  # TODO: Verify the assumed structure of the DOI is valid
16
- DOI_REGEX = %r{[0-9]{2}\.[0-9]{4,}/[a-zA-Z0-9/_.-]+}.freeze
17
- URL_REGEX = %r{(https?://)?([a-zA-Z0-9\-_]\.)+[a-zA-Z0-9\-_]{2,3}(:[0-9]+)?/?}.freeze
16
+ DOI_REGEX = %r{[0-9]{2}\.[0-9]{4,}/[a-zA-Z0-9/_.-]+}
17
+ URL_REGEX = %r{(https?://)?([a-zA-Z0-9\-_]\.)+[a-zA-Z0-9\-_]{2,3}(:[0-9]+)?/?}
18
18
 
19
- DMP_LATEST_VERSION = "#{SK_DMP_PREFIX}latest"
20
- DMP_TOMBSTONE_VERSION = "#{SK_DMP_PREFIX}tombstone"
19
+ DMP_LATEST_VERSION = "#{SK_DMP_PREFIX}latest".freeze
20
+ DMP_TOMBSTONE_VERSION = "#{SK_DMP_PREFIX}tombstone".freeze
21
21
 
22
22
  DEFAULT_API_URL = 'https://api.dmphub.uc3dev.cdlib.net/dmps/'
23
23
  DEFAULT_LANDING_PAGE_URL = 'https://dmphub.uc3dev.cdlib.net/dmps/'
@@ -41,7 +41,7 @@ module Uc3DmpId
41
41
  # Append the PK prefix for the object
42
42
  # -------------------------------------------------------------------------------------
43
43
  def append_pk_prefix(p_key:)
44
- p_key.is_a?(String) ? "#{PK_DMP_PREFIX}#{remove_pk_prefix(p_key: p_key)}" : nil
44
+ p_key.is_a?(String) ? "#{PK_DMP_PREFIX}#{remove_pk_prefix(p_key:)}" : nil
45
45
  end
46
46
 
47
47
  # Strip off the PK prefix
@@ -53,7 +53,7 @@ module Uc3DmpId
53
53
  # Append the SK prefix for the object
54
54
  # -------------------------------------------------------------------------------------
55
55
  def append_sk_prefix(s_key:)
56
- s_key.is_a?(String) ? "#{SK_DMP_PREFIX}#{remove_sk_prefix(s_key: s_key)}" : nil
56
+ s_key.is_a?(String) ? "#{SK_DMP_PREFIX}#{remove_sk_prefix(s_key:)}" : nil
57
57
  end
58
58
 
59
59
  # Strip off the SK prefix
@@ -82,7 +82,7 @@ module Uc3DmpId
82
82
  return with_protocol ? value : value.gsub(%r{https?://}, '') if value.start_with?('http')
83
83
 
84
84
  dmp_id = dmp_id.gsub('doi:', '')
85
- dmp_id = dmp_id.start_with?('/') ? dmp_id[1..dmp_id.length] : dmp_id
85
+ dmp_id = dmp_id[1..dmp_id.length] if dmp_id.start_with?('/')
86
86
  base_domain = with_protocol ? dmp_id_base_url : dmp_id_base_url.gsub(%r{https?://}, '')
87
87
  "#{base_domain}#{dmp_id}"
88
88
  end
@@ -95,7 +95,7 @@ module Uc3DmpId
95
95
  p_key = param if param.start_with?(dmp_id_base_url) || param.start_with?(base_domain)
96
96
  p_key = CGI.unescape(p_key.nil? ? param : p_key)
97
97
  p_key = format_dmp_id(value: p_key)
98
- append_pk_prefix(p_key: p_key)
98
+ append_pk_prefix(p_key:)
99
99
  end
100
100
 
101
101
  # Append the :PK prefix to the :dmp_id
@@ -115,7 +115,7 @@ module Uc3DmpId
115
115
 
116
116
  {
117
117
  type: 'doi',
118
- identifier: format_dmp_id(value: remove_pk_prefix(p_key: p_key), with_protocol: true)
118
+ identifier: format_dmp_id(value: remove_pk_prefix(p_key:), with_protocol: true)
119
119
  }
120
120
  end
121
121
 
@@ -180,7 +180,7 @@ module Uc3DmpId
180
180
  # Add DMPHub specific fields to the DMP ID JSON
181
181
  # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
182
182
  def annotate_dmp_json(provenance:, p_key:, json:)
183
- json = parse_json(json: json)
183
+ json = parse_json(json:)
184
184
  bool_vals = [1, '1', true, 'true', 'yes']
185
185
  return json if provenance.nil? || p_key.nil? || !json.is_a?(Hash)
186
186
 
@@ -189,14 +189,14 @@ module Uc3DmpId
189
189
  return json if id != p_key && !json['PK'].nil?
190
190
 
191
191
  annotated = deep_copy_dmp(obj: json)
192
- annotated['PK'] = json['PK'] || append_pk_prefix(p_key: p_key)
192
+ annotated['PK'] = json['PK'] || append_pk_prefix(p_key:)
193
193
  annotated['SK'] = DMP_LATEST_VERSION
194
194
 
195
195
  # Ensure that the :dmp_id matches the :PK
196
196
  annotated['dmp_id'] = JSON.parse(pk_to_dmp_id(p_key: remove_pk_prefix(p_key: annotated['PK'])).to_json)
197
197
 
198
- owner_id = extract_owner_id(json: json)
199
- owner_org = extract_owner_org(json: json)
198
+ owner_id = extract_owner_id(json:)
199
+ owner_org = extract_owner_org(json:)
200
200
 
201
201
  # Set the :dmproadmap_featured flag appropriately
202
202
  featured = annotated.fetch('dmproadmap_featured', 'no')
@@ -219,7 +219,7 @@ module Uc3DmpId
219
219
  annotated['dmphub_provenance_identifier'] = annotated.fetch('dmproadmap_links', {})['get']
220
220
  else
221
221
  annotated['dmphub_provenance_identifier'] = format_provenance_id(
222
- provenance: provenance, value: json.fetch('dmp_id', {})['identifier']
222
+ provenance:, value: json.fetch('dmp_id', {})['identifier']
223
223
  )
224
224
  end
225
225
  annotated
@@ -16,18 +16,18 @@ module Uc3DmpId
16
16
  def update(provenance:, p_key:, json: {}, note: nil, logger: nil)
17
17
  raise UpdaterError, Helper::MSG_DMP_INVALID_DMP_ID unless p_key.is_a?(String) && !p_key.strip.empty?
18
18
 
19
- mods = Helper.parse_json(json: json).fetch('dmp', {})
20
- p_key = Helper.append_pk_prefix(p_key: p_key)
19
+ mods = Helper.parse_json(json:).fetch('dmp', {})
20
+ p_key = Helper.append_pk_prefix(p_key:)
21
21
  logger.debug(message: "Incoming modifications for PK #{p_key}", details: mods) if logger.respond_to?(:debug)
22
22
 
23
23
  # Fetch the latest version of the DMP ID
24
24
  client = Uc3DmpDynamo::Client.new
25
- latest_version = Finder.by_pk(p_key: p_key, client: client, logger: logger, cleanse: false)
26
- latest_version = latest_version['dmp'].nil? ? latest_version : latest_version.fetch('dmp', {})
25
+ latest_version = Finder.by_pk(p_key:, client:, logger:, cleanse: false)
26
+ latest_version = latest_version.fetch('dmp', {}) unless latest_version['dmp'].nil?
27
27
  logger.debug(message: "Latest version for PK #{p_key}", details: latest_version) if logger.respond_to?(:debug)
28
28
 
29
29
  # Verify that the DMP ID is updateable with the info passed in
30
- errs = _updateable?(provenance: provenance, p_key: p_key, latest_version: latest_version['dmp'],
30
+ errs = _updateable?(provenance:, p_key:, latest_version: latest_version['dmp'],
31
31
  mods: mods['dmp'])
32
32
  logger.error(message: errs.join(', ')) if logger.respond_to?(:error) && errs.is_a?(Array) && errs.any?
33
33
  raise UpdaterError, errs if errs.is_a?(Array) && errs.any?
@@ -37,35 +37,35 @@ module Uc3DmpId
37
37
  # Version the DMP ID record (if applicable).
38
38
  owner = latest_version['dmphub_provenance_id']
39
39
  updater = provenance['PK']
40
- version = Versioner.generate_version(client: client, latest_version: latest_version, owner: owner,
41
- updater: updater, logger: logger)
40
+ version = Versioner.generate_version(client:, latest_version:, owner:,
41
+ updater:, logger:)
42
42
  raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if version.nil?
43
43
 
44
44
  # Remove the version info because we don't want to save it on the record
45
45
  version.delete('dmphub_versions')
46
46
 
47
47
  # Splice the assertions
48
- version = _process_modifications(owner: owner, updater: updater, version: version, mods: mods, note: note,
49
- logger: logger)
48
+ version = _process_modifications(owner:, updater:, version:, mods:, note:,
49
+ logger:)
50
50
  # Set the :modified timestamps
51
51
  now = Time.now.utc
52
52
  version['modified'] = now.iso8601
53
53
  version['dmphub_modification_day'] = now.strftime('%Y-%m-%d')
54
54
 
55
55
  # Save the changes
56
- resp = client.put_item(json: version, logger: logger)
56
+ resp = client.put_item(json: version, logger:)
57
57
  raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if resp.nil?
58
58
 
59
59
  # Send the updates to EZID
60
- _post_process(provenance: provenance, json: version, logger: logger)
60
+ _post_process(provenance:, json: version, logger:)
61
61
 
62
62
  # Return the new version record
63
63
  logger.info(message: "Updated DMP ID: #{p_key}") if logger.respond_to?(:debug)
64
64
 
65
65
  # Append the :dmphub_versions Array
66
66
  json = JSON.parse({ dmp: version }.to_json)
67
- json = Versioner.append_versions(p_key: p_key, dmp: json, client: client, logger: logger)
68
- Helper.cleanse_dmp_json(json: json)
67
+ json = Versioner.append_versions(p_key:, dmp: json, client:, logger:)
68
+ Helper.cleanse_dmp_json(json:)
69
69
  end
70
70
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
71
71
  # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
@@ -77,21 +77,21 @@ module Uc3DmpId
77
77
  raise UpdaterError, Helper::MSG_DMP_INVALID_DMP_ID unless p_key.is_a?(String) && !p_key.strip.empty?
78
78
 
79
79
  # fetch the existing latest version of the DMP ID
80
- client = Uc3DmpDynamo::Client.new(logger: logger)
81
- dmp = Finder.by_pk(p_key: p_key, client: client, logger: logger, cleanse: false)
80
+ client = Uc3DmpDynamo::Client.new(logger:)
81
+ dmp = Finder.by_pk(p_key:, client:, logger:, cleanse: false)
82
82
  logger.info(message: 'Existing latest record', details: dmp) if logger.respond_to?(:debug)
83
83
  raise UpdaterError, Helper::MSG_DMP_FORBIDDEN unless provenance.is_a?(Hash) && !provenance['PK'].nil? &&
84
84
  provenance['PK'] == dmp['dmp']['dmphub_provenance_id']
85
85
 
86
86
  # Add the download URl for the PDF as a related identifier on the DMP ID record
87
- annotated = Helper.annotate_dmp_json(provenance: provenance, p_key: p_key, json: dmp['dmp'])
87
+ annotated = Helper.annotate_dmp_json(provenance:, p_key:, json: dmp['dmp'])
88
88
  annotated['dmproadmap_related_identifiers'] = [] if annotated['dmproadmap_related_identifiers'].nil?
89
89
  annotated['dmproadmap_related_identifiers'] << JSON.parse({
90
90
  descriptor: 'is_metadata_for', work_type: 'output_management_plan', type: 'url', identifier: url
91
91
  }.to_json)
92
92
 
93
93
  # Save the changes without creating a new version!
94
- resp = client.put_item(json: annotated, logger: logger)
94
+ resp = client.put_item(json: annotated, logger:)
95
95
  raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if resp.nil?
96
96
 
97
97
  logger.info(message: "Added DMP ID narrative for PK: #{p_key}, Narrative: #{url}") if logger.respond_to?(:debug)
@@ -111,8 +111,9 @@ module Uc3DmpId
111
111
  return [Helper::MSG_DMP_FORBIDDEN] unless provenance.is_a?(Hash) && !provenance['PK'].nil?
112
112
  # Verify that the JSON is for the same DMP in the PK
113
113
  return [Helper::MSG_DMP_FORBIDDEN] unless Helper.dmp_id_to_pk(json: mods.fetch('dmp_id', {})) == p_key
114
+
114
115
  # Bail out if the DMP ID could not be found or the PKs do not match for some reason
115
- return [Helper::MSG_DMP_UNKNOWN] unless latest_version.is_a?(Hash) && latest_version['PK'] == p_key
116
+ [Helper::MSG_DMP_UNKNOWN] unless latest_version.is_a?(Hash) && latest_version['PK'] == p_key
116
117
  end
117
118
  # rubocop:enable Metrics/AbcSize
118
119
 
@@ -123,14 +124,14 @@ module Uc3DmpId
123
124
 
124
125
  updated = if owner == updater
125
126
  # Splice together any assertions that may have been made while the user was editing the DMP ID
126
- Asserter.splice(latest_version: version, modified_version: mods, logger: logger)
127
+ Asserter.splice(latest_version: version, modified_version: mods, logger:)
127
128
  else
128
129
  # Attach the incoming changes as an assertion to the DMP ID since the updater is NOT the owner
129
- Asserter.add(updater: updater, latest_version: version, modified_version: mods, note: note,
130
- logger: logger)
130
+ Asserter.add(updater:, latest_version: version, modified_version: mods, note:,
131
+ logger:)
131
132
  end
132
133
 
133
- _merge_versions(latest_version: version, mods: updated, logger: logger)
134
+ _merge_versions(latest_version: version, mods: updated, logger:)
134
135
  end
135
136
  # rubocop:enable Metrics/ParameterLists
136
137
 
@@ -172,7 +173,7 @@ module Uc3DmpId
172
173
  logger.debug(message: 'Sending event for EZID publication',
173
174
  details: json)
174
175
  end
175
- publisher.publish(source: 'DmpUpdater', event_type: 'EZID update', dmp: json, logger: logger) if publishable
176
+ publisher.publish(source: 'DmpUpdater', event_type: 'EZID update', dmp: json, logger:) if publishable
176
177
 
177
178
  # Determine if there are any related identifiers that we should try to fetch a citation for
178
179
  citable_identifiers = Helper.citable_related_identifiers(dmp: json)
@@ -189,7 +190,7 @@ module Uc3DmpId
189
190
  details: citable_identifiers)
190
191
  end
191
192
  publisher.publish(source: 'DmpUpdater', dmp: json, event_type: 'Citation Fetch', detail: citer_detail,
192
- logger: logger)
193
+ logger:)
193
194
  true
194
195
  end
195
196
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
@@ -23,11 +23,11 @@ module Uc3DmpId
23
23
  # Validate the specified DMP's :json against the schema for the specified :mode
24
24
  # rubocop:disable Metrics/AbcSize
25
25
  def validate(mode:, json:)
26
- json = Helper.parse_json(json: json)
26
+ json = Helper.parse_json(json:)
27
27
  return [MSG_EMPTY_JSON] if json.nil? || !VALIDATION_MODES.include?(mode)
28
28
 
29
29
  # Load the appropriate JSON schema for the mode
30
- schema = _load_schema(mode: mode)
30
+ schema = _load_schema(mode:)
31
31
  return [MSG_NO_SCHEMA] if schema.nil?
32
32
 
33
33
  # Validate the JSON
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Uc3DmpId
4
- VERSION = '0.1.0'
4
+ VERSION = '0.1.2'
5
5
  end
@@ -18,13 +18,13 @@ module Uc3DmpId
18
18
 
19
19
  args = {
20
20
  key_conditions: {
21
- PK: { attribute_value_list: [Helper.append_pk_prefix(p_key: p_key)], comparison_operator: 'EQ' }
21
+ PK: { attribute_value_list: [Helper.append_pk_prefix(p_key:)], comparison_operator: 'EQ' }
22
22
  },
23
23
  projection_expression: 'modified',
24
24
  scan_index_forward: false
25
25
  }
26
- client = client.nil? ? Uc3DmpDynamo::Client.new : client
27
- client.query(args: args, logger: logger)
26
+ client = Uc3DmpDynamo::Client.new if client.nil?
27
+ client.query(args:, logger:)
28
28
  end
29
29
 
30
30
  # Generate a snapshot of the current latest version of the DMP ID using the existing :modified as
@@ -57,8 +57,8 @@ module Uc3DmpId
57
57
  prior['SK'] = "#{Helper::SK_DMP_PREFIX}#{latest_version['modified'] || Time.now.utc.iso8601}"
58
58
 
59
59
  # Create the prior version record ()
60
- client = client.nil? ? Uc3DmpDynamo::Client.new : client
61
- resp = client.put_item(json: prior, logger: logger)
60
+ client = Uc3DmpDynamo::Client.new if client.nil?
61
+ resp = client.put_item(json: prior, logger:)
62
62
  return nil if resp.nil?
63
63
 
64
64
  msg = "#{SOURCE} created version PK: #{prior['PK']} SK: #{prior['SK']}"
@@ -74,7 +74,7 @@ module Uc3DmpId
74
74
  json = Helper.parse_json(json: dmp)
75
75
  return json unless p_key.is_a?(String) && !p_key.strip.empty? && json.is_a?(Hash) && !json['dmp'].nil?
76
76
 
77
- results = get_versions(p_key: p_key, client: client, logger: logger)
77
+ results = get_versions(p_key:, client:, logger:)
78
78
  return json unless results.length > 1
79
79
 
80
80
  # TODO: we may want to include milliseconds in the future if we get increased volume so that
@@ -82,7 +82,7 @@ module Uc3DmpId
82
82
  versions = results.map do |ver|
83
83
  next if ver['modified'].nil?
84
84
 
85
- base_url = "#{Helper.landing_page_url}#{Helper.remove_pk_prefix(p_key: p_key)}"
85
+ base_url = "#{Helper.landing_page_url}#{Helper.remove_pk_prefix(p_key:)}"
86
86
  {
87
87
  timestamp: ver['modified'],
88
88
  url: dmp['dmp']['modified'] == ver['modified'] ? base_url : "#{base_url}?version=#{ver['modified']}"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uc3-dmp-id
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Riley
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-08-25 00:00:00.000000000 Z
11
+ date: 2023-10-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '3.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: text
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.3'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.3'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: uc3-dmp-dynamo
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -76,6 +90,7 @@ files:
76
90
  - README.md
77
91
  - lib/uc3-dmp-id.rb
78
92
  - lib/uc3-dmp-id/asserter.rb
93
+ - lib/uc3-dmp-id/comparator.rb
79
94
  - lib/uc3-dmp-id/creator.rb
80
95
  - lib/uc3-dmp-id/deleter.rb
81
96
  - lib/uc3-dmp-id/finder.rb
@@ -91,7 +106,7 @@ licenses:
91
106
  - MIT
92
107
  metadata:
93
108
  rubygems_mfa_required: 'false'
94
- post_install_message:
109
+ post_install_message:
95
110
  rdoc_options: []
96
111
  require_paths:
97
112
  - lib
@@ -100,15 +115,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
100
115
  requirements:
101
116
  - - ">="
102
117
  - !ruby/object:Gem::Version
103
- version: '2.7'
118
+ version: '3.2'
104
119
  required_rubygems_version: !ruby/object:Gem::Requirement
105
120
  requirements:
106
121
  - - ">="
107
122
  - !ruby/object:Gem::Version
108
123
  version: '0'
109
124
  requirements: []
110
- rubygems_version: 3.1.6
111
- signing_key:
125
+ rubygems_version: 3.4.10
126
+ signing_key:
112
127
  specification_version: 4
113
128
  summary: DMPTool gem that provides support for DMP ID records
114
129
  test_files: []