uc3-dmp-id 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1bdcb2754d5168bdeedf8583b91054cf62fc019d4479686da168d4ca70d83729
4
- data.tar.gz: b2af99461aa7614212aae435db0db174eeb50125006d0dacc87698ecaf41b7b8
3
+ metadata.gz: 51832c144e5663dc01c805f92e81e50e63f2ce00200a8cad5525b7a34c9d7eb9
4
+ data.tar.gz: e197deb7f608ef478716a8aea113e853ab0d3903f1496f970a73d31d30b7e892
5
5
  SHA512:
6
- metadata.gz: 43cdce10f8bccc41fc979c67a4758fd5726206c9066acefcbc8abe65b9d36f79fc69eedb461a9ce584cf61f85ddd0dca13617d99a39e9c831eb56ea725a00eb0
7
- data.tar.gz: 393d0e083ca8cfbf2039ba11861576a2eff489d48d8c541279ae30a3f9e7215d39175c49d1203456f56bb00a6a7e3da6c0211d4c3fc7e4f64f32f88e3d96accf
6
+ metadata.gz: 31bc5d1bb73176c2afff25715590c50d0612f558880f2bde3a750ac5ea8d49674c5dd9b00eff36960454c972a1a31aca6b4f853a98e995dd65977e1f82903ad2
7
+ data.tar.gz: 86153dfeebc52570ecb83a012f4e7242e9bec607627c2dcefc5beb101a711199bc346c1388045127df89f13e65c83aa473c0fc77e37810d436ea939101b74f2b
data/README.md CHANGED
@@ -1,3 +1,12 @@
1
1
  # Uc3DmpId
2
2
 
3
3
  Helper methods for working with DMP ID JSON records
4
+
5
+ After you have made changes, be sure to increment the version number in `lib/uc3-dmp-id/version.rb`.
6
+
7
+ To build and push this gem to RubyGems:
8
+ - Make sure you are logged into RubyGems in your terminal window (see their docs)
9
+ - Run `gem build uc3-dmp-id.gemspec` to build the gem
10
+ - Run `gem push uc3-dmp-id-[version].gem` to publish to RubyGems
11
+
12
+ After you have pushed a new version to RubyGems, you should rebuild and redeploy the AWS SAM application.
@@ -28,13 +28,13 @@ module Uc3DmpId
28
28
  related_works = modified_version.fetch('dmproadmap_related_identifiers', [])
29
29
 
30
30
  if related_works.any?
31
- latest_version = _add_related_identifier(updater: updater, latest_version: latest_version,
32
- identifiers: related_works, note: note, logger: logger)
31
+ latest_version = _add_related_identifier(updater:, latest_version:,
32
+ identifiers: related_works, note:, logger:)
33
33
  end
34
34
  return latest_version unless !funding.nil? && funding.any?
35
35
 
36
- _add_funding_mod(updater: updater, latest_version: latest_version, funding: funding,
37
- note: note, logger: logger)
36
+ _add_funding_mod(updater:, latest_version:, funding:,
37
+ note:, logger:)
38
38
  end
39
39
  # rubocop:enable Metrics/AbcSize, Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
40
40
 
@@ -106,7 +106,7 @@ module Uc3DmpId
106
106
  end
107
107
 
108
108
  latest_version['dmproadmap_related_identifiers'] = [] if latest_version['dmproadmap_related_identifiers'].nil?
109
- assertion = _generate_assertion(updater: updater, note: note,
109
+ assertion = _generate_assertion(updater:, note:,
110
110
  mods: JSON.parse({ dmproadmap_related_identifiers: additions }.to_json))
111
111
  if logger.respond_to?(:debug)
112
112
  logger.debug(message: 'Adding change to :dmphub_modifications.',
@@ -151,7 +151,7 @@ module Uc3DmpId
151
151
  latest_version['dmphub_modifications'] = [] if latest_version['dmphub_modifications'].nil?
152
152
  mod = JSON.parse({ funding: fund }.to_json)
153
153
  mod['funding']['funding_status'] = 'granted'
154
- assertion = _generate_assertion(updater: updater, mods: mod, note: note)
154
+ assertion = _generate_assertion(updater:, mods: mod, note:)
155
155
  if logger.respond_to?(:debug)
156
156
  logger.debug(message: 'Adding change to :dmphub_modifications.',
157
157
  details: assertion)
@@ -168,7 +168,8 @@ module Uc3DmpId
168
168
  # "id": "ABCD1234",
169
169
  # "provenance": "dmphub",
170
170
  # "timestamp": "2023-07-07T14:50:23+00:00",
171
- # "note": "data received from the NIH API",
171
+ # "note": "Data received from OpenAlex, matched by PI names and title keywords.",
172
+ # "confiedence": "Med",
172
173
  # "dmproadmap_related_identifiers": {
173
174
  # "work_type": "article",
174
175
  # "descriptor": "is_cited_by",
@@ -183,7 +184,8 @@ module Uc3DmpId
183
184
  # "id": "ABCD1234",
184
185
  # "provenance": "dmphub",
185
186
  # "timestamp": "2023-07-07T14:50:23+00:00",
186
- # "note": "data received from the NIH API",
187
+ # "note": "Data received from the NIH API, matched by the opportunity number.",
188
+ # "confidence": "High",
187
189
  # "funding": {
188
190
  # "funding_status": "granted",
189
191
  # "grant_id": {
@@ -200,11 +202,19 @@ module Uc3DmpId
200
202
  provenance: updater.gsub('PROVENANCE#', ''),
201
203
  timestamp: Time.now.utc.iso8601,
202
204
  status: 'pending',
203
- note: note
205
+ note:
204
206
  }
205
207
  mods.each_pair { |key, val| assertion[key] = val }
206
208
  JSON.parse(assertion.to_json)
207
209
  end
208
210
  end
211
+
212
+ def _score_related_work(latest_version:, work:)
213
+
214
+ end
215
+
216
+ def _score_funding(latest_version:, funding:)
217
+
218
+ end
209
219
  end
210
220
  end
@@ -0,0 +1,500 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'text'
4
+
5
+ module Uc3DmpId
6
+ class ComparatorError < StandardError; end
7
+
8
+ # Class that compares incoming data from an external source to the DMP
9
+ # It determines if they are likely related and applies a confidence rating
10
+ class Comparator
11
+
12
+ MSG_MISSING_AUGMENTER = 'No Augmenter specified!'
13
+ MSG_MISSING_DMP = 'No DMP or the DMP did not contain enough information to use.'
14
+
15
+ STOP_WORDS = %w[a an and if of or the then they]
16
+
17
+ # See the bottom of this file for a hard-coded crosswalk between Crossref funder ids and ROR ids
18
+ # Some APIs do not support ROR fully for funder ids, so we need to be able to reference both
19
+
20
+ attr_accessor :augmenter, :dmp, :details_hash, :logger
21
+
22
+ def initialize(**args)
23
+ @logger = args[:logger]
24
+ @details_hash = {}
25
+
26
+ @augmenter = args[:augmenter]
27
+ raise ComparatorError, MSG_MISSING_AUGMENTER if @augmenter.nil? ||
28
+ !@augmenter['PK']&.start_with?('AUGMENTERS#')
29
+
30
+ @dmp = args.fetch(:dmp, {})['dmp'].nil? ? args[:dmp] : args.fetch(:dmp, {})['dmp']
31
+ _extract_dmp_details(dmp:)
32
+ raise ComparatorError, MSG_MISSING_DMP if @details_hash.empty?
33
+ end
34
+
35
+ # Compare the incoming hash with the DMP details that were gathered during initialization.
36
+ #
37
+ # The Hash should contain:
38
+ # {
39
+ # title: "Example research project",
40
+ # abstract: "Lorem ipsum psuedo abstract",
41
+ # keywords: ["foo", "bar"],z
42
+ # people: [
43
+ # {
44
+ # id: "https://orcid.org/blah",
45
+ # last_name: "doe",
46
+ # affiliation: { id: "https://ror.org/blah", name: "Foo" }
47
+ # }
48
+ # ],
49
+ # fundings: [
50
+ # { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
51
+ # ],
52
+ # repositories: [
53
+ # { id: ["http://some.repo.org", "https://doi.org/re3data123"], name: "Repo" }
54
+ # ]
55
+ # }
56
+ def compare(hash:)
57
+ response = { confidence: 'None', score: 0, notes: [], source: @augmenter['name'] }
58
+ return response unless hash.is_a?(Hash) && !hash['title'].nil?
59
+
60
+ # Compare the grant ids. If we have a match return the response immediately since that is
61
+ # a very positive match!
62
+ response = _grants_match?(array: hash['fundings'], response:)
63
+ return response if response[:confidence] != 'None'
64
+
65
+ response = _opportunities_match?(array: hash['fundings'], response:)
66
+ response = _orcids_match?(array: hash['people'], response:)
67
+ response = _last_name_and_affiliation_match?(array: hash['people'], response:)
68
+
69
+ # Only process the following if we had some matching contributors, affiliations or opportuniy nbrs
70
+ response = _repository_match?(array: hash['repositories'], response:) if response[:score] > 0
71
+ response = _keyword_match?(array: hash['repositories'], response:) if response[:score] > 0
72
+ response = _text_match?(type: 'title', text: hash['title'], response:) if response[:score] > 0
73
+ response = _text_match?(type: 'abstract', text: hash['abstract'], response:) if response[:score] > 0
74
+ # If the score is less than 3 then we have no confidence that it is a match
75
+ return response if response[:score] <= 2
76
+
77
+ # Set the confidence level based on the score
78
+ response[:confidence] = response[:score] > 15 ? 'High' : (response[:score] > 10 ? 'Medium' : 'Low')
79
+ response
80
+ end
81
+
82
+ private
83
+
84
+ def _extract_dmp_details(dmp:)
85
+ return nil unless dmp.is_a?(Hash) && !dmp['title'].nil? && !dmp['contact'].nil?
86
+
87
+ projects = dmp.fetch('project', [{}])
88
+ fundings = projects.map { |proj| proj.fetch('funding', []) }.flatten.compact.uniq
89
+ hosts = dmp.fetch('dataset', []).map { |dset| dset.fetch('distribution', []).map { |d| d['host'] } }
90
+ people = [dmp['contact']]
91
+ people << dmp.fetch('contributor', [])
92
+
93
+ # Extract all of the important bits about the DMP
94
+ @details_hash = {
95
+ created: dmp.fetch('created', Time.now.iso8601),
96
+ title: _cleanse_text(text: projects&.first&.fetch('title', dmp['title'])),
97
+ abstract: _cleanse_text(text: projects&.first&.fetch('description', dmp['description'])),
98
+ keywords: dmp.fetch('dataset', []).map { |ds| ds.fetch('keyword', []) }.flatten.compact.uniq,
99
+ identifiers: [dmp.fetch('dmp_id', {})['identifier']],
100
+ last_names: [],
101
+ affiliation_ids: [],
102
+ affiliations: [],
103
+ funder_names: [],
104
+ funder_ids: [],
105
+ opportunity_ids: [],
106
+ grant_ids: [],
107
+ repositories: []
108
+ }
109
+ _extract_people(array: people&.flatten&.compact&.uniq)
110
+ _extract_funding(array: fundings)
111
+ _extract_repositories(repos: hosts.flatten.compact.uniq)
112
+
113
+ # Clean up the results by flattening and removing duplicates from the Arrays
114
+ @details_hash.keys.each do |key|
115
+ @details_hash[key] = @details_hash[key].flatten.compact.uniq if @details_hash[key].is_a?(Array)
116
+ end
117
+ @logger&.debug(message: "Extracted the following from the DMP", details: @details_hash)
118
+ end
119
+
120
+ # Extract all of the funding information
121
+ def _extract_funding(array:)
122
+ return [] unless array.is_a?(Array)
123
+
124
+ array.each do |funding|
125
+ next unless funding.is_a?(Hash)
126
+
127
+ funder_id = funding.fetch('funder_id', {})
128
+ ror = funder_id['identifier'] if funder_id['type']&.downcase&.strip == 'ror'
129
+ fundref = ror.nil? ? funder_id['identifier']&.downcase&.strip : ROR_FUNDREF_ID_CROSSWALK[:"#{ror}"]
130
+ opportunity = funding.fetch('dmproadmap_funding_opportunity_id', {})['identifier']
131
+ grant = funding.fetch('grant_id', {})['identifier']
132
+
133
+ @details_hash[:identifiers] << ror&.downcase&.strip
134
+ @details_hash[:identifiers] << fundref&.downcase&.strip
135
+ @details_hash[:identifiers] << grant&.downcase&.strip
136
+ @details_hash[:identifiers] << grant&.split('/')&.last&.downcase&.strip
137
+ @details_hash[:identifiers] << opportunity&.downcase&.strip
138
+
139
+ @details_hash[:funder_names] << funding['name']&.downcase&.strip
140
+ @details_hash[:funder_ids] << fundref
141
+ @details_hash[:opportunity_ids] << opportunity&.downcase&.strip
142
+ @details_hash[:grant_ids] << [grant&.downcase&.strip, grant&.split('/')&.last&.downcase&.strip]
143
+ end
144
+ array
145
+ end
146
+
147
+ # Extract all of the ORCIDs, last names, and affiliation ids and names
148
+ def _extract_people(array:)
149
+ return [] unless array.is_a?(Array)
150
+
151
+ array.each do |entry|
152
+ next unless entry.is_a?(Hash)
153
+
154
+ id = entry.fetch('contributor_id', entry.fetch('contact_id', {}))['identifier']&.downcase&.strip
155
+ affil = entry.fetch('dmproadmap_affiliation', {})
156
+ ror = affil.fetch('affiliation_id', {})['identifier']&.downcase&.strip
157
+ name = entry.fetch('name', '')&.downcase&.strip
158
+ last_name = name.include?(', ') ? name.split(', ').first : name.split.last
159
+
160
+ @details_hash[:identifiers] << [id, ror&.downcase&.strip]
161
+ @details_hash[:last_names] << last_name
162
+ @details_hash[:affiliation_ids] << ror
163
+ @details_hash[:affiliations] << affil.fetch('name', '')&.split(' (')&.first&.downcase&.strip
164
+ end
165
+ array
166
+ end
167
+
168
+ # Extract all of the re3data ids, URLs and names
169
+ def _extract_repositories(repos:)
170
+ return [] unless repos.is_a?(Array)
171
+
172
+ repos.each do |repo|
173
+ next unless repo.is_a?(Hash)
174
+
175
+ @details_hash[:identifiers] << [
176
+ repo['url']&.downcase&.strip, repo.fetch('dmproadmap_host_id', {})['identifier']&.downcase&.strip
177
+ ]
178
+ @details_hash[:repositories] << repo.fetch('name', '')&.downcase&.strip
179
+ end
180
+ repos
181
+ end
182
+
183
+ # Returns whether or not the incoming grant id(s) match the DMPs grant id. Expecting:
184
+ # [
185
+ # { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
186
+ # ]
187
+ def _grants_match?(array:, response:)
188
+ return response unless array.is_a?(Array) && response.is_a?(Hash)
189
+
190
+ ids = array.select { |funding| funding.is_a?(Hash) && funding['grant'].is_a?(Array) }
191
+ .map { |funding| funding['grant'].map { |id| id&.downcase&.strip } }
192
+ .flatten.compact.uniq
193
+
194
+ matched = _compare_arrays(array_a: @details_hash.fetch(:grant_ids, []), array_b: ids)
195
+ return response if matched <= 0
196
+
197
+ response[:confidence] = 'Absolute'
198
+ response[:score] = 100
199
+ response[:notes] << 'the grant ID matched'
200
+ response
201
+ end
202
+
203
+ # Returns whether or not the incoming grant id(s) match the DMPs opportunity id. Expecting:
204
+ # [
205
+ # { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
206
+ # ]
207
+ def _opportunities_match?(array:, response:)
208
+ return response unless array.is_a?(Array) && response.is_a?(Hash)
209
+
210
+ ids = array.select { |funding| funding.is_a?(Hash) && funding['grant'].is_a?(Array) }
211
+ .map { |funding| funding['grant'].map { |id| id&.downcase&.strip } }
212
+ .flatten.compact.uniq
213
+
214
+ matched = _compare_arrays(array_a: @details_hash.fetch(:opportunity_ids, []), array_b: ids)
215
+ return response if matched <= 0
216
+
217
+ response[:score] += 5
218
+ response[:notes] << 'the funding opportunity number matched'
219
+ response
220
+ end
221
+
222
+ # Returns whether or not the inciming list of creators/contributors match those on the DMP. Expecting:
223
+ # [
224
+ # {
225
+ # id: "https://orcid.org/blah",
226
+ # last_name: "doe",
227
+ # affiliation: { id: "https://ror.org/blah", name: "Foo" }
228
+ # }
229
+ # ]
230
+ def _orcids_match?(array:, response:)
231
+ return response unless array.is_a?(Array) && response.is_a?(Hash)
232
+
233
+ ids = array.select { |repo| repo.is_a?(Hash) }
234
+ .map { |person| person['id']&.downcase&.strip }
235
+ .flatten.compact.uniq
236
+
237
+ matched = _compare_arrays(array_a: @details_hash.fetch(:identifiers, []), array_b: ids)
238
+ return response if matched <= 0
239
+
240
+ response[:score] += (matched * 2)
241
+ response[:notes] << 'contributor ORCIDs matched'
242
+ response
243
+ end
244
+
245
+ # Returns whether or not the inciming list of creators/contributors match those on the DMP. Expecting:
246
+ # [
247
+ # {
248
+ # id: "https://orcid.org/blah",
249
+ # last_name: "doe",
250
+ # affiliation: { id: "https://ror.org/blah", name: "Foo" }
251
+ # }
252
+ # ]
253
+ def _last_name_and_affiliation_match?(array:, response:)
254
+ return response unless array.is_a?(Array) && response.is_a?(Hash)
255
+
256
+ array = array.select { |repo| repo.is_a?(Hash) }
257
+ affiliations = array.map { |person| person['affiliation'] }&.flatten&.compact&.uniq
258
+ last_names = array.map { |person| person['last_name']&.downcase&.strip }&.flatten&.compact&.uniq
259
+ rors = affiliations.map { |affil| affil['id']&.downcase&.strip }&.flatten&.compact&.uniq
260
+ affil_names = affiliations.map { |affil| affil['name']&.downcase&.strip }&.flatten&.compact&.uniq
261
+
262
+ # Check the person last names and affiliation name and RORs
263
+ last_names_matched = _compare_arrays(array_a: @details_hash.fetch(:last_names, []), array_b: last_names)
264
+ rors_matched = _compare_arrays(array_a: @details_hash.fetch(:affiliation_ids, []), array_b: rors)
265
+ affil_names_matched = _compare_arrays(array_a: @details_hash.fetch(:affiliations, []), array_b: affil_names)
266
+ return response if last_names_matched <= 0 && rors_matched <= 0 && affil_names_matched <= 0
267
+
268
+ response[:score] += last_names_matched + rors_matched + affil_names_matched
269
+ response[:notes] << 'contributor names and affiliations matched'
270
+ response
271
+ end
272
+
273
+ # Returns whether or not the incoming list of repositories match those defined in the DMP. Expecting:
274
+ # [
275
+ # { id: ["http://some.repo.org", "https://doi.org/re3data123"], name: "Repo" }
276
+ # ]
277
+ def _repository_match?(array:, response:)
278
+ return response unless array.is_a?(Array) && response.is_a?(Hash)
279
+
280
+ # We only care about repositories with ids/urls
281
+ ids = array.select { |repo| repo.is_a?(Hash) }
282
+ .map { |repo| repo['id'].map { |id| id&.downcase&.strip } }
283
+ .flatten.compact.uniq
284
+
285
+ matched = _compare_arrays(array_a: @details_hash.fetch(:identifiers, []), array_b: ids)
286
+ return response if matched <= 0
287
+
288
+ response[:score] += matched
289
+ response[:notes] << 'repositories matched'
290
+ response
291
+ end
292
+
293
+ # Returns whether or not the list of keywords exist in the DMP. Expecting:
294
+ # keywords: ["foo", "bar"]
295
+ def _keyword_match?(array:, response:)
296
+ return response unless array.is_a?(Array) && response.is_a?(Hash)
297
+
298
+ keywords = array.map { |word| word&.downcase&.strip }&.flatten&.compact&.uniq
299
+ matched = _compare_arrays(array_a: @details_hash.fetch(:keywords, []), array_b: keywords)
300
+ return response if matched <= 0
301
+
302
+ response[:score] += 1
303
+ response[:notes] << 'keywords matched'
304
+ response
305
+ end
306
+
307
+ # Uses an NLP library to determine if the :text matches the DMP/Project :title or :description
308
+ def _text_match?(type: 'title', text:, response:, logger: nil)
309
+ return response unless response.is_a?(Hash) && text.is_a?(String) && !text.strip.empty? &&
310
+ !@details_hash[type.to_sym].nil?
311
+
312
+ nlp_processor = Text::WhiteSimilarity.new
313
+ cleansed = _cleanse_text(text:)
314
+
315
+ details = {
316
+ "dmp_#{type}": @details_hash[type.to_sym],
317
+ "incoming_#{type}": cleansed,
318
+ nlp_score: nlp_processor.similarity(@details_hash[type.to_sym], cleansed)
319
+ }
320
+ @logger&.debug(message: "Text::WhiteSimilarity score", details:)
321
+ return response if details[:nlp_score] < 0.5
322
+
323
+ response[:score] += details[:nlp_score] >= 0.75 ? 5 : 2
324
+ response[:notes] << "#{type}s are similar"
325
+ response
326
+ end
327
+
328
+ # Change the incoming text to lower case, remove spaces and STOP_WORDS
329
+ def _cleanse_text(text:)
330
+ return nil unless text.is_a?(String)
331
+
332
+ text.downcase.split.reject { |word| STOP_WORDS.include?(word) }.join(' ').strip
333
+ end
334
+
335
+ # Do an introspection of the 2 arrays and return the number of matches
336
+ def _compare_arrays(array_a: [], array_b: [])
337
+ return 0 unless array_a.is_a?(Array) && array_b.is_a?(Array)
338
+
339
+ intersection = array_a & array_b
340
+ intersection.nil? || intersection.size <= 0 ? 0 : intersection.size
341
+ end
342
+
343
+ # TODO: Remove this hard-coded crosswalk once the community has broader support for using ROR for funder ids
344
+ ROR_FUNDREF_ID_CROSSWALK = {
345
+ # NIH ID Crosswalk
346
+ "https://ror.org/01cwqze88": "https://doi.org/10.13039/100000002",
347
+ "https://ror.org/04mhx6838": "https://doi.org/10.13039/100000055",
348
+ "https://ror.org/012pb6c26": "https://doi.org/10.13039/100000050",
349
+ "https://ror.org/03wkg3b53": "https://doi.org/10.13039/100000053",
350
+ "https://ror.org/0060t0j89": "https://doi.org/10.13039/100000092",
351
+ "https://ror.org/00372qc85": "https://doi.org/10.13039/100000070",
352
+ "https://ror.org/00190t495": "https://doi.org/10.13039/100008460",
353
+ "https://ror.org/00j4k1h63": "https://doi.org/10.13039/100000066",
354
+ "https://ror.org/01y3zfr79": "https://doi.org/10.13039/100000056",
355
+ "https://ror.org/04q48ey07": "https://doi.org/10.13039/100000057",
356
+ "https://ror.org/0493hgw16": "https://doi.org/10.13039/100006545",
357
+ "https://ror.org/04vfsmv21": "https://doi.org/10.13039/100000098",
358
+ "https://ror.org/03jh5a977": "https://doi.org/10.13039/100000093",
359
+ "https://ror.org/04xeg9z08": "https://doi.org/10.13039/100000025",
360
+ "https://ror.org/01s5ya894": "https://doi.org/10.13039/100000065",
361
+ "https://ror.org/02meqm098": "https://doi.org/10.13039/100000002",
362
+ "https://ror.org/049v75w11": "https://doi.org/10.13039/100000049",
363
+ "https://ror.org/004a2wv92": "https://doi.org/10.13039/100000072",
364
+ "https://ror.org/00adh9b73": "https://doi.org/10.13039/100000062",
365
+ "https://ror.org/043z4tv69": "https://doi.org/10.13039/100000060",
366
+ "https://ror.org/00x19de83": "https://doi.org/10.13039/100000002",
367
+ "https://ror.org/02jzrsm59": "https://doi.org/10.13039/100000027",
368
+ "https://ror.org/006zn3t30": "https://doi.org/10.13039/100000069",
369
+ "https://ror.org/04byxyr05": "https://doi.org/10.13039/100000071",
370
+ "https://ror.org/04pw6fb54": "https://doi.org/10.13039/100006108",
371
+ "https://ror.org/05aq6yn88": "https://doi.org/10.13039/100006955",
372
+ "https://ror.org/02xey9a22": "https://doi.org/10.13039/100000061",
373
+ "https://ror.org/00fj8a872": "https://doi.org/10.13039/100000052",
374
+ "https://ror.org/01wtjyf13": "https://doi.org/10.13039/100000063",
375
+ "https://ror.org/04r5s4b52": "https://doi.org/10.13039/100005440",
376
+ "https://ror.org/046zezr58": "https://doi.org/10.13039/100006085",
377
+ "https://ror.org/02e3wq066": "https://doi.org/10.13039/100006086",
378
+ "https://ror.org/031gy6182": "https://doi.org/10.13039/100000002",
379
+ "https://ror.org/054j5yq82": "https://doi.org/10.13039/100000002",
380
+ "https://ror.org/02yrzyf97": "https://doi.org/10.13039/100000002",
381
+
382
+ # NSF ID Crosswalk
383
+ "https://.org/021nxhr62": "https://doi.org/10.13039/100000001",
384
+ "https://.org/04aqat463": "https://doi.org/10.13039/100000001",
385
+ "https://.org/01rcfpa16": "https://doi.org/10.13039/100005441",
386
+ "https://.org/014eweh95": "https://doi.org/10.13039/100005445",
387
+ "https://.org/001xhss06": "https://doi.org/10.13039/100000076",
388
+ "https://.org/04qn9mx93": "https://doi.org/10.13039/100000153",
389
+ "https://.org/03g87he71": "https://doi.org/10.13039/100000155",
390
+ "https://.org/01tnvpc68": "https://doi.org/10.13039/100000156",
391
+ "https://.org/01rvays47": "https://doi.org/10.13039/100000154",
392
+ "https://.org/002jdaq33": "https://doi.org/10.13039/100000152",
393
+ "https://.org/025kzpk63": "https://doi.org/10.13039/100000083",
394
+ "https://.org/04nh1dc89": "https://doi.org/10.13039/100007523",
395
+ "https://.org/01mng8331": "https://doi.org/10.13039/100000143",
396
+ "https://.org/02rdzmk74": "https://doi.org/10.13039/100000144",
397
+ "https://.org/053a2cp42": "https://doi.org/10.13039/100000145",
398
+ "https://.org/014bj5w56": "https://doi.org/10.13039/100000081",
399
+ "https://.org/00whkrf32": "https://doi.org/10.13039/100000082",
400
+ "https://.org/05s7cqk18": "https://doi.org/10.13039/100000173",
401
+ "https://.org/02kd4km72": "https://doi.org/10.13039/100000172",
402
+ "https://.org/03mamvh39": "https://doi.org/10.13039/100000171",
403
+ "https://.org/00b6sbb32": "https://doi.org/10.13039/100000084",
404
+ "https://.org/0471zv972": "https://doi.org/10.13039/100000146",
405
+ "https://.org/028yd4c30": "https://doi.org/10.13039/100000147",
406
+ "https://.org/01krpsy48": "https://doi.org/10.13039/100000148",
407
+ "https://.org/050rnw378": "https://doi.org/10.13039/100000149",
408
+ "https://.org/0388pet74": "https://doi.org/10.13039/100000150",
409
+ "https://.org/03xyg3m20": "https://doi.org/10.13039/100000151",
410
+ "https://.org/05p847d66": "https://doi.org/10.13039/100000085",
411
+ "https://.org/037gd6g64": "https://doi.org/10.13039/100000159",
412
+ "https://.org/05v01mk25": "https://doi.org/10.13039/100000160",
413
+ "https://.org/05wqqhv83": "https://doi.org/10.13039/100000141",
414
+ "https://.org/05nwjp114": "https://doi.org/10.13039/100007352",
415
+ "https://.org/05fnzca26": "https://doi.org/10.13039/100000162",
416
+ "https://.org/02trddg58": "https://doi.org/10.13039/100000163",
417
+ "https://.org/029b7h395": "https://doi.org/10.13039/100000086",
418
+ "https://.org/04mg8wm74": "https://doi.org/10.13039/100000164",
419
+ "https://.org/01ar8dr59": "https://doi.org/10.13039/100000165",
420
+ "https://.org/01pc7k308": "https://doi.org/10.13039/100000078",
421
+ "https://.org/051fftw81": "https://doi.org/10.13039/100000121",
422
+ "https://.org/04ap5x931": "https://doi.org/10.13039/100000166",
423
+ "https://.org/00apvva27": "https://doi.org/10.13039/100005716",
424
+ "https://.org/04nseet23": "https://doi.org/10.13039/100000179",
425
+ "https://.org/04k9mqs78": "https://doi.org/10.13039/100000106",
426
+ "https://.org/01k638r21": "https://doi.org/10.13039/100000089",
427
+ "https://.org/01gmp5538": "https://doi.org/10.13039/100005447",
428
+ "https://.org/01vnjbg30": "https://doi.org/10.13039/100005449",
429
+ "https://.org/03h7mcc28": "https://doi.org/10.13039/100000088",
430
+ "https://.org/05wgkzg12": "https://doi.org/10.13039/100000169",
431
+ "https://.org/0445wmv88": "https://doi.org/10.13039/100000170",
432
+ "https://.org/02dz2hb46": "https://doi.org/10.13039/100000077",
433
+ "https://.org/034m1ez10": "https://doi.org/10.13039/100000107",
434
+ "https://.org/02a65dj82": "https://doi.org/10.13039/100005717",
435
+ "https://.org/020fhsn68": "https://doi.org/10.13039/100000001",
436
+ "https://.org/03z9hh605": "https://doi.org/10.13039/100000174",
437
+ "https://.org/04ya3kq71": "https://doi.org/10.13039/100007521",
438
+ "https://.org/04evh7y43": "https://doi.org/10.13039/100005443",
439
+ "https://.org/04h67aa53": "https://doi.org/10.13039/100000177",
440
+ "https://.org/025dabr11": "https://doi.org/10.13039/100005446",
441
+ "https://.org/04vw0kz07": "https://doi.org/10.13039/100005448",
442
+ "https://.org/054ydxh33": "https://doi.org/10.13039/100005554",
443
+ "https://.org/01sharn77": "https://doi.org/10.13039/100006091",
444
+ "https://.org/02ch5q898": "https://doi.org/10.13039/100000001",
445
+
446
+ # NASA ID Crosswalk
447
+ "https://.org/0171mag52": "https://doi.org/10.13039/100006198",
448
+ "https://.org/027k65916": "https://doi.org/10.13039/100006196",
449
+ "https://.org/027ka1x80": "https://doi.org/10.13039/100000104",
450
+ "https://.org/02acart68": "https://doi.org/10.13039/100006195",
451
+ "https://.org/059fqnc42": "https://doi.org/10.13039/100006193",
452
+ "https://.org/01cyfxe35": "https://doi.org/10.13039/100016595",
453
+ "https://.org/04xx4z452": "https://doi.org/10.13039/100006203",
454
+ "https://.org/0399mhs52": "https://doi.org/10.13039/100006199",
455
+ "https://.org/02epydz83": "https://doi.org/10.13039/100006197",
456
+ "https://.org/03j9e2j92": "https://doi.org/10.13039/100006205",
457
+ "https://.org/02s42x260": "https://doi.org/10.13039/100000104",
458
+ "https://.org/01p7gwa14": "https://doi.org/10.13039/100000104",
459
+ "https://.org/01qxmdg18": "https://doi.org/10.13039/100000104",
460
+ "https://.org/006ndaj41": "https://doi.org/10.13039/100000104",
461
+ "https://.org/03em45j53": "https://doi.org/10.13039/100007346",
462
+ "https://.org/045t78n53": "https://doi.org/10.13039/100000104",
463
+ "https://.org/00r57r863": "https://doi.org/10.13039/100000104",
464
+ "https://.org/0401vze59": "https://doi.org/10.13039/100007726",
465
+ "https://.org/04hccab49": "https://doi.org/10.13039/100000104",
466
+ "https://.org/04437j066": "https://doi.org/10.13039/100000104",
467
+ "https://.org/028b18z22": "https://doi.org/10.13039/100000104",
468
+ "https://.org/00ryjtt64": "https://doi.org/10.13039/100000104",
469
+
470
+ # DOE ID Crosswalk
471
+ "https://ror.org/01bj3aw27": "https://doi.org/10.13039/100000015",
472
+ "https://ror.org/03q1rgc19": "https://doi.org/10.13039/100006133",
473
+ "https://ror.org/02xznz413": "https://doi.org/10.13039/100006134",
474
+ "https://ror.org/03sk1we31": "https://doi.org/10.13039/100006168",
475
+ "https://ror.org/00f93gc02": "https://doi.org/10.13039/100006177",
476
+ "https://ror.org/05tj7dm33": "https://doi.org/10.13039/100006147",
477
+ "https://ror.org/0012c7r22": "https://doi.org/10.13039/100006192",
478
+ "https://ror.org/00mmn6b08": "https://doi.org/10.13039/100006132",
479
+ "https://ror.org/03ery9d53": "https://doi.org/10.13039/100006120",
480
+ "https://ror.org/033jmdj81": "https://doi.org/10.13039/100000015",
481
+ "https://ror.org/03rd4h240": "https://doi.org/10.13039/100006130",
482
+ "https://ror.org/0054t4769": "https://doi.org/10.13039/100006200",
483
+ "https://ror.org/03eecgp81": "https://doi.org/10.13039/100006174",
484
+ "https://ror.org/00heb4d89": "https://doi.org/10.13039/100006135",
485
+ "https://ror.org/05ek3m339": "https://doi.org/10.13039/100006150",
486
+ "https://ror.org/00km40770": "https://doi.org/10.13039/100006138",
487
+ "https://ror.org/02ah1da87": "https://doi.org/10.13039/100006137",
488
+ "https://ror.org/05hsv7e61": "https://doi.org/10.13039/100000015",
489
+ "https://ror.org/01c9ay627": "https://doi.org/10.13039/100006165",
490
+ "https://ror.org/04z2gev20": "https://doi.org/10.13039/100006183",
491
+ "https://ror.org/02z1qvq09": "https://doi.org/10.13039/100006144",
492
+ "https://ror.org/03jf3w726": "https://doi.org/10.13039/100006186",
493
+ "https://ror.org/04848jz84": "https://doi.org/10.13039/100006142",
494
+ "https://ror.org/04s778r16": "https://doi.org/10.13039/100006171",
495
+ "https://ror.org/04nnxen11": "https://doi.org/10.13039/100000015",
496
+ "https://ror.org/05csy5p27": "https://doi.org/10.13039/100010268",
497
+ "https://ror.org/05efnac71": "https://doi.org/10.13039/100000015"
498
+ }
499
+ end
500
+ end
@@ -23,23 +23,23 @@ module Uc3DmpId
23
23
  raise CreatorError, Helper::MSG_DMP_FORBIDDEN unless provenance.is_a?(Hash) && !provenance['PK'].nil?
24
24
 
25
25
  # Validate the incoming JSON first
26
- json = Helper.parse_json(json: json)
27
- errs = Validator.validate(mode: 'author', json: json)
26
+ json = Helper.parse_json(json:)
27
+ errs = Validator.validate(mode: 'author', json:)
28
28
  raise CreatorError, errs.join(', ') if errs.is_a?(Array) && errs.any? && errs.first != Validator::MSG_VALID_JSON
29
29
 
30
30
  # Try to find it by the :dmp_id first and Fail if found
31
31
  dmp_id = Helper.dmp_id_to_pk(json: json.fetch('dmp', {})['dmp_id'])
32
- result = Finder.exists?(p_key: dmp_id, logger: logger) unless dmp_id.nil?
32
+ result = Finder.exists?(p_key: dmp_id, logger:) unless dmp_id.nil?
33
33
  raise CreatorError, Helper::MSG_DMP_EXISTS if result.is_a?(Hash)
34
34
 
35
35
  # raise CreatorError, Uc3DmpId::MSG_DMP_EXISTS unless json['PK'].nil?
36
36
 
37
37
  client = Uc3DmpDynamo::Client.new
38
- p_key = _preregister_dmp_id(client: client, provenance: provenance, json: json, logger: logger)
38
+ p_key = _preregister_dmp_id(client:, provenance:, json:, logger:)
39
39
  raise CreatorError, MSG_UNABLE_TO_MINT if p_key.nil?
40
40
 
41
41
  # Add the DMPHub specific attributes and then save
42
- annotated = Helper.annotate_dmp_json(provenance: provenance, p_key: p_key, json: json['dmp'])
42
+ annotated = Helper.annotate_dmp_json(provenance:, p_key:, json: json['dmp'])
43
43
  logger.info(message: "Creating DMP ID: #{p_key}") if logger.respond_to?(:debug)
44
44
 
45
45
  # Set the :created and :modified timestamps
@@ -48,10 +48,10 @@ module Uc3DmpId
48
48
  annotated['modified'] = now
49
49
 
50
50
  # Create the item
51
- resp = client.put_item(json: annotated, logger: logger)
51
+ resp = client.put_item(json: annotated, logger:)
52
52
  raise CreatorError, Helper::MSG_DMP_NO_DMP_ID if resp.nil?
53
53
 
54
- _post_process(json: annotated, logger: logger)
54
+ _post_process(json: annotated, logger:)
55
55
  Helper.cleanse_dmp_json(json: JSON.parse({ dmp: annotated }.to_json))
56
56
  end
57
57
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
@@ -75,7 +75,7 @@ module Uc3DmpId
75
75
  counter = 0
76
76
  while dmp_id == '' && counter <= 10
77
77
  prefix = "#{ENV.fetch('DMP_ID_SHOULDER', nil)}#{SecureRandom.hex(2).upcase}#{SecureRandom.hex(2)}"
78
- dmp_id = prefix unless Finder.exists?(client: client, p_key: prefix)
78
+ dmp_id = prefix unless Finder.exists?(client:, p_key: prefix)
79
79
  counter += 1
80
80
  end
81
81
  # Something went wrong and it was unable to identify a unique id
@@ -94,7 +94,7 @@ module Uc3DmpId
94
94
 
95
95
  # Publish the change to the EventBridge
96
96
  publisher = Uc3DmpEventBridge::Publisher.new
97
- publisher.publish(source: 'DmpCreator', event_type: 'EZID update', dmp: json, logger: logger)
97
+ publisher.publish(source: 'DmpCreator', event_type: 'EZID update', dmp: json, logger:)
98
98
 
99
99
  # Determine if there are any related identifiers that we should try to fetch a citation for
100
100
  citable_identifiers = Helper.citable_related_identifiers(dmp: json)
@@ -108,7 +108,7 @@ module Uc3DmpId
108
108
  }
109
109
  logger.debug(message: 'Fetching citations', details: citable_identifiers) if logger.respond_to?(:debug)
110
110
  publisher.publish(source: 'DmpCreator', dmp: json, event_type: 'Citation Fetch', detail: citer_detail,
111
- logger: logger)
111
+ logger:)
112
112
  true
113
113
  end
114
114
  end
@@ -19,7 +19,7 @@ module Uc3DmpId
19
19
 
20
20
  # Fetch the latest version of the DMP ID by it's PK
21
21
  client = Uc3DmpDynamo::Client.new
22
- dmp = Finder.by_pk(p_key: p_key, client: client, cleanse: false, logger: logger)
22
+ dmp = Finder.by_pk(p_key:, client:, cleanse: false, logger:)
23
23
  raise DeleterError, Helper::MSG_DMP_NOT_FOUND unless dmp.is_a?(Hash) && !dmp['dmp'].nil?
24
24
 
25
25
  # Only allow this if the provenance is the owner of the DMP!
@@ -38,16 +38,16 @@ module Uc3DmpId
38
38
  dmp['dmp']['dmphub_tombstoned_at'] = now
39
39
 
40
40
  # Create the Tombstone version
41
- resp = client.put_item(json: dmp['dmp'], logger: logger)
41
+ resp = client.put_item(json: dmp['dmp'], logger:)
42
42
  raise DeleterError, Helper::MSG_DMP_NO_TOMBSTONE if resp.nil?
43
43
 
44
44
  # Delete the Latest version
45
- client.delete_item(p_key: p_key, s_key: Helper::DMP_LATEST_VERSION, logger: logger)
45
+ client.delete_item(p_key:, s_key: Helper::DMP_LATEST_VERSION, logger:)
46
46
 
47
47
  # TODO: We should do a check here to see if it was successful!
48
48
 
49
49
  # Notify EZID about the removal
50
- _post_process(json: dmp, logger: logger)
50
+ _post_process(json: dmp, logger:)
51
51
 
52
52
  # Return the tombstoned record
53
53
  Helper.cleanse_dmp_json(json: dmp)
@@ -66,7 +66,7 @@ module Uc3DmpId
66
66
 
67
67
  # Publish the change to the EventBridge
68
68
  publisher = Uc3DmpEventBridge::Publisher.new
69
- publisher.publish(source: 'DmpDeleter', event_type: 'EZID update', dmp: json, logger: logger)
69
+ publisher.publish(source: 'DmpDeleter', event_type: 'EZID update', dmp: json, logger:)
70
70
  true
71
71
  end
72
72
  end
@@ -20,15 +20,15 @@ module Uc3DmpId
20
20
  # TODO: Replace this with ElasticSearch
21
21
  def search_dmps(args:, logger: nil)
22
22
  client = Uc3DmpDynamo::Client.new
23
- return _by_owner(owner_org: args['owner_orcid'], client: client, logger: logger) unless args['owner_orcid'].nil?
23
+ return _by_owner(owner_org: args['owner_orcid'], client:, logger:) unless args['owner_orcid'].nil?
24
24
 
25
25
  unless args['owner_org_ror'].nil?
26
- return _by_owner_org(owner_org: args['owner_org_ror'], client: client,
27
- logger: logger)
26
+ return _by_owner_org(owner_org: args['owner_org_ror'], client:,
27
+ logger:)
28
28
  end
29
29
  unless args['modification_day'].nil?
30
- return _by_mod_day(day: args['modification_day'], client: client,
31
- logger: logger)
30
+ return _by_mod_day(day: args['modification_day'], client:,
31
+ logger:)
32
32
  end
33
33
 
34
34
  []
@@ -38,20 +38,20 @@ module Uc3DmpId
38
38
  # -------------------------------------------------------------------------
39
39
  # rubocop:disable Metrics/AbcSize
40
40
  def by_json(json:, client: nil, cleanse: true, logger: nil)
41
- json = Helper.parse_json(json: json)&.fetch('dmp', {})
41
+ json = Helper.parse_json(json:)&.fetch('dmp', {})
42
42
  raise FinderError, MSG_INVALID_ARGS if !json.is_a?(Hash) || (json['PK'].nil? && json['dmp_id'].nil?)
43
43
 
44
44
  p_key = json['PK']
45
45
  # Translate the incoming :dmp_id into a PK
46
46
  p_key = Helper.dmp_id_to_pk(json: json.fetch('dmp_id', {})) if p_key.nil?
47
- client = client.nil? ? Uc3DmpDynamo::Client.new : client
47
+ client = Uc3DmpDynamo::Client.new if client.nil?
48
48
 
49
49
  # TODO: Re-enable this once we figure out Dynamo indexes
50
50
  # find_by_dmphub_provenance_id -> if no PK and no dmp_id result
51
51
  # return by_provenance_identifier(json: json, client: client, logger: logger) if p_key.nil?
52
52
 
53
53
  # find_by_PK
54
- p_key.nil? ? nil : by_pk(p_key: p_key, s_key: json['SK'], client: client, cleanse: cleanse, logger: logger)
54
+ p_key.nil? ? nil : by_pk(p_key:, s_key: json['SK'], client:, cleanse:, logger:)
55
55
  end
56
56
  # rubocop:enable Metrics/AbcSize
57
57
 
@@ -62,20 +62,20 @@ module Uc3DmpId
62
62
  raise FinderError, MSG_MISSING_PK if p_key.nil?
63
63
 
64
64
  s_key = Helper::DMP_LATEST_VERSION if s_key.nil? || s_key.to_s.strip.empty?
65
- client = client.nil? ? Uc3DmpDynamo::Client.new : client
65
+ client = Uc3DmpDynamo::Client.new if client.nil?
66
66
  resp = client.get_item(
67
67
  key: {
68
- PK: Helper.append_pk_prefix(p_key: p_key),
69
- SK: Helper.append_sk_prefix(s_key: s_key)
68
+ PK: Helper.append_pk_prefix(p_key:),
69
+ SK: Helper.append_sk_prefix(s_key:)
70
70
  },
71
- logger: logger
71
+ logger:
72
72
  )
73
73
  return resp unless resp.is_a?(Hash)
74
74
 
75
75
  dmp = resp['dmp'].nil? ? JSON.parse({ dmp: resp }.to_json) : resp
76
76
  return nil if dmp['dmp']['PK'].nil?
77
77
 
78
- dmp = Versioner.append_versions(p_key: dmp['dmp']['PK'], dmp: dmp, client: client, logger: logger) if cleanse
78
+ dmp = Versioner.append_versions(p_key: dmp['dmp']['PK'], dmp:, client:, logger:) if cleanse
79
79
  cleanse ? Helper.cleanse_dmp_json(json: dmp) : dmp
80
80
  end
81
81
  # rubocop:enable Metrics/AbcSize
@@ -85,13 +85,13 @@ module Uc3DmpId
85
85
  def exists?(p_key:, s_key: Helper::DMP_LATEST_VERSION, client: nil, logger: nil)
86
86
  raise FinderError, MSG_MISSING_PK if p_key.nil?
87
87
 
88
- client = client.nil? ? Uc3DmpDynamo::Client.new : client
88
+ client = Uc3DmpDynamo::Client.new if client.nil?
89
89
  client.pk_exists?(
90
90
  key: {
91
- PK: Helper.append_pk_prefix(p_key: p_key),
92
- SK: Helper.append_sk_prefix(s_key: s_key)
91
+ PK: Helper.append_pk_prefix(p_key:),
92
+ SK: Helper.append_sk_prefix(s_key:)
93
93
  },
94
- logger: logger
94
+ logger:
95
95
  )
96
96
  end
97
97
 
@@ -115,15 +115,15 @@ module Uc3DmpId
115
115
  filter_expression: 'SK = :version',
116
116
  expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
117
117
  }
118
- client = client.nil? ? Uc3DmpDynamo::Client.new : client
119
- resp = client.query(args: args, logger: logger)
118
+ client = Uc3DmpDynamo::Client.new if client.nil?
119
+ resp = client.query(args:, logger:)
120
120
  return resp unless resp.is_a?(Hash)
121
121
 
122
122
  dmp = resp['dmp'].nil? ? JSON.parse({ dmp: resp }.to_json) : resp
123
123
  return nil if dmp['dmp']['PK'].nil?
124
124
 
125
125
  # If we got a hit, fetch the DMP and return it.
126
- by_pk(p_key: dmp['dmp']['PK'], s_key: dmp['dmp']['SK'], cleanse: cleanse, logger: logger)
126
+ by_pk(p_key: dmp['dmp']['PK'], s_key: dmp['dmp']['SK'], cleanse:, logger:)
127
127
  end
128
128
  # rubocop:enable Metrics/AbcSize
129
129
 
@@ -149,8 +149,8 @@ module Uc3DmpId
149
149
  expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
150
150
  }
151
151
  logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
152
- client = client.nil? ? Uc3DmpDynamo::Client.new : client
153
- _process_search_response(response: client.query(args: args, logger: logger))
152
+ client = Uc3DmpDynamo::Client.new if client.nil?
153
+ _process_search_response(response: client.query(args:, logger:))
154
154
  end
155
155
 
156
156
  # Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
@@ -174,8 +174,8 @@ module Uc3DmpId
174
174
  expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
175
175
  }
176
176
  logger.info(message: "Querying _by_owner_org with #{args}") if logger.respond_to?(:info)
177
- client = client.nil? ? Uc3DmpDynamo::Client.new : client
178
- _process_search_response(response: client.query(args: args, logger: logger))
177
+ client = Uc3DmpDynamo::Client.new if client.nil?
178
+ _process_search_response(response: client.query(args:, logger:))
179
179
  end
180
180
 
181
181
  # Fetch the DMP IDs modified on the specified date (the date is the :dmphub_modification_day on the DMP ID record)
@@ -195,8 +195,8 @@ module Uc3DmpId
195
195
  expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
196
196
  }
197
197
  logger.info(message: "Querying _by_mod_day with #{args}") if logger.respond_to?(:info)
198
- client = client.nil? ? Uc3DmpDynamo::Client.new : client
199
- _process_search_response(response: client.query(args: args, logger: logger))
198
+ client = Uc3DmpDynamo::Client.new if client.nil?
199
+ _process_search_response(response: client.query(args:, logger:))
200
200
  end
201
201
 
202
202
  # Transform the search results so that we do not include any of the DMPHub specific metadata
@@ -7,17 +7,17 @@ module Uc3DmpId
7
7
  # Helper functions for working with DMP IDs
8
8
  class Helper
9
9
  PK_DMP_PREFIX = 'DMP#'
10
- PK_DMP_REGEX = %r{DMP#[a-zA-Z0-9\-_.]+/[a-zA-Z0-9]{2}\.[a-zA-Z0-9./:]+}.freeze
10
+ PK_DMP_REGEX = %r{DMP#[a-zA-Z0-9\-_.]+/[a-zA-Z0-9]{2}\.[a-zA-Z0-9./:]+}
11
11
 
12
12
  SK_DMP_PREFIX = 'VERSION#'
13
- SK_DMP_REGEX = /VERSION#\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2}/.freeze
13
+ SK_DMP_REGEX = /VERSION#\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2}/
14
14
 
15
15
  # TODO: Verify the assumed structure of the DOI is valid
16
- DOI_REGEX = %r{[0-9]{2}\.[0-9]{4,}/[a-zA-Z0-9/_.-]+}.freeze
17
- URL_REGEX = %r{(https?://)?([a-zA-Z0-9\-_]\.)+[a-zA-Z0-9\-_]{2,3}(:[0-9]+)?/?}.freeze
16
+ DOI_REGEX = %r{[0-9]{2}\.[0-9]{4,}/[a-zA-Z0-9/_.-]+}
17
+ URL_REGEX = %r{(https?://)?([a-zA-Z0-9\-_]\.)+[a-zA-Z0-9\-_]{2,3}(:[0-9]+)?/?}
18
18
 
19
- DMP_LATEST_VERSION = "#{SK_DMP_PREFIX}latest"
20
- DMP_TOMBSTONE_VERSION = "#{SK_DMP_PREFIX}tombstone"
19
+ DMP_LATEST_VERSION = "#{SK_DMP_PREFIX}latest".freeze
20
+ DMP_TOMBSTONE_VERSION = "#{SK_DMP_PREFIX}tombstone".freeze
21
21
 
22
22
  DEFAULT_API_URL = 'https://api.dmphub.uc3dev.cdlib.net/dmps/'
23
23
  DEFAULT_LANDING_PAGE_URL = 'https://dmphub.uc3dev.cdlib.net/dmps/'
@@ -41,7 +41,7 @@ module Uc3DmpId
41
41
  # Append the PK prefix for the object
42
42
  # -------------------------------------------------------------------------------------
43
43
  def append_pk_prefix(p_key:)
44
- p_key.is_a?(String) ? "#{PK_DMP_PREFIX}#{remove_pk_prefix(p_key: p_key)}" : nil
44
+ p_key.is_a?(String) ? "#{PK_DMP_PREFIX}#{remove_pk_prefix(p_key:)}" : nil
45
45
  end
46
46
 
47
47
  # Strip off the PK prefix
@@ -53,7 +53,7 @@ module Uc3DmpId
53
53
  # Append the SK prefix for the object
54
54
  # -------------------------------------------------------------------------------------
55
55
  def append_sk_prefix(s_key:)
56
- s_key.is_a?(String) ? "#{SK_DMP_PREFIX}#{remove_sk_prefix(s_key: s_key)}" : nil
56
+ s_key.is_a?(String) ? "#{SK_DMP_PREFIX}#{remove_sk_prefix(s_key:)}" : nil
57
57
  end
58
58
 
59
59
  # Strip off the SK prefix
@@ -82,7 +82,7 @@ module Uc3DmpId
82
82
  return with_protocol ? value : value.gsub(%r{https?://}, '') if value.start_with?('http')
83
83
 
84
84
  dmp_id = dmp_id.gsub('doi:', '')
85
- dmp_id = dmp_id.start_with?('/') ? dmp_id[1..dmp_id.length] : dmp_id
85
+ dmp_id = dmp_id[1..dmp_id.length] if dmp_id.start_with?('/')
86
86
  base_domain = with_protocol ? dmp_id_base_url : dmp_id_base_url.gsub(%r{https?://}, '')
87
87
  "#{base_domain}#{dmp_id}"
88
88
  end
@@ -95,7 +95,7 @@ module Uc3DmpId
95
95
  p_key = param if param.start_with?(dmp_id_base_url) || param.start_with?(base_domain)
96
96
  p_key = CGI.unescape(p_key.nil? ? param : p_key)
97
97
  p_key = format_dmp_id(value: p_key)
98
- append_pk_prefix(p_key: p_key)
98
+ append_pk_prefix(p_key:)
99
99
  end
100
100
 
101
101
  # Append the :PK prefix to the :dmp_id
@@ -115,7 +115,7 @@ module Uc3DmpId
115
115
 
116
116
  {
117
117
  type: 'doi',
118
- identifier: format_dmp_id(value: remove_pk_prefix(p_key: p_key), with_protocol: true)
118
+ identifier: format_dmp_id(value: remove_pk_prefix(p_key:), with_protocol: true)
119
119
  }
120
120
  end
121
121
 
@@ -180,7 +180,7 @@ module Uc3DmpId
180
180
  # Add DMPHub specific fields to the DMP ID JSON
181
181
  # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
182
182
  def annotate_dmp_json(provenance:, p_key:, json:)
183
- json = parse_json(json: json)
183
+ json = parse_json(json:)
184
184
  bool_vals = [1, '1', true, 'true', 'yes']
185
185
  return json if provenance.nil? || p_key.nil? || !json.is_a?(Hash)
186
186
 
@@ -189,14 +189,14 @@ module Uc3DmpId
189
189
  return json if id != p_key && !json['PK'].nil?
190
190
 
191
191
  annotated = deep_copy_dmp(obj: json)
192
- annotated['PK'] = json['PK'] || append_pk_prefix(p_key: p_key)
192
+ annotated['PK'] = json['PK'] || append_pk_prefix(p_key:)
193
193
  annotated['SK'] = DMP_LATEST_VERSION
194
194
 
195
195
  # Ensure that the :dmp_id matches the :PK
196
196
  annotated['dmp_id'] = JSON.parse(pk_to_dmp_id(p_key: remove_pk_prefix(p_key: annotated['PK'])).to_json)
197
197
 
198
- owner_id = extract_owner_id(json: json)
199
- owner_org = extract_owner_org(json: json)
198
+ owner_id = extract_owner_id(json:)
199
+ owner_org = extract_owner_org(json:)
200
200
 
201
201
  # Set the :dmproadmap_featured flag appropriately
202
202
  featured = annotated.fetch('dmproadmap_featured', 'no')
@@ -219,7 +219,7 @@ module Uc3DmpId
219
219
  annotated['dmphub_provenance_identifier'] = annotated.fetch('dmproadmap_links', {})['get']
220
220
  else
221
221
  annotated['dmphub_provenance_identifier'] = format_provenance_id(
222
- provenance: provenance, value: json.fetch('dmp_id', {})['identifier']
222
+ provenance:, value: json.fetch('dmp_id', {})['identifier']
223
223
  )
224
224
  end
225
225
  annotated
@@ -16,18 +16,18 @@ module Uc3DmpId
16
16
  def update(provenance:, p_key:, json: {}, note: nil, logger: nil)
17
17
  raise UpdaterError, Helper::MSG_DMP_INVALID_DMP_ID unless p_key.is_a?(String) && !p_key.strip.empty?
18
18
 
19
- mods = Helper.parse_json(json: json).fetch('dmp', {})
20
- p_key = Helper.append_pk_prefix(p_key: p_key)
19
+ mods = Helper.parse_json(json:).fetch('dmp', {})
20
+ p_key = Helper.append_pk_prefix(p_key:)
21
21
  logger.debug(message: "Incoming modifications for PK #{p_key}", details: mods) if logger.respond_to?(:debug)
22
22
 
23
23
  # Fetch the latest version of the DMP ID
24
24
  client = Uc3DmpDynamo::Client.new
25
- latest_version = Finder.by_pk(p_key: p_key, client: client, logger: logger, cleanse: false)
26
- latest_version = latest_version['dmp'].nil? ? latest_version : latest_version.fetch('dmp', {})
25
+ latest_version = Finder.by_pk(p_key:, client:, logger:, cleanse: false)
26
+ latest_version = latest_version.fetch('dmp', {}) unless latest_version['dmp'].nil?
27
27
  logger.debug(message: "Latest version for PK #{p_key}", details: latest_version) if logger.respond_to?(:debug)
28
28
 
29
29
  # Verify that the DMP ID is updateable with the info passed in
30
- errs = _updateable?(provenance: provenance, p_key: p_key, latest_version: latest_version['dmp'],
30
+ errs = _updateable?(provenance:, p_key:, latest_version: latest_version['dmp'],
31
31
  mods: mods['dmp'])
32
32
  logger.error(message: errs.join(', ')) if logger.respond_to?(:error) && errs.is_a?(Array) && errs.any?
33
33
  raise UpdaterError, errs if errs.is_a?(Array) && errs.any?
@@ -37,35 +37,35 @@ module Uc3DmpId
37
37
  # Version the DMP ID record (if applicable).
38
38
  owner = latest_version['dmphub_provenance_id']
39
39
  updater = provenance['PK']
40
- version = Versioner.generate_version(client: client, latest_version: latest_version, owner: owner,
41
- updater: updater, logger: logger)
40
+ version = Versioner.generate_version(client:, latest_version:, owner:,
41
+ updater:, logger:)
42
42
  raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if version.nil?
43
43
 
44
44
  # Remove the version info because we don't want to save it on the record
45
45
  version.delete('dmphub_versions')
46
46
 
47
47
  # Splice the assertions
48
- version = _process_modifications(owner: owner, updater: updater, version: version, mods: mods, note: note,
49
- logger: logger)
48
+ version = _process_modifications(owner:, updater:, version:, mods:, note:,
49
+ logger:)
50
50
  # Set the :modified timestamps
51
51
  now = Time.now.utc
52
52
  version['modified'] = now.iso8601
53
53
  version['dmphub_modification_day'] = now.strftime('%Y-%m-%d')
54
54
 
55
55
  # Save the changes
56
- resp = client.put_item(json: version, logger: logger)
56
+ resp = client.put_item(json: version, logger:)
57
57
  raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if resp.nil?
58
58
 
59
59
  # Send the updates to EZID
60
- _post_process(provenance: provenance, json: version, logger: logger)
60
+ _post_process(provenance:, json: version, logger:)
61
61
 
62
62
  # Return the new version record
63
63
  logger.info(message: "Updated DMP ID: #{p_key}") if logger.respond_to?(:debug)
64
64
 
65
65
  # Append the :dmphub_versions Array
66
66
  json = JSON.parse({ dmp: version }.to_json)
67
- json = Versioner.append_versions(p_key: p_key, dmp: json, client: client, logger: logger)
68
- Helper.cleanse_dmp_json(json: json)
67
+ json = Versioner.append_versions(p_key:, dmp: json, client:, logger:)
68
+ Helper.cleanse_dmp_json(json:)
69
69
  end
70
70
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
71
71
  # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
@@ -77,21 +77,21 @@ module Uc3DmpId
77
77
  raise UpdaterError, Helper::MSG_DMP_INVALID_DMP_ID unless p_key.is_a?(String) && !p_key.strip.empty?
78
78
 
79
79
  # fetch the existing latest version of the DMP ID
80
- client = Uc3DmpDynamo::Client.new(logger: logger)
81
- dmp = Finder.by_pk(p_key: p_key, client: client, logger: logger, cleanse: false)
80
+ client = Uc3DmpDynamo::Client.new(logger:)
81
+ dmp = Finder.by_pk(p_key:, client:, logger:, cleanse: false)
82
82
  logger.info(message: 'Existing latest record', details: dmp) if logger.respond_to?(:debug)
83
83
  raise UpdaterError, Helper::MSG_DMP_FORBIDDEN unless provenance.is_a?(Hash) && !provenance['PK'].nil? &&
84
84
  provenance['PK'] == dmp['dmp']['dmphub_provenance_id']
85
85
 
86
86
  # Add the download URl for the PDF as a related identifier on the DMP ID record
87
- annotated = Helper.annotate_dmp_json(provenance: provenance, p_key: p_key, json: dmp['dmp'])
87
+ annotated = Helper.annotate_dmp_json(provenance:, p_key:, json: dmp['dmp'])
88
88
  annotated['dmproadmap_related_identifiers'] = [] if annotated['dmproadmap_related_identifiers'].nil?
89
89
  annotated['dmproadmap_related_identifiers'] << JSON.parse({
90
90
  descriptor: 'is_metadata_for', work_type: 'output_management_plan', type: 'url', identifier: url
91
91
  }.to_json)
92
92
 
93
93
  # Save the changes without creating a new version!
94
- resp = client.put_item(json: annotated, logger: logger)
94
+ resp = client.put_item(json: annotated, logger:)
95
95
  raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if resp.nil?
96
96
 
97
97
  logger.info(message: "Added DMP ID narrative for PK: #{p_key}, Narrative: #{url}") if logger.respond_to?(:debug)
@@ -111,8 +111,9 @@ module Uc3DmpId
111
111
  return [Helper::MSG_DMP_FORBIDDEN] unless provenance.is_a?(Hash) && !provenance['PK'].nil?
112
112
  # Verify that the JSON is for the same DMP in the PK
113
113
  return [Helper::MSG_DMP_FORBIDDEN] unless Helper.dmp_id_to_pk(json: mods.fetch('dmp_id', {})) == p_key
114
+
114
115
  # Bail out if the DMP ID could not be found or the PKs do not match for some reason
115
- return [Helper::MSG_DMP_UNKNOWN] unless latest_version.is_a?(Hash) && latest_version['PK'] == p_key
116
+ [Helper::MSG_DMP_UNKNOWN] unless latest_version.is_a?(Hash) && latest_version['PK'] == p_key
116
117
  end
117
118
  # rubocop:enable Metrics/AbcSize
118
119
 
@@ -123,14 +124,14 @@ module Uc3DmpId
123
124
 
124
125
  updated = if owner == updater
125
126
  # Splice together any assertions that may have been made while the user was editing the DMP ID
126
- Asserter.splice(latest_version: version, modified_version: mods, logger: logger)
127
+ Asserter.splice(latest_version: version, modified_version: mods, logger:)
127
128
  else
128
129
  # Attach the incoming changes as an assertion to the DMP ID since the updater is NOT the owner
129
- Asserter.add(updater: updater, latest_version: version, modified_version: mods, note: note,
130
- logger: logger)
130
+ Asserter.add(updater:, latest_version: version, modified_version: mods, note:,
131
+ logger:)
131
132
  end
132
133
 
133
- _merge_versions(latest_version: version, mods: updated, logger: logger)
134
+ _merge_versions(latest_version: version, mods: updated, logger:)
134
135
  end
135
136
  # rubocop:enable Metrics/ParameterLists
136
137
 
@@ -172,7 +173,7 @@ module Uc3DmpId
172
173
  logger.debug(message: 'Sending event for EZID publication',
173
174
  details: json)
174
175
  end
175
- publisher.publish(source: 'DmpUpdater', event_type: 'EZID update', dmp: json, logger: logger) if publishable
176
+ publisher.publish(source: 'DmpUpdater', event_type: 'EZID update', dmp: json, logger:) if publishable
176
177
 
177
178
  # Determine if there are any related identifiers that we should try to fetch a citation for
178
179
  citable_identifiers = Helper.citable_related_identifiers(dmp: json)
@@ -189,7 +190,7 @@ module Uc3DmpId
189
190
  details: citable_identifiers)
190
191
  end
191
192
  publisher.publish(source: 'DmpUpdater', dmp: json, event_type: 'Citation Fetch', detail: citer_detail,
192
- logger: logger)
193
+ logger:)
193
194
  true
194
195
  end
195
196
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
@@ -23,11 +23,11 @@ module Uc3DmpId
23
23
  # Validate the specified DMP's :json against the schema for the specified :mode
24
24
  # rubocop:disable Metrics/AbcSize
25
25
  def validate(mode:, json:)
26
- json = Helper.parse_json(json: json)
26
+ json = Helper.parse_json(json:)
27
27
  return [MSG_EMPTY_JSON] if json.nil? || !VALIDATION_MODES.include?(mode)
28
28
 
29
29
  # Load the appropriate JSON schema for the mode
30
- schema = _load_schema(mode: mode)
30
+ schema = _load_schema(mode:)
31
31
  return [MSG_NO_SCHEMA] if schema.nil?
32
32
 
33
33
  # Validate the JSON
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Uc3DmpId
4
- VERSION = '0.1.0'
4
+ VERSION = '0.1.2'
5
5
  end
@@ -18,13 +18,13 @@ module Uc3DmpId
18
18
 
19
19
  args = {
20
20
  key_conditions: {
21
- PK: { attribute_value_list: [Helper.append_pk_prefix(p_key: p_key)], comparison_operator: 'EQ' }
21
+ PK: { attribute_value_list: [Helper.append_pk_prefix(p_key:)], comparison_operator: 'EQ' }
22
22
  },
23
23
  projection_expression: 'modified',
24
24
  scan_index_forward: false
25
25
  }
26
- client = client.nil? ? Uc3DmpDynamo::Client.new : client
27
- client.query(args: args, logger: logger)
26
+ client = Uc3DmpDynamo::Client.new if client.nil?
27
+ client.query(args:, logger:)
28
28
  end
29
29
 
30
30
  # Generate a snapshot of the current latest version of the DMP ID using the existing :modified as
@@ -57,8 +57,8 @@ module Uc3DmpId
57
57
  prior['SK'] = "#{Helper::SK_DMP_PREFIX}#{latest_version['modified'] || Time.now.utc.iso8601}"
58
58
 
59
59
  # Create the prior version record ()
60
- client = client.nil? ? Uc3DmpDynamo::Client.new : client
61
- resp = client.put_item(json: prior, logger: logger)
60
+ client = Uc3DmpDynamo::Client.new if client.nil?
61
+ resp = client.put_item(json: prior, logger:)
62
62
  return nil if resp.nil?
63
63
 
64
64
  msg = "#{SOURCE} created version PK: #{prior['PK']} SK: #{prior['SK']}"
@@ -74,7 +74,7 @@ module Uc3DmpId
74
74
  json = Helper.parse_json(json: dmp)
75
75
  return json unless p_key.is_a?(String) && !p_key.strip.empty? && json.is_a?(Hash) && !json['dmp'].nil?
76
76
 
77
- results = get_versions(p_key: p_key, client: client, logger: logger)
77
+ results = get_versions(p_key:, client:, logger:)
78
78
  return json unless results.length > 1
79
79
 
80
80
  # TODO: we may want to include milliseconds in the future if we get increased volume so that
@@ -82,7 +82,7 @@ module Uc3DmpId
82
82
  versions = results.map do |ver|
83
83
  next if ver['modified'].nil?
84
84
 
85
- base_url = "#{Helper.landing_page_url}#{Helper.remove_pk_prefix(p_key: p_key)}"
85
+ base_url = "#{Helper.landing_page_url}#{Helper.remove_pk_prefix(p_key:)}"
86
86
  {
87
87
  timestamp: ver['modified'],
88
88
  url: dmp['dmp']['modified'] == ver['modified'] ? base_url : "#{base_url}?version=#{ver['modified']}"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uc3-dmp-id
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Riley
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-08-25 00:00:00.000000000 Z
11
+ date: 2023-10-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '3.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: text
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.3'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.3'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: uc3-dmp-dynamo
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -76,6 +90,7 @@ files:
76
90
  - README.md
77
91
  - lib/uc3-dmp-id.rb
78
92
  - lib/uc3-dmp-id/asserter.rb
93
+ - lib/uc3-dmp-id/comparator.rb
79
94
  - lib/uc3-dmp-id/creator.rb
80
95
  - lib/uc3-dmp-id/deleter.rb
81
96
  - lib/uc3-dmp-id/finder.rb
@@ -91,7 +106,7 @@ licenses:
91
106
  - MIT
92
107
  metadata:
93
108
  rubygems_mfa_required: 'false'
94
- post_install_message:
109
+ post_install_message:
95
110
  rdoc_options: []
96
111
  require_paths:
97
112
  - lib
@@ -100,15 +115,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
100
115
  requirements:
101
116
  - - ">="
102
117
  - !ruby/object:Gem::Version
103
- version: '2.7'
118
+ version: '3.2'
104
119
  required_rubygems_version: !ruby/object:Gem::Requirement
105
120
  requirements:
106
121
  - - ">="
107
122
  - !ruby/object:Gem::Version
108
123
  version: '0'
109
124
  requirements: []
110
- rubygems_version: 3.1.6
111
- signing_key:
125
+ rubygems_version: 3.4.10
126
+ signing_key:
112
127
  specification_version: 4
113
128
  summary: DMPTool gem that provides support for DMP ID records
114
129
  test_files: []