RubyGems - uc3-dmp-id - Versions diffs - 0.1.0 → 0.1.2 - Mend

uc3-dmp-id 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml +4 -4
data/README.md +9 -0
data/lib/uc3-dmp-id/asserter.rb +19 -9
data/lib/uc3-dmp-id/comparator.rb +500 -0
data/lib/uc3-dmp-id/creator.rb +10 -10
data/lib/uc3-dmp-id/deleter.rb +5 -5
data/lib/uc3-dmp-id/finder.rb +26 -26
data/lib/uc3-dmp-id/helper.rb +16 -16
data/lib/uc3-dmp-id/updater.rb +25 -24
data/lib/uc3-dmp-id/validator.rb +2 -2
data/lib/uc3-dmp-id/version.rb +1 -1
data/lib/uc3-dmp-id/versioner.rb +7 -7
metadata +22 -7

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 1bdcb2754d5168bdeedf8583b91054cf62fc019d4479686da168d4ca70d83729
-  data.tar.gz: b2af99461aa7614212aae435db0db174eeb50125006d0dacc87698ecaf41b7b8
+  metadata.gz: 51832c144e5663dc01c805f92e81e50e63f2ce00200a8cad5525b7a34c9d7eb9
+  data.tar.gz: e197deb7f608ef478716a8aea113e853ab0d3903f1496f970a73d31d30b7e892
 SHA512:
-  metadata.gz: 43cdce10f8bccc41fc979c67a4758fd5726206c9066acefcbc8abe65b9d36f79fc69eedb461a9ce584cf61f85ddd0dca13617d99a39e9c831eb56ea725a00eb0
-  data.tar.gz: 393d0e083ca8cfbf2039ba11861576a2eff489d48d8c541279ae30a3f9e7215d39175c49d1203456f56bb00a6a7e3da6c0211d4c3fc7e4f64f32f88e3d96accf
+  metadata.gz: 31bc5d1bb73176c2afff25715590c50d0612f558880f2bde3a750ac5ea8d49674c5dd9b00eff36960454c972a1a31aca6b4f853a98e995dd65977e1f82903ad2
+  data.tar.gz: 86153dfeebc52570ecb83a012f4e7242e9bec607627c2dcefc5beb101a711199bc346c1388045127df89f13e65c83aa473c0fc77e37810d436ea939101b74f2b

data/README.md CHANGED Viewed

@@ -1,3 +1,12 @@
 # Uc3DmpId
 Helper methods for working with DMP ID JSON records
+After you have made changes, be sure to increment the version number in `lib/uc3-dmp-id/version.rb`.
+To build and push this gem to RubyGems:
+- Make sure you are logged into RubyGems in your terminal window (see their docs)
+- Run `gem build uc3-dmp-id.gemspec` to build the gem
+- Run `gem push uc3-dmp-id-[version].gem` to publish to RubyGems
+After you have pushed a new version to RubyGems, you should rebuild and redeploy the AWS SAM application.

data/lib/uc3-dmp-id/asserter.rb CHANGED Viewed

@@ -28,13 +28,13 @@ module Uc3DmpId
         related_works = modified_version.fetch('dmproadmap_related_identifiers', [])
         if related_works.any?
-          latest_version = _add_related_identifier(updater: updater, latest_version: latest_version,
-                                                   identifiers: related_works, note: note, logger: logger)
+          latest_version = _add_related_identifier(updater:, latest_version:,
+                                                   identifiers: related_works, note:, logger:)
         end
         return latest_version unless !funding.nil? && funding.any?
-        _add_funding_mod(updater: updater, latest_version: latest_version, funding: funding,
-                         note: note, logger: logger)
+        _add_funding_mod(updater:, latest_version:, funding:,
+                         note:, logger:)
       end
       # rubocop:enable Metrics/AbcSize, Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
@@ -106,7 +106,7 @@ module Uc3DmpId
         end
         latest_version['dmproadmap_related_identifiers'] = [] if latest_version['dmproadmap_related_identifiers'].nil?
-        assertion = _generate_assertion(updater: updater, note: note,
+        assertion = _generate_assertion(updater:, note:,
                                         mods: JSON.parse({ dmproadmap_related_identifiers: additions }.to_json))
         if logger.respond_to?(:debug)
           logger.debug(message: 'Adding change to :dmphub_modifications.',
@@ -151,7 +151,7 @@ module Uc3DmpId
         latest_version['dmphub_modifications'] = [] if latest_version['dmphub_modifications'].nil?
         mod = JSON.parse({ funding: fund }.to_json)
         mod['funding']['funding_status'] = 'granted'
-        assertion = _generate_assertion(updater: updater, mods: mod, note: note)
+        assertion = _generate_assertion(updater:, mods: mod, note:)
         if logger.respond_to?(:debug)
           logger.debug(message: 'Adding change to :dmphub_modifications.',
                        details: assertion)
@@ -168,7 +168,8 @@ module Uc3DmpId
       #    "id": "ABCD1234",
       #    "provenance": "dmphub",
       #    "timestamp": "2023-07-07T14:50:23+00:00",
-      #    "note": "data received from the NIH API",
+      #    "note": "Data received from OpenAlex, matched by PI names and title keywords.",
+      #    "confiedence": "Med",
       #    "dmproadmap_related_identifiers": {
       #      "work_type": "article",
       #      "descriptor": "is_cited_by",
@@ -183,7 +184,8 @@ module Uc3DmpId
       #    "id": "ABCD1234",
       #    "provenance": "dmphub",
       #    "timestamp": "2023-07-07T14:50:23+00:00",
-      #    "note": "data received from the NIH API",
+      #    "note": "Data received from the NIH API, matched by the opportunity number.",
+      #    "confidence": "High",
       #    "funding": {
       #      "funding_status": "granted",
       #      "grant_id": {
@@ -200,11 +202,19 @@ module Uc3DmpId
           provenance: updater.gsub('PROVENANCE#', ''),
           timestamp: Time.now.utc.iso8601,
           status: 'pending',
-          note: note
+          note:
         }
         mods.each_pair { |key, val| assertion[key] = val }
         JSON.parse(assertion.to_json)
       end
     end
+    def _score_related_work(latest_version:, work:)
+    end
+    def _score_funding(latest_version:, funding:)
+    end
   end
 end

data/lib/uc3-dmp-id/comparator.rb ADDED Viewed

@@ -0,0 +1,500 @@
+# frozen_string_literal: true
+require 'text'
+module Uc3DmpId
+  class ComparatorError < StandardError; end
+  # Class that compares incoming data from an external source to the DMP
+  # It determines if they are likely related and applies a confidence rating
+  class Comparator
+    MSG_MISSING_AUGMENTER = 'No Augmenter specified!'
+    MSG_MISSING_DMP = 'No DMP or the DMP did not contain enough information to use.'
+    STOP_WORDS = %w[a an and if of or the then they]
+    # See the bottom of this file for a hard-coded crosswalk between Crossref funder ids and ROR ids
+    # Some APIs do not support ROR fully for funder ids, so we need to be able to reference both
+    attr_accessor :augmenter, :dmp, :details_hash, :logger
+    def initialize(**args)
+      @logger = args[:logger]
+      @details_hash = {}
+      @augmenter = args[:augmenter]
+      raise ComparatorError, MSG_MISSING_AUGMENTER if @augmenter.nil? ||
+                                                      !@augmenter['PK']&.start_with?('AUGMENTERS#')
+      @dmp = args.fetch(:dmp, {})['dmp'].nil? ? args[:dmp] : args.fetch(:dmp, {})['dmp']
+      _extract_dmp_details(dmp:)
+      raise ComparatorError, MSG_MISSING_DMP if @details_hash.empty?
+    end
+    # Compare the incoming hash with the DMP details that were gathered during initialization.
+    #
+    # The Hash should contain:
+    #  {
+    #    title: "Example research project",
+    #    abstract: "Lorem ipsum psuedo abstract",
+    #    keywords: ["foo", "bar"],z
+    #    people: [
+    #      {
+    #        id: "https://orcid.org/blah",
+    #        last_name: "doe",
+    #        affiliation: { id: "https://ror.org/blah", name: "Foo" }
+    #      }
+    #    ],
+    #    fundings: [
+    #      { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
+    #    ],
+    #    repositories: [
+    #      { id: ["http://some.repo.org", "https://doi.org/re3data123"], name: "Repo" }
+    #    ]
+    #  }
+    def compare(hash:)
+      response = { confidence: 'None', score: 0, notes: [], source: @augmenter['name'] }
+      return response unless hash.is_a?(Hash) && !hash['title'].nil?
+      # Compare the grant ids. If we have a match return the response immediately since that is
+      # a very positive match!
+      response = _grants_match?(array: hash['fundings'], response:)
+      return response if response[:confidence] != 'None'
+      response = _opportunities_match?(array: hash['fundings'], response:)
+      response = _orcids_match?(array: hash['people'], response:)
+      response = _last_name_and_affiliation_match?(array: hash['people'], response:)
+      # Only process the following if we had some matching contributors, affiliations or opportuniy nbrs
+      response = _repository_match?(array: hash['repositories'], response:) if response[:score] > 0
+      response = _keyword_match?(array: hash['repositories'], response:) if response[:score] > 0
+      response = _text_match?(type: 'title', text: hash['title'], response:) if response[:score] > 0
+      response = _text_match?(type: 'abstract', text: hash['abstract'], response:) if response[:score] > 0
+      # If the score is less than 3 then we have no confidence that it is a match
+      return response if response[:score] <= 2
+      # Set the confidence level based on the score
+      response[:confidence] = response[:score] > 15 ? 'High' : (response[:score] > 10 ? 'Medium' : 'Low')
+      response
+    end
+    private
+    def _extract_dmp_details(dmp:)
+      return nil unless dmp.is_a?(Hash) && !dmp['title'].nil? && !dmp['contact'].nil?
+      projects = dmp.fetch('project', [{}])
+      fundings = projects.map { |proj| proj.fetch('funding', []) }.flatten.compact.uniq
+      hosts = dmp.fetch('dataset', []).map { |dset| dset.fetch('distribution', []).map { |d| d['host'] } }
+      people = [dmp['contact']]
+      people << dmp.fetch('contributor', [])
+      # Extract all of the important bits about the DMP
+      @details_hash = {
+        created: dmp.fetch('created', Time.now.iso8601),
+        title: _cleanse_text(text: projects&.first&.fetch('title', dmp['title'])),
+        abstract: _cleanse_text(text: projects&.first&.fetch('description', dmp['description'])),
+        keywords: dmp.fetch('dataset', []).map { |ds| ds.fetch('keyword', []) }.flatten.compact.uniq,
+        identifiers: [dmp.fetch('dmp_id', {})['identifier']],
+        last_names: [],
+        affiliation_ids: [],
+        affiliations: [],
+        funder_names: [],
+        funder_ids: [],
+        opportunity_ids: [],
+        grant_ids: [],
+        repositories: []
+      }
+      _extract_people(array: people&.flatten&.compact&.uniq)
+      _extract_funding(array: fundings)
+      _extract_repositories(repos: hosts.flatten.compact.uniq)
+      # Clean up the results by flattening and removing duplicates from the Arrays
+      @details_hash.keys.each do |key|
+        @details_hash[key] = @details_hash[key].flatten.compact.uniq if @details_hash[key].is_a?(Array)
+      end
+      @logger&.debug(message: "Extracted the following from the DMP", details: @details_hash)
+    end
+    # Extract all of the funding information
+    def _extract_funding(array:)
+      return [] unless array.is_a?(Array)
+      array.each do |funding|
+        next unless funding.is_a?(Hash)
+        funder_id = funding.fetch('funder_id', {})
+        ror = funder_id['identifier'] if funder_id['type']&.downcase&.strip == 'ror'
+        fundref = ror.nil? ? funder_id['identifier']&.downcase&.strip : ROR_FUNDREF_ID_CROSSWALK[:"#{ror}"]
+        opportunity = funding.fetch('dmproadmap_funding_opportunity_id', {})['identifier']
+        grant = funding.fetch('grant_id', {})['identifier']
+        @details_hash[:identifiers] << ror&.downcase&.strip
+        @details_hash[:identifiers] << fundref&.downcase&.strip
+        @details_hash[:identifiers] << grant&.downcase&.strip
+        @details_hash[:identifiers] << grant&.split('/')&.last&.downcase&.strip
+        @details_hash[:identifiers] << opportunity&.downcase&.strip
+        @details_hash[:funder_names] << funding['name']&.downcase&.strip
+        @details_hash[:funder_ids] << fundref
+        @details_hash[:opportunity_ids] << opportunity&.downcase&.strip
+        @details_hash[:grant_ids] << [grant&.downcase&.strip, grant&.split('/')&.last&.downcase&.strip]
+      end
+      array
+    end
+    # Extract all of the ORCIDs, last names, and affiliation ids and names
+    def _extract_people(array:)
+      return [] unless array.is_a?(Array)
+      array.each do |entry|
+        next unless entry.is_a?(Hash)
+        id = entry.fetch('contributor_id', entry.fetch('contact_id', {}))['identifier']&.downcase&.strip
+        affil = entry.fetch('dmproadmap_affiliation', {})
+        ror = affil.fetch('affiliation_id', {})['identifier']&.downcase&.strip
+        name = entry.fetch('name', '')&.downcase&.strip
+        last_name = name.include?(', ') ? name.split(', ').first : name.split.last
+        @details_hash[:identifiers] << [id, ror&.downcase&.strip]
+        @details_hash[:last_names] << last_name
+        @details_hash[:affiliation_ids] << ror
+        @details_hash[:affiliations] << affil.fetch('name', '')&.split(' (')&.first&.downcase&.strip
+      end
+      array
+    end
+    # Extract all of the re3data ids, URLs and names
+    def _extract_repositories(repos:)
+      return [] unless repos.is_a?(Array)
+      repos.each do |repo|
+        next unless repo.is_a?(Hash)
+        @details_hash[:identifiers] << [
+          repo['url']&.downcase&.strip, repo.fetch('dmproadmap_host_id', {})['identifier']&.downcase&.strip
+        ]
+        @details_hash[:repositories] << repo.fetch('name', '')&.downcase&.strip
+      end
+      repos
+    end
+    # Returns whether or not the incoming grant id(s) match the DMPs grant id. Expecting:
+    #    [
+    #      { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
+    #    ]
+    def _grants_match?(array:, response:)
+      return response unless array.is_a?(Array) && response.is_a?(Hash)
+      ids = array.select { |funding| funding.is_a?(Hash) && funding['grant'].is_a?(Array) }
+                 .map { |funding| funding['grant'].map { |id| id&.downcase&.strip } }
+                 .flatten.compact.uniq
+      matched = _compare_arrays(array_a:  @details_hash.fetch(:grant_ids, []), array_b: ids)
+      return response if matched <= 0
+      response[:confidence] = 'Absolute'
+      response[:score] = 100
+      response[:notes] << 'the grant ID matched'
+      response
+    end
+    # Returns whether or not the incoming grant id(s) match the DMPs opportunity id. Expecting:
+    #    [
+    #      { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
+    #    ]
+    def _opportunities_match?(array:, response:)
+      return response unless array.is_a?(Array) && response.is_a?(Hash)
+      ids = array.select { |funding| funding.is_a?(Hash) && funding['grant'].is_a?(Array) }
+                 .map { |funding| funding['grant'].map { |id| id&.downcase&.strip } }
+                 .flatten.compact.uniq
+      matched = _compare_arrays(array_a:  @details_hash.fetch(:opportunity_ids, []), array_b: ids)
+      return response if matched <= 0
+      response[:score] += 5
+      response[:notes] << 'the funding opportunity number matched'
+      response
+    end
+    # Returns whether or not the inciming list of creators/contributors match those on the DMP. Expecting:
+    #   [
+    #      {
+    #        id: "https://orcid.org/blah",
+    #        last_name: "doe",
+    #        affiliation: { id: "https://ror.org/blah", name: "Foo" }
+    #      }
+    #    ]
+    def _orcids_match?(array:, response:)
+      return response unless array.is_a?(Array) && response.is_a?(Hash)
+      ids = array.select { |repo| repo.is_a?(Hash) }
+                 .map { |person| person['id']&.downcase&.strip }
+                 .flatten.compact.uniq
+      matched = _compare_arrays(array_a:  @details_hash.fetch(:identifiers, []), array_b: ids)
+      return response if matched <= 0
+      response[:score] += (matched * 2)
+      response[:notes] << 'contributor ORCIDs matched'
+      response
+    end
+    # Returns whether or not the inciming list of creators/contributors match those on the DMP. Expecting:
+    #   [
+    #      {
+    #        id: "https://orcid.org/blah",
+    #        last_name: "doe",
+    #        affiliation: { id: "https://ror.org/blah", name: "Foo" }
+    #      }
+    #    ]
+    def _last_name_and_affiliation_match?(array:, response:)
+      return response unless array.is_a?(Array) && response.is_a?(Hash)
+      array = array.select { |repo| repo.is_a?(Hash) }
+      affiliations = array.map { |person| person['affiliation'] }&.flatten&.compact&.uniq
+      last_names = array.map { |person| person['last_name']&.downcase&.strip }&.flatten&.compact&.uniq
+      rors = affiliations.map { |affil| affil['id']&.downcase&.strip }&.flatten&.compact&.uniq
+      affil_names = affiliations.map { |affil| affil['name']&.downcase&.strip }&.flatten&.compact&.uniq
+      # Check the person last names and affiliation name and RORs
+      last_names_matched = _compare_arrays(array_a:  @details_hash.fetch(:last_names, []), array_b: last_names)
+      rors_matched = _compare_arrays(array_a:  @details_hash.fetch(:affiliation_ids, []), array_b: rors)
+      affil_names_matched = _compare_arrays(array_a:  @details_hash.fetch(:affiliations, []), array_b: affil_names)
+      return response if last_names_matched <= 0 && rors_matched <= 0 && affil_names_matched <= 0
+      response[:score] += last_names_matched + rors_matched + affil_names_matched
+      response[:notes] << 'contributor names and affiliations matched'
+      response
+    end
+    # Returns whether or not the incoming list of repositories match those defined in the DMP. Expecting:
+    #    [
+    #      { id: ["http://some.repo.org", "https://doi.org/re3data123"], name: "Repo" }
+    #    ]
+    def _repository_match?(array:, response:)
+      return response unless array.is_a?(Array) && response.is_a?(Hash)
+      # We only care about repositories with ids/urls
+      ids = array.select { |repo| repo.is_a?(Hash) }
+                 .map { |repo| repo['id'].map { |id| id&.downcase&.strip } }
+                 .flatten.compact.uniq
+      matched = _compare_arrays(array_a:  @details_hash.fetch(:identifiers, []), array_b: ids)
+      return response if matched <= 0
+      response[:score] += matched
+      response[:notes] << 'repositories matched'
+      response
+    end
+    # Returns whether or not the list of keywords exist in the DMP. Expecting:
+    #     keywords: ["foo", "bar"]
+    def _keyword_match?(array:, response:)
+      return response unless array.is_a?(Array) && response.is_a?(Hash)
+      keywords = array.map { |word| word&.downcase&.strip }&.flatten&.compact&.uniq
+      matched = _compare_arrays(array_a:  @details_hash.fetch(:keywords, []), array_b: keywords)
+      return response if matched <= 0
+      response[:score] += 1
+      response[:notes] << 'keywords matched'
+      response
+    end
+    # Uses an NLP library to determine if the :text matches the DMP/Project :title or :description
+    def _text_match?(type: 'title', text:, response:, logger: nil)
+      return response unless response.is_a?(Hash) && text.is_a?(String) && !text.strip.empty? &&
+                             !@details_hash[type.to_sym].nil?
+      nlp_processor = Text::WhiteSimilarity.new
+      cleansed = _cleanse_text(text:)
+      details = {
+        "dmp_#{type}": @details_hash[type.to_sym],
+        "incoming_#{type}": cleansed,
+        nlp_score: nlp_processor.similarity(@details_hash[type.to_sym], cleansed)
+      }
+      @logger&.debug(message: "Text::WhiteSimilarity score", details:)
+      return response if details[:nlp_score] < 0.5
+      response[:score] += details[:nlp_score] >= 0.75 ? 5 : 2
+      response[:notes] << "#{type}s are similar"
+      response
+    end
+    # Change the incoming text to lower case, remove spaces and STOP_WORDS
+    def _cleanse_text(text:)
+      return nil unless text.is_a?(String)
+      text.downcase.split.reject { |word| STOP_WORDS.include?(word) }.join(' ').strip
+    end
+    # Do an introspection of the 2 arrays and return the number of matches
+    def _compare_arrays(array_a: [], array_b: [])
+      return 0 unless array_a.is_a?(Array) && array_b.is_a?(Array)
+      intersection = array_a & array_b
+      intersection.nil? || intersection.size <= 0 ? 0 : intersection.size
+    end
+    # TODO: Remove this hard-coded crosswalk once the community has broader support for using ROR for funder ids
+    ROR_FUNDREF_ID_CROSSWALK = {
+      # NIH ID Crosswalk
+      "https://ror.org/01cwqze88": "https://doi.org/10.13039/100000002",
+      "https://ror.org/04mhx6838": "https://doi.org/10.13039/100000055",
+      "https://ror.org/012pb6c26": "https://doi.org/10.13039/100000050",
+      "https://ror.org/03wkg3b53": "https://doi.org/10.13039/100000053",
+      "https://ror.org/0060t0j89": "https://doi.org/10.13039/100000092",
+      "https://ror.org/00372qc85": "https://doi.org/10.13039/100000070",
+      "https://ror.org/00190t495": "https://doi.org/10.13039/100008460",
+      "https://ror.org/00j4k1h63": "https://doi.org/10.13039/100000066",
+      "https://ror.org/01y3zfr79": "https://doi.org/10.13039/100000056",
+      "https://ror.org/04q48ey07": "https://doi.org/10.13039/100000057",
+      "https://ror.org/0493hgw16": "https://doi.org/10.13039/100006545",
+      "https://ror.org/04vfsmv21": "https://doi.org/10.13039/100000098",
+      "https://ror.org/03jh5a977": "https://doi.org/10.13039/100000093",
+      "https://ror.org/04xeg9z08": "https://doi.org/10.13039/100000025",
+      "https://ror.org/01s5ya894": "https://doi.org/10.13039/100000065",
+      "https://ror.org/02meqm098": "https://doi.org/10.13039/100000002",
+      "https://ror.org/049v75w11": "https://doi.org/10.13039/100000049",
+      "https://ror.org/004a2wv92": "https://doi.org/10.13039/100000072",
+      "https://ror.org/00adh9b73": "https://doi.org/10.13039/100000062",
+      "https://ror.org/043z4tv69": "https://doi.org/10.13039/100000060",
+      "https://ror.org/00x19de83": "https://doi.org/10.13039/100000002",
+      "https://ror.org/02jzrsm59": "https://doi.org/10.13039/100000027",
+      "https://ror.org/006zn3t30": "https://doi.org/10.13039/100000069",
+      "https://ror.org/04byxyr05": "https://doi.org/10.13039/100000071",
+      "https://ror.org/04pw6fb54": "https://doi.org/10.13039/100006108",
+      "https://ror.org/05aq6yn88": "https://doi.org/10.13039/100006955",
+      "https://ror.org/02xey9a22": "https://doi.org/10.13039/100000061",
+      "https://ror.org/00fj8a872": "https://doi.org/10.13039/100000052",
+      "https://ror.org/01wtjyf13": "https://doi.org/10.13039/100000063",
+      "https://ror.org/04r5s4b52": "https://doi.org/10.13039/100005440",
+      "https://ror.org/046zezr58": "https://doi.org/10.13039/100006085",
+      "https://ror.org/02e3wq066": "https://doi.org/10.13039/100006086",
+      "https://ror.org/031gy6182": "https://doi.org/10.13039/100000002",
+      "https://ror.org/054j5yq82": "https://doi.org/10.13039/100000002",
+      "https://ror.org/02yrzyf97": "https://doi.org/10.13039/100000002",
+      # NSF ID Crosswalk
+      "https://.org/021nxhr62": "https://doi.org/10.13039/100000001",
+      "https://.org/04aqat463": "https://doi.org/10.13039/100000001",
+      "https://.org/01rcfpa16": "https://doi.org/10.13039/100005441",
+      "https://.org/014eweh95": "https://doi.org/10.13039/100005445",
+      "https://.org/001xhss06": "https://doi.org/10.13039/100000076",
+      "https://.org/04qn9mx93": "https://doi.org/10.13039/100000153",
+      "https://.org/03g87he71": "https://doi.org/10.13039/100000155",
+      "https://.org/01tnvpc68": "https://doi.org/10.13039/100000156",
+      "https://.org/01rvays47": "https://doi.org/10.13039/100000154",
+      "https://.org/002jdaq33": "https://doi.org/10.13039/100000152",
+      "https://.org/025kzpk63": "https://doi.org/10.13039/100000083",
+      "https://.org/04nh1dc89": "https://doi.org/10.13039/100007523",
+      "https://.org/01mng8331": "https://doi.org/10.13039/100000143",
+      "https://.org/02rdzmk74": "https://doi.org/10.13039/100000144",
+      "https://.org/053a2cp42": "https://doi.org/10.13039/100000145",
+      "https://.org/014bj5w56": "https://doi.org/10.13039/100000081",
+      "https://.org/00whkrf32": "https://doi.org/10.13039/100000082",
+      "https://.org/05s7cqk18": "https://doi.org/10.13039/100000173",
+      "https://.org/02kd4km72": "https://doi.org/10.13039/100000172",
+      "https://.org/03mamvh39": "https://doi.org/10.13039/100000171",
+      "https://.org/00b6sbb32": "https://doi.org/10.13039/100000084",
+      "https://.org/0471zv972": "https://doi.org/10.13039/100000146",
+      "https://.org/028yd4c30": "https://doi.org/10.13039/100000147",
+      "https://.org/01krpsy48": "https://doi.org/10.13039/100000148",
+      "https://.org/050rnw378": "https://doi.org/10.13039/100000149",
+      "https://.org/0388pet74": "https://doi.org/10.13039/100000150",
+      "https://.org/03xyg3m20": "https://doi.org/10.13039/100000151",
+      "https://.org/05p847d66": "https://doi.org/10.13039/100000085",
+      "https://.org/037gd6g64": "https://doi.org/10.13039/100000159",
+      "https://.org/05v01mk25": "https://doi.org/10.13039/100000160",
+      "https://.org/05wqqhv83": "https://doi.org/10.13039/100000141",
+      "https://.org/05nwjp114": "https://doi.org/10.13039/100007352",
+      "https://.org/05fnzca26": "https://doi.org/10.13039/100000162",
+      "https://.org/02trddg58": "https://doi.org/10.13039/100000163",
+      "https://.org/029b7h395": "https://doi.org/10.13039/100000086",
+      "https://.org/04mg8wm74": "https://doi.org/10.13039/100000164",
+      "https://.org/01ar8dr59": "https://doi.org/10.13039/100000165",
+      "https://.org/01pc7k308": "https://doi.org/10.13039/100000078",
+      "https://.org/051fftw81": "https://doi.org/10.13039/100000121",
+      "https://.org/04ap5x931": "https://doi.org/10.13039/100000166",
+      "https://.org/00apvva27": "https://doi.org/10.13039/100005716",
+      "https://.org/04nseet23": "https://doi.org/10.13039/100000179",
+      "https://.org/04k9mqs78": "https://doi.org/10.13039/100000106",
+      "https://.org/01k638r21": "https://doi.org/10.13039/100000089",
+      "https://.org/01gmp5538": "https://doi.org/10.13039/100005447",
+      "https://.org/01vnjbg30": "https://doi.org/10.13039/100005449",
+      "https://.org/03h7mcc28": "https://doi.org/10.13039/100000088",
+      "https://.org/05wgkzg12": "https://doi.org/10.13039/100000169",
+      "https://.org/0445wmv88": "https://doi.org/10.13039/100000170",
+      "https://.org/02dz2hb46": "https://doi.org/10.13039/100000077",
+      "https://.org/034m1ez10": "https://doi.org/10.13039/100000107",
+      "https://.org/02a65dj82": "https://doi.org/10.13039/100005717",
+      "https://.org/020fhsn68": "https://doi.org/10.13039/100000001",
+      "https://.org/03z9hh605": "https://doi.org/10.13039/100000174",
+      "https://.org/04ya3kq71": "https://doi.org/10.13039/100007521",
+      "https://.org/04evh7y43": "https://doi.org/10.13039/100005443",
+      "https://.org/04h67aa53": "https://doi.org/10.13039/100000177",
+      "https://.org/025dabr11": "https://doi.org/10.13039/100005446",
+      "https://.org/04vw0kz07": "https://doi.org/10.13039/100005448",
+      "https://.org/054ydxh33": "https://doi.org/10.13039/100005554",
+      "https://.org/01sharn77": "https://doi.org/10.13039/100006091",
+      "https://.org/02ch5q898": "https://doi.org/10.13039/100000001",
+      # NASA ID Crosswalk
+      "https://.org/0171mag52": "https://doi.org/10.13039/100006198",
+      "https://.org/027k65916": "https://doi.org/10.13039/100006196",
+      "https://.org/027ka1x80": "https://doi.org/10.13039/100000104",
+      "https://.org/02acart68": "https://doi.org/10.13039/100006195",
+      "https://.org/059fqnc42": "https://doi.org/10.13039/100006193",
+      "https://.org/01cyfxe35": "https://doi.org/10.13039/100016595",
+      "https://.org/04xx4z452": "https://doi.org/10.13039/100006203",
+      "https://.org/0399mhs52": "https://doi.org/10.13039/100006199",
+      "https://.org/02epydz83": "https://doi.org/10.13039/100006197",
+      "https://.org/03j9e2j92": "https://doi.org/10.13039/100006205",
+      "https://.org/02s42x260": "https://doi.org/10.13039/100000104",
+      "https://.org/01p7gwa14": "https://doi.org/10.13039/100000104",
+      "https://.org/01qxmdg18": "https://doi.org/10.13039/100000104",
+      "https://.org/006ndaj41": "https://doi.org/10.13039/100000104",
+      "https://.org/03em45j53": "https://doi.org/10.13039/100007346",
+      "https://.org/045t78n53": "https://doi.org/10.13039/100000104",
+      "https://.org/00r57r863": "https://doi.org/10.13039/100000104",
+      "https://.org/0401vze59": "https://doi.org/10.13039/100007726",
+      "https://.org/04hccab49": "https://doi.org/10.13039/100000104",
+      "https://.org/04437j066": "https://doi.org/10.13039/100000104",
+      "https://.org/028b18z22": "https://doi.org/10.13039/100000104",
+      "https://.org/00ryjtt64": "https://doi.org/10.13039/100000104",
+      # DOE ID Crosswalk
+      "https://ror.org/01bj3aw27": "https://doi.org/10.13039/100000015",
+      "https://ror.org/03q1rgc19": "https://doi.org/10.13039/100006133",
+      "https://ror.org/02xznz413": "https://doi.org/10.13039/100006134",
+      "https://ror.org/03sk1we31": "https://doi.org/10.13039/100006168",
+      "https://ror.org/00f93gc02": "https://doi.org/10.13039/100006177",
+      "https://ror.org/05tj7dm33": "https://doi.org/10.13039/100006147",
+      "https://ror.org/0012c7r22": "https://doi.org/10.13039/100006192",
+      "https://ror.org/00mmn6b08": "https://doi.org/10.13039/100006132",
+      "https://ror.org/03ery9d53": "https://doi.org/10.13039/100006120",
+      "https://ror.org/033jmdj81": "https://doi.org/10.13039/100000015",
+      "https://ror.org/03rd4h240": "https://doi.org/10.13039/100006130",
+      "https://ror.org/0054t4769": "https://doi.org/10.13039/100006200",
+      "https://ror.org/03eecgp81": "https://doi.org/10.13039/100006174",
+      "https://ror.org/00heb4d89": "https://doi.org/10.13039/100006135",
+      "https://ror.org/05ek3m339": "https://doi.org/10.13039/100006150",
+      "https://ror.org/00km40770": "https://doi.org/10.13039/100006138",
+      "https://ror.org/02ah1da87": "https://doi.org/10.13039/100006137",
+      "https://ror.org/05hsv7e61": "https://doi.org/10.13039/100000015",
+      "https://ror.org/01c9ay627": "https://doi.org/10.13039/100006165",
+      "https://ror.org/04z2gev20": "https://doi.org/10.13039/100006183",
+      "https://ror.org/02z1qvq09": "https://doi.org/10.13039/100006144",
+      "https://ror.org/03jf3w726": "https://doi.org/10.13039/100006186",
+      "https://ror.org/04848jz84": "https://doi.org/10.13039/100006142",
+      "https://ror.org/04s778r16": "https://doi.org/10.13039/100006171",
+      "https://ror.org/04nnxen11": "https://doi.org/10.13039/100000015",
+      "https://ror.org/05csy5p27": "https://doi.org/10.13039/100010268",
+      "https://ror.org/05efnac71": "https://doi.org/10.13039/100000015"
+    }
+  end
+end

data/lib/uc3-dmp-id/creator.rb CHANGED Viewed

@@ -23,23 +23,23 @@ module Uc3DmpId
         raise CreatorError, Helper::MSG_DMP_FORBIDDEN unless provenance.is_a?(Hash) && !provenance['PK'].nil?
         # Validate the incoming JSON first
-        json = Helper.parse_json(json: json)
-        errs = Validator.validate(mode: 'author', json: json)
+        json = Helper.parse_json(json:)
+        errs = Validator.validate(mode: 'author', json:)
         raise CreatorError, errs.join(', ') if errs.is_a?(Array) && errs.any? && errs.first != Validator::MSG_VALID_JSON
         # Try to find it by the :dmp_id first and Fail if found
         dmp_id = Helper.dmp_id_to_pk(json: json.fetch('dmp', {})['dmp_id'])
-        result = Finder.exists?(p_key: dmp_id, logger: logger) unless dmp_id.nil?
+        result = Finder.exists?(p_key: dmp_id, logger:) unless dmp_id.nil?
         raise CreatorError, Helper::MSG_DMP_EXISTS if result.is_a?(Hash)
         # raise CreatorError, Uc3DmpId::MSG_DMP_EXISTS unless json['PK'].nil?
         client = Uc3DmpDynamo::Client.new
-        p_key = _preregister_dmp_id(client: client, provenance: provenance, json: json, logger: logger)
+        p_key = _preregister_dmp_id(client:, provenance:, json:, logger:)
         raise CreatorError, MSG_UNABLE_TO_MINT if p_key.nil?
         # Add the DMPHub specific attributes and then save
-        annotated = Helper.annotate_dmp_json(provenance: provenance, p_key: p_key, json: json['dmp'])
+        annotated = Helper.annotate_dmp_json(provenance:, p_key:, json: json['dmp'])
         logger.info(message: "Creating DMP ID: #{p_key}") if logger.respond_to?(:debug)
         # Set the :created and :modified timestamps
@@ -48,10 +48,10 @@ module Uc3DmpId
         annotated['modified'] = now
         # Create the item
-        resp = client.put_item(json: annotated, logger: logger)
+        resp = client.put_item(json: annotated, logger:)
         raise CreatorError, Helper::MSG_DMP_NO_DMP_ID if resp.nil?
-        _post_process(json: annotated, logger: logger)
+        _post_process(json: annotated, logger:)
         Helper.cleanse_dmp_json(json: JSON.parse({ dmp: annotated }.to_json))
       end
       # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
@@ -75,7 +75,7 @@ module Uc3DmpId
         counter = 0
         while dmp_id == '' && counter <= 10
           prefix = "#{ENV.fetch('DMP_ID_SHOULDER', nil)}#{SecureRandom.hex(2).upcase}#{SecureRandom.hex(2)}"
-          dmp_id = prefix unless Finder.exists?(client: client, p_key: prefix)
+          dmp_id = prefix unless Finder.exists?(client:, p_key: prefix)
           counter += 1
         end
         # Something went wrong and it was unable to identify a unique id
@@ -94,7 +94,7 @@ module Uc3DmpId
         # Publish the change to the EventBridge
         publisher = Uc3DmpEventBridge::Publisher.new
-        publisher.publish(source: 'DmpCreator', event_type: 'EZID update', dmp: json, logger: logger)
+        publisher.publish(source: 'DmpCreator', event_type: 'EZID update', dmp: json, logger:)
         # Determine if there are any related identifiers that we should try to fetch a citation for
         citable_identifiers = Helper.citable_related_identifiers(dmp: json)
@@ -108,7 +108,7 @@ module Uc3DmpId
         }
         logger.debug(message: 'Fetching citations', details: citable_identifiers) if logger.respond_to?(:debug)
         publisher.publish(source: 'DmpCreator', dmp: json, event_type: 'Citation Fetch', detail: citer_detail,
-                          logger: logger)
+                          logger:)
         true
       end
     end

data/lib/uc3-dmp-id/deleter.rb CHANGED Viewed

@@ -19,7 +19,7 @@ module Uc3DmpId
         # Fetch the latest version of the DMP ID by it's PK
         client = Uc3DmpDynamo::Client.new
-        dmp = Finder.by_pk(p_key: p_key, client: client, cleanse: false, logger: logger)
+        dmp = Finder.by_pk(p_key:, client:, cleanse: false, logger:)
         raise DeleterError, Helper::MSG_DMP_NOT_FOUND unless dmp.is_a?(Hash) && !dmp['dmp'].nil?
         # Only allow this if the provenance is the owner of the DMP!
@@ -38,16 +38,16 @@ module Uc3DmpId
         dmp['dmp']['dmphub_tombstoned_at'] = now
         # Create the Tombstone version
-        resp = client.put_item(json: dmp['dmp'], logger: logger)
+        resp = client.put_item(json: dmp['dmp'], logger:)
         raise DeleterError, Helper::MSG_DMP_NO_TOMBSTONE if resp.nil?
         # Delete the Latest version
-        client.delete_item(p_key: p_key, s_key: Helper::DMP_LATEST_VERSION, logger: logger)
+        client.delete_item(p_key:, s_key: Helper::DMP_LATEST_VERSION, logger:)
         # TODO: We should do a check here to see if it was successful!
         # Notify EZID about the removal
-        _post_process(json: dmp, logger: logger)
+        _post_process(json: dmp, logger:)
         # Return the tombstoned record
         Helper.cleanse_dmp_json(json: dmp)
@@ -66,7 +66,7 @@ module Uc3DmpId
         # Publish the change to the EventBridge
         publisher = Uc3DmpEventBridge::Publisher.new
-        publisher.publish(source: 'DmpDeleter', event_type: 'EZID update', dmp: json, logger: logger)
+        publisher.publish(source: 'DmpDeleter', event_type: 'EZID update', dmp: json, logger:)
         true
       end
     end

data/lib/uc3-dmp-id/finder.rb CHANGED Viewed

@@ -20,15 +20,15 @@ module Uc3DmpId
       # TODO: Replace this with ElasticSearch
       def search_dmps(args:, logger: nil)
         client = Uc3DmpDynamo::Client.new
-        return _by_owner(owner_org: args['owner_orcid'], client: client, logger: logger) unless args['owner_orcid'].nil?
+        return _by_owner(owner_org: args['owner_orcid'], client:, logger:) unless args['owner_orcid'].nil?
         unless args['owner_org_ror'].nil?
-          return _by_owner_org(owner_org: args['owner_org_ror'], client: client,
-                               logger: logger)
+          return _by_owner_org(owner_org: args['owner_org_ror'], client:,
+                               logger:)
         end
         unless args['modification_day'].nil?
-          return _by_mod_day(day: args['modification_day'], client: client,
-                             logger: logger)
+          return _by_mod_day(day: args['modification_day'], client:,
+                             logger:)
         end
         []
@@ -38,20 +38,20 @@ module Uc3DmpId
       # -------------------------------------------------------------------------
       # rubocop:disable Metrics/AbcSize
       def by_json(json:, client: nil, cleanse: true, logger: nil)
-        json = Helper.parse_json(json: json)&.fetch('dmp', {})
+        json = Helper.parse_json(json:)&.fetch('dmp', {})
         raise FinderError, MSG_INVALID_ARGS if !json.is_a?(Hash) || (json['PK'].nil? && json['dmp_id'].nil?)
         p_key = json['PK']
         # Translate the incoming :dmp_id into a PK
         p_key = Helper.dmp_id_to_pk(json: json.fetch('dmp_id', {})) if p_key.nil?
-        client = client.nil? ? Uc3DmpDynamo::Client.new : client
+        client = Uc3DmpDynamo::Client.new if client.nil?
         # TODO: Re-enable this once we figure out Dynamo indexes
         # find_by_dmphub_provenance_id -> if no PK and no dmp_id result
         # return by_provenance_identifier(json: json, client: client, logger: logger) if p_key.nil?
         # find_by_PK
-        p_key.nil? ? nil : by_pk(p_key: p_key, s_key: json['SK'], client: client, cleanse: cleanse, logger: logger)
+        p_key.nil? ? nil : by_pk(p_key:, s_key: json['SK'], client:, cleanse:, logger:)
       end
       # rubocop:enable Metrics/AbcSize
@@ -62,20 +62,20 @@ module Uc3DmpId
         raise FinderError, MSG_MISSING_PK if p_key.nil?
         s_key = Helper::DMP_LATEST_VERSION if s_key.nil? || s_key.to_s.strip.empty?
-        client = client.nil? ? Uc3DmpDynamo::Client.new : client
+        client = Uc3DmpDynamo::Client.new if client.nil?
         resp = client.get_item(
           key: {
-            PK: Helper.append_pk_prefix(p_key: p_key),
-            SK: Helper.append_sk_prefix(s_key: s_key)
+            PK: Helper.append_pk_prefix(p_key:),
+            SK: Helper.append_sk_prefix(s_key:)
           },
-          logger: logger
+          logger:
         )
         return resp unless resp.is_a?(Hash)
         dmp = resp['dmp'].nil? ? JSON.parse({ dmp: resp }.to_json) : resp
         return nil if dmp['dmp']['PK'].nil?
-        dmp = Versioner.append_versions(p_key: dmp['dmp']['PK'], dmp: dmp, client: client, logger: logger) if cleanse
+        dmp = Versioner.append_versions(p_key: dmp['dmp']['PK'], dmp:, client:, logger:) if cleanse
         cleanse ? Helper.cleanse_dmp_json(json: dmp) : dmp
       end
       # rubocop:enable Metrics/AbcSize
@@ -85,13 +85,13 @@ module Uc3DmpId
       def exists?(p_key:, s_key: Helper::DMP_LATEST_VERSION, client: nil, logger: nil)
         raise FinderError, MSG_MISSING_PK if p_key.nil?
-        client = client.nil? ? Uc3DmpDynamo::Client.new : client
+        client = Uc3DmpDynamo::Client.new if client.nil?
         client.pk_exists?(
           key: {
-            PK: Helper.append_pk_prefix(p_key: p_key),
-            SK: Helper.append_sk_prefix(s_key: s_key)
+            PK: Helper.append_pk_prefix(p_key:),
+            SK: Helper.append_sk_prefix(s_key:)
           },
-          logger: logger
+          logger:
         )
       end
@@ -115,15 +115,15 @@ module Uc3DmpId
           filter_expression: 'SK = :version',
           expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
         }
-        client = client.nil? ? Uc3DmpDynamo::Client.new : client
-        resp = client.query(args: args, logger: logger)
+        client = Uc3DmpDynamo::Client.new if client.nil?
+        resp = client.query(args:, logger:)
         return resp unless resp.is_a?(Hash)
         dmp = resp['dmp'].nil? ? JSON.parse({ dmp: resp }.to_json) : resp
         return nil if dmp['dmp']['PK'].nil?
         # If we got a hit, fetch the DMP and return it.
-        by_pk(p_key: dmp['dmp']['PK'], s_key: dmp['dmp']['SK'], cleanse: cleanse, logger: logger)
+        by_pk(p_key: dmp['dmp']['PK'], s_key: dmp['dmp']['SK'], cleanse:, logger:)
       end
       # rubocop:enable Metrics/AbcSize
@@ -149,8 +149,8 @@ module Uc3DmpId
           expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
         }
         logger.info(message: "Querying _by_owner with #{args}") if logger.respond_to?(:info)
-        client = client.nil? ? Uc3DmpDynamo::Client.new : client
-        _process_search_response(response: client.query(args: args, logger: logger))
+        client = Uc3DmpDynamo::Client.new if client.nil?
+        _process_search_response(response: client.query(args:, logger:))
       end
       # Fetch the DMP IDs for the specified organization/institution (the org is the :dmphub_owner_org
@@ -174,8 +174,8 @@ module Uc3DmpId
           expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
         }
         logger.info(message: "Querying _by_owner_org with #{args}") if logger.respond_to?(:info)
-        client = client.nil? ? Uc3DmpDynamo::Client.new : client
-        _process_search_response(response: client.query(args: args, logger: logger))
+        client = Uc3DmpDynamo::Client.new if client.nil?
+        _process_search_response(response: client.query(args:, logger:))
       end
       # Fetch the DMP IDs modified on the specified date (the date is the :dmphub_modification_day on the DMP ID record)
@@ -195,8 +195,8 @@ module Uc3DmpId
           expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
         }
         logger.info(message: "Querying _by_mod_day with #{args}") if logger.respond_to?(:info)
-        client = client.nil? ? Uc3DmpDynamo::Client.new : client
-        _process_search_response(response: client.query(args: args, logger: logger))
+        client = Uc3DmpDynamo::Client.new if client.nil?
+        _process_search_response(response: client.query(args:, logger:))
       end
       # Transform the search results so that we do not include any of the DMPHub specific metadata

data/lib/uc3-dmp-id/helper.rb CHANGED Viewed

@@ -7,17 +7,17 @@ module Uc3DmpId
   # Helper functions for working with DMP IDs
   class Helper
     PK_DMP_PREFIX = 'DMP#'
-    PK_DMP_REGEX = %r{DMP#[a-zA-Z0-9\-_.]+/[a-zA-Z0-9]{2}\.[a-zA-Z0-9./:]+}.freeze
+    PK_DMP_REGEX = %r{DMP#[a-zA-Z0-9\-_.]+/[a-zA-Z0-9]{2}\.[a-zA-Z0-9./:]+}
     SK_DMP_PREFIX = 'VERSION#'
-    SK_DMP_REGEX = /VERSION#\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2}/.freeze
+    SK_DMP_REGEX = /VERSION#\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2}/
     # TODO: Verify the assumed structure of the DOI is valid
-    DOI_REGEX = %r{[0-9]{2}\.[0-9]{4,}/[a-zA-Z0-9/_.-]+}.freeze
-    URL_REGEX = %r{(https?://)?([a-zA-Z0-9\-_]\.)+[a-zA-Z0-9\-_]{2,3}(:[0-9]+)?/?}.freeze
+    DOI_REGEX = %r{[0-9]{2}\.[0-9]{4,}/[a-zA-Z0-9/_.-]+}
+    URL_REGEX = %r{(https?://)?([a-zA-Z0-9\-_]\.)+[a-zA-Z0-9\-_]{2,3}(:[0-9]+)?/?}
-    DMP_LATEST_VERSION = "#{SK_DMP_PREFIX}latest"
-    DMP_TOMBSTONE_VERSION = "#{SK_DMP_PREFIX}tombstone"
+    DMP_LATEST_VERSION = "#{SK_DMP_PREFIX}latest".freeze
+    DMP_TOMBSTONE_VERSION = "#{SK_DMP_PREFIX}tombstone".freeze
     DEFAULT_API_URL = 'https://api.dmphub.uc3dev.cdlib.net/dmps/'
     DEFAULT_LANDING_PAGE_URL = 'https://dmphub.uc3dev.cdlib.net/dmps/'
@@ -41,7 +41,7 @@ module Uc3DmpId
       # Append the PK prefix for the object
       # -------------------------------------------------------------------------------------
       def append_pk_prefix(p_key:)
-        p_key.is_a?(String) ? "#{PK_DMP_PREFIX}#{remove_pk_prefix(p_key: p_key)}" : nil
+        p_key.is_a?(String) ? "#{PK_DMP_PREFIX}#{remove_pk_prefix(p_key:)}" : nil
       end
       # Strip off the PK prefix
@@ -53,7 +53,7 @@ module Uc3DmpId
       # Append the SK prefix for the object
       # -------------------------------------------------------------------------------------
       def append_sk_prefix(s_key:)
-        s_key.is_a?(String) ? "#{SK_DMP_PREFIX}#{remove_sk_prefix(s_key: s_key)}" : nil
+        s_key.is_a?(String) ? "#{SK_DMP_PREFIX}#{remove_sk_prefix(s_key:)}" : nil
       end
       # Strip off the SK prefix
@@ -82,7 +82,7 @@ module Uc3DmpId
         return with_protocol ? value : value.gsub(%r{https?://}, '') if value.start_with?('http')
         dmp_id = dmp_id.gsub('doi:', '')
-        dmp_id = dmp_id.start_with?('/') ? dmp_id[1..dmp_id.length] : dmp_id
+        dmp_id = dmp_id[1..dmp_id.length] if dmp_id.start_with?('/')
         base_domain = with_protocol ? dmp_id_base_url : dmp_id_base_url.gsub(%r{https?://}, '')
         "#{base_domain}#{dmp_id}"
       end
@@ -95,7 +95,7 @@ module Uc3DmpId
         p_key = param if param.start_with?(dmp_id_base_url) || param.start_with?(base_domain)
         p_key = CGI.unescape(p_key.nil? ? param : p_key)
         p_key = format_dmp_id(value: p_key)
-        append_pk_prefix(p_key: p_key)
+        append_pk_prefix(p_key:)
       end
       # Append the :PK prefix to the :dmp_id
@@ -115,7 +115,7 @@ module Uc3DmpId
         {
           type: 'doi',
-          identifier: format_dmp_id(value: remove_pk_prefix(p_key: p_key), with_protocol: true)
+          identifier: format_dmp_id(value: remove_pk_prefix(p_key:), with_protocol: true)
         }
       end
@@ -180,7 +180,7 @@ module Uc3DmpId
       # Add DMPHub specific fields to the DMP ID JSON
       # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
       def annotate_dmp_json(provenance:, p_key:, json:)
-        json = parse_json(json: json)
+        json = parse_json(json:)
         bool_vals = [1, '1', true, 'true', 'yes']
         return json if provenance.nil? || p_key.nil? || !json.is_a?(Hash)
@@ -189,14 +189,14 @@ module Uc3DmpId
         return json if id != p_key && !json['PK'].nil?
         annotated = deep_copy_dmp(obj: json)
-        annotated['PK'] = json['PK'] || append_pk_prefix(p_key: p_key)
+        annotated['PK'] = json['PK'] || append_pk_prefix(p_key:)
         annotated['SK'] = DMP_LATEST_VERSION
         # Ensure that the :dmp_id matches the :PK
         annotated['dmp_id'] = JSON.parse(pk_to_dmp_id(p_key: remove_pk_prefix(p_key: annotated['PK'])).to_json)
-        owner_id = extract_owner_id(json: json)
-        owner_org = extract_owner_org(json: json)
+        owner_id = extract_owner_id(json:)
+        owner_org = extract_owner_org(json:)
         # Set the :dmproadmap_featured flag appropriately
         featured = annotated.fetch('dmproadmap_featured', 'no')
@@ -219,7 +219,7 @@ module Uc3DmpId
           annotated['dmphub_provenance_identifier'] = annotated.fetch('dmproadmap_links', {})['get']
         else
           annotated['dmphub_provenance_identifier'] = format_provenance_id(
-            provenance: provenance, value: json.fetch('dmp_id', {})['identifier']
+            provenance:, value: json.fetch('dmp_id', {})['identifier']
           )
         end
         annotated

data/lib/uc3-dmp-id/updater.rb CHANGED Viewed

@@ -16,18 +16,18 @@ module Uc3DmpId
       def update(provenance:, p_key:, json: {}, note: nil, logger: nil)
         raise UpdaterError, Helper::MSG_DMP_INVALID_DMP_ID unless p_key.is_a?(String) && !p_key.strip.empty?
-        mods = Helper.parse_json(json: json).fetch('dmp', {})
-        p_key = Helper.append_pk_prefix(p_key: p_key)
+        mods = Helper.parse_json(json:).fetch('dmp', {})
+        p_key = Helper.append_pk_prefix(p_key:)
         logger.debug(message: "Incoming modifications for PK #{p_key}", details: mods) if logger.respond_to?(:debug)
         # Fetch the latest version of the DMP ID
         client = Uc3DmpDynamo::Client.new
-        latest_version = Finder.by_pk(p_key: p_key, client: client, logger: logger, cleanse: false)
-        latest_version = latest_version['dmp'].nil? ? latest_version : latest_version.fetch('dmp', {})
+        latest_version = Finder.by_pk(p_key:, client:, logger:, cleanse: false)
+        latest_version = latest_version.fetch('dmp', {}) unless latest_version['dmp'].nil?
         logger.debug(message: "Latest version for PK #{p_key}", details: latest_version) if logger.respond_to?(:debug)
         # Verify that the DMP ID is updateable with the info passed in
-        errs = _updateable?(provenance: provenance, p_key: p_key, latest_version: latest_version['dmp'],
+        errs = _updateable?(provenance:, p_key:, latest_version: latest_version['dmp'],
                             mods: mods['dmp'])
         logger.error(message: errs.join(', ')) if logger.respond_to?(:error) && errs.is_a?(Array) && errs.any?
         raise UpdaterError, errs if errs.is_a?(Array) && errs.any?
@@ -37,35 +37,35 @@ module Uc3DmpId
         # Version the DMP ID record (if applicable).
         owner = latest_version['dmphub_provenance_id']
         updater = provenance['PK']
-        version = Versioner.generate_version(client: client, latest_version: latest_version, owner: owner,
-                                             updater: updater, logger: logger)
+        version = Versioner.generate_version(client:, latest_version:, owner:,
+                                             updater:, logger:)
         raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if version.nil?
         # Remove the version info because we don't want to save it on the record
         version.delete('dmphub_versions')
         # Splice the assertions
-        version = _process_modifications(owner: owner, updater: updater, version: version, mods: mods, note: note,
-                                         logger: logger)
+        version = _process_modifications(owner:, updater:, version:, mods:, note:,
+                                         logger:)
         # Set the :modified timestamps
         now = Time.now.utc
         version['modified'] = now.iso8601
         version['dmphub_modification_day'] = now.strftime('%Y-%m-%d')
         # Save the changes
-        resp = client.put_item(json: version, logger: logger)
+        resp = client.put_item(json: version, logger:)
         raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if resp.nil?
         # Send the updates to EZID
-        _post_process(provenance: provenance, json: version, logger: logger)
+        _post_process(provenance:, json: version, logger:)
         # Return the new version record
         logger.info(message: "Updated DMP ID: #{p_key}") if logger.respond_to?(:debug)
         # Append the :dmphub_versions Array
         json = JSON.parse({ dmp: version }.to_json)
-        json = Versioner.append_versions(p_key: p_key, dmp: json, client: client, logger: logger)
-        Helper.cleanse_dmp_json(json: json)
+        json = Versioner.append_versions(p_key:, dmp: json, client:, logger:)
+        Helper.cleanse_dmp_json(json:)
       end
       # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
       # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
@@ -77,21 +77,21 @@ module Uc3DmpId
         raise UpdaterError, Helper::MSG_DMP_INVALID_DMP_ID unless p_key.is_a?(String) && !p_key.strip.empty?
         # fetch the existing latest version of the DMP ID
-        client = Uc3DmpDynamo::Client.new(logger: logger)
-        dmp = Finder.by_pk(p_key: p_key, client: client, logger: logger, cleanse: false)
+        client = Uc3DmpDynamo::Client.new(logger:)
+        dmp = Finder.by_pk(p_key:, client:, logger:, cleanse: false)
         logger.info(message: 'Existing latest record', details: dmp) if logger.respond_to?(:debug)
         raise UpdaterError, Helper::MSG_DMP_FORBIDDEN unless provenance.is_a?(Hash) && !provenance['PK'].nil? &&
                                                              provenance['PK'] == dmp['dmp']['dmphub_provenance_id']
         # Add the download URl for the PDF as a related identifier on the DMP ID record
-        annotated = Helper.annotate_dmp_json(provenance: provenance, p_key: p_key, json: dmp['dmp'])
+        annotated = Helper.annotate_dmp_json(provenance:, p_key:, json: dmp['dmp'])
         annotated['dmproadmap_related_identifiers'] = [] if annotated['dmproadmap_related_identifiers'].nil?
         annotated['dmproadmap_related_identifiers'] << JSON.parse({
           descriptor: 'is_metadata_for', work_type: 'output_management_plan', type: 'url', identifier: url
         }.to_json)
         # Save the changes without creating a new version!
-        resp = client.put_item(json: annotated, logger: logger)
+        resp = client.put_item(json: annotated, logger:)
         raise UpdaterError, Helper::MSG_DMP_UNABLE_TO_VERSION if resp.nil?
         logger.info(message: "Added DMP ID narrative for PK: #{p_key}, Narrative: #{url}") if logger.respond_to?(:debug)
@@ -111,8 +111,9 @@ module Uc3DmpId
         return [Helper::MSG_DMP_FORBIDDEN] unless provenance.is_a?(Hash) && !provenance['PK'].nil?
         # Verify that the JSON is for the same DMP in the PK
         return [Helper::MSG_DMP_FORBIDDEN] unless Helper.dmp_id_to_pk(json: mods.fetch('dmp_id', {})) == p_key
         # Bail out if the DMP ID could not be found or the PKs do not match for some reason
-        return [Helper::MSG_DMP_UNKNOWN] unless latest_version.is_a?(Hash) && latest_version['PK'] == p_key
+        [Helper::MSG_DMP_UNKNOWN] unless latest_version.is_a?(Hash) && latest_version['PK'] == p_key
       end
       # rubocop:enable Metrics/AbcSize
@@ -123,14 +124,14 @@ module Uc3DmpId
         updated = if owner == updater
                     # Splice together any assertions that may have been made while the user was editing the DMP ID
-                    Asserter.splice(latest_version: version, modified_version: mods, logger: logger)
+                    Asserter.splice(latest_version: version, modified_version: mods, logger:)
                   else
                     # Attach the incoming changes as an assertion to the DMP ID since the updater is NOT the owner
-                    Asserter.add(updater: updater, latest_version: version, modified_version: mods, note: note,
-                                 logger: logger)
+                    Asserter.add(updater:, latest_version: version, modified_version: mods, note:,
+                                 logger:)
                   end
-        _merge_versions(latest_version: version, mods: updated, logger: logger)
+        _merge_versions(latest_version: version, mods: updated, logger:)
       end
       # rubocop:enable Metrics/ParameterLists
@@ -172,7 +173,7 @@ module Uc3DmpId
           logger.debug(message: 'Sending event for EZID publication',
                        details: json)
         end
-        publisher.publish(source: 'DmpUpdater', event_type: 'EZID update', dmp: json, logger: logger) if publishable
+        publisher.publish(source: 'DmpUpdater', event_type: 'EZID update', dmp: json, logger:) if publishable
         # Determine if there are any related identifiers that we should try to fetch a citation for
         citable_identifiers = Helper.citable_related_identifiers(dmp: json)
@@ -189,7 +190,7 @@ module Uc3DmpId
                        details: citable_identifiers)
         end
         publisher.publish(source: 'DmpUpdater', dmp: json, event_type: 'Citation Fetch', detail: citer_detail,
-                          logger: logger)
+                          logger:)
         true
       end
       # rubocop:enable Metrics/AbcSize, Metrics/MethodLength

data/lib/uc3-dmp-id/validator.rb CHANGED Viewed

@@ -23,11 +23,11 @@ module Uc3DmpId
       # Validate the specified DMP's :json against the schema for the specified :mode
       # rubocop:disable Metrics/AbcSize
       def validate(mode:, json:)
-        json = Helper.parse_json(json: json)
+        json = Helper.parse_json(json:)
         return [MSG_EMPTY_JSON] if json.nil? || !VALIDATION_MODES.include?(mode)
         # Load the appropriate JSON schema for the mode
-        schema = _load_schema(mode: mode)
+        schema = _load_schema(mode:)
         return [MSG_NO_SCHEMA] if schema.nil?
         # Validate the JSON

data/lib/uc3-dmp-id/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Uc3DmpId
-  VERSION = '0.1.0'
+  VERSION = '0.1.2'
 end

data/lib/uc3-dmp-id/versioner.rb CHANGED Viewed

@@ -18,13 +18,13 @@ module Uc3DmpId
         args = {
           key_conditions: {
-            PK: { attribute_value_list: [Helper.append_pk_prefix(p_key: p_key)], comparison_operator: 'EQ' }
+            PK: { attribute_value_list: [Helper.append_pk_prefix(p_key:)], comparison_operator: 'EQ' }
           },
           projection_expression: 'modified',
           scan_index_forward: false
         }
-        client = client.nil? ? Uc3DmpDynamo::Client.new : client
-        client.query(args: args, logger: logger)
+        client = Uc3DmpDynamo::Client.new if client.nil?
+        client.query(args:, logger:)
       end
       # Generate a snapshot of the current latest version of the DMP ID using the existing :modified as
@@ -57,8 +57,8 @@ module Uc3DmpId
         prior['SK'] = "#{Helper::SK_DMP_PREFIX}#{latest_version['modified'] || Time.now.utc.iso8601}"
         # Create the prior version record ()
-        client = client.nil? ? Uc3DmpDynamo::Client.new : client
-        resp = client.put_item(json: prior, logger: logger)
+        client = Uc3DmpDynamo::Client.new if client.nil?
+        resp = client.put_item(json: prior, logger:)
         return nil if resp.nil?
         msg = "#{SOURCE} created version PK: #{prior['PK']} SK: #{prior['SK']}"
@@ -74,7 +74,7 @@ module Uc3DmpId
         json = Helper.parse_json(json: dmp)
         return json unless p_key.is_a?(String) && !p_key.strip.empty? && json.is_a?(Hash) && !json['dmp'].nil?
-        results = get_versions(p_key: p_key, client: client, logger: logger)
+        results = get_versions(p_key:, client:, logger:)
         return json unless results.length > 1
         # TODO: we may want to include milliseconds in the future if we get increased volume so that
@@ -82,7 +82,7 @@ module Uc3DmpId
         versions = results.map do |ver|
           next if ver['modified'].nil?
-          base_url = "#{Helper.landing_page_url}#{Helper.remove_pk_prefix(p_key: p_key)}"
+          base_url = "#{Helper.landing_page_url}#{Helper.remove_pk_prefix(p_key:)}"
           {
             timestamp: ver['modified'],
             url: dmp['dmp']['modified'] == ver['modified'] ? base_url : "#{base_url}?version=#{ver['modified']}"

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: uc3-dmp-id
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.1.2
 platform: ruby
 authors:
 - Brian Riley
-autorequire:
+autorequire:
 bindir: bin
 cert_chain: []
-date: 2023-08-25 00:00:00.000000000 Z
+date: 2023-10-26 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: json
@@ -38,6 +38,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '3.0'
+- !ruby/object:Gem::Dependency
+  name: text
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.3'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.3'
 - !ruby/object:Gem::Dependency
   name: uc3-dmp-dynamo
   requirement: !ruby/object:Gem::Requirement
@@ -76,6 +90,7 @@ files:
 - README.md
 - lib/uc3-dmp-id.rb
 - lib/uc3-dmp-id/asserter.rb
+- lib/uc3-dmp-id/comparator.rb
 - lib/uc3-dmp-id/creator.rb
 - lib/uc3-dmp-id/deleter.rb
 - lib/uc3-dmp-id/finder.rb
@@ -91,7 +106,7 @@ licenses:
 - MIT
 metadata:
   rubygems_mfa_required: 'false'
-post_install_message:
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -100,15 +115,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: '2.7'
+      version: '3.2'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.1.6
-signing_key:
+rubygems_version: 3.4.10
+signing_key:
 specification_version: 4
 summary: DMPTool gem that provides support for DMP ID records
 test_files: []