RubyGems - uc3-dmp-id - Versions diffs - 0.1.1 → 0.1.3 - Mend

uc3-dmp-id 0.1.1 → 0.1.3

Files changed (5) hide show

checksums.yaml +4 -4
data/lib/uc3-dmp-id/asserter.rb +8 -2
data/lib/uc3-dmp-id/comparator.rb +531 -0
data/lib/uc3-dmp-id/version.rb +1 -1
metadata +17 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 2d78c65551789e0b3e96488bb2c3689999a301c76a8434f945f7b855dd62d57d
-  data.tar.gz: 73fe134b92ebe24bf9595610f3ef9e7f8dc744a3494152d9e7a6e7e73675b075
+  metadata.gz: 10840ba39949ec387dd3016a717d75cf3ba5d259fe164fb54e9c2d436e74a24b
+  data.tar.gz: 398c7de1d549a738bfc574cdf56c71b1bff8400402aee5bc8b356fdcb9f01ecb
 SHA512:
-  metadata.gz: 9fb32754f0e36c6292860e35327a9af48b566bae061eb0d08a8e4f94b9976144acae3b948837f7b1835e918d20d06971085091db81d5483ba494c51fd5279d68
-  data.tar.gz: 889bd0b3cbe91b95a534930b267625b38fe988381cf7b76e6fd73e53d4d73796b5abe3af8bd81ae322b1c7f1e17c9b9f3ea0f59ba00f3b9deeaecb3a15a131ed
+  metadata.gz: 793eeaf24f53e8a77e36596d91c56501f125f8d9272bd6b432dd05bc2d170d907d25fe213c276cf614ed8118884ac0be796cfa53e3d02c88ed91bf8660ab2f53
+  data.tar.gz: 8214ca8f136883170f45b025bc80e73e047ad2a6172837e08ab8f1b3dc48049d3038a95f4df278958c7b3d6a5b5b938a988ec88c3bb556bb0791c998c2603200

data/lib/uc3-dmp-id/asserter.rb CHANGED Viewed

@@ -168,7 +168,8 @@ module Uc3DmpId
       #    "id": "ABCD1234",
       #    "provenance": "dmphub",
       #    "timestamp": "2023-07-07T14:50:23+00:00",
-      #    "note": "data received from the NIH API",
+      #    "note": "Data received from OpenAlex, matched by PI names and title keywords.",
+      #    "confiedence": "Med",
       #    "dmproadmap_related_identifiers": {
       #      "work_type": "article",
       #      "descriptor": "is_cited_by",
@@ -183,7 +184,8 @@ module Uc3DmpId
       #    "id": "ABCD1234",
       #    "provenance": "dmphub",
       #    "timestamp": "2023-07-07T14:50:23+00:00",
-      #    "note": "data received from the NIH API",
+      #    "note": "Data received from the NIH API, matched by the opportunity number.",
+      #    "confidence": "High",
       #    "funding": {
       #      "funding_status": "granted",
       #      "grant_id": {
@@ -206,5 +208,9 @@ module Uc3DmpId
         JSON.parse(assertion.to_json)
       end
     end
+    def _score_related_work(latest_version:, work:); end
+    def _score_funding(latest_version:, funding:); end
   end
 end

data/lib/uc3-dmp-id/comparator.rb ADDED Viewed

@@ -0,0 +1,531 @@
+# frozen_string_literal: true
+require 'text'
+# rubocop:disable Metrics/ClassLength
+module Uc3DmpId
+  class ComparatorError < StandardError; end
+  # Class that compares incoming data from an external source to the DMP
+  # It determines if they are likely related and applies a confidence rating
+  class Comparator
+    MSG_MISSING_AUGMENTER = 'No Augmenter specified!'
+    MSG_MISSING_DMP = 'No DMP or the DMP did not contain enough information to use.'
+    STOP_WORDS = %w[a an and if of or the then they].freeze
+    # See the bottom of this file for a hard-coded crosswalk between Crossref funder ids and ROR ids
+    # Some APIs do not support ROR fully for funder ids, so we need to be able to reference both
+    attr_accessor :augmenter, :dmp, :details_hash, :logger
+    # rubocop:disable Metrics/AbcSize
+    def initialize(**args)
+      @logger = args[:logger]
+      @details_hash = {}
+      @augmenter = args[:augmenter]
+      raise ComparatorError, MSG_MISSING_AUGMENTER if @augmenter.nil? ||
+                                                      !@augmenter['PK']&.start_with?('AUGMENTERS#')
+      @dmp = args.fetch(:dmp, {})['dmp'].nil? ? args[:dmp] : args.fetch(:dmp, {})['dmp']
+      _extract_dmp_details(dmp:)
+      raise ComparatorError, MSG_MISSING_DMP if @details_hash.empty?
+    end
+    # rubocop:enable Metrics/AbcSize
+    # Compare the incoming hash with the DMP details that were gathered during initialization.
+    #
+    # The Hash should contain:
+    #  {
+    #    title: "Example research project",
+    #    abstract: "Lorem ipsum psuedo abstract",
+    #    keywords: ["foo", "bar"],z
+    #    people: [
+    #      {
+    #        id: "https://orcid.org/blah",
+    #        last_name: "doe",
+    #        affiliation: { id: "https://ror.org/blah", name: "Foo" }
+    #      }
+    #    ],
+    #    fundings: [
+    #      { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
+    #    ],
+    #    repositories: [
+    #      { id: ["http://some.repo.org", "https://doi.org/re3data123"], name: "Repo" }
+    #    ]
+    #  }
+    # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
+    def compare(hash:)
+      response = { confidence: 'None', score: 0, notes: [], source: @augmenter['name'] }
+      return response unless hash.is_a?(Hash) && !hash['title'].nil?
+      # Compare the grant ids. If we have a match return the response immediately since that is
+      # a very positive match!
+      response = _grants_match?(array: hash['fundings'], response:)
+      return response if response[:confidence] != 'None'
+      response = _opportunities_match?(array: hash['fundings'], response:)
+      response = _orcids_match?(array: hash['people'], response:)
+      response = _last_name_and_affiliation_match?(array: hash['people'], response:)
+      # Only process the following if we had some matching contributors, affiliations or opportuniy nbrs
+      response = _repository_match?(array: hash['repositories'], response:) if response[:score].positive?
+      response = _keyword_match?(array: hash['repositories'], response:) if response[:score].positive?
+      response = _text_match?(type: 'title', text: hash['title'], response:) if response[:score].positive?
+      response = _text_match?(type: 'abstract', text: hash['abstract'], response:) if response[:score].positive?
+      # If the score is less than 3 then we have no confidence that it is a match
+      return response if response[:score] <= 2
+      # Set the confidence level based on the score
+      response[:confidence] = if response[:score] > 15
+                                'High'
+                              else
+                                (response[:score] > 10 ? 'Medium' : 'Low')
+                              end
+      response
+    end
+    # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
+    private
+    # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
+    # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
+    def _extract_dmp_details(dmp:)
+      return nil unless dmp.is_a?(Hash) && !dmp['title'].nil? && !dmp['contact'].nil?
+      projects = dmp.fetch('project', [{}])
+      fundings = projects.map { |proj| proj.fetch('funding', []) }.flatten.compact.uniq
+      hosts = dmp.fetch('dataset', []).map { |dset| dset.fetch('distribution', []).map { |d| d['host'] } }
+      people = [dmp['contact']]
+      people << dmp.fetch('contributor', [])
+      # Extract all of the important bits about the DMP
+      @details_hash = {
+        created: dmp.fetch('created', Time.now.iso8601),
+        title: _cleanse_text(text: projects&.first&.fetch('title', dmp['title'])),
+        abstract: _cleanse_text(text: projects&.first&.fetch('description', dmp['description'])),
+        keywords: dmp.fetch('dataset', []).map { |ds| ds.fetch('keyword', []) }.flatten.compact.uniq,
+        identifiers: [dmp.fetch('dmp_id', {})['identifier']],
+        last_names: [],
+        affiliation_ids: [],
+        affiliations: [],
+        funder_names: [],
+        funder_ids: [],
+        opportunity_ids: [],
+        grant_ids: [],
+        repositories: []
+      }
+      _extract_people(array: people&.flatten&.compact&.uniq)
+      _extract_funding(array: fundings)
+      _extract_repositories(repos: hosts.flatten.compact.uniq)
+      # Clean up the results by flattening and removing duplicates from the Arrays
+      @details_hash.each_key do |key|
+        @details_hash[key] = @details_hash[key].flatten.compact.uniq if @details_hash[key].is_a?(Array)
+      end
+      @logger&.debug(message: 'Extracted the following from the DMP', details: @details_hash)
+    end
+    # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
+    # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
+    # Extract all of the funding information
+    # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
+    def _extract_funding(array:)
+      return [] unless array.is_a?(Array)
+      array.each do |funding|
+        next unless funding.is_a?(Hash)
+        funder_id = funding.fetch('funder_id', {})
+        ror = funder_id['identifier'] if funder_id['type']&.downcase&.strip == 'ror'
+        fundref = ror.nil? ? funder_id['identifier']&.downcase&.strip : ROR_FUNDREF_ID_CROSSWALK[:"#{ror}"]
+        opportunity = funding.fetch('dmproadmap_funding_opportunity_id', {})['identifier']
+        grant = funding.fetch('grant_id', {})['identifier']
+        @details_hash[:identifiers] << ror&.downcase&.strip
+        @details_hash[:identifiers] << fundref&.downcase&.strip
+        @details_hash[:identifiers] << grant&.downcase&.strip
+        @details_hash[:identifiers] << grant&.split('/')&.last&.downcase&.strip
+        @details_hash[:identifiers] << opportunity&.downcase&.strip
+        @details_hash[:funder_names] << funding['name']&.downcase&.strip
+        @details_hash[:funder_ids] << fundref
+        @details_hash[:opportunity_ids] << opportunity&.downcase&.strip
+        @details_hash[:grant_ids] << [grant&.downcase&.strip, grant&.split('/')&.last&.downcase&.strip]
+      end
+      array
+    end
+    # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
+    # Extract all of the ORCIDs, last names, and affiliation ids and names
+    # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
+    def _extract_people(array:)
+      return [] unless array.is_a?(Array)
+      array.each do |entry|
+        next unless entry.is_a?(Hash)
+        id = entry.fetch('contributor_id', entry.fetch('contact_id', {}))['identifier']&.downcase&.strip
+        affil = entry.fetch('dmproadmap_affiliation', {})
+        ror = affil.fetch('affiliation_id', {})['identifier']&.downcase&.strip
+        name = entry.fetch('name', '')&.downcase&.strip
+        last_name = name.include?(', ') ? name.split(', ').first : name.split.last
+        @details_hash[:identifiers] << [id, ror&.downcase&.strip]
+        @details_hash[:last_names] << last_name
+        @details_hash[:affiliation_ids] << ror
+        @details_hash[:affiliations] << affil.fetch('name', '')&.split(' (')&.first&.downcase&.strip
+      end
+      array
+    end
+    # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
+    # Extract all of the re3data ids, URLs and names
+    # rubocop:disable Metrics/AbcSize
+    def _extract_repositories(repos:)
+      return [] unless repos.is_a?(Array)
+      repos.each do |repo|
+        next unless repo.is_a?(Hash)
+        @details_hash[:identifiers] << [
+          repo['url']&.downcase&.strip, repo.fetch('dmproadmap_host_id', {})['identifier']&.downcase&.strip
+        ]
+        @details_hash[:repositories] << repo.fetch('name', '')&.downcase&.strip
+      end
+      repos
+    end
+    # rubocop:enable Metrics/AbcSize
+    # Returns whether or not the incoming grant id(s) match the DMPs grant id. Expecting:
+    #    [
+    #      { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
+    #    ]
+    # rubocop:disable Metrics/AbcSize
+    def _grants_match?(array:, response:)
+      return response unless array.is_a?(Array) && response.is_a?(Hash)
+      ids = array.select { |funding| funding.is_a?(Hash) && funding['grant'].is_a?(Array) }
+                 .map { |funding| funding['grant'].map { |id| id&.downcase&.strip } }
+                 .flatten.compact.uniq
+      matched = _compare_arrays(array_a: @details_hash.fetch(:grant_ids, []), array_b: ids)
+      return response if matched <= 0
+      response[:confidence] = 'Absolute'
+      response[:score] = 100
+      response[:notes] << 'the grant ID matched'
+      response
+    end
+    # rubocop:enable Metrics/AbcSize
+    # Returns whether or not the incoming grant id(s) match the DMPs opportunity id. Expecting:
+    #    [
+    #      { id: "https://doi.org/crossref123", name: "Bar", grant: ["1234", "http://foo.bar/543"] }
+    #    ]
+    # rubocop:disable Metrics/AbcSize
+    def _opportunities_match?(array:, response:)
+      return response unless array.is_a?(Array) && response.is_a?(Hash)
+      ids = array.select { |funding| funding.is_a?(Hash) && funding['grant'].is_a?(Array) }
+                 .map { |funding| funding['grant'].map { |id| id&.downcase&.strip } }
+                 .flatten.compact.uniq
+      matched = _compare_arrays(array_a: @details_hash.fetch(:opportunity_ids, []), array_b: ids)
+      return response if matched <= 0
+      response[:score] += 5
+      response[:notes] << 'the funding opportunity number matched'
+      response
+    end
+    # rubocop:enable Metrics/AbcSize
+    # Returns whether or not the inciming list of creators/contributors match those on the DMP. Expecting:
+    #   [
+    #      {
+    #        id: "https://orcid.org/blah",
+    #        last_name: "doe",
+    #        affiliation: { id: "https://ror.org/blah", name: "Foo" }
+    #      }
+    #    ]
+    # rubocop:disable Metrics/AbcSize
+    def _orcids_match?(array:, response:)
+      return response unless array.is_a?(Array) && response.is_a?(Hash)
+      ids = array.select { |repo| repo.is_a?(Hash) }
+                 .map { |person| person['id']&.downcase&.strip }
+                 .flatten.compact.uniq
+      matched = _compare_arrays(array_a: @details_hash.fetch(:identifiers, []), array_b: ids)
+      return response if matched <= 0
+      response[:score] += (matched * 2)
+      response[:notes] << 'contributor ORCIDs matched'
+      response
+    end
+    # rubocop:enable Metrics/AbcSize
+    # Returns whether or not the inciming list of creators/contributors match those on the DMP. Expecting:
+    #   [
+    #      {
+    #        id: "https://orcid.org/blah",
+    #        last_name: "doe",
+    #        affiliation: { id: "https://ror.org/blah", name: "Foo" }
+    #      }
+    #    ]
+    # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
+    def _last_name_and_affiliation_match?(array:, response:)
+      return response unless array.is_a?(Array) && response.is_a?(Hash)
+      array = array.select { |repo| repo.is_a?(Hash) }
+      affiliations = array.map { |person| person['affiliation'] }&.flatten&.compact&.uniq
+      last_names = array.map { |person| person['last_name']&.downcase&.strip }&.flatten&.compact&.uniq
+      rors = affiliations.map { |affil| affil['id']&.downcase&.strip }&.flatten&.compact&.uniq
+      affil_names = affiliations.map { |affil| affil['name']&.downcase&.strip }&.flatten&.compact&.uniq
+      # Check the person last names and affiliation name and RORs
+      last_names_matched = _compare_arrays(array_a: @details_hash.fetch(:last_names, []), array_b: last_names)
+      rors_matched = _compare_arrays(array_a: @details_hash.fetch(:affiliation_ids, []), array_b: rors)
+      affil_names_matched = _compare_arrays(array_a: @details_hash.fetch(:affiliations, []), array_b: affil_names)
+      return response if last_names_matched <= 0 && rors_matched <= 0 && affil_names_matched <= 0
+      response[:score] += last_names_matched + rors_matched + affil_names_matched
+      response[:notes] << 'contributor names and affiliations matched'
+      response
+    end
+    # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
+    # Returns whether or not the incoming list of repositories match those defined in the DMP. Expecting:
+    #    [
+    #      { id: ["http://some.repo.org", "https://doi.org/re3data123"], name: "Repo" }
+    #    ]
+    # rubocop:disable Metrics/AbcSize
+    def _repository_match?(array:, response:)
+      return response unless array.is_a?(Array) && response.is_a?(Hash)
+      # We only care about repositories with ids/urls
+      ids = array.select { |repo| repo.is_a?(Hash) }
+                 .map { |repo| repo['id'].map { |id| id&.downcase&.strip } }
+                 .flatten.compact.uniq
+      matched = _compare_arrays(array_a: @details_hash.fetch(:identifiers, []), array_b: ids)
+      return response if matched <= 0
+      response[:score] += matched
+      response[:notes] << 'repositories matched'
+      response
+    end
+    # rubocop:enable Metrics/AbcSize
+    # Returns whether or not the list of keywords exist in the DMP. Expecting:
+    #     keywords: ["foo", "bar"]
+    def _keyword_match?(array:, response:)
+      return response unless array.is_a?(Array) && response.is_a?(Hash)
+      keywords = array.map { |word| word&.downcase&.strip }&.flatten&.compact&.uniq
+      matched = _compare_arrays(array_a: @details_hash.fetch(:keywords, []), array_b: keywords)
+      return response if matched <= 0
+      response[:score] += 1
+      response[:notes] << 'keywords matched'
+      response
+    end
+    # Uses an NLP library to determine if the :text matches the DMP/Project :title or :description
+    # rubocop:disable Metrics/AbcSize
+    def _text_match?(text:, response:, type: 'title')
+      return response unless response.is_a?(Hash) && text.is_a?(String) && !text.strip.empty? &&
+                             !@details_hash[type.to_sym].nil?
+      nlp_processor = Text::WhiteSimilarity.new
+      cleansed = _cleanse_text(text:)
+      details = {
+        "dmp_#{type}": @details_hash[type.to_sym],
+        "incoming_#{type}": cleansed,
+        nlp_score: nlp_processor.similarity(@details_hash[type.to_sym], cleansed)
+      }
+      @logger&.debug(message: 'Text::WhiteSimilarity score', details:)
+      return response if details[:nlp_score] < 0.5
+      response[:score] += details[:nlp_score] >= 0.75 ? 5 : 2
+      response[:notes] << "#{type}s are similar"
+      response
+    end
+    # rubocop:enable Metrics/AbcSize
+    # Change the incoming text to lower case, remove spaces and STOP_WORDS
+    def _cleanse_text(text:)
+      return nil unless text.is_a?(String)
+      text.downcase.split.reject { |word| STOP_WORDS.include?(word) }.join(' ').strip
+    end
+    # Do an introspection of the 2 arrays and return the number of matches
+    def _compare_arrays(array_a: [], array_b: [])
+      return 0 unless array_a.is_a?(Array) && array_b.is_a?(Array)
+      intersection = array_a & array_b
+      intersection.nil? || intersection.size <= 0 ? 0 : intersection.size
+    end
+    # TODO: Remove this hard-coded crosswalk once the community has broader support for using ROR for funder ids
+    ROR_FUNDREF_ID_CROSSWALK = {
+      # NIH ID Crosswalk
+      'https://ror.org/01cwqze88': 'https://doi.org/10.13039/100000002',
+      'https://ror.org/04mhx6838': 'https://doi.org/10.13039/100000055',
+      'https://ror.org/012pb6c26': 'https://doi.org/10.13039/100000050',
+      'https://ror.org/03wkg3b53': 'https://doi.org/10.13039/100000053',
+      'https://ror.org/0060t0j89': 'https://doi.org/10.13039/100000092',
+      'https://ror.org/00372qc85': 'https://doi.org/10.13039/100000070',
+      'https://ror.org/00190t495': 'https://doi.org/10.13039/100008460',
+      'https://ror.org/00j4k1h63': 'https://doi.org/10.13039/100000066',
+      'https://ror.org/01y3zfr79': 'https://doi.org/10.13039/100000056',
+      'https://ror.org/04q48ey07': 'https://doi.org/10.13039/100000057',
+      'https://ror.org/0493hgw16': 'https://doi.org/10.13039/100006545',
+      'https://ror.org/04vfsmv21': 'https://doi.org/10.13039/100000098',
+      'https://ror.org/03jh5a977': 'https://doi.org/10.13039/100000093',
+      'https://ror.org/04xeg9z08': 'https://doi.org/10.13039/100000025',
+      'https://ror.org/01s5ya894': 'https://doi.org/10.13039/100000065',
+      'https://ror.org/02meqm098': 'https://doi.org/10.13039/100000002',
+      'https://ror.org/049v75w11': 'https://doi.org/10.13039/100000049',
+      'https://ror.org/004a2wv92': 'https://doi.org/10.13039/100000072',
+      'https://ror.org/00adh9b73': 'https://doi.org/10.13039/100000062',
+      'https://ror.org/043z4tv69': 'https://doi.org/10.13039/100000060',
+      'https://ror.org/00x19de83': 'https://doi.org/10.13039/100000002',
+      'https://ror.org/02jzrsm59': 'https://doi.org/10.13039/100000027',
+      'https://ror.org/006zn3t30': 'https://doi.org/10.13039/100000069',
+      'https://ror.org/04byxyr05': 'https://doi.org/10.13039/100000071',
+      'https://ror.org/04pw6fb54': 'https://doi.org/10.13039/100006108',
+      'https://ror.org/05aq6yn88': 'https://doi.org/10.13039/100006955',
+      'https://ror.org/02xey9a22': 'https://doi.org/10.13039/100000061',
+      'https://ror.org/00fj8a872': 'https://doi.org/10.13039/100000052',
+      'https://ror.org/01wtjyf13': 'https://doi.org/10.13039/100000063',
+      'https://ror.org/04r5s4b52': 'https://doi.org/10.13039/100005440',
+      'https://ror.org/046zezr58': 'https://doi.org/10.13039/100006085',
+      'https://ror.org/02e3wq066': 'https://doi.org/10.13039/100006086',
+      'https://ror.org/031gy6182': 'https://doi.org/10.13039/100000002',
+      'https://ror.org/054j5yq82': 'https://doi.org/10.13039/100000002',
+      'https://ror.org/02yrzyf97': 'https://doi.org/10.13039/100000002',
+      # NSF ID Crosswalk
+      'https://.org/021nxhr62': 'https://doi.org/10.13039/100000001',
+      'https://.org/04aqat463': 'https://doi.org/10.13039/100000001',
+      'https://.org/01rcfpa16': 'https://doi.org/10.13039/100005441',
+      'https://.org/014eweh95': 'https://doi.org/10.13039/100005445',
+      'https://.org/001xhss06': 'https://doi.org/10.13039/100000076',
+      'https://.org/04qn9mx93': 'https://doi.org/10.13039/100000153',
+      'https://.org/03g87he71': 'https://doi.org/10.13039/100000155',
+      'https://.org/01tnvpc68': 'https://doi.org/10.13039/100000156',
+      'https://.org/01rvays47': 'https://doi.org/10.13039/100000154',
+      'https://.org/002jdaq33': 'https://doi.org/10.13039/100000152',
+      'https://.org/025kzpk63': 'https://doi.org/10.13039/100000083',
+      'https://.org/04nh1dc89': 'https://doi.org/10.13039/100007523',
+      'https://.org/01mng8331': 'https://doi.org/10.13039/100000143',
+      'https://.org/02rdzmk74': 'https://doi.org/10.13039/100000144',
+      'https://.org/053a2cp42': 'https://doi.org/10.13039/100000145',
+      'https://.org/014bj5w56': 'https://doi.org/10.13039/100000081',
+      'https://.org/00whkrf32': 'https://doi.org/10.13039/100000082',
+      'https://.org/05s7cqk18': 'https://doi.org/10.13039/100000173',
+      'https://.org/02kd4km72': 'https://doi.org/10.13039/100000172',
+      'https://.org/03mamvh39': 'https://doi.org/10.13039/100000171',
+      'https://.org/00b6sbb32': 'https://doi.org/10.13039/100000084',
+      'https://.org/0471zv972': 'https://doi.org/10.13039/100000146',
+      'https://.org/028yd4c30': 'https://doi.org/10.13039/100000147',
+      'https://.org/01krpsy48': 'https://doi.org/10.13039/100000148',
+      'https://.org/050rnw378': 'https://doi.org/10.13039/100000149',
+      'https://.org/0388pet74': 'https://doi.org/10.13039/100000150',
+      'https://.org/03xyg3m20': 'https://doi.org/10.13039/100000151',
+      'https://.org/05p847d66': 'https://doi.org/10.13039/100000085',
+      'https://.org/037gd6g64': 'https://doi.org/10.13039/100000159',
+      'https://.org/05v01mk25': 'https://doi.org/10.13039/100000160',
+      'https://.org/05wqqhv83': 'https://doi.org/10.13039/100000141',
+      'https://.org/05nwjp114': 'https://doi.org/10.13039/100007352',
+      'https://.org/05fnzca26': 'https://doi.org/10.13039/100000162',
+      'https://.org/02trddg58': 'https://doi.org/10.13039/100000163',
+      'https://.org/029b7h395': 'https://doi.org/10.13039/100000086',
+      'https://.org/04mg8wm74': 'https://doi.org/10.13039/100000164',
+      'https://.org/01ar8dr59': 'https://doi.org/10.13039/100000165',
+      'https://.org/01pc7k308': 'https://doi.org/10.13039/100000078',
+      'https://.org/051fftw81': 'https://doi.org/10.13039/100000121',
+      'https://.org/04ap5x931': 'https://doi.org/10.13039/100000166',
+      'https://.org/00apvva27': 'https://doi.org/10.13039/100005716',
+      'https://.org/04nseet23': 'https://doi.org/10.13039/100000179',
+      'https://.org/04k9mqs78': 'https://doi.org/10.13039/100000106',
+      'https://.org/01k638r21': 'https://doi.org/10.13039/100000089',
+      'https://.org/01gmp5538': 'https://doi.org/10.13039/100005447',
+      'https://.org/01vnjbg30': 'https://doi.org/10.13039/100005449',
+      'https://.org/03h7mcc28': 'https://doi.org/10.13039/100000088',
+      'https://.org/05wgkzg12': 'https://doi.org/10.13039/100000169',
+      'https://.org/0445wmv88': 'https://doi.org/10.13039/100000170',
+      'https://.org/02dz2hb46': 'https://doi.org/10.13039/100000077',
+      'https://.org/034m1ez10': 'https://doi.org/10.13039/100000107',
+      'https://.org/02a65dj82': 'https://doi.org/10.13039/100005717',
+      'https://.org/020fhsn68': 'https://doi.org/10.13039/100000001',
+      'https://.org/03z9hh605': 'https://doi.org/10.13039/100000174',
+      'https://.org/04ya3kq71': 'https://doi.org/10.13039/100007521',
+      'https://.org/04evh7y43': 'https://doi.org/10.13039/100005443',
+      'https://.org/04h67aa53': 'https://doi.org/10.13039/100000177',
+      'https://.org/025dabr11': 'https://doi.org/10.13039/100005446',
+      'https://.org/04vw0kz07': 'https://doi.org/10.13039/100005448',
+      'https://.org/054ydxh33': 'https://doi.org/10.13039/100005554',
+      'https://.org/01sharn77': 'https://doi.org/10.13039/100006091',
+      'https://.org/02ch5q898': 'https://doi.org/10.13039/100000001',
+      # NASA ID Crosswalk
+      'https://.org/0171mag52': 'https://doi.org/10.13039/100006198',
+      'https://.org/027k65916': 'https://doi.org/10.13039/100006196',
+      'https://.org/027ka1x80': 'https://doi.org/10.13039/100000104',
+      'https://.org/02acart68': 'https://doi.org/10.13039/100006195',
+      'https://.org/059fqnc42': 'https://doi.org/10.13039/100006193',
+      'https://.org/01cyfxe35': 'https://doi.org/10.13039/100016595',
+      'https://.org/04xx4z452': 'https://doi.org/10.13039/100006203',
+      'https://.org/0399mhs52': 'https://doi.org/10.13039/100006199',
+      'https://.org/02epydz83': 'https://doi.org/10.13039/100006197',
+      'https://.org/03j9e2j92': 'https://doi.org/10.13039/100006205',
+      'https://.org/02s42x260': 'https://doi.org/10.13039/100000104',
+      'https://.org/01p7gwa14': 'https://doi.org/10.13039/100000104',
+      'https://.org/01qxmdg18': 'https://doi.org/10.13039/100000104',
+      'https://.org/006ndaj41': 'https://doi.org/10.13039/100000104',
+      'https://.org/03em45j53': 'https://doi.org/10.13039/100007346',
+      'https://.org/045t78n53': 'https://doi.org/10.13039/100000104',
+      'https://.org/00r57r863': 'https://doi.org/10.13039/100000104',
+      'https://.org/0401vze59': 'https://doi.org/10.13039/100007726',
+      'https://.org/04hccab49': 'https://doi.org/10.13039/100000104',
+      'https://.org/04437j066': 'https://doi.org/10.13039/100000104',
+      'https://.org/028b18z22': 'https://doi.org/10.13039/100000104',
+      'https://.org/00ryjtt64': 'https://doi.org/10.13039/100000104',
+      # DOE ID Crosswalk
+      'https://ror.org/01bj3aw27': 'https://doi.org/10.13039/100000015',
+      'https://ror.org/03q1rgc19': 'https://doi.org/10.13039/100006133',
+      'https://ror.org/02xznz413': 'https://doi.org/10.13039/100006134',
+      'https://ror.org/03sk1we31': 'https://doi.org/10.13039/100006168',
+      'https://ror.org/00f93gc02': 'https://doi.org/10.13039/100006177',
+      'https://ror.org/05tj7dm33': 'https://doi.org/10.13039/100006147',
+      'https://ror.org/0012c7r22': 'https://doi.org/10.13039/100006192',
+      'https://ror.org/00mmn6b08': 'https://doi.org/10.13039/100006132',
+      'https://ror.org/03ery9d53': 'https://doi.org/10.13039/100006120',
+      'https://ror.org/033jmdj81': 'https://doi.org/10.13039/100000015',
+      'https://ror.org/03rd4h240': 'https://doi.org/10.13039/100006130',
+      'https://ror.org/0054t4769': 'https://doi.org/10.13039/100006200',
+      'https://ror.org/03eecgp81': 'https://doi.org/10.13039/100006174',
+      'https://ror.org/00heb4d89': 'https://doi.org/10.13039/100006135',
+      'https://ror.org/05ek3m339': 'https://doi.org/10.13039/100006150',
+      'https://ror.org/00km40770': 'https://doi.org/10.13039/100006138',
+      'https://ror.org/02ah1da87': 'https://doi.org/10.13039/100006137',
+      'https://ror.org/05hsv7e61': 'https://doi.org/10.13039/100000015',
+      'https://ror.org/01c9ay627': 'https://doi.org/10.13039/100006165',
+      'https://ror.org/04z2gev20': 'https://doi.org/10.13039/100006183',
+      'https://ror.org/02z1qvq09': 'https://doi.org/10.13039/100006144',
+      'https://ror.org/03jf3w726': 'https://doi.org/10.13039/100006186',
+      'https://ror.org/04848jz84': 'https://doi.org/10.13039/100006142',
+      'https://ror.org/04s778r16': 'https://doi.org/10.13039/100006171',
+      'https://ror.org/04nnxen11': 'https://doi.org/10.13039/100000015',
+      'https://ror.org/05csy5p27': 'https://doi.org/10.13039/100010268',
+      'https://ror.org/05efnac71': 'https://doi.org/10.13039/100000015'
+    }.freeze
+  end
+end
+# rubocop:enable Metrics/ClassLength

data/lib/uc3-dmp-id/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Uc3DmpId
-  VERSION = '0.1.1'
+  VERSION = '0.1.3'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: uc3-dmp-id
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.1.3
 platform: ruby
 authors:
 - Brian Riley
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2023-10-04 00:00:00.000000000 Z
+date: 2023-10-26 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: json
@@ -38,6 +38,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '3.0'
+- !ruby/object:Gem::Dependency
+  name: text
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.3'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.3'
 - !ruby/object:Gem::Dependency
   name: uc3-dmp-dynamo
   requirement: !ruby/object:Gem::Requirement
@@ -76,6 +90,7 @@ files:
 - README.md
 - lib/uc3-dmp-id.rb
 - lib/uc3-dmp-id/asserter.rb
+- lib/uc3-dmp-id/comparator.rb
 - lib/uc3-dmp-id/creator.rb
 - lib/uc3-dmp-id/deleter.rb
 - lib/uc3-dmp-id/finder.rb