RubyGems - commonmeta-ruby - Versions diffs - 3.11.0 → 3.12.1 - Mend

commonmeta-ruby 3.11.0 → 3.12.1

Files changed (16) hide show

checksums.yaml +4 -4
data/Gemfile.lock +5 -4
data/lib/commonmeta/author_utils.rb +9 -6
data/lib/commonmeta/readers/datacite_reader.rb +117 -111
data/lib/commonmeta/schema_utils.rb +1 -1
data/lib/commonmeta/version.rb +1 -1
data/lib/commonmeta/writers/commonmeta_writer.rb +1 -1
data/resources/{commonmeta_v0.10.6.json → commonmeta_v0.10.7.json} +11 -3
data/spec/author_utils_spec.rb +15 -0
data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/medium_post_with_institutional_author.yml +317 -0
data/spec/readers/datacite_reader_spec.rb +36 -7
data/spec/readers/json_feed_reader_spec.rb +25 -0
data/spec/writers/commonmeta_writer_spec.rb +30 -3
data/spec/writers/csl_writer_spec.rb +1 -0
data/spec/writers/csv_writer_spec.rb +1 -0
metadata +4 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 7bc4df0a364fc1ff53342960355c5fc534a0c479cf825cc0d856e282e42e7d49
-  data.tar.gz: 57dcd210498057807f46ffb851eb40b4db189636d915152a77de1fd1191012c9
+  metadata.gz: b48e44936ddd71a38a9d019c33972d02cf66af57c02542f48d37a022017c8208
+  data.tar.gz: deb5ca7a1b1ec9387583e0039edc45ab382d8989c31012dacd81560c8cbaeaa3
 SHA512:
-  metadata.gz: 2a4265c7a8d17ab99b963459015232400bcd7541e4c827e2bf0863e9809fbe64d70d0ce6ced7dbcf2a9186beca2075be73617bd3d3cace2e4b9c67e39d8e756a
-  data.tar.gz: 765b9d3d29683badac4ed6b77ac6f86f6b939f5081d23016c1484a9f8b7e0d26039559a9f6d5c2e2b8fe5c06bc1d70d5e09e17f16a5a24c6c9bb84291a8c1548
+  metadata.gz: e72bf66b0e72b62640d6f528c2279b119499a225acbe26498efe2afc7c5679b018175097144f6fef3a592186ae518e4073b9f209a31a56bda82e697fc3287408
+  data.tar.gz: '099621bbc5109437cf592a34bb8810d6d7c6a26c68bec0e13c43061410066218bf38fc0a3f809a84dc9f2861075a2b6a53d41836da9c2e909e5096529b7f87cd'

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    commonmeta-ruby (3.11.0)
+    commonmeta-ruby (3.12.1)
       activesupport (>= 4.2.5, < 8.0)
       addressable (~> 2.8.1, < 2.8.2)
       base32-url (>= 0.7.0, < 1)
@@ -58,7 +58,8 @@ GEM
       rubocop (~> 1.0)
     concurrent-ruby (1.2.3)
     connection_pool (2.4.1)
-    crack (0.4.5)
+    crack (0.4.6)
+      bigdecimal
       rexml
     crass (1.0.6)
     csl (2.0.0)
@@ -66,7 +67,7 @@ GEM
       rexml
     csl-styles (2.0.1)
       csl (~> 2.0)
-    diff-lcs (1.5.0)
+    diff-lcs (1.5.1)
     docile (1.4.0)
     domain_name (0.6.20240107)
     drb (2.2.0)
@@ -154,7 +155,7 @@ GEM
       iniparser (>= 0.1.0)
     public_suffix (4.0.7)
     racc (1.7.3)
-    rack (3.0.8)
+    rack (3.0.9)
     rack-test (2.1.0)
       rack (>= 1.3)
     rainbow (3.1.1)

data/lib/commonmeta/author_utils.rb CHANGED Viewed

@@ -25,8 +25,8 @@ module Commonmeta
       "Researcher" => "Other",
       "Sponsor" => "Other",
       "Supervisor" => "Supervision",
-      "WorkPackageLeader" => "Other"
-    }
+      "WorkPackageLeader" => "Other",
+    }
     def get_one_author(author)
       # basic sanity checks
@@ -55,20 +55,20 @@ module Commonmeta
            parse_attributes(author.fetch("identifier", nil), first: true) ||
            parse_attributes(author.fetch("sameAs", nil), first: true)
       id = normalize_orcid(id) || normalize_ror(id) if id.present?
       # DataCite metadata
       if id.nil? && author["nameIdentifiers"].present?
         id = Array.wrap(author.dig("nameIdentifiers")).find do |ni|
           normalize_name_identifier(ni).present?
         end
         id = normalize_name_identifier(id) if id.present?
-      # Crossref metadata
+        # Crossref metadata
       elsif id.nil? && author["ORCID"].present?
         id = author.fetch("ORCID")
         id = normalize_orcid(id)
-      # JSON Feed metadata
+        # JSON Feed metadata
       elsif id.nil? && author["url"].present?
-        id = author.fetch("url")
+        id = author.fetch("url")
       end
       # parse author type, i.e. "Person", "Organization" or not specified
@@ -168,6 +168,9 @@ module Commonmeta
       # check if a name has only one word, e.g. "FamousOrganization", not including commas
       return false if name.to_s.split(" ").size == 1 && name.to_s.exclude?(",")
+      # check if name contains words known to be used in organization names
+      return false if %w[University College Institute School Center Department Laboratory Library Museum Foundation Society Association Company Corporation Collaboration Consortium Incorporated Inc. Institut Research Science Team].any? { |word| name.to_s.include?(word) }
       # check for suffixes, e.g. "John Smith, MD"
       return true if name && %w[MD PhD].include?(name.split(", ").last)

data/lib/commonmeta/readers/datacite_reader.rb CHANGED Viewed

@@ -4,29 +4,29 @@ module Commonmeta
   module Readers
     module DataciteReader
       def get_datacite(id: nil, **options)
-        return { 'string' => nil, 'state' => 'not_found' } unless id.present?
+        return { "string" => nil, "state" => "not_found" } unless id.present?
         api_url = datacite_api_url(id, options)
         response = HTTP.get(api_url)
-        return { 'string' => nil, 'state' => 'not_found' } unless response.status.success?
+        return { "string" => nil, "state" => "not_found" } unless response.status.success?
         body = JSON.parse(response.body)
-        client = Array.wrap(body.fetch('included', nil)).find do |m|
-          m['type'] == 'clients'
+        client = Array.wrap(body.fetch("included", nil)).find do |m|
+          m["type"] == "clients"
         end
-        client_id = client.to_h.fetch('id', nil)
-        provider_id = Array.wrap(client.to_h.fetch('relationships', nil)).find do |m|
-          m['provider'].present?
-        end.to_h.dig('provider', 'data', 'id')
-        { 'string' => response.body.to_s,
-          'provider_id' => provider_id,
-          'client_id' => client_id }
+        client_id = client.to_h.fetch("id", nil)
+        provider_id = Array.wrap(client.to_h.fetch("relationships", nil)).find do |m|
+          m["provider"].present?
+        end.to_h.dig("provider", "data", "id")
+        { "string" => response.body.to_s,
+          "provider_id" => provider_id,
+          "client_id" => client_id }
       end
       def read_datacite(string: nil, **_options)
         errors = jsonlint(string)
-        return { 'errors' => errors } if errors.present?
+        return { "errors" => errors } if errors.present?
         read_options = ActiveSupport::HashWithIndifferentAccess.new(_options.except(:doi, :id, :url,
                                                                                     :sandbox, :validate, :ra))
@@ -34,140 +34,146 @@ module Commonmeta
         meta = string.present? ? JSON.parse(string) : {}
         # optionally strip out the message wrapper from API
-        meta = meta.dig('data', 'attributes') if meta.dig('data').present?
+        meta = meta.dig("data", "attributes") if meta.dig("data").present?
         meta.transform_keys!(&:underscore)
-        id = normalize_doi(meta.fetch('doi', nil))
+        id = normalize_doi(meta.fetch("doi", nil))
-        resource_type_general = meta.dig('types', 'resourceTypeGeneral')
-        resource_type = meta.dig('types', 'resourceType')
+        resource_type_general = meta.dig("types", "resourceTypeGeneral")
+        resource_type = meta.dig("types", "resourceType")
         # if resource_type is one of the new resource_type_general types introduced in schema 4.3, use it
         type = Commonmeta::Utils::DC_TO_CM_TRANSLATIONS.fetch(resource_type, nil) ||
-               Commonmeta::Utils::DC_TO_CM_TRANSLATIONS.fetch(resource_type_general, 'Other')
+               Commonmeta::Utils::DC_TO_CM_TRANSLATIONS.fetch(resource_type_general, "Other")
-        alternate_identifiers = Array.wrap(meta.fetch('alternate_identifiers', nil)).map do |i|
+        alternate_identifiers = Array.wrap(meta.fetch("alternate_identifiers", nil)).map do |i|
           i.transform_keys! { |k| k.camelize(:lower) }
         end
-        url = meta.fetch('url', nil)
-        titles = Array.wrap(meta.fetch('titles', nil)).map do |title|
-          title.compact
+        url = meta.fetch("url", nil)
+        titles = Array.wrap(meta.fetch("titles", nil)).map do |title|
+          { "title" => title.fetch("title", nil),
+            "type" => title.fetch("titleType", nil),
+            "language" => title.fetch("lang", nil) }.compact
         end
-        contributors = get_authors(from_datacite(meta.fetch('creators', nil)))
-        contributors += get_authors(from_datacite(meta.fetch('contributors', nil)))
-        if meta.fetch('publisher', nil).is_a?(Hash)
-          publisher = { 'name' => meta.fetch('publisher', nil).fetch('name', nil) }
-        elsif meta.fetch('publisher', nil).is_a?(String)
-          publisher = { 'name' => meta.fetch('publisher', nil) }
+        contributors = get_authors(from_datacite(meta.fetch("creators", nil)))
+        contributors += get_authors(from_datacite(meta.fetch("contributors", nil)))
+        if meta.fetch("publisher", nil).is_a?(Hash)
+          publisher = { "name" => meta.fetch("publisher", nil).fetch("name", nil) }
+        elsif meta.fetch("publisher", nil).is_a?(String)
+          publisher = { "name" => meta.fetch("publisher", nil) }
         else
           publisher = nil
         end
-        container = meta.fetch('container', nil)
-        funding_references = meta.fetch('funding_references', nil)
+        container = meta.fetch("container", nil)
+        funding_references = meta.fetch("funding_references", nil)
         date = {}
-        date['created'] =
-          get_iso8601_date(meta.dig('created')) || get_date(meta.dig('dates'), 'Created')
-        date['published'] =
-          get_iso8601_date(meta.dig('published')) || get_date(meta.dig('dates'),
-                                                              'Issued') || get_iso8601_date(meta.dig('publication_year'))
-        date['registered'] = get_iso8601_date(meta.dig('registered'))
-        date['updated'] =
-          get_iso8601_date(meta.dig('updated')) || get_date(meta.dig('dates'), 'Updated')
-        descriptions = Array.wrap(meta.fetch('descriptions', nil)).map do |description|
-          description.compact
+        date["created"] =
+          get_iso8601_date(meta.dig("created")) || get_date(meta.dig("dates"), "Created")
+        date["published"] =
+          get_iso8601_date(meta.dig("published")) || get_date(meta.dig("dates"),
+                                                              "Issued") || get_iso8601_date(meta.dig("publication_year"))
+        date["registered"] = get_iso8601_date(meta.dig("registered"))
+        date["updated"] =
+          get_iso8601_date(meta.dig("updated")) || get_date(meta.dig("dates"), "Updated")
+        descriptions = Array.wrap(meta.fetch("descriptions", nil)).map do |description|
+          description_type = description.fetch("descriptionType", nil)
+          description_type = "Other" unless %w[Abstract Methods TechnicalInfo].include?(description_type)
+          { "description" => description.fetch("description", nil),
+            "type" => description_type,
+            "language" => description.fetch("lang", nil) }.compact
         end
-        license = Array.wrap(meta.fetch('rights_list', nil)).find do |r|
-          r['rightsUri'].present?
+        license = Array.wrap(meta.fetch("rights_list", nil)).find do |r|
+          r["rightsUri"].present?
         end
-        license = hsh_to_spdx('rightsURI' => license['rightsUri']) if license.present?
-        version = meta.fetch('version', nil)
-        subjects = meta.fetch('subjects', nil)
-        language = meta.fetch('language', nil)
-        geo_locations = meta.fetch('geo_locations', nil)
-        references = (Array.wrap(meta.fetch('related_identifiers',
-                                            nil)) + Array.wrap(meta.fetch('related_items',
+        license = hsh_to_spdx("rightsURI" => license["rightsUri"]) if license.present?
+        version = meta.fetch("version", nil)
+        subjects = meta.fetch("subjects", nil)
+        language = meta.fetch("language", nil)
+        geo_locations = meta.fetch("geo_locations", nil)
+        references = (Array.wrap(meta.fetch("related_identifiers",
+                                            nil)) + Array.wrap(meta.fetch("related_items",
                                                                           nil))).select do |r|
-                       %w[References Cites IsSupplementedBy].include?(r['relationType'])
-                     end.map do |reference|
+          %w[References Cites IsSupplementedBy].include?(r["relationType"])
+        end.map do |reference|
           get_datacite_reference(reference)
         end
-        files = Array.wrap(meta.fetch("content_url", nil)).map { |file| { "url" => file } }
-        formats = meta.fetch('formats', nil)
-        sizes = meta.fetch('sizes', nil)
-        schema_version = meta.fetch('schema_version', nil) || 'http://datacite.org/schema/kernel-4'
-        state = id.present? || read_options.present? ? 'findable' : 'not_found'
-        { 'id' => id,
-          'type' => type,
-          'additional_type' => resource_type == type ? nil : resource_type,
-          'url' => url,
-          'titles' => titles,
-          'contributors' => contributors,
-          'container' => container,
-          'publisher' => publisher,
-          'provider' => 'DataCite',
-          'alternate_identifiers' => alternate_identifiers.presence,
-          'references' => references,
-          'funding_references' => funding_references,
-          'files' => files.presence,
-          'date' => date.compact,
-          'descriptions' => descriptions,
-          'license' => license,
-          'version' => version,
-          'subjects' => subjects,
-          'language' => language,
-          'geo_locations' => geo_locations,
-          'formats' => formats,
-          'sizes' => sizes,
-          'state' => state }.compact # .merge(read_options)
+        files = Array.wrap(meta.fetch("content_url", nil)).map { |file| { "url" => file } }
+        formats = meta.fetch("formats", nil)
+        sizes = meta.fetch("sizes", nil)
+        schema_version = meta.fetch("schema_version", nil) || "http://datacite.org/schema/kernel-4"
+        state = id.present? || read_options.present? ? "findable" : "not_found"
+        { "id" => id,
+          "type" => type,
+          "additional_type" => resource_type == type ? nil : resource_type,
+          "url" => url,
+          "titles" => titles,
+          "contributors" => contributors,
+          "container" => container,
+          "publisher" => publisher,
+          "provider" => "DataCite",
+          "alternate_identifiers" => alternate_identifiers.presence,
+          "references" => references,
+          "funding_references" => funding_references,
+          "files" => files.presence,
+          "date" => date.compact,
+          "descriptions" => descriptions,
+          "license" => license,
+          "version" => version,
+          "subjects" => subjects,
+          "language" => language,
+          "geo_locations" => geo_locations,
+          "formats" => formats,
+          "sizes" => sizes,
+          "state" => state }.compact # .merge(read_options)
       end
       def format_contributor(contributor)
-        type = contributor.fetch('nameType', nil)
-        { 'name' => type == 'Person' ? nil : contributor.fetch('name', nil),
-          'type' => type,
-          'givenName' => contributor.fetch('givenName', nil),
-          'familyName' => contributor.fetch('familyName', nil),
-          'nameIdentifiers' => contributor.fetch('nameIdentifiers', nil).presence,
-          'affiliations' => contributor.fetch('affiliations', nil).presence,
-          'contributorType' => contributor.fetch('contributorType', nil) }.compact
+        type = contributor.fetch("nameType", nil)
+        { "name" => type == "Person" ? nil : contributor.fetch("name", nil),
+          "type" => type,
+          "givenName" => contributor.fetch("givenName", nil),
+          "familyName" => contributor.fetch("familyName", nil),
+          "nameIdentifiers" => contributor.fetch("nameIdentifiers", nil).presence,
+          "affiliations" => contributor.fetch("affiliations", nil).presence,
+          "contributorType" => contributor.fetch("contributorType", nil) }.compact
       end
       def get_datacite_reference(reference)
         return nil unless reference.present? || !reference.is_a?(Hash)
-        key = reference['relatedIdentifier']
+        key = reference["relatedIdentifier"]
         doi = nil
         url = nil
-        case reference['relatedIdentifierType']
-        when 'DOI'
-          doi = normalize_doi(reference['relatedIdentifier'])
-        when 'URL'
-          url = reference['relatedIdentifier']
+        case reference["relatedIdentifierType"]
+        when "DOI"
+          doi = normalize_doi(reference["relatedIdentifier"])
+        when "URL"
+          url = reference["relatedIdentifier"]
         else
-          url = reference['relatedIdentifier']
+          url = reference["relatedIdentifier"]
         end
         {
-          'key' => key,
-          'doi' => doi,
-          'url' => url,
-          'contributor' => reference.dig('author'),
-          'title' => reference.dig('article-title'),
-          'publisher' => reference.dig('publisher'),
-          'publicationYear' => reference.dig('year'),
-          'volume' => reference.dig('volume'),
-          'issue' => reference.dig('issue'),
-          'firstPage' => reference.dig('first-page'),
-          'lastPage' => reference.dig('last-page'),
-          'containerTitle' => reference.dig('journal-title'),
-          'edition' => nil,
-          'unstructured' => doi.nil? ? reference.dig('unstructured') : nil
+          "key" => key,
+          "doi" => doi,
+          "url" => url,
+          "contributor" => reference.dig("author"),
+          "title" => reference.dig("article-title"),
+          "publisher" => reference.dig("publisher"),
+          "publicationYear" => reference.dig("year"),
+          "volume" => reference.dig("volume"),
+          "issue" => reference.dig("issue"),
+          "firstPage" => reference.dig("first-page"),
+          "lastPage" => reference.dig("last-page"),
+          "containerTitle" => reference.dig("journal-title"),
+          "edition" => nil,
+          "unstructured" => doi.nil? ? reference.dig("unstructured") : nil,
         }.compact
       end
     end

data/lib/commonmeta/schema_utils.rb CHANGED Viewed

@@ -5,7 +5,7 @@ require "pathname"
 module Commonmeta
   module SchemaUtils
-    COMMONMETA = File.read(File.expand_path("../../resources/commonmeta_v0.10.6.json",
+    COMMONMETA = File.read(File.expand_path("../../resources/commonmeta_v0.10.7.json",
                                             __dir__))
     def json_schema_errors

data/lib/commonmeta/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Commonmeta
-  VERSION = '3.11.0'
+  VERSION = '3.12.1'
 end

data/lib/commonmeta/writers/commonmeta_writer.rb CHANGED Viewed

@@ -8,4 +8,4 @@ module Commonmeta
       end
     end
   end
-end
+end

data/resources/{commonmeta_v0.10.6.json → commonmeta_v0.10.7.json} RENAMED Viewed

@@ -1,7 +1,7 @@
 {
   "$schema": "http://json-schema.org/draft-07/schema#",
-  "$id": "https://commonmeta.org/commonmeta_v0.10.6.json",
-  "title": "Commonmeta v0.10.6",
+  "$id": "https://commonmeta.org/commonmeta_v0.10.7.json",
+  "title": "Commonmeta v0.10.7",
   "description": "JSON representation of the Commonmeta schema.",
   "additionalProperties": false,
   "definitions": {
@@ -253,6 +253,10 @@
             "description": "The type of the title.",
             "type": "string",
             "enum": ["AlternativeTitle", "Subtitle", "TranslatedTitle"]
+          },
+          "language": {
+            "description": "The language of the title. Use one of the language codes from the IETF BCP 47 standard.",
+            "type": "string"
           }
         },
         "required": ["title"]
@@ -424,7 +428,11 @@
           "type": {
             "description": "The type of the description.",
             "type": "string",
-            "enum": ["Abstract", "Description", "Summary"]
+            "enum": ["Abstract", "Summary", "Methods", "TechnicalInfo", "Other"]
+          },
+          "language": {
+            "description": "The language of the title. Use one of the language codes from the IETF BCP 47 standard.",
+            "type": "string"
           }
         },
         "required": ["description"]

data/spec/author_utils_spec.rb CHANGED Viewed

@@ -58,6 +58,21 @@ describe Commonmeta::Metadata, vcr: true do
       author = { "name" => "Tejas S. Sathe, MD" }
       expect(subject.is_personal_name?(name: author["name"])).to be true
     end
+    it "name with organization string" do
+      author = { "name" => "University of California, Santa Barbara" }
+      expect(subject.is_personal_name?(name: author["name"])).to be false
+    end
+    it "name with another organization string" do
+      author = { "name" => "Research Graph" }
+      expect(subject.is_personal_name?(name: author["name"])).to be false
+    end
+    it "name with ye another organization string" do
+      author = { "name" => "Team OA Brandenburg" }
+      expect(subject.is_personal_name?(name: author["name"])).to be false
+    end
   end
   context "cleanup_author" do

data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/medium_post_with_institutional_author.yml ADDED Viewed

@@ -0,0 +1,317 @@
+---
+http_interactions:
+- request:
+    method: get
+    uri: https://api.rogue-scholar.org/posts/05f01f68-ef81-47d7-a3c1-40aba91d358f
+    body:
+      encoding: ASCII-8BIT
+      string: ''
+    headers:
+      Connection:
+      - close
+      Host:
+      - api.rogue-scholar.org
+      User-Agent:
+      - http.rb/5.1.1
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Content-Type:
+      - application/json
+      Content-Length:
+      - '23886'
+      Ratelimit-Limit:
+      - '15'
+      Ratelimit-Remaining:
+      - '14'
+      Ratelimit-Reset:
+      - '3'
+      Date:
+      - Wed, 31 Jan 2024 19:50:01 GMT
+      Server:
+      - Fly/ba9e227a (2024-01-26)
+      Via:
+      - 1.1 fly.io
+      Fly-Request-Id:
+      - 01HNGH4EZV3XQF20H1PZ6X5N07-fra
+    body:
+      encoding: UTF-8
+      string: '{"abstract":null,"archive_url":null,"authors":[{"name":"Research Graph"}],"blog":{"api":false,"archive_prefix":null,"authors":null,"backlog":0,"canonical_url":null,"category":"computerAndInformationSciences","created_at":1706685423,"current_feed_url":null,"description":"Stories
+        by Research Graph on Medium","favicon":"https://cdn-images-1.medium.com/fit/c/150/150/1*laJi0jBkVoGhXid7gD_DmQ.png","feed_format":"application/rss+xml","feed_url":"https://medium.com/@researchgraph/feed","filter":null,"funding":null,"generator":"Medium","generator_raw":"Medium","home_page_url":"https://medium.com/@researchgraph","id":"30da2ca9-8258-4ab5-acca-3919d9a5d98d","indexed":true,"issn":null,"language":"en","license":"https://creativecommons.org/licenses/by/4.0/legalcode","mastodon":"","plan":"Starter","prefix":"10.59350","relative_url":null,"ror":null,"secure":true,"slug":"researchgraph","status":"active","title":"Research
+        Graph","updated_at":1706151454,"use_api":null,"use_mastodon":false,"user_id":"a7e16958-1175-437c-b839-d4b8a47ec811","version":"https://jsonfeed.org/version/1.1"},"blog_name":"Research
+        Graph","blog_slug":"researchgraph","content_text":"**Tools and Platform for
+        Integration of Knowledge Graph with RAG\npipelines.**\n\n<figure>\n<img\nsrc=\"https://cdn-images-1.medium.com/max/1024/1*bJ3eWZ7301vYDzBomwdLfQ.png\"\nalt=\"Complex
+        network connected to books and showing information from magespace\" />\n<figcaption>Image
+        Created in <a\nhref=\"https://www.mage.space/\">https://www.mage.space/</a></figcaption>\n</figure>\n\nAuthors:
+        [Aland\nAstudillo](https://www.linkedin.com/in/aland-astudillo/), [Aishwarya\nNambissan](https://www.linkedin.com/in/aishwarya-nambissan-127229200/)\n\nMany
+        users of chatbots such as ChatGPT, have encountered the problem of\nreceiving
+        inappropriate or incompatible responses. There are several\nreasons why this
+        might\u00a0happen.\n\nOne reason is the lack of appropriate training data,
+        as chatbots are\nusually trained on large amounts of text and code. If the
+        data is\ninsufficient or of poor quality, the chatbot may misunderstand queries\nand
+        provide inaccurate responses. Another reason is that some chatbots\nare designed
+        for specific tasks or domains, which limits their ability\nto handle broader
+        queries or understand subtle nuances in conversation.\nAdditionally, chatbots
+        may struggle with natural language, which is\ncomplex and often ambiguous.
+        This can cause them to misunderstand a\nuser''s query and provide irrelevant
+        or off-topic responses. Finally,\nthere are technical limitations, such as
+        the chatbot''s inability to\nreason or make inferences.\n\nThis article explores
+        a potential solution by combining two influential\napproaches in the field
+        of Natural Language Processing\u200a---\u200aRetrieval\nAugmented Generation
+        (**RAG**) and Knowledge Graphs(**KGs**). We will\ndelve into the partnership
+        between these two entities, discuss the\nnotable technologies and software
+        used in their processes, and highlight\nvarious options for utilizing their
+        combined potential.\n\n### **RAG**\n\nRetrieval-Augmented Generation is the
+        process of optimizing the output\nof a large language model using a knowledge
+        base outside of its training\ndata sources before generating a response. It
+        takes an input and\nretrieves a set of relevant/supporting documents given
+        a source (e.g.,\nWikipedia). This can be thought of as a Large Language Model
+        (LLM) not\njust putting words together, but carefully selecting relevant\ninformation
+        from external sources and Knowledge Graphs to create\nwell-informed and detailed
+        responses.\n\n### RAG Retrieval Techniques\n\nThe following are some crucial
+        technologies that enable RAG''s impressive\nability to retrieve and incorporate
+        relevant information:\n\n**Vector Search**: It transforms text into numerical
+        vectors, capturing\ntheir meaning and nuances in a mathematical space, creating
+        a map of\nrelationships. Similar texts, like those discussing shared topics
+        or\nusing similar language, end up positioned close together in this space,\nallowing
+        vector search to quickly identify them as related. This allows\nlightning-fast
+        comparisons, finding similar texts based on meaning, not\njust keywords.\n\nAlgorithms
+        like [**Faiss**](https://github.com/facebookresearch/faiss)\nand [**Annoy**](https://github.com/spotify/annoy)
+        map text into dense\nvectors, enabling fast comparisons and retrieval of relevant
+        passages\nbased on semantic similarity.\n\n**Passage Ranking**: It is an internal
+        algorithm that scores candidate\ntext passages based on their relevance to
+        a query. It considers factors\nlike keyword frequency, keyword overlap, and
+        document structure to act\nlike a judge, sifting through information to select
+        the most fitting and\ninformative passages.\n\nKeyword overlap measures how
+        often the same keywords appear in **both**\nthe query and the candidate passage,
+        emphasizing shared vocabulary and\npotential relevance. It differs from keyword
+        frequency, which simply\ncounts how often individual keywords appear within
+        a passage, regardless\nof their presence in the\u00a0query.\n\nTechniques
+        like [**BM25**](https://github.com/getalp/wikIR) and\n[**TF-IDF**](https://github.com/marcocor/wikipedia-idf)
+        score candidate\npassages based on keyword overlap and frequency, ensuring
+        retrieved\ninformation truly fits the\u00a0context.\n\n**Graph Neural Networks**
+        (**GNNs**): They are neural networks designed\nto explore and learn from interconnected
+        data like maps, social\nnetworks, and other complex relationships. Unlike
+        traditional processing\nmethods that go through data in a linear fashion,
+        GNNs are capable of\nrecognizing hidden patterns and understanding relationships
+        like \"who\nknows who\" and \"what connects to what\" by \"hopping\" across
+        connections\nin\u00a0data.\n\nConsider a graph as a network of dots(nodes)
+        connected by lines (edges).\nEach dot represents some information, like a
+        person, object, or concept.\nThe lines tell you how these things relate to
+        each\u00a0other.\n\nGNNs work in rounds. In each\u00a0round:\n\n1.  Message
+        Passing: Each node \"talks\" to its neighbors, sending\n    messages along
+        the edges. These messages contain information about\n    the node itself and
+        its features.\n2.  Node Update: Each node receives messages from all its neighbors
+        and\n    combines them with its own information. This update can involve\n    calculations
+        and applying a special function.\n3.  Output Calculation: Based on the updated
+        information, the network\n    calculates an output for each node. This output
+        could be a\n    prediction about the node''s category, its relationship to
+        another\n    node, or some other relevant information.\n\nThis process repeats
+        for multiple rounds, allowing nodes to incorporate\ninformation from their
+        entire neighborhood, not just their direct\nneighbors. As the rounds progress,
+        the network learns to understand the\nrelationships between nodes and the
+        overall structure of the\u00a0graph.\n\nWhen dealing with Knowledge Graphs,
+        frameworks like\n[**PyTorch-Geometric**](https://readthedocs.org/projects/pytorch-geometric/)\nand
+        [**DeepMind''s\nGNN**](https://github.com/deepmind/deepmind-research/blob/master/learning_to_simulate/graph_network.py)\nlibrary
+        come into play. These frameworks allow GNNs to traverse\ninterconnected entities
+        and relationships within the graph, retrieve\nrelevant knowledge fragments,
+        and understand complex connections.\n\n### **Knowledge Graphs: The Structured
+        Wisdom\u00a0Library**\n\nA knowledge graph, also referred to as a semantic
+        network, is a\nstructure that represents a network of real-world entities
+        such as\nobjects, events, situations, or concepts. It helps to illustrate
+        the\nconstantly changing representations of the world, connecting entities\n(such
+        as \"Marie Curie\") and relationships (such as \"won Nobel Prize\") to\nform
+        a complex network of information. This information is typically\nstored in
+        a graph database and visualized as a graph structure, thus the\nterm knowledge
+        \"graph\".\n\nKGs go beyond simply finding relevant facts and delve deeper
+        into\nunderstanding the relationships and insights hidden within using these\nprocesses:\n\n**Entity
+        Linking**: Imagine a vast network of information, like a big\npuzzle of dots.
+        Now imagine trying to connect specific names, places,\nand concepts to their
+        corresponding dots in the puzzle. That is what\nentity linking does with text
+        and knowledge graphs, connecting the\nspecific components of the text to the
+        corresponding nodes in the graph.\nThey help systems understand the exact
+        meaning of entities, and find\nrelevant information from the\u00a0graph.\n\nLibraries
+        like [**DGL-KeLP**](https://github.com/awslabs/dgl-ke)\nleverage GNNs to identify
+        and link named entities (like \"Marie Curie\")\nto their respective nodes
+        within the Knowledge Graphs, enabling RAG to\nretrieve information that is
+        directly relevant to the core subject of a\nsearch\u00a0query\n\n**Path Mining**:
+        Path mining is a process of uncovering hidden\nrelationships and patterns
+        that are not easily noticeable. It involves\nexploring complicated networks
+        of information and identifying and\ntracing connections between entities that
+        may seem unrelated. By doing\nso, path mining reveals surprising insights
+        and useful knowledge,\nimproving our understanding of the complex structures
+        within knowledge\ngraphs.\n\nTools like [**Neo4j**](https://neo4j.com/) and\n[**Stanza**](https://github.com/stanfordnlp/stanza)
+        allow traversing\npaths between entities, uncovering hidden relationships,
+        and generating\ninsightful responses based on this deeper understanding.\n\n**Reasoning
+        and Inference**: In the context of knowledge graphs,\nreasoning and inference
+        are not just limited to discovering facts; they\nare also concerned with utilizing
+        them effectively. This involves\nintegrating data, drawing meaningful connections,
+        and using logical\nreasoning to resolve issues, foresee future occurrences,
+        or even\nconstruct narratives leveraging the insights provided by the knowledge\ngraph.\n\nConsider
+        the scenario of trying to find an organization that works in\nspecific sectors
+        with the help of a knowledge graph. This analogy\neffectively highlights the
+        active role of reasoning and inference in\nknowledge graphs:\n\n1.  Gathering
+        Facts: Knowledge graphs collect and organize information\n    from various
+        sources, such as websites, databases, academic papers,\n    and social media
+        platforms. These facts are represented as\n    structured data, with entities
+        (e.g., organizations) and their\n    attributes (e.g., sectors in which they
+        operate) forming nodes and\n    edges in the graph. By combining data about
+        organizations and\n    sectors, knowledge graphs enable the gathering of relevant
+        facts for\n    analysis.\n2.  Integrating information: By connecting an organization''s\n    relationships
+        with specific sectors, such as partnerships,\n    investments, or certifications,
+        knowledge graphs reveal the scope\n    and relevance of their work within
+        those sectors. Links to related\n    entities like employees, board members,
+        or projects can further\n    contribute to understanding an organization''s
+        involvement in\n    specific\u00a0sectors.\n3.  Predicting and Creating: Knowledge
+        graphs can leverage machine\n    learning and predictive models to infer missing
+        or hidden\n    information. By analyzing the available facts and connections
+        within\n    the graph, these models can predict an organization''s potential\n    involvement
+        in sectors that have common attributes with their known\n    areas of operation.
+        For example, if an organization has expertise in\n    renewable energy, predictive
+        models could suggest their likely\n    involvement in related sectors like
+        clean transportation or\n    sustainable infrastructure. Additionally, knowledge
+        graphs\n    facilitate the creation of new information and insights by combining\n    existing
+        facts with external data sources. For instance, by\n    integrating real-time
+        data on industry trends, market analysis, or\n    news articles, knowledge
+        graphs enable the discovery of emerging\n    sectors or upcoming organizations
+        that might align with the given\n    parameters.\n\nA framework like [**Atomspace**](https://github.com/opencog/atomspace)\nfrom
+        [**OpenCog**](https://opencog.org/) empowers RAG to reason and\ninfer new
+        knowledge. By traversing paths and combining information from\ninterconnected
+        entities, the system can generate informed predictions or\nanswer hypothetical
+        questions.\n\n### Purpose\n\nThe combination of Retrieval-Augmented Generation
+        (RAG) and Knowledge\nGraphs (KG) is beneficial for several\u00a0reasons:\n\n1.  **Enhanced
+        information retrieval**: Knowledge graphs provide\n    structured and interconnected
+        information that can significantly\n    improve the effectiveness of information
+        retrieval. By using KGs,\n    RAG models can retrieve more accurate and relevant
+        information,\n    leading to better generation and response\u00a0quality.\n2.  **Reliable
+        and diverse information:** KGs are constructed from\n    authoritative sources,
+        making them reliable and trustworthy sources\n    of information. RAG models
+        can leverage this reliable information to\n    generate more accurate responses.
+        Additionally, KGs help in\n    diversifying the generated responses by providing
+        a broader pool of\n    related facts and entities.\n3.  **Context-aware understanding**:
+        KGs enable RAG models to understand\n    and reason over the contextual information.
+        By leveraging the\n    relationships and semantic connections encoded in KGs,
+        RAG models\n    can better grasp the context of user queries or conversations,\n    resulting
+        in more coherent and appropriate responses.\n4.  **Handling complex queries**:
+        KGs allow RAG models to tackle complex\n    queries by breaking them down
+        into smaller sub-queries, retrieving\n    relevant pieces of information from
+        the KG, and then generating a\n    response based on the retrieved knowledge.
+        This enables RAG models\n    to handle a wide range of user queries effectively.\n5.  **Explainability
+        and transparency**: KGs provide a transparent and\n    interpretable representation
+        of knowledge. By integrating KG-based\n    retrieval into RAG models, the
+        reasoning behind the generated\n    responses becomes more explainable. Users
+        can have a clear\n    understanding of the knowledge sources and connections
+        used to\n    produce the response.\n6.  **Scalability**: Knowledge graphs
+        act as large-scale repositories of\n    information. RAG models can leverage
+        KGs to generate responses to\n    various queries or conversations without
+        requiring additional\n    supervised training data. This makes the RAG+KG
+        approach scalable to\n    handle an extensive range of knowledge domains and
+        user\u00a0queries.\n\n### **Pipeline Possibilities: Orchestrating RAG and\u00a0KGs:**\n\nLet''s
+        explore some exciting pipeline options for harnessing the combined\npower
+        of RAG and Knowledge Graphs. There are two options in which either\nthe LLM
+        is prioritized or the Knowledge Graph is prioritized:\n\n**Option 1: LLM-Centric
+        Pipeline:**\n\nThe LLM-Centric pipeline is a RAG and Knowledge Graph combination
+        that\nempowers LLMs to craft well-informed responses. Here''s how it\u00a0works:\n\n1.  Start
+        with the user''s question or statement\n2.  The LLM (like GPT-3) generates
+        an initial draft response based on\n    its internal knowledge. This draft
+        may lack specific factual details\n    or nuances that a knowledge graph can\u00a0provide.\n3.  RAG
+        kicks in, searching the text corpus or the Knowledge Graph for\n    relevant
+        passages that enrich the draft. During the retrieval\n    process, RAG retrieval
+        techniques are used to search not only text\n    corpora but also knowledge
+        graphs to find relevant information. This\n    means that RAG can directly
+        tap into the structured knowledge within\n    the graph to retrieve facts,
+        relationships, and entities that align\n    with the user''s query and the
+        LLM''s generated draft.\n4.  The retrieved information is carefully fused
+        with the LLM''s output,\n    creating a more factually accurate and insightful
+        response\n5.  A final polishing step ensures the response is fluent, grammatically\n    correct,
+        and ready to\u00a0show.\n\n<figure>\n<img\nsrc=\"https://cdn-images-1.medium.com/max/1024/0*3pd9MOIflkbS07wI\"
+        />\n<figcaption>RAG LLM-centric generic\u00a0scheme.</figcaption>\n</figure>\n\nThe
+        basic steps to perform this\u00a0are:\n\n1.  **Pre-processing**: Clean and
+        tokenize user input to prepare for\n    processing.\n2.  **LLM Generation**:
+        Generate an initial draft response using an LLM\n    like [**GPT-3**](https://openai.com/product)
+        or [**Jurassic-1\n    Jumbo**](https://www.livescience.com/google-sentient-ai-lamda-lemoine).\n3.  **Retrieval**:
+        Employ RAG techniques to retrieve relevant passages\n    from a text corpus
+        or Knowledge Graphs.\n4.  **Fusion**: Integrate retrieved information into
+        the LLM-generated\n    draft, creating a more informed and factually-grounded
+        response.\n5.  **Post-processing**: Refine the final response for fluency,\n    grammatical
+        correctness, and overall coherence.\n\n**Option 2: Knowledge Graphs-Centric
+        Pipeline:**\n\nIn this approach, knowledge graphs take center stage. In essence,
+        this\npipeline prioritizes the structured knowledge within knowledge graphs,\nusing
+        RAG retrieval techniques to translate those insights into\ncompelling and
+        informative language. Here''s how it\u00a0unfolds:\n\n1.  User input: The
+        process begins with the user''s question or statement\n2.  Graph exploration:
+        The knowledge graph is meticulously explored to\n    identify relevant entities,
+        relationships, and paths that align with\n    the user''s input. This stage
+        involves techniques like entity\n    linking, path mining, and reasoning to
+        uncover valuable information\n    within the\u00a0graph\n3.  Response planning:
+        The insights extracted from the graph are used to\n    create a structured
+        response plan. This plan outlines the key\n    points, facts, and logical
+        flow that the final response\n    should\u00a0embody\n4.  Language generation:
+        This is where RAG steps in. Its purpose is to\n    create human-like text
+        that follows the response plan. It uses LLMs\n    to produce well-written
+        sentences and paragraphs, combining the\n    relevant information from the
+        knowledge graph while maintaining\n    cohesiveness and readability.\n5.  Post-processing:
+        The generated response undergoes a final refinement\n    process to ensure
+        grammatical correctness, clarity, and\n    overall\u00a0quality\n\n<figure>\n<img\nsrc=\"https://cdn-images-1.medium.com/max/1024/0*mZ83esKBjbPmCq_C\"
+        />\n<figcaption>RAG Knowledge Graph-centric generic\u00a0scheme.</figcaption>\n</figure>\n\nThe
+        basic steps\u00a0are:\n\n1.  **Query Formulation**: Transform the user input
+        into a query\n    suitable for Knowledge Graph''s exploration.\n2.  **Knowledge
+        Graphs:** You can use either Neo4j or\n    [NebulaGraph](https://www.nebula-graph.io/)
+        to implement a retrieval\n    enhancement technique. This technique involves
+        utilizing a knowledge\n    graph to illustrate the connections between entities
+        and\n    relationships. Additionally, it incorporates a powerful language\n    model
+        to improve the retrieval process.\n3.  **Fact Selection**: Employ entity linking
+        and reasoning algorithms\n    to select and prioritize the most relevant facts
+        based on the query\n    and\u00a0context.\n4.  **Natural Language Generation**
+        (**NLG**): Utilise specialized NLG\n    models like\n    [BART](https://research.facebook.com/publications/controllable-abstractive-summarization/)\n    to
+        translate the extracted facts into a natural language response.\n5.  **Refinement**:
+        Enhance the generated response for clarity and\n    coherence.\n\n### **Unveiling
+        a Future of Intelligent Interaction**\n\nThe combination of RAG and Knowledge
+        Graphs goes beyond just being a\ntechnological fusion. It paves the way for
+        a future where the\ninteraction between humans and computers goes beyond simple
+        words and\nbecomes a more informed and refined form of communication. As these\ntechnologies
+        continue to develop, we can expect to witness a significant\ntransformation
+        in:\n\n- AI-powered assistants that answer your questions with the confidence\n  of
+        a well-read friend, seamlessly combining relevant facts and\n  insights gleaned
+        from Knowledge Graphs.\n- Next-generation search engines that go beyond keyword
+        matching,\n  understanding the deeper meaning behind your queries and delivering\n  comprehensive,
+        contextual results enriched with information from\n  Knowledge Graphs.\n-
+        Creative writing tools that utilize RAG and Knowledge Graphs to\n  generate
+        stories that are both factually accurate and full of\n  unexpected plot twists
+        and character development, moving beyond\n  clich\u00e9d patterns.\n\n###
+        **Conclusion**\n\nThe convergence of Retrieval Augmented Generation (RAG)
+        and Knowledge\nGraphs (KGs) brings about an exciting synergy in the world
+        of Natural\nLanguage Processing (NLP). RAG enhances the output of large language\nmodels
+        by carefully selecting relevant information from external sources\nand KGs,
+        allowing for well-informed and detailed responses. KGs, on the\nother hand,
+        provide a structured representation of real-world entities\nand their relationships,
+        enabling the exploration of hidden insights and\nthe discovery of complex
+        connections.\n\nThe integration of RAG and KGs opens up two pipeline possibilities.
+        The\nLLM-centric pipeline prioritizes the language model''s output, which
+        is\nthen enriched with information retrieved from KGs. The Knowledge\nGraphs-centric
+        pipeline, on the other hand, places KGs at the center,\nutilizing RAG techniques
+        to translate the structured insights into\ncompelling and informative language.\n\nWhile
+        integrating LLMs and a knowledge graph for content retrieval\nrequires careful
+        planning, the reward is significant. You can gain\naccess to hidden relationships
+        within information, ultimately leading to\nhigher-quality output information.\n\nTools
+        like **OpenAI**, **Langchain**, and **LlamaIndex** provide\nready-made pipelines
+        to integrate knowledge graphs (like **Neo4j**)\neasily. Meanwhile, open-source
+        LLMs like **Mistral**, **Llama**, and\n**Dolphin** are catching up to proprietary
+        models in performance, making\nthem attractive choices for building custom
+        architectures. This\nopen-source scenario allows for the exploration and examination
+        of\nvarious methods before fully committing to a particular technological\nframework.
+        So, it is crucial to evaluate your needs and choose the\napproach that best
+        fits your use\u00a0case.\n\n![](https://medium.com/_/stat?event=post.clientViewed&referrerSource=full_rss&postId=fc0a6900f7eb){width=\"1\"\nheight=\"1\"}\n","doi":"https://doi.org/10.59350/jhrs4-22440","guid":"https://medium.com/p/fc0a6900f7eb","id":"05f01f68-ef81-47d7-a3c1-40aba91d358f","image":"https://cdn-images-1.medium.com/max/1024/1*bJ3eWZ7301vYDzBomwdLfQ.png","indexed_at":1706690571,"language":"en","published_at":1705557796,"reference":[],"relationships":[],"summary":"<strong>\n
+        Tools and Platform for Integration of Knowledge Graph with RAG pipelines.\n</strong>\nAuthors:
+        Aland Astudillo, Aishwarya Nambissan Many users of chatbots such as ChatGPT,
+        have encountered the problem of receiving inappropriate or incompatible responses.
+        There are several reasons why this might\u00a0happen. One reason is the lack
+        of appropriate training data, as chatbots are usually trained on large amounts
+        of text and code.","tags":["Artificial-intelligence","Machine-learning","Retrieval-augmented","Knowledge-graph"],"title":"Unveiling
+        the Synergy: Retrieval Augmented Generation (RAG) Meets Knowledge Graphs","updated_at":1705557796,"url":"https://medium.com/@researchgraph/unveiling-the-synergy-retrieval-augmented-generation-rag-meets-knowledge-graphs-fc0a6900f7eb"}
+        '
+  recorded_at: Wed, 31 Jan 2024 19:50:01 GMT
+recorded_with: VCR 6.2.0

data/spec/readers/datacite_reader_spec.rb CHANGED Viewed

@@ -90,7 +90,7 @@ describe Commonmeta::Metadata, vcr: true do
         "affiliation" => [{ "name" => "Тверская государственная сельскохозяйственная академия" }], "familyName" => "Ганичева", "givenName" => "А.В.", "type" => "Person", "contributorRoles" => ["Author"],
       )
       expect(subject.titles.last).to eq("title" => "MODEL OF SYSTEM DYNAMICS OF PROCESS OF TRAINING",
-                                        "titleType" => "TranslatedTitle")
+                                        "type" => "TranslatedTitle")
       expect(subject.date).to eq("created" => "2019-02-12", "published" => "2019",
                                  "registered" => "2019-02-12", "updated" => "2022-08-23")
       expect(subject.publisher).to eq("name" => "МОДЕЛИРОВАНИЕ, ОПТИМИЗАЦИЯ И ИНФОРМАЦИОННЫЕ ТЕХНОЛОГИИ")
@@ -114,10 +114,14 @@ describe Commonmeta::Metadata, vcr: true do
       expect(subject.contributors.first).to eq(
         "name" => "Europäische Kommission", "contributorRoles" => ["Author"], "type" => "Organization",
       )
-      expect(subject.titles).to eq([
-                                     { "lang" => "de",
-                                       "title" => "Flash Eurobarometer 54 (Madrid Summit)" }, { "lang" => "en", "title" => "Flash Eurobarometer 54 (Madrid Summit)" }, { "titleType" => "Subtitle", "lang" => "de", "title" => "The Common European Currency" }, { "titleType" => "Subtitle", "lang" => "en", "title" => "The Common European Currency" },
-                                   ])
+      expect(subject.titles).to eq([{ "language" => "de", "title" => "Flash Eurobarometer 54 (Madrid Summit)" },
+                                    { "language" => "en", "title" => "Flash Eurobarometer 54 (Madrid Summit)" },
+                                    { "language" => "de",
+                                      "title" => "The Common European Currency",
+                                      "type" => "Subtitle" },
+                                    { "language" => "en",
+                                      "title" => "The Common European Currency",
+                                      "type" => "Subtitle" }])
       expect(subject.subjects).to eq([{ "lang" => "en",
                                         "subject" => "KAT12 International Institutions, Relations, Conditions",
                                         "subjectScheme" => "ZA" },
@@ -163,14 +167,39 @@ describe Commonmeta::Metadata, vcr: true do
       expect(subject.contributors.length).to eq(23)
       expect(subject.contributors[0]).to eq("contributorRoles" => ["Author"], "familyName" => "ExampleFamilyName", "givenName" => "ExampleGivenName", "type" => "Person")
       expect(subject.contributors[2]).to eq("contributorRoles" => ["ContactPerson"], "familyName" => "ExampleFamilyName", "givenName" => "ExampleGivenName", "type" => "Person")
-      expect(subject.date).to eq("created"=>"2022-10-27", "published"=>"2022", "registered"=>"2022-10-27", "updated"=>"2024-01-02")
+      expect(subject.date).to eq("created" => "2022-10-27", "published" => "2022", "registered" => "2022-10-27", "updated" => "2024-01-02")
       expect(subject.publisher).to eq("name" => "Example Publisher")
-      expect(subject.license).to eq("id"=>"CC-PDDC", "url"=>"https://creativecommons.org/licenses/publicdomain/")
+      expect(subject.titles).to eq([{ "language" => "en", "title" => "Example Title" },
+                                    { "language" => "en", "title" => "Example Subtitle", "type" => "Subtitle" },
+                                    { "language" => "fr",
+                                      "title" => "Example TranslatedTitle",
+                                      "type" => "TranslatedTitle" },
+                                    { "language" => "en",
+                                      "title" => "Example AlternativeTitle",
+                                      "type" => "AlternativeTitle" }])
+      expect(subject.descriptions).to eq([{ "description" => "Example Abstract",
+                                            "type" => "Abstract",
+                                            "language" => "en" },
+                                          { "description" => "Example Methods",
+                                            "type" => "Methods",
+                                            "language" => "en" },
+                                          { "description" => "Example SeriesInformation",
+                                            "type" => "Other",
+                                            "language" => "en" },
+                                          { "description" => "Example TableOfContents",
+                                            "type" => "Other",
+                                            "language" => "en" },
+                                          { "description" => "Example TechnicalInfo",
+                                            "type" => "TechnicalInfo",
+                                            "language" => "en" },
+                                          { "description" => "Example Other", "type" => "Other", "language" => "en" }])
+      expect(subject.license).to eq("id" => "CC-PDDC", "url" => "https://creativecommons.org/licenses/publicdomain/")
     end
     it "instrument" do
       input = "#{fixture_path}datacite-instrument.json"
       subject = described_class.new(input: input)
+      puts subject.errors unless subject.valid?
       expect(subject.valid?).to be true
       expect(subject.id).to eq("https://doi.org/10.82433/08qf-ee96")
       expect(subject.type).to eq("Instrument")

data/spec/readers/json_feed_reader_spec.rb CHANGED Viewed

@@ -189,6 +189,31 @@ describe Commonmeta::Metadata, vcr: true do
       expect(subject.references).to be_nil
     end
+    it "medium post with institutional author" do
+      input = "https://api.rogue-scholar.org/posts/05f01f68-ef81-47d7-a3c1-40aba91d358f"
+      subject = described_class.new(input: input)
+      # expect(subject.valid?).to be true
+      expect(subject.id).to eq("https://doi.org/10.59350/jhrs4-22440")
+      expect(subject.url).to eq("https://medium.com/@researchgraph/unveiling-the-synergy-retrieval-augmented-generation-rag-meets-knowledge-graphs-fc0a6900f7eb")
+      expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "05f01f68-ef81-47d7-a3c1-40aba91d358f", "alternateIdentifierType" => "UUID" }])
+      expect(subject.type).to eq("Article")
+      expect(subject.contributors.length).to eq(1)
+      expect(subject.contributors.first).to eq("contributorRoles"=>["Author"], "name"=>"Research Graph", "type"=>"Organization")
+      expect(subject.titles).to eq([{ "title" => "Unveiling the Synergy: Retrieval Augmented Generation (RAG) Meets Knowledge Graphs" }])
+      expect(subject.license).to eq("id" => "CC-BY-4.0",
+                                    "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
+      expect(subject.date).to eq("published"=>"2024-01-18", "updated"=>"2024-01-18")
+      expect(subject.descriptions.first["description"]).to start_with("<strong> Tools and Platform for Integration of Knowledge Graph with RAG pipelines.")
+      expect(subject.publisher).to eq("name" => "Research Graph")
+      expect(subject.subjects).to eq([{ "subject" => "Computer and information sciences" },
+                                      { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
+                                        "subject" => "FOS: Computer and information sciences",
+                                        "subjectScheme" => "Fields of Science and Technology (FOS)" }])
+      expect(subject.language).to eq("en")
+      expect(subject.container).to eq("identifier" => "https://medium.com/@researchgraph", "identifierType" => "URL", "title" => "Research Graph", "type" => "Periodical")
+      expect(subject.references).to be_nil
+    end
     it "syldavia gazette post with references" do
       input = "https://api.rogue-scholar.org/posts/0022b9ef-525a-4a79-81ad-13411697f58a"
       subject = described_class.new(input: input)

data/spec/writers/commonmeta_writer_spec.rb CHANGED Viewed

@@ -33,11 +33,38 @@ describe Commonmeta::Metadata, vcr: true do
                                              "volume" => "426",
                                              "firstPage" => "181",
                                              "containerTitle" => "Nature")
-      expect(json["date"]).to eq("published"=>"2014-02-11", "updated"=>"2022-03-26")
+      expect(json["date"]).to eq("published" => "2014-02-11", "updated" => "2022-03-26")
       expect(json["descriptions"].first["description"]).to start_with("Among various advantages,")
-      expect(json["license"]).to eq("id"=>"CC-BY-3.0", "url"=>"https://creativecommons.org/licenses/by/3.0/legalcode")
+      expect(json["license"]).to eq("id" => "CC-BY-3.0", "url" => "https://creativecommons.org/licenses/by/3.0/legalcode")
       expect(json["provider"]).to eq("Crossref")
-      expect(json["files"].first).to eq("mimeType"=>"application/pdf", "url"=>"https://cdn.elifesciences.org/articles/01567/elife-01567-v1.pdf")
+      expect(json["files"].first).to eq("mimeType" => "application/pdf", "url" => "https://cdn.elifesciences.org/articles/01567/elife-01567-v1.pdf")
+    end
+    it "dataset schema v4.5" do
+      input = "#{fixture_path}datacite-dataset_v4.5.json"
+      subject = described_class.new(input: input)
+      expect(subject.id).to eq("https://doi.org/10.82433/b09z-4k37")
+      json = JSON.parse(subject.commonmeta)
+      expect(json["id"]).to eq("https://doi.org/10.82433/b09z-4k37")
+      expect(json["type"]).to eq("Dataset")
+      expect(json["titles"]).to eq([{ "language" => "en", "title" => "Example Title" },
+                                    { "language" => "en", "title" => "Example Subtitle", "type" => "Subtitle" },
+                                    { "language" => "fr",
+                                      "title" => "Example TranslatedTitle",
+                                      "type" => "TranslatedTitle" },
+                                    { "language" => "en",
+                                      "title" => "Example AlternativeTitle",
+                                      "type" => "AlternativeTitle" }])
+      expect(json["descriptions"]).to eq([{ "description" => "Example Abstract", "language" => "en", "type" => "Abstract" },
+                                          { "description" => "Example Methods", "language" => "en", "type" => "Methods" },
+                                          { "description" => "Example SeriesInformation",
+                                            "language" => "en",
+                                            "type" => "Other" },
+                                          { "description" => "Example TableOfContents", "language" => "en", "type" => "Other" },
+                                          { "description" => "Example TechnicalInfo",
+                                            "language" => "en",
+                                            "type" => "TechnicalInfo" },
+                                          { "description" => "Example Other", "language" => "en", "type" => "Other" }])
     end
   end
 end

data/spec/writers/csl_writer_spec.rb CHANGED Viewed

@@ -7,6 +7,7 @@ describe Commonmeta::Metadata, vcr: true do
     it 'Dataset' do
       input = 'https://doi.org/10.5061/DRYAD.8515'
       subject = described_class.new(input: input, from: 'datacite')
+      puts subject.errors unless subject.valid?
       expect(subject.valid?).to be true
       json = JSON.parse(subject.csl)
       expect(json['type']).to eq('dataset')

data/spec/writers/csv_writer_spec.rb CHANGED Viewed

@@ -37,6 +37,7 @@ describe Commonmeta::Metadata, vcr: true do
     it 'text' do
       input = 'https://doi.org/10.3204/desy-2014-01645'
       subject = described_class.new(input: input, from: 'datacite')
+      puts subject.errors unless subject.valid?
       expect(subject.valid?).to be true
       csv = subject.csv.parse_csv

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: commonmeta-ruby
 version: !ruby/object:Gem::Version
-  version: 3.11.0
+  version: 3.12.1
 platform: ruby
 authors:
 - Martin Fenner
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2024-01-26 00:00:00.000000000 Z
+date: 2024-02-02 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: activesupport
@@ -694,7 +694,7 @@ files:
 - lib/commonmeta/xml_converter.rb
 - resources/2008/09/xsd.xsl
 - resources/cff.json
-- resources/commonmeta_v0.10.6.json
+- resources/commonmeta_v0.10.7.json
 - resources/crossref/AccessIndicators.xsd
 - resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd
 - resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd
@@ -921,6 +921,7 @@ files:
 - spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/ghost_post_without_doi.yml
 - spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/jekyll_post.yml
 - spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/jekyll_post_with_anonymous_author.yml
+- spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/medium_post_with_institutional_author.yml
 - spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/substack_post_with_broken_reference.yml
 - spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/syldavia_gazette_post_with_references.yml
 - spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/upstream_post_with_references.yml