commonmeta-ruby 3.11.0 → 3.12.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7bc4df0a364fc1ff53342960355c5fc534a0c479cf825cc0d856e282e42e7d49
4
- data.tar.gz: 57dcd210498057807f46ffb851eb40b4db189636d915152a77de1fd1191012c9
3
+ metadata.gz: b48e44936ddd71a38a9d019c33972d02cf66af57c02542f48d37a022017c8208
4
+ data.tar.gz: deb5ca7a1b1ec9387583e0039edc45ab382d8989c31012dacd81560c8cbaeaa3
5
5
  SHA512:
6
- metadata.gz: 2a4265c7a8d17ab99b963459015232400bcd7541e4c827e2bf0863e9809fbe64d70d0ce6ced7dbcf2a9186beca2075be73617bd3d3cace2e4b9c67e39d8e756a
7
- data.tar.gz: 765b9d3d29683badac4ed6b77ac6f86f6b939f5081d23016c1484a9f8b7e0d26039559a9f6d5c2e2b8fe5c06bc1d70d5e09e17f16a5a24c6c9bb84291a8c1548
6
+ metadata.gz: e72bf66b0e72b62640d6f528c2279b119499a225acbe26498efe2afc7c5679b018175097144f6fef3a592186ae518e4073b9f209a31a56bda82e697fc3287408
7
+ data.tar.gz: '099621bbc5109437cf592a34bb8810d6d7c6a26c68bec0e13c43061410066218bf38fc0a3f809a84dc9f2861075a2b6a53d41836da9c2e909e5096529b7f87cd'
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- commonmeta-ruby (3.11.0)
4
+ commonmeta-ruby (3.12.1)
5
5
  activesupport (>= 4.2.5, < 8.0)
6
6
  addressable (~> 2.8.1, < 2.8.2)
7
7
  base32-url (>= 0.7.0, < 1)
@@ -58,7 +58,8 @@ GEM
58
58
  rubocop (~> 1.0)
59
59
  concurrent-ruby (1.2.3)
60
60
  connection_pool (2.4.1)
61
- crack (0.4.5)
61
+ crack (0.4.6)
62
+ bigdecimal
62
63
  rexml
63
64
  crass (1.0.6)
64
65
  csl (2.0.0)
@@ -66,7 +67,7 @@ GEM
66
67
  rexml
67
68
  csl-styles (2.0.1)
68
69
  csl (~> 2.0)
69
- diff-lcs (1.5.0)
70
+ diff-lcs (1.5.1)
70
71
  docile (1.4.0)
71
72
  domain_name (0.6.20240107)
72
73
  drb (2.2.0)
@@ -154,7 +155,7 @@ GEM
154
155
  iniparser (>= 0.1.0)
155
156
  public_suffix (4.0.7)
156
157
  racc (1.7.3)
157
- rack (3.0.8)
158
+ rack (3.0.9)
158
159
  rack-test (2.1.0)
159
160
  rack (>= 1.3)
160
161
  rainbow (3.1.1)
@@ -25,8 +25,8 @@ module Commonmeta
25
25
  "Researcher" => "Other",
26
26
  "Sponsor" => "Other",
27
27
  "Supervisor" => "Supervision",
28
- "WorkPackageLeader" => "Other"
29
- }
28
+ "WorkPackageLeader" => "Other",
29
+ }
30
30
 
31
31
  def get_one_author(author)
32
32
  # basic sanity checks
@@ -55,20 +55,20 @@ module Commonmeta
55
55
  parse_attributes(author.fetch("identifier", nil), first: true) ||
56
56
  parse_attributes(author.fetch("sameAs", nil), first: true)
57
57
  id = normalize_orcid(id) || normalize_ror(id) if id.present?
58
-
58
+
59
59
  # DataCite metadata
60
60
  if id.nil? && author["nameIdentifiers"].present?
61
61
  id = Array.wrap(author.dig("nameIdentifiers")).find do |ni|
62
62
  normalize_name_identifier(ni).present?
63
63
  end
64
64
  id = normalize_name_identifier(id) if id.present?
65
- # Crossref metadata
65
+ # Crossref metadata
66
66
  elsif id.nil? && author["ORCID"].present?
67
67
  id = author.fetch("ORCID")
68
68
  id = normalize_orcid(id)
69
- # JSON Feed metadata
69
+ # JSON Feed metadata
70
70
  elsif id.nil? && author["url"].present?
71
- id = author.fetch("url")
71
+ id = author.fetch("url")
72
72
  end
73
73
 
74
74
  # parse author type, i.e. "Person", "Organization" or not specified
@@ -168,6 +168,9 @@ module Commonmeta
168
168
  # check if a name has only one word, e.g. "FamousOrganization", not including commas
169
169
  return false if name.to_s.split(" ").size == 1 && name.to_s.exclude?(",")
170
170
 
171
+ # check if name contains words known to be used in organization names
172
+ return false if %w[University College Institute School Center Department Laboratory Library Museum Foundation Society Association Company Corporation Collaboration Consortium Incorporated Inc. Institut Research Science Team].any? { |word| name.to_s.include?(word) }
173
+
171
174
  # check for suffixes, e.g. "John Smith, MD"
172
175
  return true if name && %w[MD PhD].include?(name.split(", ").last)
173
176
 
@@ -4,29 +4,29 @@ module Commonmeta
4
4
  module Readers
5
5
  module DataciteReader
6
6
  def get_datacite(id: nil, **options)
7
- return { 'string' => nil, 'state' => 'not_found' } unless id.present?
7
+ return { "string" => nil, "state" => "not_found" } unless id.present?
8
8
 
9
9
  api_url = datacite_api_url(id, options)
10
10
  response = HTTP.get(api_url)
11
- return { 'string' => nil, 'state' => 'not_found' } unless response.status.success?
11
+ return { "string" => nil, "state" => "not_found" } unless response.status.success?
12
12
 
13
13
  body = JSON.parse(response.body)
14
- client = Array.wrap(body.fetch('included', nil)).find do |m|
15
- m['type'] == 'clients'
14
+ client = Array.wrap(body.fetch("included", nil)).find do |m|
15
+ m["type"] == "clients"
16
16
  end
17
- client_id = client.to_h.fetch('id', nil)
18
- provider_id = Array.wrap(client.to_h.fetch('relationships', nil)).find do |m|
19
- m['provider'].present?
20
- end.to_h.dig('provider', 'data', 'id')
21
-
22
- { 'string' => response.body.to_s,
23
- 'provider_id' => provider_id,
24
- 'client_id' => client_id }
17
+ client_id = client.to_h.fetch("id", nil)
18
+ provider_id = Array.wrap(client.to_h.fetch("relationships", nil)).find do |m|
19
+ m["provider"].present?
20
+ end.to_h.dig("provider", "data", "id")
21
+
22
+ { "string" => response.body.to_s,
23
+ "provider_id" => provider_id,
24
+ "client_id" => client_id }
25
25
  end
26
26
 
27
27
  def read_datacite(string: nil, **_options)
28
28
  errors = jsonlint(string)
29
- return { 'errors' => errors } if errors.present?
29
+ return { "errors" => errors } if errors.present?
30
30
 
31
31
  read_options = ActiveSupport::HashWithIndifferentAccess.new(_options.except(:doi, :id, :url,
32
32
  :sandbox, :validate, :ra))
@@ -34,140 +34,146 @@ module Commonmeta
34
34
  meta = string.present? ? JSON.parse(string) : {}
35
35
 
36
36
  # optionally strip out the message wrapper from API
37
- meta = meta.dig('data', 'attributes') if meta.dig('data').present?
37
+ meta = meta.dig("data", "attributes") if meta.dig("data").present?
38
38
 
39
39
  meta.transform_keys!(&:underscore)
40
40
 
41
- id = normalize_doi(meta.fetch('doi', nil))
41
+ id = normalize_doi(meta.fetch("doi", nil))
42
42
 
43
- resource_type_general = meta.dig('types', 'resourceTypeGeneral')
44
- resource_type = meta.dig('types', 'resourceType')
43
+ resource_type_general = meta.dig("types", "resourceTypeGeneral")
44
+ resource_type = meta.dig("types", "resourceType")
45
45
  # if resource_type is one of the new resource_type_general types introduced in schema 4.3, use it
46
46
  type = Commonmeta::Utils::DC_TO_CM_TRANSLATIONS.fetch(resource_type, nil) ||
47
- Commonmeta::Utils::DC_TO_CM_TRANSLATIONS.fetch(resource_type_general, 'Other')
47
+ Commonmeta::Utils::DC_TO_CM_TRANSLATIONS.fetch(resource_type_general, "Other")
48
48
 
49
- alternate_identifiers = Array.wrap(meta.fetch('alternate_identifiers', nil)).map do |i|
49
+ alternate_identifiers = Array.wrap(meta.fetch("alternate_identifiers", nil)).map do |i|
50
50
  i.transform_keys! { |k| k.camelize(:lower) }
51
51
  end
52
- url = meta.fetch('url', nil)
53
- titles = Array.wrap(meta.fetch('titles', nil)).map do |title|
54
- title.compact
52
+ url = meta.fetch("url", nil)
53
+ titles = Array.wrap(meta.fetch("titles", nil)).map do |title|
54
+ { "title" => title.fetch("title", nil),
55
+ "type" => title.fetch("titleType", nil),
56
+ "language" => title.fetch("lang", nil) }.compact
55
57
  end
56
- contributors = get_authors(from_datacite(meta.fetch('creators', nil)))
57
- contributors += get_authors(from_datacite(meta.fetch('contributors', nil)))
58
- if meta.fetch('publisher', nil).is_a?(Hash)
59
- publisher = { 'name' => meta.fetch('publisher', nil).fetch('name', nil) }
60
- elsif meta.fetch('publisher', nil).is_a?(String)
61
- publisher = { 'name' => meta.fetch('publisher', nil) }
58
+ contributors = get_authors(from_datacite(meta.fetch("creators", nil)))
59
+ contributors += get_authors(from_datacite(meta.fetch("contributors", nil)))
60
+ if meta.fetch("publisher", nil).is_a?(Hash)
61
+ publisher = { "name" => meta.fetch("publisher", nil).fetch("name", nil) }
62
+ elsif meta.fetch("publisher", nil).is_a?(String)
63
+ publisher = { "name" => meta.fetch("publisher", nil) }
62
64
  else
63
65
  publisher = nil
64
66
  end
65
67
 
66
- container = meta.fetch('container', nil)
67
- funding_references = meta.fetch('funding_references', nil)
68
+ container = meta.fetch("container", nil)
69
+ funding_references = meta.fetch("funding_references", nil)
68
70
 
69
71
  date = {}
70
- date['created'] =
71
- get_iso8601_date(meta.dig('created')) || get_date(meta.dig('dates'), 'Created')
72
- date['published'] =
73
- get_iso8601_date(meta.dig('published')) || get_date(meta.dig('dates'),
74
- 'Issued') || get_iso8601_date(meta.dig('publication_year'))
75
- date['registered'] = get_iso8601_date(meta.dig('registered'))
76
- date['updated'] =
77
- get_iso8601_date(meta.dig('updated')) || get_date(meta.dig('dates'), 'Updated')
78
-
79
- descriptions = Array.wrap(meta.fetch('descriptions', nil)).map do |description|
80
- description.compact
72
+ date["created"] =
73
+ get_iso8601_date(meta.dig("created")) || get_date(meta.dig("dates"), "Created")
74
+ date["published"] =
75
+ get_iso8601_date(meta.dig("published")) || get_date(meta.dig("dates"),
76
+ "Issued") || get_iso8601_date(meta.dig("publication_year"))
77
+ date["registered"] = get_iso8601_date(meta.dig("registered"))
78
+ date["updated"] =
79
+ get_iso8601_date(meta.dig("updated")) || get_date(meta.dig("dates"), "Updated")
80
+
81
+ descriptions = Array.wrap(meta.fetch("descriptions", nil)).map do |description|
82
+ description_type = description.fetch("descriptionType", nil)
83
+ description_type = "Other" unless %w[Abstract Methods TechnicalInfo].include?(description_type)
84
+ { "description" => description.fetch("description", nil),
85
+ "type" => description_type,
86
+ "language" => description.fetch("lang", nil) }.compact
81
87
  end
82
- license = Array.wrap(meta.fetch('rights_list', nil)).find do |r|
83
- r['rightsUri'].present?
88
+ license = Array.wrap(meta.fetch("rights_list", nil)).find do |r|
89
+ r["rightsUri"].present?
84
90
  end
85
- license = hsh_to_spdx('rightsURI' => license['rightsUri']) if license.present?
86
- version = meta.fetch('version', nil)
87
- subjects = meta.fetch('subjects', nil)
88
- language = meta.fetch('language', nil)
89
- geo_locations = meta.fetch('geo_locations', nil)
90
- references = (Array.wrap(meta.fetch('related_identifiers',
91
- nil)) + Array.wrap(meta.fetch('related_items',
91
+ license = hsh_to_spdx("rightsURI" => license["rightsUri"]) if license.present?
92
+ version = meta.fetch("version", nil)
93
+ subjects = meta.fetch("subjects", nil)
94
+ language = meta.fetch("language", nil)
95
+ geo_locations = meta.fetch("geo_locations", nil)
96
+ references = (Array.wrap(meta.fetch("related_identifiers",
97
+ nil)) + Array.wrap(meta.fetch("related_items",
92
98
  nil))).select do |r|
93
- %w[References Cites IsSupplementedBy].include?(r['relationType'])
94
- end.map do |reference|
99
+ %w[References Cites IsSupplementedBy].include?(r["relationType"])
100
+ end.map do |reference|
95
101
  get_datacite_reference(reference)
96
102
  end
97
- files = Array.wrap(meta.fetch("content_url", nil)).map { |file| { "url" => file } }
98
- formats = meta.fetch('formats', nil)
99
- sizes = meta.fetch('sizes', nil)
100
- schema_version = meta.fetch('schema_version', nil) || 'http://datacite.org/schema/kernel-4'
101
- state = id.present? || read_options.present? ? 'findable' : 'not_found'
102
-
103
- { 'id' => id,
104
- 'type' => type,
105
- 'additional_type' => resource_type == type ? nil : resource_type,
106
- 'url' => url,
107
- 'titles' => titles,
108
- 'contributors' => contributors,
109
- 'container' => container,
110
- 'publisher' => publisher,
111
- 'provider' => 'DataCite',
112
- 'alternate_identifiers' => alternate_identifiers.presence,
113
- 'references' => references,
114
- 'funding_references' => funding_references,
115
- 'files' => files.presence,
116
- 'date' => date.compact,
117
- 'descriptions' => descriptions,
118
- 'license' => license,
119
- 'version' => version,
120
- 'subjects' => subjects,
121
- 'language' => language,
122
- 'geo_locations' => geo_locations,
123
- 'formats' => formats,
124
- 'sizes' => sizes,
125
- 'state' => state }.compact # .merge(read_options)
103
+ files = Array.wrap(meta.fetch("content_url", nil)).map { |file| { "url" => file } }
104
+ formats = meta.fetch("formats", nil)
105
+ sizes = meta.fetch("sizes", nil)
106
+ schema_version = meta.fetch("schema_version", nil) || "http://datacite.org/schema/kernel-4"
107
+ state = id.present? || read_options.present? ? "findable" : "not_found"
108
+
109
+ { "id" => id,
110
+ "type" => type,
111
+ "additional_type" => resource_type == type ? nil : resource_type,
112
+ "url" => url,
113
+ "titles" => titles,
114
+ "contributors" => contributors,
115
+ "container" => container,
116
+ "publisher" => publisher,
117
+ "provider" => "DataCite",
118
+ "alternate_identifiers" => alternate_identifiers.presence,
119
+ "references" => references,
120
+ "funding_references" => funding_references,
121
+ "files" => files.presence,
122
+ "date" => date.compact,
123
+ "descriptions" => descriptions,
124
+ "license" => license,
125
+ "version" => version,
126
+ "subjects" => subjects,
127
+ "language" => language,
128
+ "geo_locations" => geo_locations,
129
+ "formats" => formats,
130
+ "sizes" => sizes,
131
+ "state" => state }.compact # .merge(read_options)
126
132
  end
127
133
 
128
134
  def format_contributor(contributor)
129
- type = contributor.fetch('nameType', nil)
130
-
131
- { 'name' => type == 'Person' ? nil : contributor.fetch('name', nil),
132
- 'type' => type,
133
- 'givenName' => contributor.fetch('givenName', nil),
134
- 'familyName' => contributor.fetch('familyName', nil),
135
- 'nameIdentifiers' => contributor.fetch('nameIdentifiers', nil).presence,
136
- 'affiliations' => contributor.fetch('affiliations', nil).presence,
137
- 'contributorType' => contributor.fetch('contributorType', nil) }.compact
135
+ type = contributor.fetch("nameType", nil)
136
+
137
+ { "name" => type == "Person" ? nil : contributor.fetch("name", nil),
138
+ "type" => type,
139
+ "givenName" => contributor.fetch("givenName", nil),
140
+ "familyName" => contributor.fetch("familyName", nil),
141
+ "nameIdentifiers" => contributor.fetch("nameIdentifiers", nil).presence,
142
+ "affiliations" => contributor.fetch("affiliations", nil).presence,
143
+ "contributorType" => contributor.fetch("contributorType", nil) }.compact
138
144
  end
139
145
 
140
146
  def get_datacite_reference(reference)
141
147
  return nil unless reference.present? || !reference.is_a?(Hash)
142
148
 
143
- key = reference['relatedIdentifier']
149
+ key = reference["relatedIdentifier"]
144
150
  doi = nil
145
151
  url = nil
146
152
 
147
- case reference['relatedIdentifierType']
148
- when 'DOI'
149
- doi = normalize_doi(reference['relatedIdentifier'])
150
- when 'URL'
151
- url = reference['relatedIdentifier']
153
+ case reference["relatedIdentifierType"]
154
+ when "DOI"
155
+ doi = normalize_doi(reference["relatedIdentifier"])
156
+ when "URL"
157
+ url = reference["relatedIdentifier"]
152
158
  else
153
- url = reference['relatedIdentifier']
159
+ url = reference["relatedIdentifier"]
154
160
  end
155
161
 
156
162
  {
157
- 'key' => key,
158
- 'doi' => doi,
159
- 'url' => url,
160
- 'contributor' => reference.dig('author'),
161
- 'title' => reference.dig('article-title'),
162
- 'publisher' => reference.dig('publisher'),
163
- 'publicationYear' => reference.dig('year'),
164
- 'volume' => reference.dig('volume'),
165
- 'issue' => reference.dig('issue'),
166
- 'firstPage' => reference.dig('first-page'),
167
- 'lastPage' => reference.dig('last-page'),
168
- 'containerTitle' => reference.dig('journal-title'),
169
- 'edition' => nil,
170
- 'unstructured' => doi.nil? ? reference.dig('unstructured') : nil
163
+ "key" => key,
164
+ "doi" => doi,
165
+ "url" => url,
166
+ "contributor" => reference.dig("author"),
167
+ "title" => reference.dig("article-title"),
168
+ "publisher" => reference.dig("publisher"),
169
+ "publicationYear" => reference.dig("year"),
170
+ "volume" => reference.dig("volume"),
171
+ "issue" => reference.dig("issue"),
172
+ "firstPage" => reference.dig("first-page"),
173
+ "lastPage" => reference.dig("last-page"),
174
+ "containerTitle" => reference.dig("journal-title"),
175
+ "edition" => nil,
176
+ "unstructured" => doi.nil? ? reference.dig("unstructured") : nil,
171
177
  }.compact
172
178
  end
173
179
  end
@@ -5,7 +5,7 @@ require "pathname"
5
5
 
6
6
  module Commonmeta
7
7
  module SchemaUtils
8
- COMMONMETA = File.read(File.expand_path("../../resources/commonmeta_v0.10.6.json",
8
+ COMMONMETA = File.read(File.expand_path("../../resources/commonmeta_v0.10.7.json",
9
9
  __dir__))
10
10
 
11
11
  def json_schema_errors
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Commonmeta
4
- VERSION = '3.11.0'
4
+ VERSION = '3.12.1'
5
5
  end
@@ -8,4 +8,4 @@ module Commonmeta
8
8
  end
9
9
  end
10
10
  end
11
- end
11
+ end
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "$schema": "http://json-schema.org/draft-07/schema#",
3
- "$id": "https://commonmeta.org/commonmeta_v0.10.6.json",
4
- "title": "Commonmeta v0.10.6",
3
+ "$id": "https://commonmeta.org/commonmeta_v0.10.7.json",
4
+ "title": "Commonmeta v0.10.7",
5
5
  "description": "JSON representation of the Commonmeta schema.",
6
6
  "additionalProperties": false,
7
7
  "definitions": {
@@ -253,6 +253,10 @@
253
253
  "description": "The type of the title.",
254
254
  "type": "string",
255
255
  "enum": ["AlternativeTitle", "Subtitle", "TranslatedTitle"]
256
+ },
257
+ "language": {
258
+ "description": "The language of the title. Use one of the language codes from the IETF BCP 47 standard.",
259
+ "type": "string"
256
260
  }
257
261
  },
258
262
  "required": ["title"]
@@ -424,7 +428,11 @@
424
428
  "type": {
425
429
  "description": "The type of the description.",
426
430
  "type": "string",
427
- "enum": ["Abstract", "Description", "Summary"]
431
+ "enum": ["Abstract", "Summary", "Methods", "TechnicalInfo", "Other"]
432
+ },
433
+ "language": {
434
+ "description": "The language of the title. Use one of the language codes from the IETF BCP 47 standard.",
435
+ "type": "string"
428
436
  }
429
437
  },
430
438
  "required": ["description"]
@@ -58,6 +58,21 @@ describe Commonmeta::Metadata, vcr: true do
58
58
  author = { "name" => "Tejas S. Sathe, MD" }
59
59
  expect(subject.is_personal_name?(name: author["name"])).to be true
60
60
  end
61
+
62
+ it "name with organization string" do
63
+ author = { "name" => "University of California, Santa Barbara" }
64
+ expect(subject.is_personal_name?(name: author["name"])).to be false
65
+ end
66
+
67
+ it "name with another organization string" do
68
+ author = { "name" => "Research Graph" }
69
+ expect(subject.is_personal_name?(name: author["name"])).to be false
70
+ end
71
+
72
+ it "name with ye another organization string" do
73
+ author = { "name" => "Team OA Brandenburg" }
74
+ expect(subject.is_personal_name?(name: author["name"])).to be false
75
+ end
61
76
  end
62
77
 
63
78
  context "cleanup_author" do
@@ -0,0 +1,317 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: https://api.rogue-scholar.org/posts/05f01f68-ef81-47d7-a3c1-40aba91d358f
6
+ body:
7
+ encoding: ASCII-8BIT
8
+ string: ''
9
+ headers:
10
+ Connection:
11
+ - close
12
+ Host:
13
+ - api.rogue-scholar.org
14
+ User-Agent:
15
+ - http.rb/5.1.1
16
+ response:
17
+ status:
18
+ code: 200
19
+ message: OK
20
+ headers:
21
+ Content-Type:
22
+ - application/json
23
+ Content-Length:
24
+ - '23886'
25
+ Ratelimit-Limit:
26
+ - '15'
27
+ Ratelimit-Remaining:
28
+ - '14'
29
+ Ratelimit-Reset:
30
+ - '3'
31
+ Date:
32
+ - Wed, 31 Jan 2024 19:50:01 GMT
33
+ Server:
34
+ - Fly/ba9e227a (2024-01-26)
35
+ Via:
36
+ - 1.1 fly.io
37
+ Fly-Request-Id:
38
+ - 01HNGH4EZV3XQF20H1PZ6X5N07-fra
39
+ body:
40
+ encoding: UTF-8
41
+ string: '{"abstract":null,"archive_url":null,"authors":[{"name":"Research Graph"}],"blog":{"api":false,"archive_prefix":null,"authors":null,"backlog":0,"canonical_url":null,"category":"computerAndInformationSciences","created_at":1706685423,"current_feed_url":null,"description":"Stories
42
+ by Research Graph on Medium","favicon":"https://cdn-images-1.medium.com/fit/c/150/150/1*laJi0jBkVoGhXid7gD_DmQ.png","feed_format":"application/rss+xml","feed_url":"https://medium.com/@researchgraph/feed","filter":null,"funding":null,"generator":"Medium","generator_raw":"Medium","home_page_url":"https://medium.com/@researchgraph","id":"30da2ca9-8258-4ab5-acca-3919d9a5d98d","indexed":true,"issn":null,"language":"en","license":"https://creativecommons.org/licenses/by/4.0/legalcode","mastodon":"","plan":"Starter","prefix":"10.59350","relative_url":null,"ror":null,"secure":true,"slug":"researchgraph","status":"active","title":"Research
43
+ Graph","updated_at":1706151454,"use_api":null,"use_mastodon":false,"user_id":"a7e16958-1175-437c-b839-d4b8a47ec811","version":"https://jsonfeed.org/version/1.1"},"blog_name":"Research
44
+ Graph","blog_slug":"researchgraph","content_text":"**Tools and Platform for
45
+ Integration of Knowledge Graph with RAG\npipelines.**\n\n<figure>\n<img\nsrc=\"https://cdn-images-1.medium.com/max/1024/1*bJ3eWZ7301vYDzBomwdLfQ.png\"\nalt=\"Complex
46
+ network connected to books and showing information from magespace\" />\n<figcaption>Image
47
+ Created in <a\nhref=\"https://www.mage.space/\">https://www.mage.space/</a></figcaption>\n</figure>\n\nAuthors:
48
+ [Aland\nAstudillo](https://www.linkedin.com/in/aland-astudillo/), [Aishwarya\nNambissan](https://www.linkedin.com/in/aishwarya-nambissan-127229200/)\n\nMany
49
+ users of chatbots such as ChatGPT, have encountered the problem of\nreceiving
50
+ inappropriate or incompatible responses. There are several\nreasons why this
51
+ might\u00a0happen.\n\nOne reason is the lack of appropriate training data,
52
+ as chatbots are\nusually trained on large amounts of text and code. If the
53
+ data is\ninsufficient or of poor quality, the chatbot may misunderstand queries\nand
54
+ provide inaccurate responses. Another reason is that some chatbots\nare designed
55
+ for specific tasks or domains, which limits their ability\nto handle broader
56
+ queries or understand subtle nuances in conversation.\nAdditionally, chatbots
57
+ may struggle with natural language, which is\ncomplex and often ambiguous.
58
+ This can cause them to misunderstand a\nuser''s query and provide irrelevant
59
+ or off-topic responses. Finally,\nthere are technical limitations, such as
60
+ the chatbot''s inability to\nreason or make inferences.\n\nThis article explores
61
+ a potential solution by combining two influential\napproaches in the field
62
+ of Natural Language Processing\u200a---\u200aRetrieval\nAugmented Generation
63
+ (**RAG**) and Knowledge Graphs(**KGs**). We will\ndelve into the partnership
64
+ between these two entities, discuss the\nnotable technologies and software
65
+ used in their processes, and highlight\nvarious options for utilizing their
66
+ combined potential.\n\n### **RAG**\n\nRetrieval-Augmented Generation is the
67
+ process of optimizing the output\nof a large language model using a knowledge
68
+ base outside of its training\ndata sources before generating a response. It
69
+ takes an input and\nretrieves a set of relevant/supporting documents given
70
+ a source (e.g.,\nWikipedia). This can be thought of as a Large Language Model
71
+ (LLM) not\njust putting words together, but carefully selecting relevant\ninformation
72
+ from external sources and Knowledge Graphs to create\nwell-informed and detailed
73
+ responses.\n\n### RAG Retrieval Techniques\n\nThe following are some crucial
74
+ technologies that enable RAG''s impressive\nability to retrieve and incorporate
75
+ relevant information:\n\n**Vector Search**: It transforms text into numerical
76
+ vectors, capturing\ntheir meaning and nuances in a mathematical space, creating
77
+ a map of\nrelationships. Similar texts, like those discussing shared topics
78
+ or\nusing similar language, end up positioned close together in this space,\nallowing
79
+ vector search to quickly identify them as related. This allows\nlightning-fast
80
+ comparisons, finding similar texts based on meaning, not\njust keywords.\n\nAlgorithms
81
+ like [**Faiss**](https://github.com/facebookresearch/faiss)\nand [**Annoy**](https://github.com/spotify/annoy)
82
+ map text into dense\nvectors, enabling fast comparisons and retrieval of relevant
83
+ passages\nbased on semantic similarity.\n\n**Passage Ranking**: It is an internal
84
+ algorithm that scores candidate\ntext passages based on their relevance to
85
+ a query. It considers factors\nlike keyword frequency, keyword overlap, and
86
+ document structure to act\nlike a judge, sifting through information to select
87
+ the most fitting and\ninformative passages.\n\nKeyword overlap measures how
88
+ often the same keywords appear in **both**\nthe query and the candidate passage,
89
+ emphasizing shared vocabulary and\npotential relevance. It differs from keyword
90
+ frequency, which simply\ncounts how often individual keywords appear within
91
+ a passage, regardless\nof their presence in the\u00a0query.\n\nTechniques
92
+ like [**BM25**](https://github.com/getalp/wikIR) and\n[**TF-IDF**](https://github.com/marcocor/wikipedia-idf)
93
+ score candidate\npassages based on keyword overlap and frequency, ensuring
94
+ retrieved\ninformation truly fits the\u00a0context.\n\n**Graph Neural Networks**
95
+ (**GNNs**): They are neural networks designed\nto explore and learn from interconnected
96
+ data like maps, social\nnetworks, and other complex relationships. Unlike
97
+ traditional processing\nmethods that go through data in a linear fashion,
98
+ GNNs are capable of\nrecognizing hidden patterns and understanding relationships
99
+ like \"who\nknows who\" and \"what connects to what\" by \"hopping\" across
100
+ connections\nin\u00a0data.\n\nConsider a graph as a network of dots(nodes)
101
+ connected by lines (edges).\nEach dot represents some information, like a
102
+ person, object, or concept.\nThe lines tell you how these things relate to
103
+ each\u00a0other.\n\nGNNs work in rounds. In each\u00a0round:\n\n1. Message
104
+ Passing: Each node \"talks\" to its neighbors, sending\n messages along
105
+ the edges. These messages contain information about\n the node itself and
106
+ its features.\n2. Node Update: Each node receives messages from all its neighbors
107
+ and\n combines them with its own information. This update can involve\n calculations
108
+ and applying a special function.\n3. Output Calculation: Based on the updated
109
+ information, the network\n calculates an output for each node. This output
110
+ could be a\n prediction about the node''s category, its relationship to
111
+ another\n node, or some other relevant information.\n\nThis process repeats
112
+ for multiple rounds, allowing nodes to incorporate\ninformation from their
113
+ entire neighborhood, not just their direct\nneighbors. As the rounds progress,
114
+ the network learns to understand the\nrelationships between nodes and the
115
+ overall structure of the\u00a0graph.\n\nWhen dealing with Knowledge Graphs,
116
+ frameworks like\n[**PyTorch-Geometric**](https://readthedocs.org/projects/pytorch-geometric/)\nand
117
+ [**DeepMind''s\nGNN**](https://github.com/deepmind/deepmind-research/blob/master/learning_to_simulate/graph_network.py)\nlibrary
118
+ come into play. These frameworks allow GNNs to traverse\ninterconnected entities
119
+ and relationships within the graph, retrieve\nrelevant knowledge fragments,
120
+ and understand complex connections.\n\n### **Knowledge Graphs: The Structured
121
+ Wisdom\u00a0Library**\n\nA knowledge graph, also referred to as a semantic
122
+ network, is a\nstructure that represents a network of real-world entities
123
+ such as\nobjects, events, situations, or concepts. It helps to illustrate
124
+ the\nconstantly changing representations of the world, connecting entities\n(such
125
+ as \"Marie Curie\") and relationships (such as \"won Nobel Prize\") to\nform
126
+ a complex network of information. This information is typically\nstored in
127
+ a graph database and visualized as a graph structure, thus the\nterm knowledge
128
+ \"graph\".\n\nKGs go beyond simply finding relevant facts and delve deeper
129
+ into\nunderstanding the relationships and insights hidden within using these\nprocesses:\n\n**Entity
130
+ Linking**: Imagine a vast network of information, like a big\npuzzle of dots.
131
+ Now imagine trying to connect specific names, places,\nand concepts to their
132
+ corresponding dots in the puzzle. That is what\nentity linking does with text
133
+ and knowledge graphs, connecting the\nspecific components of the text to the
134
+ corresponding nodes in the graph.\nThey help systems understand the exact
135
+ meaning of entities, and find\nrelevant information from the\u00a0graph.\n\nLibraries
136
+ like [**DGL-KeLP**](https://github.com/awslabs/dgl-ke)\nleverage GNNs to identify
137
+ and link named entities (like \"Marie Curie\")\nto their respective nodes
138
+ within the Knowledge Graphs, enabling RAG to\nretrieve information that is
139
+ directly relevant to the core subject of a\nsearch\u00a0query\n\n**Path Mining**:
140
+ Path mining is a process of uncovering hidden\nrelationships and patterns
141
+ that are not easily noticeable. It involves\nexploring complicated networks
142
+ of information and identifying and\ntracing connections between entities that
143
+ may seem unrelated. By doing\nso, path mining reveals surprising insights
144
+ and useful knowledge,\nimproving our understanding of the complex structures
145
+ within knowledge\ngraphs.\n\nTools like [**Neo4j**](https://neo4j.com/) and\n[**Stanza**](https://github.com/stanfordnlp/stanza)
146
+ allow traversing\npaths between entities, uncovering hidden relationships,
147
+ and generating\ninsightful responses based on this deeper understanding.\n\n**Reasoning
148
+ and Inference**: In the context of knowledge graphs,\nreasoning and inference
149
+ are not just limited to discovering facts; they\nare also concerned with utilizing
150
+ them effectively. This involves\nintegrating data, drawing meaningful connections,
151
+ and using logical\nreasoning to resolve issues, foresee future occurrences,
152
+ or even\nconstruct narratives leveraging the insights provided by the knowledge\ngraph.\n\nConsider
153
+ the scenario of trying to find an organization that works in\nspecific sectors
154
+ with the help of a knowledge graph. This analogy\neffectively highlights the
155
+ active role of reasoning and inference in\nknowledge graphs:\n\n1. Gathering
156
+ Facts: Knowledge graphs collect and organize information\n from various
157
+ sources, such as websites, databases, academic papers,\n and social media
158
+ platforms. These facts are represented as\n structured data, with entities
159
+ (e.g., organizations) and their\n attributes (e.g., sectors in which they
160
+ operate) forming nodes and\n edges in the graph. By combining data about
161
+ organizations and\n sectors, knowledge graphs enable the gathering of relevant
162
+ facts for\n analysis.\n2. Integrating information: By connecting an organization''s\n relationships
163
+ with specific sectors, such as partnerships,\n investments, or certifications,
164
+ knowledge graphs reveal the scope\n and relevance of their work within
165
+ those sectors. Links to related\n entities like employees, board members,
166
+ or projects can further\n contribute to understanding an organization''s
167
+ involvement in\n specific\u00a0sectors.\n3. Predicting and Creating: Knowledge
168
+ graphs can leverage machine\n learning and predictive models to infer missing
169
+ or hidden\n information. By analyzing the available facts and connections
170
+ within\n the graph, these models can predict an organization''s potential\n involvement
171
+ in sectors that have common attributes with their known\n areas of operation.
172
+ For example, if an organization has expertise in\n renewable energy, predictive
173
+ models could suggest their likely\n involvement in related sectors like
174
+ clean transportation or\n sustainable infrastructure. Additionally, knowledge
175
+ graphs\n facilitate the creation of new information and insights by combining\n existing
176
+ facts with external data sources. For instance, by\n integrating real-time
177
+ data on industry trends, market analysis, or\n news articles, knowledge
178
+ graphs enable the discovery of emerging\n sectors or upcoming organizations
179
+ that might align with the given\n parameters.\n\nA framework like [**Atomspace**](https://github.com/opencog/atomspace)\nfrom
180
+ [**OpenCog**](https://opencog.org/) empowers RAG to reason and\ninfer new
181
+ knowledge. By traversing paths and combining information from\ninterconnected
182
+ entities, the system can generate informed predictions or\nanswer hypothetical
183
+ questions.\n\n### Purpose\n\nThe combination of Retrieval-Augmented Generation
184
+ (RAG) and Knowledge\nGraphs (KG) is beneficial for several\u00a0reasons:\n\n1. **Enhanced
185
+ information retrieval**: Knowledge graphs provide\n structured and interconnected
186
+ information that can significantly\n improve the effectiveness of information
187
+ retrieval. By using KGs,\n RAG models can retrieve more accurate and relevant
188
+ information,\n leading to better generation and response\u00a0quality.\n2. **Reliable
189
+ and diverse information:** KGs are constructed from\n authoritative sources,
190
+ making them reliable and trustworthy sources\n of information. RAG models
191
+ can leverage this reliable information to\n generate more accurate responses.
192
+ Additionally, KGs help in\n diversifying the generated responses by providing
193
+ a broader pool of\n related facts and entities.\n3. **Context-aware understanding**:
194
+ KGs enable RAG models to understand\n and reason over the contextual information.
195
+ By leveraging the\n relationships and semantic connections encoded in KGs,
196
+ RAG models\n can better grasp the context of user queries or conversations,\n resulting
197
+ in more coherent and appropriate responses.\n4. **Handling complex queries**:
198
+ KGs allow RAG models to tackle complex\n queries by breaking them down
199
+ into smaller sub-queries, retrieving\n relevant pieces of information from
200
+ the KG, and then generating a\n response based on the retrieved knowledge.
201
+ This enables RAG models\n to handle a wide range of user queries effectively.\n5. **Explainability
202
+ and transparency**: KGs provide a transparent and\n interpretable representation
203
+ of knowledge. By integrating KG-based\n retrieval into RAG models, the
204
+ reasoning behind the generated\n responses becomes more explainable. Users
205
+ can have a clear\n understanding of the knowledge sources and connections
206
+ used to\n produce the response.\n6. **Scalability**: Knowledge graphs
207
+ act as large-scale repositories of\n information. RAG models can leverage
208
+ KGs to generate responses to\n various queries or conversations without
209
+ requiring additional\n supervised training data. This makes the RAG+KG
210
+ approach scalable to\n handle an extensive range of knowledge domains and
211
+ user\u00a0queries.\n\n### **Pipeline Possibilities: Orchestrating RAG and\u00a0KGs:**\n\nLet''s
212
+ explore some exciting pipeline options for harnessing the combined\npower
213
+ of RAG and Knowledge Graphs. There are two options in which either\nthe LLM
214
+ is prioritized or the Knowledge Graph is prioritized:\n\n**Option 1: LLM-Centric
215
+ Pipeline:**\n\nThe LLM-Centric pipeline is a RAG and Knowledge Graph combination
216
+ that\nempowers LLMs to craft well-informed responses. Here''s how it\u00a0works:\n\n1. Start
217
+ with the user''s question or statement\n2. The LLM (like GPT-3) generates
218
+ an initial draft response based on\n its internal knowledge. This draft
219
+ may lack specific factual details\n or nuances that a knowledge graph can\u00a0provide.\n3. RAG
220
+ kicks in, searching the text corpus or the Knowledge Graph for\n relevant
221
+ passages that enrich the draft. During the retrieval\n process, RAG retrieval
222
+ techniques are used to search not only text\n corpora but also knowledge
223
+ graphs to find relevant information. This\n means that RAG can directly
224
+ tap into the structured knowledge within\n the graph to retrieve facts,
225
+ relationships, and entities that align\n with the user''s query and the
226
+ LLM''s generated draft.\n4. The retrieved information is carefully fused
227
+ with the LLM''s output,\n creating a more factually accurate and insightful
228
+ response\n5. A final polishing step ensures the response is fluent, grammatically\n correct,
229
+ and ready to\u00a0show.\n\n<figure>\n<img\nsrc=\"https://cdn-images-1.medium.com/max/1024/0*3pd9MOIflkbS07wI\"
230
+ />\n<figcaption>RAG LLM-centric generic\u00a0scheme.</figcaption>\n</figure>\n\nThe
231
+ basic steps to perform this\u00a0are:\n\n1. **Pre-processing**: Clean and
232
+ tokenize user input to prepare for\n processing.\n2. **LLM Generation**:
233
+ Generate an initial draft response using an LLM\n like [**GPT-3**](https://openai.com/product)
234
+ or [**Jurassic-1\n Jumbo**](https://www.livescience.com/google-sentient-ai-lamda-lemoine).\n3. **Retrieval**:
235
+ Employ RAG techniques to retrieve relevant passages\n from a text corpus
236
+ or Knowledge Graphs.\n4. **Fusion**: Integrate retrieved information into
237
+ the LLM-generated\n draft, creating a more informed and factually-grounded
238
+ response.\n5. **Post-processing**: Refine the final response for fluency,\n grammatical
239
+ correctness, and overall coherence.\n\n**Option 2: Knowledge Graphs-Centric
240
+ Pipeline:**\n\nIn this approach, knowledge graphs take center stage. In essence,
241
+ this\npipeline prioritizes the structured knowledge within knowledge graphs,\nusing
242
+ RAG retrieval techniques to translate those insights into\ncompelling and
243
+ informative language. Here''s how it\u00a0unfolds:\n\n1. User input: The
244
+ process begins with the user''s question or statement\n2. Graph exploration:
245
+ The knowledge graph is meticulously explored to\n identify relevant entities,
246
+ relationships, and paths that align with\n the user''s input. This stage
247
+ involves techniques like entity\n linking, path mining, and reasoning to
248
+ uncover valuable information\n within the\u00a0graph\n3. Response planning:
249
+ The insights extracted from the graph are used to\n create a structured
250
+ response plan. This plan outlines the key\n points, facts, and logical
251
+ flow that the final response\n should\u00a0embody\n4. Language generation:
252
+ This is where RAG steps in. Its purpose is to\n create human-like text
253
+ that follows the response plan. It uses LLMs\n to produce well-written
254
+ sentences and paragraphs, combining the\n relevant information from the
255
+ knowledge graph while maintaining\n cohesiveness and readability.\n5. Post-processing:
256
+ The generated response undergoes a final refinement\n process to ensure
257
+ grammatical correctness, clarity, and\n overall\u00a0quality\n\n<figure>\n<img\nsrc=\"https://cdn-images-1.medium.com/max/1024/0*mZ83esKBjbPmCq_C\"
258
+ />\n<figcaption>RAG Knowledge Graph-centric generic\u00a0scheme.</figcaption>\n</figure>\n\nThe
259
+ basic steps\u00a0are:\n\n1. **Query Formulation**: Transform the user input
260
+ into a query\n suitable for Knowledge Graph''s exploration.\n2. **Knowledge
261
+ Graphs:** You can use either Neo4j or\n [NebulaGraph](https://www.nebula-graph.io/)
262
+ to implement a retrieval\n enhancement technique. This technique involves
263
+ utilizing a knowledge\n graph to illustrate the connections between entities
264
+ and\n relationships. Additionally, it incorporates a powerful language\n model
265
+ to improve the retrieval process.\n3. **Fact Selection**: Employ entity linking
266
+ and reasoning algorithms\n to select and prioritize the most relevant facts
267
+ based on the query\n and\u00a0context.\n4. **Natural Language Generation**
268
+ (**NLG**): Utilise specialized NLG\n models like\n [BART](https://research.facebook.com/publications/controllable-abstractive-summarization/)\n to
269
+ translate the extracted facts into a natural language response.\n5. **Refinement**:
270
+ Enhance the generated response for clarity and\n coherence.\n\n### **Unveiling
271
+ a Future of Intelligent Interaction**\n\nThe combination of RAG and Knowledge
272
+ Graphs goes beyond just being a\ntechnological fusion. It paves the way for
273
+ a future where the\ninteraction between humans and computers goes beyond simple
274
+ words and\nbecomes a more informed and refined form of communication. As these\ntechnologies
275
+ continue to develop, we can expect to witness a significant\ntransformation
276
+ in:\n\n- AI-powered assistants that answer your questions with the confidence\n of
277
+ a well-read friend, seamlessly combining relevant facts and\n insights gleaned
278
+ from Knowledge Graphs.\n- Next-generation search engines that go beyond keyword
279
+ matching,\n understanding the deeper meaning behind your queries and delivering\n comprehensive,
280
+ contextual results enriched with information from\n Knowledge Graphs.\n-
281
+ Creative writing tools that utilize RAG and Knowledge Graphs to\n generate
282
+ stories that are both factually accurate and full of\n unexpected plot twists
283
+ and character development, moving beyond\n clich\u00e9d patterns.\n\n###
284
+ **Conclusion**\n\nThe convergence of Retrieval Augmented Generation (RAG)
285
+ and Knowledge\nGraphs (KGs) brings about an exciting synergy in the world
286
+ of Natural\nLanguage Processing (NLP). RAG enhances the output of large language\nmodels
287
+ by carefully selecting relevant information from external sources\nand KGs,
288
+ allowing for well-informed and detailed responses. KGs, on the\nother hand,
289
+ provide a structured representation of real-world entities\nand their relationships,
290
+ enabling the exploration of hidden insights and\nthe discovery of complex
291
+ connections.\n\nThe integration of RAG and KGs opens up two pipeline possibilities.
292
+ The\nLLM-centric pipeline prioritizes the language model''s output, which
293
+ is\nthen enriched with information retrieved from KGs. The Knowledge\nGraphs-centric
294
+ pipeline, on the other hand, places KGs at the center,\nutilizing RAG techniques
295
+ to translate the structured insights into\ncompelling and informative language.\n\nWhile
296
+ integrating LLMs and a knowledge graph for content retrieval\nrequires careful
297
+ planning, the reward is significant. You can gain\naccess to hidden relationships
298
+ within information, ultimately leading to\nhigher-quality output information.\n\nTools
299
+ like **OpenAI**, **Langchain**, and **LlamaIndex** provide\nready-made pipelines
300
+ to integrate knowledge graphs (like **Neo4j**)\neasily. Meanwhile, open-source
301
+ LLMs like **Mistral**, **Llama**, and\n**Dolphin** are catching up to proprietary
302
+ models in performance, making\nthem attractive choices for building custom
303
+ architectures. This\nopen-source scenario allows for the exploration and examination
304
+ of\nvarious methods before fully committing to a particular technological\nframework.
305
+ So, it is crucial to evaluate your needs and choose the\napproach that best
306
+ fits your use\u00a0case.\n\n![](https://medium.com/_/stat?event=post.clientViewed&referrerSource=full_rss&postId=fc0a6900f7eb){width=\"1\"\nheight=\"1\"}\n","doi":"https://doi.org/10.59350/jhrs4-22440","guid":"https://medium.com/p/fc0a6900f7eb","id":"05f01f68-ef81-47d7-a3c1-40aba91d358f","image":"https://cdn-images-1.medium.com/max/1024/1*bJ3eWZ7301vYDzBomwdLfQ.png","indexed_at":1706690571,"language":"en","published_at":1705557796,"reference":[],"relationships":[],"summary":"<strong>\n
307
+ Tools and Platform for Integration of Knowledge Graph with RAG pipelines.\n</strong>\nAuthors:
308
+ Aland Astudillo, Aishwarya Nambissan Many users of chatbots such as ChatGPT,
309
+ have encountered the problem of receiving inappropriate or incompatible responses.
310
+ There are several reasons why this might\u00a0happen. One reason is the lack
311
+ of appropriate training data, as chatbots are usually trained on large amounts
312
+ of text and code.","tags":["Artificial-intelligence","Machine-learning","Retrieval-augmented","Knowledge-graph"],"title":"Unveiling
313
+ the Synergy: Retrieval Augmented Generation (RAG) Meets Knowledge Graphs","updated_at":1705557796,"url":"https://medium.com/@researchgraph/unveiling-the-synergy-retrieval-augmented-generation-rag-meets-knowledge-graphs-fc0a6900f7eb"}
314
+
315
+ '
316
+ recorded_at: Wed, 31 Jan 2024 19:50:01 GMT
317
+ recorded_with: VCR 6.2.0
@@ -90,7 +90,7 @@ describe Commonmeta::Metadata, vcr: true do
90
90
  "affiliation" => [{ "name" => "Тверская государственная сельскохозяйственная академия" }], "familyName" => "Ганичева", "givenName" => "А.В.", "type" => "Person", "contributorRoles" => ["Author"],
91
91
  )
92
92
  expect(subject.titles.last).to eq("title" => "MODEL OF SYSTEM DYNAMICS OF PROCESS OF TRAINING",
93
- "titleType" => "TranslatedTitle")
93
+ "type" => "TranslatedTitle")
94
94
  expect(subject.date).to eq("created" => "2019-02-12", "published" => "2019",
95
95
  "registered" => "2019-02-12", "updated" => "2022-08-23")
96
96
  expect(subject.publisher).to eq("name" => "МОДЕЛИРОВАНИЕ, ОПТИМИЗАЦИЯ И ИНФОРМАЦИОННЫЕ ТЕХНОЛОГИИ")
@@ -114,10 +114,14 @@ describe Commonmeta::Metadata, vcr: true do
114
114
  expect(subject.contributors.first).to eq(
115
115
  "name" => "Europäische Kommission", "contributorRoles" => ["Author"], "type" => "Organization",
116
116
  )
117
- expect(subject.titles).to eq([
118
- { "lang" => "de",
119
- "title" => "Flash Eurobarometer 54 (Madrid Summit)" }, { "lang" => "en", "title" => "Flash Eurobarometer 54 (Madrid Summit)" }, { "titleType" => "Subtitle", "lang" => "de", "title" => "The Common European Currency" }, { "titleType" => "Subtitle", "lang" => "en", "title" => "The Common European Currency" },
120
- ])
117
+ expect(subject.titles).to eq([{ "language" => "de", "title" => "Flash Eurobarometer 54 (Madrid Summit)" },
118
+ { "language" => "en", "title" => "Flash Eurobarometer 54 (Madrid Summit)" },
119
+ { "language" => "de",
120
+ "title" => "The Common European Currency",
121
+ "type" => "Subtitle" },
122
+ { "language" => "en",
123
+ "title" => "The Common European Currency",
124
+ "type" => "Subtitle" }])
121
125
  expect(subject.subjects).to eq([{ "lang" => "en",
122
126
  "subject" => "KAT12 International Institutions, Relations, Conditions",
123
127
  "subjectScheme" => "ZA" },
@@ -163,14 +167,39 @@ describe Commonmeta::Metadata, vcr: true do
163
167
  expect(subject.contributors.length).to eq(23)
164
168
  expect(subject.contributors[0]).to eq("contributorRoles" => ["Author"], "familyName" => "ExampleFamilyName", "givenName" => "ExampleGivenName", "type" => "Person")
165
169
  expect(subject.contributors[2]).to eq("contributorRoles" => ["ContactPerson"], "familyName" => "ExampleFamilyName", "givenName" => "ExampleGivenName", "type" => "Person")
166
- expect(subject.date).to eq("created"=>"2022-10-27", "published"=>"2022", "registered"=>"2022-10-27", "updated"=>"2024-01-02")
170
+ expect(subject.date).to eq("created" => "2022-10-27", "published" => "2022", "registered" => "2022-10-27", "updated" => "2024-01-02")
167
171
  expect(subject.publisher).to eq("name" => "Example Publisher")
168
- expect(subject.license).to eq("id"=>"CC-PDDC", "url"=>"https://creativecommons.org/licenses/publicdomain/")
172
+ expect(subject.titles).to eq([{ "language" => "en", "title" => "Example Title" },
173
+ { "language" => "en", "title" => "Example Subtitle", "type" => "Subtitle" },
174
+ { "language" => "fr",
175
+ "title" => "Example TranslatedTitle",
176
+ "type" => "TranslatedTitle" },
177
+ { "language" => "en",
178
+ "title" => "Example AlternativeTitle",
179
+ "type" => "AlternativeTitle" }])
180
+ expect(subject.descriptions).to eq([{ "description" => "Example Abstract",
181
+ "type" => "Abstract",
182
+ "language" => "en" },
183
+ { "description" => "Example Methods",
184
+ "type" => "Methods",
185
+ "language" => "en" },
186
+ { "description" => "Example SeriesInformation",
187
+ "type" => "Other",
188
+ "language" => "en" },
189
+ { "description" => "Example TableOfContents",
190
+ "type" => "Other",
191
+ "language" => "en" },
192
+ { "description" => "Example TechnicalInfo",
193
+ "type" => "TechnicalInfo",
194
+ "language" => "en" },
195
+ { "description" => "Example Other", "type" => "Other", "language" => "en" }])
196
+ expect(subject.license).to eq("id" => "CC-PDDC", "url" => "https://creativecommons.org/licenses/publicdomain/")
169
197
  end
170
198
 
171
199
  it "instrument" do
172
200
  input = "#{fixture_path}datacite-instrument.json"
173
201
  subject = described_class.new(input: input)
202
+ puts subject.errors unless subject.valid?
174
203
  expect(subject.valid?).to be true
175
204
  expect(subject.id).to eq("https://doi.org/10.82433/08qf-ee96")
176
205
  expect(subject.type).to eq("Instrument")
@@ -189,6 +189,31 @@ describe Commonmeta::Metadata, vcr: true do
189
189
  expect(subject.references).to be_nil
190
190
  end
191
191
 
192
+ it "medium post with institutional author" do
193
+ input = "https://api.rogue-scholar.org/posts/05f01f68-ef81-47d7-a3c1-40aba91d358f"
194
+ subject = described_class.new(input: input)
195
+ # expect(subject.valid?).to be true
196
+ expect(subject.id).to eq("https://doi.org/10.59350/jhrs4-22440")
197
+ expect(subject.url).to eq("https://medium.com/@researchgraph/unveiling-the-synergy-retrieval-augmented-generation-rag-meets-knowledge-graphs-fc0a6900f7eb")
198
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "05f01f68-ef81-47d7-a3c1-40aba91d358f", "alternateIdentifierType" => "UUID" }])
199
+ expect(subject.type).to eq("Article")
200
+ expect(subject.contributors.length).to eq(1)
201
+ expect(subject.contributors.first).to eq("contributorRoles"=>["Author"], "name"=>"Research Graph", "type"=>"Organization")
202
+ expect(subject.titles).to eq([{ "title" => "Unveiling the Synergy: Retrieval Augmented Generation (RAG) Meets Knowledge Graphs" }])
203
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
204
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
205
+ expect(subject.date).to eq("published"=>"2024-01-18", "updated"=>"2024-01-18")
206
+ expect(subject.descriptions.first["description"]).to start_with("<strong> Tools and Platform for Integration of Knowledge Graph with RAG pipelines.")
207
+ expect(subject.publisher).to eq("name" => "Research Graph")
208
+ expect(subject.subjects).to eq([{ "subject" => "Computer and information sciences" },
209
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
210
+ "subject" => "FOS: Computer and information sciences",
211
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
212
+ expect(subject.language).to eq("en")
213
+ expect(subject.container).to eq("identifier" => "https://medium.com/@researchgraph", "identifierType" => "URL", "title" => "Research Graph", "type" => "Periodical")
214
+ expect(subject.references).to be_nil
215
+ end
216
+
192
217
  it "syldavia gazette post with references" do
193
218
  input = "https://api.rogue-scholar.org/posts/0022b9ef-525a-4a79-81ad-13411697f58a"
194
219
  subject = described_class.new(input: input)
@@ -33,11 +33,38 @@ describe Commonmeta::Metadata, vcr: true do
33
33
  "volume" => "426",
34
34
  "firstPage" => "181",
35
35
  "containerTitle" => "Nature")
36
- expect(json["date"]).to eq("published"=>"2014-02-11", "updated"=>"2022-03-26")
36
+ expect(json["date"]).to eq("published" => "2014-02-11", "updated" => "2022-03-26")
37
37
  expect(json["descriptions"].first["description"]).to start_with("Among various advantages,")
38
- expect(json["license"]).to eq("id"=>"CC-BY-3.0", "url"=>"https://creativecommons.org/licenses/by/3.0/legalcode")
38
+ expect(json["license"]).to eq("id" => "CC-BY-3.0", "url" => "https://creativecommons.org/licenses/by/3.0/legalcode")
39
39
  expect(json["provider"]).to eq("Crossref")
40
- expect(json["files"].first).to eq("mimeType"=>"application/pdf", "url"=>"https://cdn.elifesciences.org/articles/01567/elife-01567-v1.pdf")
40
+ expect(json["files"].first).to eq("mimeType" => "application/pdf", "url" => "https://cdn.elifesciences.org/articles/01567/elife-01567-v1.pdf")
41
+ end
42
+
43
+ it "dataset schema v4.5" do
44
+ input = "#{fixture_path}datacite-dataset_v4.5.json"
45
+ subject = described_class.new(input: input)
46
+ expect(subject.id).to eq("https://doi.org/10.82433/b09z-4k37")
47
+ json = JSON.parse(subject.commonmeta)
48
+ expect(json["id"]).to eq("https://doi.org/10.82433/b09z-4k37")
49
+ expect(json["type"]).to eq("Dataset")
50
+ expect(json["titles"]).to eq([{ "language" => "en", "title" => "Example Title" },
51
+ { "language" => "en", "title" => "Example Subtitle", "type" => "Subtitle" },
52
+ { "language" => "fr",
53
+ "title" => "Example TranslatedTitle",
54
+ "type" => "TranslatedTitle" },
55
+ { "language" => "en",
56
+ "title" => "Example AlternativeTitle",
57
+ "type" => "AlternativeTitle" }])
58
+ expect(json["descriptions"]).to eq([{ "description" => "Example Abstract", "language" => "en", "type" => "Abstract" },
59
+ { "description" => "Example Methods", "language" => "en", "type" => "Methods" },
60
+ { "description" => "Example SeriesInformation",
61
+ "language" => "en",
62
+ "type" => "Other" },
63
+ { "description" => "Example TableOfContents", "language" => "en", "type" => "Other" },
64
+ { "description" => "Example TechnicalInfo",
65
+ "language" => "en",
66
+ "type" => "TechnicalInfo" },
67
+ { "description" => "Example Other", "language" => "en", "type" => "Other" }])
41
68
  end
42
69
  end
43
70
  end
@@ -7,6 +7,7 @@ describe Commonmeta::Metadata, vcr: true do
7
7
  it 'Dataset' do
8
8
  input = 'https://doi.org/10.5061/DRYAD.8515'
9
9
  subject = described_class.new(input: input, from: 'datacite')
10
+ puts subject.errors unless subject.valid?
10
11
  expect(subject.valid?).to be true
11
12
  json = JSON.parse(subject.csl)
12
13
  expect(json['type']).to eq('dataset')
@@ -37,6 +37,7 @@ describe Commonmeta::Metadata, vcr: true do
37
37
  it 'text' do
38
38
  input = 'https://doi.org/10.3204/desy-2014-01645'
39
39
  subject = described_class.new(input: input, from: 'datacite')
40
+ puts subject.errors unless subject.valid?
40
41
  expect(subject.valid?).to be true
41
42
  csv = subject.csv.parse_csv
42
43
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: commonmeta-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.11.0
4
+ version: 3.12.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin Fenner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-01-26 00:00:00.000000000 Z
11
+ date: 2024-02-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -694,7 +694,7 @@ files:
694
694
  - lib/commonmeta/xml_converter.rb
695
695
  - resources/2008/09/xsd.xsl
696
696
  - resources/cff.json
697
- - resources/commonmeta_v0.10.6.json
697
+ - resources/commonmeta_v0.10.7.json
698
698
  - resources/crossref/AccessIndicators.xsd
699
699
  - resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd
700
700
  - resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd
@@ -921,6 +921,7 @@ files:
921
921
  - spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/ghost_post_without_doi.yml
922
922
  - spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/jekyll_post.yml
923
923
  - spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/jekyll_post_with_anonymous_author.yml
924
+ - spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/medium_post_with_institutional_author.yml
924
925
  - spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/substack_post_with_broken_reference.yml
925
926
  - spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/syldavia_gazette_post_with_references.yml
926
927
  - spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/upstream_post_with_references.yml