commonmeta-ruby 3.2.14 → 3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/bin/commonmeta +1 -1
  4. data/lib/commonmeta/author_utils.rb +1 -1
  5. data/lib/commonmeta/cli.rb +14 -0
  6. data/lib/commonmeta/crossref_utils.rb +56 -14
  7. data/lib/commonmeta/readers/json_feed_reader.rb +30 -2
  8. data/lib/commonmeta/utils.rb +34 -0
  9. data/lib/commonmeta/version.rb +1 -1
  10. data/spec/cli_spec.rb +12 -3
  11. data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/doi_prefix/doi_prefix_by_blog.yml +997 -0
  12. data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/doi_prefix/doi_prefix_by_uuid.yml +256 -0
  13. data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/encode/by_blog.yml +997 -0
  14. data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/encode/by_uuid.yml +256 -0
  15. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_doi_prefix_for_blog/by_blog_id.yml +997 -0
  16. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_doi_prefix_for_blog/by_blog_post_uuid.yml +389 -0
  17. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_doi_prefix_for_blog/by_blog_post_uuid_specific_prefix.yml +389 -0
  18. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item/by_uuid.yml +136 -0
  19. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/blog_post_with_non-url_id.yml +136 -0
  20. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/ghost_post_with_organizational_author.yml +91 -0
  21. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/substack_post_with_broken_reference.yml +1316 -0
  22. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_feed_item_from_rogue_scholar_with_organizational_author.yml +91 -0
  23. data/spec/readers/json_feed_reader_spec.rb +280 -186
  24. data/spec/utils_spec.rb +8 -0
  25. data/spec/writers/crossref_xml_writer_spec.rb +28 -0
  26. metadata +14 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b549fc46c70c5962ee3c971968771a6e5f13124eb89b03aab36228afc94c01fe
4
- data.tar.gz: 1545d0de4e821265cc19bb744456d025f6bc14ea52c19cbae2585a1d501501a4
3
+ metadata.gz: dde90e2b65c0eabd771cda34b2b5054fbe6bf80b573444fe0bcdde14c8392429
4
+ data.tar.gz: 487ffa495c4ffae32184c0a6e11ef68a5f9e82ea18bfe4183d851cb4558d5f81
5
5
  SHA512:
6
- metadata.gz: 3d7a785d1fdbf3bd226a64f489ef12e37cb474fe0334c894e98c0a86b90fba23d02d937eed3bc4f4e710791209556ab2ece950e50491d48874e128e95f62c44e
7
- data.tar.gz: cad955e4ba4066a42ff821f03437cfe09fb926c283785050d34682cc95e676d89d608f03ca63c18574d012e5fcad168c5f289cd7d465f525846c877e91368635
6
+ metadata.gz: 468b8f5ffc67f878cf97a39b0733e01ee9a56c1a2fd82874feba18fb9de6876b2d835ab11b2dd40021139dff988152d66fe25af0adde1e5884884017435dc711
7
+ data.tar.gz: 1126af3e7ae9b0cbb25d9d2f4497e894ae33e2a34bb9cc975924233fcf48f8cd0527b67f7983aeee375d4e98f7d58005a913e5301a848aca2da92ce051b93732
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- commonmeta-ruby (3.2.14)
4
+ commonmeta-ruby (3.3)
5
5
  activesupport (>= 4.2.5, < 8.0)
6
6
  addressable (~> 2.8.1, < 2.8.2)
7
7
  base32-url (>= 0.7.0, < 1)
@@ -121,7 +121,7 @@ GEM
121
121
  crass (~> 1.0.2)
122
122
  nokogiri (>= 1.12.0)
123
123
  matrix (0.4.2)
124
- minitest (5.18.0)
124
+ minitest (5.18.1)
125
125
  multi_json (1.15.0)
126
126
  namae (1.1.1)
127
127
  nokogiri (1.15.2-arm64-darwin)
data/bin/commonmeta CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  require File.expand_path("../../lib/commonmeta", __FILE__)
4
4
 
5
- if (ARGV & %w(--version -v help --help encode decode encode_id decode_id json_feed_not_indexed json_feed_unregistered json_feed_by_blog)).empty?
5
+ if (ARGV & %w(--version -v help --help encode decode encode_id decode_id encode_by_blog encode_by_uuid json_feed_not_indexed json_feed_unregistered json_feed_by_blog)).empty?
6
6
  Commonmeta::CLI.start(ARGV.dup.unshift("convert"))
7
7
  else
8
8
  Commonmeta::CLI.start
@@ -41,7 +41,7 @@ module Commonmeta
41
41
  elsif id.nil? && author['ORCID'].present?
42
42
  id = author.fetch('ORCID')
43
43
  end
44
- id = normalize_orcid(id)
44
+ id = normalize_orcid(id) || normalize_ror(id)
45
45
 
46
46
  # parse author type, i.e. "Person", "Organization" or not specified
47
47
  type = author.fetch('type', nil)
@@ -68,6 +68,20 @@ module Commonmeta
68
68
  puts encode_container_id
69
69
  end
70
70
 
71
+ desc "", "encode_by_blog"
72
+
73
+ def encode_by_blog(blog_id)
74
+ prefix = get_doi_prefix_by_blog_id(blog_id)
75
+ puts encode_doi(prefix)
76
+ end
77
+
78
+ desc "", "encode_by_uuid"
79
+
80
+ def encode_by_uuid(uuid)
81
+ prefix = get_doi_prefix_by_json_feed_item_uuid(uuid)
82
+ puts encode_doi(prefix)
83
+ end
84
+
71
85
  desc "", "decode"
72
86
 
73
87
  def decode(doi)
@@ -94,25 +94,67 @@ module Commonmeta
94
94
 
95
95
  def insert_crossref_creators(xml)
96
96
  xml.contributors do
97
- Array.wrap(creators).each_with_index do |person, index|
98
- xml.person_name("contributor_role" => "author",
99
- "sequence" => index.zero? ? "first" : "additional") do
100
- insert_crossref_person(xml, person)
97
+ Array.wrap(creators).each_with_index do |creator, index|
98
+ if creator["type"] == "Organization"
99
+ xml.organization("contributor_role" => "author",
100
+ "sequence" => index.zero? ? "first" : "additional") do
101
+ insert_crossref_organization(xml, creator)
102
+ end
103
+ elsif creator["givenName"].present? || creator["familyName"].present?
104
+ xml.person_name("contributor_role" => "author",
105
+ "sequence" => index.zero? ? "first" : "additional") do
106
+ insert_crossref_person(xml, creator)
107
+ end
108
+ else
109
+ xml.unknown("contributor_role" => "author",
110
+ "sequence" => index.zero? ? "first" : "additional") do
111
+ insert_crossref_anonymous(xml, creator)
112
+ end
113
+ end
114
+ end
115
+ end
116
+ end
117
+
118
+ def insert_crossref_person(xml, creator)
119
+ xml.given_name(creator["givenName"]) if creator["givenName"].present?
120
+ xml.surname(creator["familyName"]) if creator["familyName"].present?
121
+ if creator.dig("id") && URI.parse(creator.dig("id")).host == "orcid.org"
122
+ xml.ORCID(creator.dig("id"))
123
+ end
124
+ if creator["affiliation"].present?
125
+ xml.affiliations do
126
+ xml.institution do
127
+ xml.institution_name(creator.dig("affiliation", 0, "name")) if creator.dig("affiliation", 0, "name").present?
128
+ xml.institution_id(creator.dig("affiliation", 0, "affiliationIdentifier"), "type" => creator.dig("affiliation", 0, "affiliationIdentifierScheme")) if creator.dig("affiliation", 0, "affiliationIdentifier").present?
101
129
  end
102
130
  end
103
131
  end
104
132
  end
105
133
 
106
- def insert_crossref_person(xml, person)
107
- xml.given_name(person["givenName"]) if person["givenName"].present?
108
- xml.surname(person["familyName"]) if person["familyName"].present?
109
- if person.dig("id") && URI.parse(person.dig("id")).host == "orcid.org"
110
- xml.ORCID(person.dig("id"))
134
+ def insert_crossref_organization(xml, creator)
135
+ xml.name(creator["name"]) if creator["name"].present?
136
+ if creator["affiliation"].present?
137
+ xml.affiliations do
138
+ xml.institution do
139
+ xml.institution_name(creator.dig("affiliation", 0, "name")) if creator.dig("affiliation", 0, "name").present?
140
+ xml.institution_id(creator.dig("affiliation", 0, "affiliationIdentifier"), "type" => creator.dig("affiliation", 0, "affiliationIdentifierScheme")) if creator.dig("affiliation", 0, "affiliationIdentifier").present?
141
+ end
142
+ end
111
143
  end
112
- Array.wrap(person["affiliation"]).each do |affiliation|
113
- attributes = { "affiliationIdentifier" => affiliation["affiliationIdentifier"],
114
- "affiliationIdentifierScheme" => affiliation["affiliationIdentifierScheme"], "schemeURI" => affiliation["schemeUri"] }.compact
115
- xml.affiliation(affiliation["name"], attributes)
144
+ end
145
+
146
+ def insert_crossref_anonymous(xml, creator)
147
+ if person["affiliation"].present?
148
+ xml.anonymous do
149
+ xml.affiliations do
150
+ xml.institution do
151
+ xml.institution_name(creator.dig("affiliation", 0, "name")) if creator.dig("affiliation", 0, "name").present?
152
+ xml.institution_id(creator.dig("affiliation", 0, "affiliationIdentifier"), "type" => creator.dig("affiliation", 0, "affiliationIdentifierScheme")) if creator.dig("affiliation", 0, "affiliationIdentifier").present?
153
+ end
154
+ end
155
+ end
156
+ else
157
+ xml.anonymous
116
158
  end
117
159
  end
118
160
 
@@ -265,7 +307,7 @@ module Commonmeta
265
307
  }.compact
266
308
 
267
309
  # strip hyphen from UUIDs, as item_number can only be 32 characters long (UUIDv4 is 36 characters long)
268
- alternate_identifier["alternateIdentifier"] = alternate_identifier["alternateIdentifier"].gsub('-','') if alternate_identifier["alternateIdentifierType"] == "UUID"
310
+ alternate_identifier["alternateIdentifier"] = alternate_identifier["alternateIdentifier"].gsub("-", "") if alternate_identifier["alternateIdentifierType"] == "UUID"
269
311
 
270
312
  xml.item_number(alternate_identifier["alternateIdentifier"], attributes)
271
313
  end
@@ -20,8 +20,10 @@ module Commonmeta
20
20
 
21
21
  meta = string.present? ? JSON.parse(string) : {}
22
22
 
23
- id = options[:doi] ? normalize_doi(options[:doi]) : normalize_id(meta.fetch("id", nil))
24
23
  url = normalize_url(meta.fetch("url", nil))
24
+ id = options[:doi] ? normalize_doi(options[:doi]) : normalize_id(meta.fetch("id", nil))
25
+ id = url if id.blank? && url.present?
26
+
25
27
  type = "Article"
26
28
  creators = if meta.fetch("authors", nil).present?
27
29
  get_authors(from_json_feed(Array.wrap(meta.fetch("authors"))))
@@ -82,7 +84,11 @@ module Commonmeta
82
84
  def get_references(meta)
83
85
  # check that references resolve
84
86
  Array.wrap(meta["references"]).reduce([]) do |sum, reference|
85
- sum << reference if [200, 301, 302].include? HTTP.head(reference["doi"] || reference["url"]).status
87
+ if reference["doi"] && validate_doi(reference["doi"])
88
+ sum << reference if [200, 301, 302].include? HTTP.head(reference["doi"]).status
89
+ elsif reference["url"] && validate_url(reference["url"]) == "URL"
90
+ sum << reference if [200, 301, 302].include? HTTP.head(reference["url"]).status
91
+ end
86
92
 
87
93
  sum
88
94
  end
@@ -120,6 +126,28 @@ module Commonmeta
120
126
  blog = JSON.parse(response.body.to_s)
121
127
  blog["items"].map { |item| item["uuid"] }.first
122
128
  end
129
+
130
+ def get_doi_prefix_by_blog_id(blog_id)
131
+ # for generating a random DOI.
132
+
133
+ url = json_feed_by_blog_url(blog_id)
134
+ response = HTTP.get(url)
135
+ return { "string" => nil, "state" => "not_found" } unless response.status.success?
136
+
137
+ post = JSON.parse(response.body.to_s)
138
+ post.dig('prefix')
139
+ end
140
+
141
+ def get_doi_prefix_by_json_feed_item_uuid(uuid)
142
+ # for generating a random DOI. Prefix is based on the blog id.
143
+
144
+ url = json_feed_item_by_uuid_url(uuid)
145
+ response = HTTP.get(url)
146
+ return { "string" => nil, "state" => "not_found" } unless response.status.success?
147
+
148
+ post = JSON.parse(response.body.to_s)
149
+ post.dig('blog', 'prefix')
150
+ end
123
151
  end
124
152
  end
125
153
  end
@@ -543,6 +543,11 @@ module Commonmeta
543
543
  orcid.gsub(/[[:space:]]/, "-") if orcid.present?
544
544
  end
545
545
 
546
+ def validate_ror(ror)
547
+ ror = Array(%r{\A(?:(?:http|https)://ror\.org/)?([0-9a-z]{7}\d{2})\z}.match(ror)).last
548
+ ror.gsub(/[[:space:]]/, "-") if ror.present?
549
+ end
550
+
546
551
  def validate_orcid_scheme(orcid_scheme)
547
552
  Array(%r{\A(http|https)://(www\.)?(orcid\.org)}.match(orcid_scheme)).last
548
553
  end
@@ -634,6 +639,14 @@ module Commonmeta
634
639
  "https://orcid.org/" + Addressable::URI.encode(orcid)
635
640
  end
636
641
 
642
+ def normalize_ror(ror)
643
+ ror = validate_ror(ror)
644
+ return nil unless ror.present?
645
+
646
+ # turn ROR ID into URL
647
+ "https://ror.org/" + Addressable::URI.encode(ror)
648
+ end
649
+
637
650
  # pick electronic issn if there are multiple
638
651
  # format issn as xxxx-xxxx
639
652
  def normalize_issn(input, options = {})
@@ -1384,6 +1397,23 @@ module Commonmeta
1384
1397
  "https://doi.org/#{prefix}/#{str}"
1385
1398
  end
1386
1399
 
1400
+ def encode_doi_for_uuid(uuid, options = {})
1401
+ # look up prefix for rogue scholar blog associated with uuid
1402
+ # returns nil if unknown uuid or doi registration is not enabled for blog
1403
+ json_feed_by_uuid(uuid)
1404
+ # DOI suffix is a generated from a random number, encoded in base32
1405
+ # suffix has 8 digits plus two checksum digits. With base32 there are
1406
+ # 32 possible digits, so 8 digits gives 32^8 possible combinations
1407
+ if options[:uuid]
1408
+ str = Base32::URL.encode_uuid(options[:uuid], split: 7, checksum: true)
1409
+ else
1410
+ random_int = SecureRandom.random_number(32 ** 7..(32 ** 8) - 1)
1411
+ suffix = Base32::URL.encode(random_int, checksum: true)
1412
+ str = "#{suffix[0, 5]}-#{suffix[5, 10]}"
1413
+ end
1414
+ "https://doi.org/#{prefix}/#{str}"
1415
+ end
1416
+
1387
1417
  def decode_doi(doi, options = {})
1388
1418
  suffix = doi.split("/", 5).last
1389
1419
  if options[:uuid]
@@ -1415,5 +1445,9 @@ module Commonmeta
1415
1445
  def json_feed_by_blog_url(blog_id)
1416
1446
  "https://rogue-scholar.org/api/blogs/#{blog_id}"
1417
1447
  end
1448
+
1449
+ def json_feed_item_by_uuid_url(uuid)
1450
+ "https://rogue-scholar.org/api/posts/#{uuid}"
1451
+ end
1418
1452
  end
1419
1453
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Commonmeta
4
- VERSION = '3.2.14'
4
+ VERSION = '3.3'
5
5
  end
data/spec/cli_spec.rb CHANGED
@@ -311,12 +311,21 @@ describe Commonmeta::CLI do
311
311
  # end
312
312
  end
313
313
 
314
- describe "encode" do
315
- let(:input) { "10.53731" }
316
-
314
+ describe "encode", vcr: true do
317
315
  it "blog prefix" do
316
+ input = "10.53731"
318
317
  expect { subject.encode input }.to output(/https:\/\/doi.org\/10.53731/).to_stdout
319
318
  end
319
+
320
+ it "by_blog" do
321
+ input = "tyfqw20"
322
+ expect { subject.encode_by_blog input }.to output(/https:\/\/doi.org\/10.59350/).to_stdout
323
+ end
324
+
325
+ it "by_uuid" do
326
+ input = "2b22bbba-bcba-4072-94cc-3f88442fff88"
327
+ expect { subject.encode_by_uuid input }.to output(/https:\/\/doi.org\/10.54900/).to_stdout
328
+ end
320
329
  end
321
330
 
322
331
  describe "decode" do