commonmeta-ruby 3.2.14 → 3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/bin/commonmeta +1 -1
- data/lib/commonmeta/author_utils.rb +1 -1
- data/lib/commonmeta/cli.rb +14 -0
- data/lib/commonmeta/crossref_utils.rb +56 -14
- data/lib/commonmeta/readers/json_feed_reader.rb +30 -2
- data/lib/commonmeta/utils.rb +34 -0
- data/lib/commonmeta/version.rb +1 -1
- data/spec/cli_spec.rb +12 -3
- data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/doi_prefix/doi_prefix_by_blog.yml +997 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/doi_prefix/doi_prefix_by_uuid.yml +256 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/encode/by_blog.yml +997 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/encode/by_uuid.yml +256 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_doi_prefix_for_blog/by_blog_id.yml +997 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_doi_prefix_for_blog/by_blog_post_uuid.yml +389 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_doi_prefix_for_blog/by_blog_post_uuid_specific_prefix.yml +389 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item/by_uuid.yml +136 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/blog_post_with_non-url_id.yml +136 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/ghost_post_with_organizational_author.yml +91 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/substack_post_with_broken_reference.yml +1316 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_feed_item_from_rogue_scholar_with_organizational_author.yml +91 -0
- data/spec/readers/json_feed_reader_spec.rb +280 -186
- data/spec/utils_spec.rb +8 -0
- data/spec/writers/crossref_xml_writer_spec.rb +28 -0
- metadata +14 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dde90e2b65c0eabd771cda34b2b5054fbe6bf80b573444fe0bcdde14c8392429
|
4
|
+
data.tar.gz: 487ffa495c4ffae32184c0a6e11ef68a5f9e82ea18bfe4183d851cb4558d5f81
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 468b8f5ffc67f878cf97a39b0733e01ee9a56c1a2fd82874feba18fb9de6876b2d835ab11b2dd40021139dff988152d66fe25af0adde1e5884884017435dc711
|
7
|
+
data.tar.gz: 1126af3e7ae9b0cbb25d9d2f4497e894ae33e2a34bb9cc975924233fcf48f8cd0527b67f7983aeee375d4e98f7d58005a913e5301a848aca2da92ce051b93732
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
commonmeta-ruby (3.
|
4
|
+
commonmeta-ruby (3.3)
|
5
5
|
activesupport (>= 4.2.5, < 8.0)
|
6
6
|
addressable (~> 2.8.1, < 2.8.2)
|
7
7
|
base32-url (>= 0.7.0, < 1)
|
@@ -121,7 +121,7 @@ GEM
|
|
121
121
|
crass (~> 1.0.2)
|
122
122
|
nokogiri (>= 1.12.0)
|
123
123
|
matrix (0.4.2)
|
124
|
-
minitest (5.18.
|
124
|
+
minitest (5.18.1)
|
125
125
|
multi_json (1.15.0)
|
126
126
|
namae (1.1.1)
|
127
127
|
nokogiri (1.15.2-arm64-darwin)
|
data/bin/commonmeta
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
require File.expand_path("../../lib/commonmeta", __FILE__)
|
4
4
|
|
5
|
-
if (ARGV & %w(--version -v help --help encode decode encode_id decode_id json_feed_not_indexed json_feed_unregistered json_feed_by_blog)).empty?
|
5
|
+
if (ARGV & %w(--version -v help --help encode decode encode_id decode_id encode_by_blog encode_by_uuid json_feed_not_indexed json_feed_unregistered json_feed_by_blog)).empty?
|
6
6
|
Commonmeta::CLI.start(ARGV.dup.unshift("convert"))
|
7
7
|
else
|
8
8
|
Commonmeta::CLI.start
|
@@ -41,7 +41,7 @@ module Commonmeta
|
|
41
41
|
elsif id.nil? && author['ORCID'].present?
|
42
42
|
id = author.fetch('ORCID')
|
43
43
|
end
|
44
|
-
id = normalize_orcid(id)
|
44
|
+
id = normalize_orcid(id) || normalize_ror(id)
|
45
45
|
|
46
46
|
# parse author type, i.e. "Person", "Organization" or not specified
|
47
47
|
type = author.fetch('type', nil)
|
data/lib/commonmeta/cli.rb
CHANGED
@@ -68,6 +68,20 @@ module Commonmeta
|
|
68
68
|
puts encode_container_id
|
69
69
|
end
|
70
70
|
|
71
|
+
desc "", "encode_by_blog"
|
72
|
+
|
73
|
+
def encode_by_blog(blog_id)
|
74
|
+
prefix = get_doi_prefix_by_blog_id(blog_id)
|
75
|
+
puts encode_doi(prefix)
|
76
|
+
end
|
77
|
+
|
78
|
+
desc "", "encode_by_uuid"
|
79
|
+
|
80
|
+
def encode_by_uuid(uuid)
|
81
|
+
prefix = get_doi_prefix_by_json_feed_item_uuid(uuid)
|
82
|
+
puts encode_doi(prefix)
|
83
|
+
end
|
84
|
+
|
71
85
|
desc "", "decode"
|
72
86
|
|
73
87
|
def decode(doi)
|
@@ -94,25 +94,67 @@ module Commonmeta
|
|
94
94
|
|
95
95
|
def insert_crossref_creators(xml)
|
96
96
|
xml.contributors do
|
97
|
-
Array.wrap(creators).each_with_index do |
|
98
|
-
|
99
|
-
|
100
|
-
|
97
|
+
Array.wrap(creators).each_with_index do |creator, index|
|
98
|
+
if creator["type"] == "Organization"
|
99
|
+
xml.organization("contributor_role" => "author",
|
100
|
+
"sequence" => index.zero? ? "first" : "additional") do
|
101
|
+
insert_crossref_organization(xml, creator)
|
102
|
+
end
|
103
|
+
elsif creator["givenName"].present? || creator["familyName"].present?
|
104
|
+
xml.person_name("contributor_role" => "author",
|
105
|
+
"sequence" => index.zero? ? "first" : "additional") do
|
106
|
+
insert_crossref_person(xml, creator)
|
107
|
+
end
|
108
|
+
else
|
109
|
+
xml.unknown("contributor_role" => "author",
|
110
|
+
"sequence" => index.zero? ? "first" : "additional") do
|
111
|
+
insert_crossref_anonymous(xml, creator)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def insert_crossref_person(xml, creator)
|
119
|
+
xml.given_name(creator["givenName"]) if creator["givenName"].present?
|
120
|
+
xml.surname(creator["familyName"]) if creator["familyName"].present?
|
121
|
+
if creator.dig("id") && URI.parse(creator.dig("id")).host == "orcid.org"
|
122
|
+
xml.ORCID(creator.dig("id"))
|
123
|
+
end
|
124
|
+
if creator["affiliation"].present?
|
125
|
+
xml.affiliations do
|
126
|
+
xml.institution do
|
127
|
+
xml.institution_name(creator.dig("affiliation", 0, "name")) if creator.dig("affiliation", 0, "name").present?
|
128
|
+
xml.institution_id(creator.dig("affiliation", 0, "affiliationIdentifier"), "type" => creator.dig("affiliation", 0, "affiliationIdentifierScheme")) if creator.dig("affiliation", 0, "affiliationIdentifier").present?
|
101
129
|
end
|
102
130
|
end
|
103
131
|
end
|
104
132
|
end
|
105
133
|
|
106
|
-
def
|
107
|
-
xml.
|
108
|
-
|
109
|
-
|
110
|
-
|
134
|
+
def insert_crossref_organization(xml, creator)
|
135
|
+
xml.name(creator["name"]) if creator["name"].present?
|
136
|
+
if creator["affiliation"].present?
|
137
|
+
xml.affiliations do
|
138
|
+
xml.institution do
|
139
|
+
xml.institution_name(creator.dig("affiliation", 0, "name")) if creator.dig("affiliation", 0, "name").present?
|
140
|
+
xml.institution_id(creator.dig("affiliation", 0, "affiliationIdentifier"), "type" => creator.dig("affiliation", 0, "affiliationIdentifierScheme")) if creator.dig("affiliation", 0, "affiliationIdentifier").present?
|
141
|
+
end
|
142
|
+
end
|
111
143
|
end
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
144
|
+
end
|
145
|
+
|
146
|
+
def insert_crossref_anonymous(xml, creator)
|
147
|
+
if person["affiliation"].present?
|
148
|
+
xml.anonymous do
|
149
|
+
xml.affiliations do
|
150
|
+
xml.institution do
|
151
|
+
xml.institution_name(creator.dig("affiliation", 0, "name")) if creator.dig("affiliation", 0, "name").present?
|
152
|
+
xml.institution_id(creator.dig("affiliation", 0, "affiliationIdentifier"), "type" => creator.dig("affiliation", 0, "affiliationIdentifierScheme")) if creator.dig("affiliation", 0, "affiliationIdentifier").present?
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
else
|
157
|
+
xml.anonymous
|
116
158
|
end
|
117
159
|
end
|
118
160
|
|
@@ -265,7 +307,7 @@ module Commonmeta
|
|
265
307
|
}.compact
|
266
308
|
|
267
309
|
# strip hyphen from UUIDs, as item_number can only be 32 characters long (UUIDv4 is 36 characters long)
|
268
|
-
alternate_identifier["alternateIdentifier"] = alternate_identifier["alternateIdentifier"].gsub(
|
310
|
+
alternate_identifier["alternateIdentifier"] = alternate_identifier["alternateIdentifier"].gsub("-", "") if alternate_identifier["alternateIdentifierType"] == "UUID"
|
269
311
|
|
270
312
|
xml.item_number(alternate_identifier["alternateIdentifier"], attributes)
|
271
313
|
end
|
@@ -20,8 +20,10 @@ module Commonmeta
|
|
20
20
|
|
21
21
|
meta = string.present? ? JSON.parse(string) : {}
|
22
22
|
|
23
|
-
id = options[:doi] ? normalize_doi(options[:doi]) : normalize_id(meta.fetch("id", nil))
|
24
23
|
url = normalize_url(meta.fetch("url", nil))
|
24
|
+
id = options[:doi] ? normalize_doi(options[:doi]) : normalize_id(meta.fetch("id", nil))
|
25
|
+
id = url if id.blank? && url.present?
|
26
|
+
|
25
27
|
type = "Article"
|
26
28
|
creators = if meta.fetch("authors", nil).present?
|
27
29
|
get_authors(from_json_feed(Array.wrap(meta.fetch("authors"))))
|
@@ -82,7 +84,11 @@ module Commonmeta
|
|
82
84
|
def get_references(meta)
|
83
85
|
# check that references resolve
|
84
86
|
Array.wrap(meta["references"]).reduce([]) do |sum, reference|
|
85
|
-
|
87
|
+
if reference["doi"] && validate_doi(reference["doi"])
|
88
|
+
sum << reference if [200, 301, 302].include? HTTP.head(reference["doi"]).status
|
89
|
+
elsif reference["url"] && validate_url(reference["url"]) == "URL"
|
90
|
+
sum << reference if [200, 301, 302].include? HTTP.head(reference["url"]).status
|
91
|
+
end
|
86
92
|
|
87
93
|
sum
|
88
94
|
end
|
@@ -120,6 +126,28 @@ module Commonmeta
|
|
120
126
|
blog = JSON.parse(response.body.to_s)
|
121
127
|
blog["items"].map { |item| item["uuid"] }.first
|
122
128
|
end
|
129
|
+
|
130
|
+
def get_doi_prefix_by_blog_id(blog_id)
|
131
|
+
# for generating a random DOI.
|
132
|
+
|
133
|
+
url = json_feed_by_blog_url(blog_id)
|
134
|
+
response = HTTP.get(url)
|
135
|
+
return { "string" => nil, "state" => "not_found" } unless response.status.success?
|
136
|
+
|
137
|
+
post = JSON.parse(response.body.to_s)
|
138
|
+
post.dig('prefix')
|
139
|
+
end
|
140
|
+
|
141
|
+
def get_doi_prefix_by_json_feed_item_uuid(uuid)
|
142
|
+
# for generating a random DOI. Prefix is based on the blog id.
|
143
|
+
|
144
|
+
url = json_feed_item_by_uuid_url(uuid)
|
145
|
+
response = HTTP.get(url)
|
146
|
+
return { "string" => nil, "state" => "not_found" } unless response.status.success?
|
147
|
+
|
148
|
+
post = JSON.parse(response.body.to_s)
|
149
|
+
post.dig('blog', 'prefix')
|
150
|
+
end
|
123
151
|
end
|
124
152
|
end
|
125
153
|
end
|
data/lib/commonmeta/utils.rb
CHANGED
@@ -543,6 +543,11 @@ module Commonmeta
|
|
543
543
|
orcid.gsub(/[[:space:]]/, "-") if orcid.present?
|
544
544
|
end
|
545
545
|
|
546
|
+
def validate_ror(ror)
|
547
|
+
ror = Array(%r{\A(?:(?:http|https)://ror\.org/)?([0-9a-z]{7}\d{2})\z}.match(ror)).last
|
548
|
+
ror.gsub(/[[:space:]]/, "-") if ror.present?
|
549
|
+
end
|
550
|
+
|
546
551
|
def validate_orcid_scheme(orcid_scheme)
|
547
552
|
Array(%r{\A(http|https)://(www\.)?(orcid\.org)}.match(orcid_scheme)).last
|
548
553
|
end
|
@@ -634,6 +639,14 @@ module Commonmeta
|
|
634
639
|
"https://orcid.org/" + Addressable::URI.encode(orcid)
|
635
640
|
end
|
636
641
|
|
642
|
+
def normalize_ror(ror)
|
643
|
+
ror = validate_ror(ror)
|
644
|
+
return nil unless ror.present?
|
645
|
+
|
646
|
+
# turn ROR ID into URL
|
647
|
+
"https://ror.org/" + Addressable::URI.encode(ror)
|
648
|
+
end
|
649
|
+
|
637
650
|
# pick electronic issn if there are multiple
|
638
651
|
# format issn as xxxx-xxxx
|
639
652
|
def normalize_issn(input, options = {})
|
@@ -1384,6 +1397,23 @@ module Commonmeta
|
|
1384
1397
|
"https://doi.org/#{prefix}/#{str}"
|
1385
1398
|
end
|
1386
1399
|
|
1400
|
+
def encode_doi_for_uuid(uuid, options = {})
|
1401
|
+
# look up prefix for rogue scholar blog associated with uuid
|
1402
|
+
# returns nil if unknown uuid or doi registration is not enabled for blog
|
1403
|
+
json_feed_by_uuid(uuid)
|
1404
|
+
# DOI suffix is a generated from a random number, encoded in base32
|
1405
|
+
# suffix has 8 digits plus two checksum digits. With base32 there are
|
1406
|
+
# 32 possible digits, so 8 digits gives 32^8 possible combinations
|
1407
|
+
if options[:uuid]
|
1408
|
+
str = Base32::URL.encode_uuid(options[:uuid], split: 7, checksum: true)
|
1409
|
+
else
|
1410
|
+
random_int = SecureRandom.random_number(32 ** 7..(32 ** 8) - 1)
|
1411
|
+
suffix = Base32::URL.encode(random_int, checksum: true)
|
1412
|
+
str = "#{suffix[0, 5]}-#{suffix[5, 10]}"
|
1413
|
+
end
|
1414
|
+
"https://doi.org/#{prefix}/#{str}"
|
1415
|
+
end
|
1416
|
+
|
1387
1417
|
def decode_doi(doi, options = {})
|
1388
1418
|
suffix = doi.split("/", 5).last
|
1389
1419
|
if options[:uuid]
|
@@ -1415,5 +1445,9 @@ module Commonmeta
|
|
1415
1445
|
def json_feed_by_blog_url(blog_id)
|
1416
1446
|
"https://rogue-scholar.org/api/blogs/#{blog_id}"
|
1417
1447
|
end
|
1448
|
+
|
1449
|
+
def json_feed_item_by_uuid_url(uuid)
|
1450
|
+
"https://rogue-scholar.org/api/posts/#{uuid}"
|
1451
|
+
end
|
1418
1452
|
end
|
1419
1453
|
end
|
data/lib/commonmeta/version.rb
CHANGED
data/spec/cli_spec.rb
CHANGED
@@ -311,12 +311,21 @@ describe Commonmeta::CLI do
|
|
311
311
|
# end
|
312
312
|
end
|
313
313
|
|
314
|
-
describe "encode" do
|
315
|
-
let(:input) { "10.53731" }
|
316
|
-
|
314
|
+
describe "encode", vcr: true do
|
317
315
|
it "blog prefix" do
|
316
|
+
input = "10.53731"
|
318
317
|
expect { subject.encode input }.to output(/https:\/\/doi.org\/10.53731/).to_stdout
|
319
318
|
end
|
319
|
+
|
320
|
+
it "by_blog" do
|
321
|
+
input = "tyfqw20"
|
322
|
+
expect { subject.encode_by_blog input }.to output(/https:\/\/doi.org\/10.59350/).to_stdout
|
323
|
+
end
|
324
|
+
|
325
|
+
it "by_uuid" do
|
326
|
+
input = "2b22bbba-bcba-4072-94cc-3f88442fff88"
|
327
|
+
expect { subject.encode_by_uuid input }.to output(/https:\/\/doi.org\/10.54900/).to_stdout
|
328
|
+
end
|
320
329
|
end
|
321
330
|
|
322
331
|
describe "decode" do
|