commonmeta-ruby 3.2.14 → 3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/bin/commonmeta +1 -1
- data/lib/commonmeta/author_utils.rb +1 -1
- data/lib/commonmeta/cli.rb +14 -0
- data/lib/commonmeta/crossref_utils.rb +56 -14
- data/lib/commonmeta/readers/json_feed_reader.rb +30 -2
- data/lib/commonmeta/utils.rb +34 -0
- data/lib/commonmeta/version.rb +1 -1
- data/spec/cli_spec.rb +12 -3
- data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/doi_prefix/doi_prefix_by_blog.yml +997 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/doi_prefix/doi_prefix_by_uuid.yml +256 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/encode/by_blog.yml +997 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/encode/by_uuid.yml +256 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_doi_prefix_for_blog/by_blog_id.yml +997 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_doi_prefix_for_blog/by_blog_post_uuid.yml +389 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_doi_prefix_for_blog/by_blog_post_uuid_specific_prefix.yml +389 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item/by_uuid.yml +136 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/blog_post_with_non-url_id.yml +136 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/ghost_post_with_organizational_author.yml +91 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/substack_post_with_broken_reference.yml +1316 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_feed_item_from_rogue_scholar_with_organizational_author.yml +91 -0
- data/spec/readers/json_feed_reader_spec.rb +280 -186
- data/spec/utils_spec.rb +8 -0
- data/spec/writers/crossref_xml_writer_spec.rb +28 -0
- metadata +14 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dde90e2b65c0eabd771cda34b2b5054fbe6bf80b573444fe0bcdde14c8392429
|
4
|
+
data.tar.gz: 487ffa495c4ffae32184c0a6e11ef68a5f9e82ea18bfe4183d851cb4558d5f81
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 468b8f5ffc67f878cf97a39b0733e01ee9a56c1a2fd82874feba18fb9de6876b2d835ab11b2dd40021139dff988152d66fe25af0adde1e5884884017435dc711
|
7
|
+
data.tar.gz: 1126af3e7ae9b0cbb25d9d2f4497e894ae33e2a34bb9cc975924233fcf48f8cd0527b67f7983aeee375d4e98f7d58005a913e5301a848aca2da92ce051b93732
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
commonmeta-ruby (3.
|
4
|
+
commonmeta-ruby (3.3)
|
5
5
|
activesupport (>= 4.2.5, < 8.0)
|
6
6
|
addressable (~> 2.8.1, < 2.8.2)
|
7
7
|
base32-url (>= 0.7.0, < 1)
|
@@ -121,7 +121,7 @@ GEM
|
|
121
121
|
crass (~> 1.0.2)
|
122
122
|
nokogiri (>= 1.12.0)
|
123
123
|
matrix (0.4.2)
|
124
|
-
minitest (5.18.
|
124
|
+
minitest (5.18.1)
|
125
125
|
multi_json (1.15.0)
|
126
126
|
namae (1.1.1)
|
127
127
|
nokogiri (1.15.2-arm64-darwin)
|
data/bin/commonmeta
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
require File.expand_path("../../lib/commonmeta", __FILE__)
|
4
4
|
|
5
|
-
if (ARGV & %w(--version -v help --help encode decode encode_id decode_id json_feed_not_indexed json_feed_unregistered json_feed_by_blog)).empty?
|
5
|
+
if (ARGV & %w(--version -v help --help encode decode encode_id decode_id encode_by_blog encode_by_uuid json_feed_not_indexed json_feed_unregistered json_feed_by_blog)).empty?
|
6
6
|
Commonmeta::CLI.start(ARGV.dup.unshift("convert"))
|
7
7
|
else
|
8
8
|
Commonmeta::CLI.start
|
@@ -41,7 +41,7 @@ module Commonmeta
|
|
41
41
|
elsif id.nil? && author['ORCID'].present?
|
42
42
|
id = author.fetch('ORCID')
|
43
43
|
end
|
44
|
-
id = normalize_orcid(id)
|
44
|
+
id = normalize_orcid(id) || normalize_ror(id)
|
45
45
|
|
46
46
|
# parse author type, i.e. "Person", "Organization" or not specified
|
47
47
|
type = author.fetch('type', nil)
|
data/lib/commonmeta/cli.rb
CHANGED
@@ -68,6 +68,20 @@ module Commonmeta
|
|
68
68
|
puts encode_container_id
|
69
69
|
end
|
70
70
|
|
71
|
+
desc "", "encode_by_blog"
|
72
|
+
|
73
|
+
def encode_by_blog(blog_id)
|
74
|
+
prefix = get_doi_prefix_by_blog_id(blog_id)
|
75
|
+
puts encode_doi(prefix)
|
76
|
+
end
|
77
|
+
|
78
|
+
desc "", "encode_by_uuid"
|
79
|
+
|
80
|
+
def encode_by_uuid(uuid)
|
81
|
+
prefix = get_doi_prefix_by_json_feed_item_uuid(uuid)
|
82
|
+
puts encode_doi(prefix)
|
83
|
+
end
|
84
|
+
|
71
85
|
desc "", "decode"
|
72
86
|
|
73
87
|
def decode(doi)
|
@@ -94,25 +94,67 @@ module Commonmeta
|
|
94
94
|
|
95
95
|
def insert_crossref_creators(xml)
|
96
96
|
xml.contributors do
|
97
|
-
Array.wrap(creators).each_with_index do |
|
98
|
-
|
99
|
-
|
100
|
-
|
97
|
+
Array.wrap(creators).each_with_index do |creator, index|
|
98
|
+
if creator["type"] == "Organization"
|
99
|
+
xml.organization("contributor_role" => "author",
|
100
|
+
"sequence" => index.zero? ? "first" : "additional") do
|
101
|
+
insert_crossref_organization(xml, creator)
|
102
|
+
end
|
103
|
+
elsif creator["givenName"].present? || creator["familyName"].present?
|
104
|
+
xml.person_name("contributor_role" => "author",
|
105
|
+
"sequence" => index.zero? ? "first" : "additional") do
|
106
|
+
insert_crossref_person(xml, creator)
|
107
|
+
end
|
108
|
+
else
|
109
|
+
xml.unknown("contributor_role" => "author",
|
110
|
+
"sequence" => index.zero? ? "first" : "additional") do
|
111
|
+
insert_crossref_anonymous(xml, creator)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def insert_crossref_person(xml, creator)
|
119
|
+
xml.given_name(creator["givenName"]) if creator["givenName"].present?
|
120
|
+
xml.surname(creator["familyName"]) if creator["familyName"].present?
|
121
|
+
if creator.dig("id") && URI.parse(creator.dig("id")).host == "orcid.org"
|
122
|
+
xml.ORCID(creator.dig("id"))
|
123
|
+
end
|
124
|
+
if creator["affiliation"].present?
|
125
|
+
xml.affiliations do
|
126
|
+
xml.institution do
|
127
|
+
xml.institution_name(creator.dig("affiliation", 0, "name")) if creator.dig("affiliation", 0, "name").present?
|
128
|
+
xml.institution_id(creator.dig("affiliation", 0, "affiliationIdentifier"), "type" => creator.dig("affiliation", 0, "affiliationIdentifierScheme")) if creator.dig("affiliation", 0, "affiliationIdentifier").present?
|
101
129
|
end
|
102
130
|
end
|
103
131
|
end
|
104
132
|
end
|
105
133
|
|
106
|
-
def
|
107
|
-
xml.
|
108
|
-
|
109
|
-
|
110
|
-
|
134
|
+
def insert_crossref_organization(xml, creator)
|
135
|
+
xml.name(creator["name"]) if creator["name"].present?
|
136
|
+
if creator["affiliation"].present?
|
137
|
+
xml.affiliations do
|
138
|
+
xml.institution do
|
139
|
+
xml.institution_name(creator.dig("affiliation", 0, "name")) if creator.dig("affiliation", 0, "name").present?
|
140
|
+
xml.institution_id(creator.dig("affiliation", 0, "affiliationIdentifier"), "type" => creator.dig("affiliation", 0, "affiliationIdentifierScheme")) if creator.dig("affiliation", 0, "affiliationIdentifier").present?
|
141
|
+
end
|
142
|
+
end
|
111
143
|
end
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
144
|
+
end
|
145
|
+
|
146
|
+
def insert_crossref_anonymous(xml, creator)
|
147
|
+
if person["affiliation"].present?
|
148
|
+
xml.anonymous do
|
149
|
+
xml.affiliations do
|
150
|
+
xml.institution do
|
151
|
+
xml.institution_name(creator.dig("affiliation", 0, "name")) if creator.dig("affiliation", 0, "name").present?
|
152
|
+
xml.institution_id(creator.dig("affiliation", 0, "affiliationIdentifier"), "type" => creator.dig("affiliation", 0, "affiliationIdentifierScheme")) if creator.dig("affiliation", 0, "affiliationIdentifier").present?
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
else
|
157
|
+
xml.anonymous
|
116
158
|
end
|
117
159
|
end
|
118
160
|
|
@@ -265,7 +307,7 @@ module Commonmeta
|
|
265
307
|
}.compact
|
266
308
|
|
267
309
|
# strip hyphen from UUIDs, as item_number can only be 32 characters long (UUIDv4 is 36 characters long)
|
268
|
-
alternate_identifier["alternateIdentifier"] = alternate_identifier["alternateIdentifier"].gsub(
|
310
|
+
alternate_identifier["alternateIdentifier"] = alternate_identifier["alternateIdentifier"].gsub("-", "") if alternate_identifier["alternateIdentifierType"] == "UUID"
|
269
311
|
|
270
312
|
xml.item_number(alternate_identifier["alternateIdentifier"], attributes)
|
271
313
|
end
|
@@ -20,8 +20,10 @@ module Commonmeta
|
|
20
20
|
|
21
21
|
meta = string.present? ? JSON.parse(string) : {}
|
22
22
|
|
23
|
-
id = options[:doi] ? normalize_doi(options[:doi]) : normalize_id(meta.fetch("id", nil))
|
24
23
|
url = normalize_url(meta.fetch("url", nil))
|
24
|
+
id = options[:doi] ? normalize_doi(options[:doi]) : normalize_id(meta.fetch("id", nil))
|
25
|
+
id = url if id.blank? && url.present?
|
26
|
+
|
25
27
|
type = "Article"
|
26
28
|
creators = if meta.fetch("authors", nil).present?
|
27
29
|
get_authors(from_json_feed(Array.wrap(meta.fetch("authors"))))
|
@@ -82,7 +84,11 @@ module Commonmeta
|
|
82
84
|
def get_references(meta)
|
83
85
|
# check that references resolve
|
84
86
|
Array.wrap(meta["references"]).reduce([]) do |sum, reference|
|
85
|
-
|
87
|
+
if reference["doi"] && validate_doi(reference["doi"])
|
88
|
+
sum << reference if [200, 301, 302].include? HTTP.head(reference["doi"]).status
|
89
|
+
elsif reference["url"] && validate_url(reference["url"]) == "URL"
|
90
|
+
sum << reference if [200, 301, 302].include? HTTP.head(reference["url"]).status
|
91
|
+
end
|
86
92
|
|
87
93
|
sum
|
88
94
|
end
|
@@ -120,6 +126,28 @@ module Commonmeta
|
|
120
126
|
blog = JSON.parse(response.body.to_s)
|
121
127
|
blog["items"].map { |item| item["uuid"] }.first
|
122
128
|
end
|
129
|
+
|
130
|
+
def get_doi_prefix_by_blog_id(blog_id)
|
131
|
+
# for generating a random DOI.
|
132
|
+
|
133
|
+
url = json_feed_by_blog_url(blog_id)
|
134
|
+
response = HTTP.get(url)
|
135
|
+
return { "string" => nil, "state" => "not_found" } unless response.status.success?
|
136
|
+
|
137
|
+
post = JSON.parse(response.body.to_s)
|
138
|
+
post.dig('prefix')
|
139
|
+
end
|
140
|
+
|
141
|
+
def get_doi_prefix_by_json_feed_item_uuid(uuid)
|
142
|
+
# for generating a random DOI. Prefix is based on the blog id.
|
143
|
+
|
144
|
+
url = json_feed_item_by_uuid_url(uuid)
|
145
|
+
response = HTTP.get(url)
|
146
|
+
return { "string" => nil, "state" => "not_found" } unless response.status.success?
|
147
|
+
|
148
|
+
post = JSON.parse(response.body.to_s)
|
149
|
+
post.dig('blog', 'prefix')
|
150
|
+
end
|
123
151
|
end
|
124
152
|
end
|
125
153
|
end
|
data/lib/commonmeta/utils.rb
CHANGED
@@ -543,6 +543,11 @@ module Commonmeta
|
|
543
543
|
orcid.gsub(/[[:space:]]/, "-") if orcid.present?
|
544
544
|
end
|
545
545
|
|
546
|
+
def validate_ror(ror)
|
547
|
+
ror = Array(%r{\A(?:(?:http|https)://ror\.org/)?([0-9a-z]{7}\d{2})\z}.match(ror)).last
|
548
|
+
ror.gsub(/[[:space:]]/, "-") if ror.present?
|
549
|
+
end
|
550
|
+
|
546
551
|
def validate_orcid_scheme(orcid_scheme)
|
547
552
|
Array(%r{\A(http|https)://(www\.)?(orcid\.org)}.match(orcid_scheme)).last
|
548
553
|
end
|
@@ -634,6 +639,14 @@ module Commonmeta
|
|
634
639
|
"https://orcid.org/" + Addressable::URI.encode(orcid)
|
635
640
|
end
|
636
641
|
|
642
|
+
def normalize_ror(ror)
|
643
|
+
ror = validate_ror(ror)
|
644
|
+
return nil unless ror.present?
|
645
|
+
|
646
|
+
# turn ROR ID into URL
|
647
|
+
"https://ror.org/" + Addressable::URI.encode(ror)
|
648
|
+
end
|
649
|
+
|
637
650
|
# pick electronic issn if there are multiple
|
638
651
|
# format issn as xxxx-xxxx
|
639
652
|
def normalize_issn(input, options = {})
|
@@ -1384,6 +1397,23 @@ module Commonmeta
|
|
1384
1397
|
"https://doi.org/#{prefix}/#{str}"
|
1385
1398
|
end
|
1386
1399
|
|
1400
|
+
def encode_doi_for_uuid(uuid, options = {})
|
1401
|
+
# look up prefix for rogue scholar blog associated with uuid
|
1402
|
+
# returns nil if unknown uuid or doi registration is not enabled for blog
|
1403
|
+
json_feed_by_uuid(uuid)
|
1404
|
+
# DOI suffix is a generated from a random number, encoded in base32
|
1405
|
+
# suffix has 8 digits plus two checksum digits. With base32 there are
|
1406
|
+
# 32 possible digits, so 8 digits gives 32^8 possible combinations
|
1407
|
+
if options[:uuid]
|
1408
|
+
str = Base32::URL.encode_uuid(options[:uuid], split: 7, checksum: true)
|
1409
|
+
else
|
1410
|
+
random_int = SecureRandom.random_number(32 ** 7..(32 ** 8) - 1)
|
1411
|
+
suffix = Base32::URL.encode(random_int, checksum: true)
|
1412
|
+
str = "#{suffix[0, 5]}-#{suffix[5, 10]}"
|
1413
|
+
end
|
1414
|
+
"https://doi.org/#{prefix}/#{str}"
|
1415
|
+
end
|
1416
|
+
|
1387
1417
|
def decode_doi(doi, options = {})
|
1388
1418
|
suffix = doi.split("/", 5).last
|
1389
1419
|
if options[:uuid]
|
@@ -1415,5 +1445,9 @@ module Commonmeta
|
|
1415
1445
|
def json_feed_by_blog_url(blog_id)
|
1416
1446
|
"https://rogue-scholar.org/api/blogs/#{blog_id}"
|
1417
1447
|
end
|
1448
|
+
|
1449
|
+
def json_feed_item_by_uuid_url(uuid)
|
1450
|
+
"https://rogue-scholar.org/api/posts/#{uuid}"
|
1451
|
+
end
|
1418
1452
|
end
|
1419
1453
|
end
|
data/lib/commonmeta/version.rb
CHANGED
data/spec/cli_spec.rb
CHANGED
@@ -311,12 +311,21 @@ describe Commonmeta::CLI do
|
|
311
311
|
# end
|
312
312
|
end
|
313
313
|
|
314
|
-
describe "encode" do
|
315
|
-
let(:input) { "10.53731" }
|
316
|
-
|
314
|
+
describe "encode", vcr: true do
|
317
315
|
it "blog prefix" do
|
316
|
+
input = "10.53731"
|
318
317
|
expect { subject.encode input }.to output(/https:\/\/doi.org\/10.53731/).to_stdout
|
319
318
|
end
|
319
|
+
|
320
|
+
it "by_blog" do
|
321
|
+
input = "tyfqw20"
|
322
|
+
expect { subject.encode_by_blog input }.to output(/https:\/\/doi.org\/10.59350/).to_stdout
|
323
|
+
end
|
324
|
+
|
325
|
+
it "by_uuid" do
|
326
|
+
input = "2b22bbba-bcba-4072-94cc-3f88442fff88"
|
327
|
+
expect { subject.encode_by_uuid input }.to output(/https:\/\/doi.org\/10.54900/).to_stdout
|
328
|
+
end
|
320
329
|
end
|
321
330
|
|
322
331
|
describe "decode" do
|