commonmeta-ruby 3.2.14 → 3.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (26) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/bin/commonmeta +1 -1
  4. data/lib/commonmeta/author_utils.rb +1 -1
  5. data/lib/commonmeta/cli.rb +14 -0
  6. data/lib/commonmeta/crossref_utils.rb +56 -14
  7. data/lib/commonmeta/readers/json_feed_reader.rb +30 -2
  8. data/lib/commonmeta/utils.rb +34 -0
  9. data/lib/commonmeta/version.rb +1 -1
  10. data/spec/cli_spec.rb +12 -3
  11. data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/doi_prefix/doi_prefix_by_blog.yml +997 -0
  12. data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/doi_prefix/doi_prefix_by_uuid.yml +256 -0
  13. data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/encode/by_blog.yml +997 -0
  14. data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/encode/by_uuid.yml +256 -0
  15. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_doi_prefix_for_blog/by_blog_id.yml +997 -0
  16. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_doi_prefix_for_blog/by_blog_post_uuid.yml +389 -0
  17. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_doi_prefix_for_blog/by_blog_post_uuid_specific_prefix.yml +389 -0
  18. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item/by_uuid.yml +136 -0
  19. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/blog_post_with_non-url_id.yml +136 -0
  20. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/ghost_post_with_organizational_author.yml +91 -0
  21. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/substack_post_with_broken_reference.yml +1316 -0
  22. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_feed_item_from_rogue_scholar_with_organizational_author.yml +91 -0
  23. data/spec/readers/json_feed_reader_spec.rb +280 -186
  24. data/spec/utils_spec.rb +8 -0
  25. data/spec/writers/crossref_xml_writer_spec.rb +28 -0
  26. metadata +14 -2
@@ -1,245 +1,339 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'spec_helper'
3
+ require "spec_helper"
4
4
 
5
5
  describe Commonmeta::Metadata, vcr: true do
6
6
  subject { described_class.new }
7
7
 
8
- context 'get json_feed_item metadata' do
9
- it 'blogger post' do
10
- input = 'https://rogue-scholar.org/api/posts/f3629c86-06e0-42c0-844a-266b03a91ef1'
8
+ context "get json_feed_item metadata" do
9
+ it "blogger post" do
10
+ input = "https://rogue-scholar.org/api/posts/f3629c86-06e0-42c0-844a-266b03a91ef1"
11
11
  subject = described_class.new(input: input)
12
12
  expect(subject.valid?).to be true
13
- expect(subject.id).to eq('https://iphylo.blogspot.com/2023/05/ten-years-and-million-links.html')
14
- expect(subject.url).to eq('https://iphylo.blogspot.com/2023/05/ten-years-and-million-links.html')
15
- expect(subject.alternate_identifiers).to eq([{"alternateIdentifier"=>"f3629c86-06e0-42c0-844a-266b03a91ef1", "alternateIdentifierType"=>"UUID"}])
16
- expect(subject.type).to eq('Article')
13
+ expect(subject.id).to eq("https://iphylo.blogspot.com/2023/05/ten-years-and-million-links.html")
14
+ expect(subject.url).to eq("https://iphylo.blogspot.com/2023/05/ten-years-and-million-links.html")
15
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "f3629c86-06e0-42c0-844a-266b03a91ef1", "alternateIdentifierType" => "UUID" }])
16
+ expect(subject.type).to eq("Article")
17
17
  expect(subject.creators.length).to eq(1)
18
- expect(subject.creators.first).to eq("familyName"=>"Page", "givenName"=>"Roderic", "type"=>"Person")
19
- expect(subject.titles).to eq([{"title"=>"Ten years and a million links"}])
20
- expect(subject.license).to eq('id' => 'CC-BY-4.0',
21
- 'url' => 'https://creativecommons.org/licenses/by/4.0/legalcode')
22
- expect(subject.date).to eq('published' => '2023-05-31')
23
- expect(subject.descriptions.first['description']).to start_with("As trailed on a Twitter thread last week I’ve been working on a manuscript describing the efforts to map taxonomic names to their original descriptions in the taxonomic literature.")
24
- expect(subject.publisher).to eq('name' => 'iPhylo')
25
- expect(subject.subjects).to eq([{"subject"=>"Natural sciences"},
26
- {"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
27
- "subject"=>"FOS: Natural sciences",
28
- "subjectScheme"=>"Fields of Science and Technology (FOS)"}])
29
- expect(subject.language).to eq('en')
30
- expect(subject.container).to eq("identifier"=>"https://iphylo.blogspot.com/", "identifierType"=>"URL", "title"=>"iPhylo", "type"=>"Periodical")
31
- end
32
-
33
- it 'ghost post with doi' do
34
- input = 'https://rogue-scholar.org/api/posts/5bb66e92-5cb9-4659-8aca-20e486b695c9'
18
+ expect(subject.creators.first).to eq("familyName" => "Page", "givenName" => "Roderic", "type" => "Person")
19
+ expect(subject.titles).to eq([{ "title" => "Ten years and a million links" }])
20
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
21
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
22
+ expect(subject.date).to eq("published" => "2023-05-31")
23
+ expect(subject.descriptions.first["description"]).to start_with("As trailed on a Twitter thread last week I’ve been working on a manuscript describing the efforts to map taxonomic names to their original descriptions in the taxonomic literature.")
24
+ expect(subject.publisher).to eq("name" => "iPhylo")
25
+ expect(subject.subjects).to eq([{ "subject" => "Natural sciences" },
26
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
27
+ "subject" => "FOS: Natural sciences",
28
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
29
+ expect(subject.language).to eq("en")
30
+ expect(subject.container).to eq("identifier" => "https://iphylo.blogspot.com/", "identifierType" => "URL", "title" => "iPhylo", "type" => "Periodical")
31
+ end
32
+
33
+ it "ghost post with doi" do
34
+ input = "https://rogue-scholar.org/api/posts/5bb66e92-5cb9-4659-8aca-20e486b695c9"
35
35
  subject = described_class.new(input: input)
36
36
  expect(subject.valid?).to be true
37
- expect(subject.id).to eq('https://doi.org/10.53731/4nwxn-frt36')
38
- expect(subject.url).to eq('https://blog.front-matter.io/posts/does-it-compose')
39
- expect(subject.alternate_identifiers).to eq([{"alternateIdentifier"=>"5bb66e92-5cb9-4659-8aca-20e486b695c9", "alternateIdentifierType"=>"UUID"}])
40
- expect(subject.type).to eq('Article')
37
+ expect(subject.id).to eq("https://doi.org/10.53731/4nwxn-frt36")
38
+ expect(subject.url).to eq("https://blog.front-matter.io/posts/does-it-compose")
39
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "5bb66e92-5cb9-4659-8aca-20e486b695c9", "alternateIdentifierType" => "UUID" }])
40
+ expect(subject.type).to eq("Article")
41
41
  expect(subject.creators.length).to eq(1)
42
- expect(subject.creators.first).to eq("id" => "https://orcid.org/0000-0003-1419-2405", "familyName"=>"Fenner", "givenName"=>"Martin", "type"=>"Person")
43
- expect(subject.titles).to eq([{"title"=>"Does it compose?"}])
44
- expect(subject.license).to eq('id' => 'CC-BY-4.0',
45
- 'url' => 'https://creativecommons.org/licenses/by/4.0/legalcode')
46
- expect(subject.date).to eq('published' => '2023-05-16', 'updated' => '2023-05-16')
47
- expect(subject.descriptions.first['description']).to start_with("One question I have increasingly asked myself in the past few years. Meaning Can I run this open source software using Docker containers and a Docker Compose file?")
48
- expect(subject.publisher).to eq('name' => 'Front Matter')
49
- expect(subject.subjects).to eq([{"subject"=>"Engineering and technology"},
50
- {"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
51
- "subject"=>"FOS: Engineering and technology",
52
- "subjectScheme"=>"Fields of Science and Technology (FOS)"}])
53
- expect(subject.language).to eq('en')
54
- expect(subject.container).to eq("identifier"=>"https://blog.front-matter.io", "identifierType"=>"URL", "title"=>"Front Matter", "type"=>"Periodical")
55
- end
56
-
57
- it 'ghost post without doi' do
58
- input = 'https://rogue-scholar.org/api/posts/c3095752-2af0-40a4-a229-3ceb7424bce2'
42
+ expect(subject.creators.first).to eq("id" => "https://orcid.org/0000-0003-1419-2405", "familyName" => "Fenner", "givenName" => "Martin", "type" => "Person")
43
+ expect(subject.titles).to eq([{ "title" => "Does it compose?" }])
44
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
45
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
46
+ expect(subject.date).to eq("published" => "2023-05-16", "updated" => "2023-05-16")
47
+ expect(subject.descriptions.first["description"]).to start_with("One question I have increasingly asked myself in the past few years. Meaning Can I run this open source software using Docker containers and a Docker Compose file?")
48
+ expect(subject.publisher).to eq("name" => "Front Matter")
49
+ expect(subject.subjects).to eq([{ "subject" => "Engineering and technology" },
50
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
51
+ "subject" => "FOS: Engineering and technology",
52
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
53
+ expect(subject.language).to eq("en")
54
+ expect(subject.container).to eq("identifier" => "https://blog.front-matter.io", "identifierType" => "URL", "title" => "Front Matter", "type" => "Periodical")
55
+ end
56
+
57
+ it "ghost post without doi" do
58
+ input = "https://rogue-scholar.org/api/posts/c3095752-2af0-40a4-a229-3ceb7424bce2"
59
59
  subject = described_class.new(input: input)
60
60
  expect(subject.valid?).to be true
61
- expect(subject.id).to eq('https://www.ideasurg.pub/residency-visual-abstract')
62
- expect(subject.url).to eq('https://www.ideasurg.pub/residency-visual-abstract')
63
- expect(subject.alternate_identifiers).to eq([{"alternateIdentifier"=>"c3095752-2af0-40a4-a229-3ceb7424bce2", "alternateIdentifierType"=>"UUID"}])
64
- expect(subject.type).to eq('Article')
61
+ expect(subject.id).to eq("https://www.ideasurg.pub/residency-visual-abstract")
62
+ expect(subject.url).to eq("https://www.ideasurg.pub/residency-visual-abstract")
63
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "c3095752-2af0-40a4-a229-3ceb7424bce2", "alternateIdentifierType" => "UUID" }])
64
+ expect(subject.type).to eq("Article")
65
65
  expect(subject.creators.length).to eq(1)
66
- expect(subject.creators.first).to eq("familyName"=>"Sathe", "givenName"=>"Tejas S.", "type"=>"Person")
67
- expect(subject.titles).to eq([{"title"=>"The Residency Visual Abstract"}])
68
- expect(subject.license).to eq('id' => 'CC-BY-4.0',
69
- 'url' => 'https://creativecommons.org/licenses/by/4.0/legalcode')
70
- expect(subject.date).to eq('published' => '2023-04-08')
71
- expect(subject.descriptions.first['description']).to start_with("A graphical, user-friendly tool for programs to highlight important data to prospective applicants")
72
- expect(subject.publisher).to eq('name' => 'I.D.E.A.S.')
73
- expect(subject.subjects).to eq([{"subject"=>"Medical and health sciences"},
74
- {"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
75
- "subject"=>"FOS: Medical and health sciences",
76
- "subjectScheme"=>"Fields of Science and Technology (FOS)"}])
77
- expect(subject.language).to eq('en')
78
- expect(subject.container).to eq("identifier"=>"https://www.ideasurg.pub/", "identifierType"=>"URL", "title"=>"I.D.E.A.S.", "type"=>"Periodical")
79
- end
80
-
81
- it 'ghost post with author name suffix' do
82
- input = 'https://rogue-scholar.org/api/posts/6179ad80-cc7f-4904-9260-0ecb3c3a90ba'
66
+ expect(subject.creators.first).to eq("familyName" => "Sathe", "givenName" => "Tejas S.", "type" => "Person")
67
+ expect(subject.titles).to eq([{ "title" => "The Residency Visual Abstract" }])
68
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
69
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
70
+ expect(subject.date).to eq("published" => "2023-04-08")
71
+ expect(subject.descriptions.first["description"]).to start_with("A graphical, user-friendly tool for programs to highlight important data to prospective applicants")
72
+ expect(subject.publisher).to eq("name" => "I.D.E.A.S.")
73
+ expect(subject.subjects).to eq([{ "subject" => "Medical and health sciences" },
74
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
75
+ "subject" => "FOS: Medical and health sciences",
76
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
77
+ expect(subject.language).to eq("en")
78
+ expect(subject.container).to eq("identifier" => "https://www.ideasurg.pub/", "identifierType" => "URL", "title" => "I.D.E.A.S.", "type" => "Periodical")
79
+ end
80
+
81
+ it "ghost post with author name suffix" do
82
+ input = "https://rogue-scholar.org/api/posts/6179ad80-cc7f-4904-9260-0ecb3c3a90ba"
83
83
  subject = described_class.new(input: input)
84
84
  expect(subject.valid?).to be true
85
- expect(subject.id).to eq('https://www.ideasurg.pub/academic-powerhouse')
86
- expect(subject.url).to eq('https://www.ideasurg.pub/academic-powerhouse')
87
- expect(subject.alternate_identifiers).to eq([{"alternateIdentifier"=>"6179ad80-cc7f-4904-9260-0ecb3c3a90ba", "alternateIdentifierType"=>"UUID"}])
88
- expect(subject.type).to eq('Article')
85
+ expect(subject.id).to eq("https://www.ideasurg.pub/academic-powerhouse")
86
+ expect(subject.url).to eq("https://www.ideasurg.pub/academic-powerhouse")
87
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "6179ad80-cc7f-4904-9260-0ecb3c3a90ba", "alternateIdentifierType" => "UUID" }])
88
+ expect(subject.type).to eq("Article")
89
89
  expect(subject.creators.length).to eq(1)
90
- expect(subject.creators.first).to eq("familyName"=>"Sathe", "givenName"=>"Tejas S.", "type"=>"Person")
91
- expect(subject.titles).to eq([{"title"=>"How to Build an Academic Powerhouse: Let's Study Who's Doing it"}])
92
- expect(subject.license).to eq('id' => 'CC-BY-4.0',
93
- 'url' => 'https://creativecommons.org/licenses/by/4.0/legalcode')
94
- expect(subject.date).to eq('published' => '2023-06-03')
95
- expect(subject.descriptions.first['description']).to start_with("A Data Exploration with Public Data from the Academic Surgical Congress")
96
- expect(subject.publisher).to eq('name' => 'I.D.E.A.S.')
97
- expect(subject.subjects).to eq([{"subject"=>"Medical and health sciences"},
98
- {"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
99
- "subject"=>"FOS: Medical and health sciences",
100
- "subjectScheme"=>"Fields of Science and Technology (FOS)"}])
101
- expect(subject.language).to eq('en')
102
- expect(subject.container).to eq("identifier"=>"https://www.ideasurg.pub/", "identifierType"=>"URL", "title"=>"I.D.E.A.S.", "type"=>"Periodical")
90
+ expect(subject.creators.first).to eq("familyName" => "Sathe", "givenName" => "Tejas S.", "type" => "Person")
91
+ expect(subject.titles).to eq([{ "title" => "How to Build an Academic Powerhouse: Let's Study Who's Doing it" }])
92
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
93
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
94
+ expect(subject.date).to eq("published" => "2023-06-03")
95
+ expect(subject.descriptions.first["description"]).to start_with("A Data Exploration with Public Data from the Academic Surgical Congress")
96
+ expect(subject.publisher).to eq("name" => "I.D.E.A.S.")
97
+ expect(subject.subjects).to eq([{ "subject" => "Medical and health sciences" },
98
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
99
+ "subject" => "FOS: Medical and health sciences",
100
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
101
+ expect(subject.language).to eq("en")
102
+ expect(subject.container).to eq("identifier" => "https://www.ideasurg.pub/", "identifierType" => "URL", "title" => "I.D.E.A.S.", "type" => "Periodical")
103
103
  expect(subject.references).to be_nil
104
104
  end
105
105
 
106
- it 'syldavia gazette post with references' do
107
- input = 'https://rogue-scholar.org/api/posts/0022b9ef-525a-4a79-81ad-13411697f58a'
106
+ it "syldavia gazette post with references" do
107
+ input = "https://rogue-scholar.org/api/posts/0022b9ef-525a-4a79-81ad-13411697f58a"
108
108
  subject = described_class.new(input: input)
109
109
  expect(subject.valid?).to be true
110
- expect(subject.id).to eq('https://doi.org/10.53731/ffbx660-083tnag')
111
- expect(subject.url).to eq('https://syldavia-gazette.org/guinea-worms-chatgpt-neanderthals')
112
- expect(subject.alternate_identifiers).to eq([{"alternateIdentifier"=>"0022b9ef-525a-4a79-81ad-13411697f58a", "alternateIdentifierType"=>"UUID"}])
113
- expect(subject.type).to eq('Article')
110
+ expect(subject.id).to eq("https://doi.org/10.53731/ffbx660-083tnag")
111
+ expect(subject.url).to eq("https://syldavia-gazette.org/guinea-worms-chatgpt-neanderthals")
112
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "0022b9ef-525a-4a79-81ad-13411697f58a", "alternateIdentifierType" => "UUID" }])
113
+ expect(subject.type).to eq("Article")
114
114
  expect(subject.creators.length).to eq(1)
115
- expect(subject.creators.first).to eq("familyName"=>"Fenner", "givenName"=>"Martin", "id"=>"https://orcid.org/0000-0003-1419-2405", "type"=>"Person")
116
- expect(subject.titles).to eq([{"title"=>"Guinea Worms, ChatGPT, Neanderthals, Plagiarism, Tidyverse"}])
117
- expect(subject.license).to eq('id' => 'CC-BY-4.0',
118
- 'url' => 'https://creativecommons.org/licenses/by/4.0/legalcode')
119
- expect(subject.date).to eq('published' => '2023-02-01', 'updated' => '2023-04-13')
120
- expect(subject.descriptions.first['description']).to start_with("Guinea worm disease reaches all-time low: only 13* human cases reported in 2022")
121
- expect(subject.publisher).to eq('name' => 'Syldavia Gazette')
122
- expect(subject.subjects).to eq([{"subject"=>"Humanities"}, {"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf", "subject"=>"FOS: Humanities", "subjectScheme"=>"Fields of Science and Technology (FOS)"}])
123
- expect(subject.language).to eq('en')
124
- expect(subject.container).to eq("identifier"=>"https://syldavia-gazette.org", "identifierType"=>"URL", "title"=>"Syldavia Gazette", "type"=>"Periodical")
115
+ expect(subject.creators.first).to eq("familyName" => "Fenner", "givenName" => "Martin", "id" => "https://orcid.org/0000-0003-1419-2405", "type" => "Person")
116
+ expect(subject.titles).to eq([{ "title" => "Guinea Worms, ChatGPT, Neanderthals, Plagiarism, Tidyverse" }])
117
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
118
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
119
+ expect(subject.date).to eq("published" => "2023-02-01", "updated" => "2023-04-13")
120
+ expect(subject.descriptions.first["description"]).to start_with("Guinea worm disease reaches all-time low: only 13* human cases reported in 2022")
121
+ expect(subject.publisher).to eq("name" => "Syldavia Gazette")
122
+ expect(subject.subjects).to eq([{ "subject" => "Humanities" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Humanities", "subjectScheme" => "Fields of Science and Technology (FOS)" }])
123
+ expect(subject.language).to eq("en")
124
+ expect(subject.container).to eq("identifier" => "https://syldavia-gazette.org", "identifierType" => "URL", "title" => "Syldavia Gazette", "type" => "Periodical")
125
125
  expect(subject.references.length).to eq(5)
126
- expect(subject.references.first).to eq("key"=>"ref1", "url"=>"https://cartercenter.org/news/pr/2023/2022-guinea-worm-worldwide-cases-announcement.html")
126
+ expect(subject.references.first).to eq("key" => "ref1", "url" => "https://cartercenter.org/news/pr/2023/2022-guinea-worm-worldwide-cases-announcement.html")
127
127
  end
128
128
 
129
- it 'wordpress post' do
130
- input = 'https://rogue-scholar.org/api/posts/1c578558-1324-4493-b8af-84c49eabc52f'
129
+ it "wordpress post" do
130
+ input = "https://rogue-scholar.org/api/posts/1c578558-1324-4493-b8af-84c49eabc52f"
131
131
  subject = described_class.new(input: input)
132
132
  expect(subject.valid?).to be true
133
- expect(subject.id).to eq('http://wisspub.net/?p=20455')
134
- expect(subject.url).to eq('https://wisspub.net/2023/05/23/eu-mitgliedstaaten-betonen-die-rolle-von-wissenschaftsgeleiteten-open-access-modellen-jenseits-von-apcs')
135
- expect(subject.alternate_identifiers).to eq([{"alternateIdentifier"=>"1c578558-1324-4493-b8af-84c49eabc52f", "alternateIdentifierType"=>"UUID"}])
136
- expect(subject.type).to eq('Article')
133
+ expect(subject.id).to eq("http://wisspub.net/?p=20455")
134
+ expect(subject.url).to eq("https://wisspub.net/2023/05/23/eu-mitgliedstaaten-betonen-die-rolle-von-wissenschaftsgeleiteten-open-access-modellen-jenseits-von-apcs")
135
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "1c578558-1324-4493-b8af-84c49eabc52f", "alternateIdentifierType" => "UUID" }])
136
+ expect(subject.type).to eq("Article")
137
137
  expect(subject.creators.length).to eq(1)
138
- expect(subject.creators.first).to eq("familyName"=>"Pampel", "givenName"=>"Heinz", "id"=>"https://orcid.org/0000-0003-3334-2771", "type"=>"Person")
139
- expect(subject.titles).to eq([{"title"=>"EU-Mitgliedstaaten betonen die Rolle von wissenschaftsgeleiteten Open-Access-Modellen jenseits von APCs"}])
140
- expect(subject.license).to eq('id' => 'CC-BY-4.0',
141
- 'url' => 'https://creativecommons.org/licenses/by/4.0/legalcode')
142
- expect(subject.date).to eq('published' => '2023-05-23', 'updated' => '2023-05-23')
143
- expect(subject.descriptions.first['description']).to start_with("Die EU-Wissenschaftsministerien haben sich auf ihrer heutigen Sitzung in Brüssel unter dem Titel “Council conclusions on high-quality, transparent, open, trustworthy and equitable scholarly publishing”")
144
- expect(subject.publisher).to eq('name' => 'wisspub.net')
145
- expect(subject.subjects).to eq([{"subject"=>"Engineering and technology"},
146
- {"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
147
- "subject"=>"FOS: Engineering and technology",
148
- "subjectScheme"=>"Fields of Science and Technology (FOS)"}])
149
- expect(subject.language).to eq('de')
150
- expect(subject.container).to eq("identifier"=>"https://wisspub.net", "identifierType"=>"URL", "title"=>"wisspub.net", "type"=>"Periodical")
151
- end
152
-
153
- it 'wordpress post with references' do
154
- input = 'https://rogue-scholar.org/api/posts/4e4bf150-751f-4245-b4ca-fe69e3c3bb24'
138
+ expect(subject.creators.first).to eq("familyName" => "Pampel", "givenName" => "Heinz", "id" => "https://orcid.org/0000-0003-3334-2771", "type" => "Person")
139
+ expect(subject.titles).to eq([{ "title" => "EU-Mitgliedstaaten betonen die Rolle von wissenschaftsgeleiteten Open-Access-Modellen jenseits von APCs" }])
140
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
141
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
142
+ expect(subject.date).to eq("published" => "2023-05-23", "updated" => "2023-05-23")
143
+ expect(subject.descriptions.first["description"]).to start_with("Die EU-Wissenschaftsministerien haben sich auf ihrer heutigen Sitzung in Brüssel unter dem Titel “Council conclusions on high-quality, transparent, open, trustworthy and equitable scholarly publishing”")
144
+ expect(subject.publisher).to eq("name" => "wisspub.net")
145
+ expect(subject.subjects).to eq([{ "subject" => "Engineering and technology" },
146
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
147
+ "subject" => "FOS: Engineering and technology",
148
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
149
+ expect(subject.language).to eq("de")
150
+ expect(subject.container).to eq("identifier" => "https://wisspub.net", "identifierType" => "URL", "title" => "wisspub.net", "type" => "Periodical")
151
+ end
152
+
153
+ it "wordpress post with references" do
154
+ input = "https://rogue-scholar.org/api/posts/4e4bf150-751f-4245-b4ca-fe69e3c3bb24"
155
155
  subject = described_class.new(input: input)
156
156
  expect(subject.valid?).to be true
157
- expect(subject.id).to eq('http://svpow.com/?p=20992')
158
- expect(subject.url).to eq('https://svpow.com/2023/06/09/new-paper-curtice-et-al-2023-on-the-first-haplocanthosaurus-from-dry-mesa')
159
- expect(subject.type).to eq('Article')
157
+ expect(subject.id).to eq("http://svpow.com/?p=20992")
158
+ expect(subject.url).to eq("https://svpow.com/2023/06/09/new-paper-curtice-et-al-2023-on-the-first-haplocanthosaurus-from-dry-mesa")
159
+ expect(subject.type).to eq("Article")
160
160
  expect(subject.creators.length).to eq(1)
161
- expect(subject.creators.first).to eq("familyName"=>"Wedel", "givenName"=>"Matt", "type"=>"Person")
162
- expect(subject.titles).to eq([{"title"=>"New paper: Curtice et al. (2023) on the first Haplocanthosaurus from Dry Mesa"}])
163
- expect(subject.license).to eq('id' => 'CC-BY-4.0',
164
- 'url' => 'https://creativecommons.org/licenses/by/4.0/legalcode')
165
- expect(subject.date).to eq('published' => '2023-06-09', 'updated' => '2023-06-09')
166
- expect(subject.descriptions.first['description']).to start_with("Haplocanthosaurus tibiae and dorsal vertebrae.")
167
- expect(subject.publisher).to eq('name' => 'Sauropod Vertebra Picture of the Week')
168
- expect(subject.subjects).to eq([{"subject"=>"Natural sciences"},
169
- {"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
170
- "subject"=>"FOS: Natural sciences",
171
- "subjectScheme"=>"Fields of Science and Technology (FOS)"}])
172
- expect(subject.language).to eq('en')
173
- expect(subject.container).to eq("identifier"=>"https://svpow.com", "identifierType"=>"URL", "title"=>"Sauropod Vertebra Picture of the Week", "type"=>"Periodical")
161
+ expect(subject.creators.first).to eq("familyName" => "Wedel", "givenName" => "Matt", "type" => "Person")
162
+ expect(subject.titles).to eq([{ "title" => "New paper: Curtice et al. (2023) on the first Haplocanthosaurus from Dry Mesa" }])
163
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
164
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
165
+ expect(subject.date).to eq("published" => "2023-06-09", "updated" => "2023-06-09")
166
+ expect(subject.descriptions.first["description"]).to start_with("Haplocanthosaurus tibiae and dorsal vertebrae.")
167
+ expect(subject.publisher).to eq("name" => "Sauropod Vertebra Picture of the Week")
168
+ expect(subject.subjects).to eq([{ "subject" => "Natural sciences" },
169
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
170
+ "subject" => "FOS: Natural sciences",
171
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
172
+ expect(subject.language).to eq("en")
173
+ expect(subject.container).to eq("identifier" => "https://svpow.com", "identifierType" => "URL", "title" => "Sauropod Vertebra Picture of the Week", "type" => "Periodical")
174
174
  expect(subject.references.length).to eq(3)
175
- expect(subject.references.first).to eq("key"=>"ref1", "url"=>"https://sauroposeidon.files.wordpress.com/2010/04/foster-and-wedel-2014-haplocanthosaurus-from-snowmass-colorado.pdf")
175
+ expect(subject.references.first).to eq("key" => "ref1", "url" => "https://sauroposeidon.files.wordpress.com/2010/04/foster-and-wedel-2014-haplocanthosaurus-from-snowmass-colorado.pdf")
176
176
  end
177
177
 
178
- it 'upstream post with references' do
179
- input = 'https://rogue-scholar.org/api/posts/954f8138-0ecd-4090-87c5-cef1297f1470'
178
+ it "upstream post with references" do
179
+ input = "https://rogue-scholar.org/api/posts/954f8138-0ecd-4090-87c5-cef1297f1470"
180
180
  subject = described_class.new(input: input)
181
181
  expect(subject.valid?).to be true
182
- expect(subject.id).to eq('https://doi.org/10.54900/zwm7q-vet94')
183
- expect(subject.url).to eq('https://upstream.force11.org/the-research-software-alliance-resa')
184
- expect(subject.alternate_identifiers).to eq([{"alternateIdentifier"=>"954f8138-0ecd-4090-87c5-cef1297f1470", "alternateIdentifierType"=>"UUID"}])
185
- expect(subject.type).to eq('Article')
182
+ expect(subject.id).to eq("https://doi.org/10.54900/zwm7q-vet94")
183
+ expect(subject.url).to eq("https://upstream.force11.org/the-research-software-alliance-resa")
184
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "954f8138-0ecd-4090-87c5-cef1297f1470", "alternateIdentifierType" => "UUID" }])
185
+ expect(subject.type).to eq("Article")
186
186
  expect(subject.creators.length).to eq(2)
187
- expect(subject.creators.first).to eq("familyName"=>"Katz", "givenName"=>"Daniel S.", "id"=>"https://orcid.org/0000-0001-5934-7525", "type"=>"Person")
188
- expect(subject.titles).to eq([{"title"=>"The Research Software Alliance (ReSA)"}])
189
- expect(subject.license).to eq('id' => 'CC-BY-4.0',
190
- 'url' => 'https://creativecommons.org/licenses/by/4.0/legalcode')
191
- expect(subject.date).to eq('published' => '2023-04-18', 'updated' => '2023-04-18')
192
- expect(subject.descriptions.first['description']).to start_with("Research software is a key part of most research today. As University of Manchester Professor Carole Goble has said, \"software is the ubiquitous instrument of science.\"")
193
- expect(subject.publisher).to eq('name' => 'Upstream')
194
- expect(subject.subjects).to eq([{"subject"=>"Humanities"},
195
- {"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
196
- "subject"=>"FOS: Humanities",
197
- "subjectScheme"=>"Fields of Science and Technology (FOS)"}])
198
- expect(subject.language).to eq('en')
199
- expect(subject.container).to eq("identifier"=>"https://upstream.force11.org", "identifierType"=>"URL", "title"=>"Upstream", "type"=>"Periodical")
187
+ expect(subject.creators.first).to eq("familyName" => "Katz", "givenName" => "Daniel S.", "id" => "https://orcid.org/0000-0001-5934-7525", "type" => "Person")
188
+ expect(subject.titles).to eq([{ "title" => "The Research Software Alliance (ReSA)" }])
189
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
190
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
191
+ expect(subject.date).to eq("published" => "2023-04-18", "updated" => "2023-04-18")
192
+ expect(subject.descriptions.first["description"]).to start_with("Research software is a key part of most research today. As University of Manchester Professor Carole Goble has said, \"software is the ubiquitous instrument of science.\"")
193
+ expect(subject.publisher).to eq("name" => "Upstream")
194
+ expect(subject.subjects).to eq([{ "subject" => "Humanities" },
195
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
196
+ "subject" => "FOS: Humanities",
197
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
198
+ expect(subject.language).to eq("en")
199
+ expect(subject.container).to eq("identifier" => "https://upstream.force11.org", "identifierType" => "URL", "title" => "Upstream", "type" => "Periodical")
200
200
  expect(subject.references.length).to eq(11)
201
- expect(subject.references.first).to eq("key"=>"ref1", "url"=>"https://software.ac.uk/blog/2014-12-04-its-impossible-conduct-research-without-software-say-7-out-10-uk-researchers")
201
+ expect(subject.references.first).to eq("key" => "ref1", "url" => "https://software.ac.uk/blog/2014-12-04-its-impossible-conduct-research-without-software-say-7-out-10-uk-researchers")
202
202
  end
203
203
 
204
- it 'jekyll post' do
205
- input = 'https://rogue-scholar.org/api/posts/efdacb04-bcec-49d7-b689-ab3eab0634bf'
204
+ it "jekyll post" do
205
+ input = "https://rogue-scholar.org/api/posts/efdacb04-bcec-49d7-b689-ab3eab0634bf"
206
206
  subject = described_class.new(input: input)
207
207
  expect(subject.valid?).to be true
208
- expect(subject.id).to eq('https://citationstyles.org/2020/07/11/seeking-public-comment-on-CSL-1-0-2')
209
- expect(subject.url).to eq('https://citationstyles.org/2020/07/11/seeking-public-comment-on-CSL-1-0-2')
210
- expect(subject.alternate_identifiers).to eq([{"alternateIdentifier"=>"efdacb04-bcec-49d7-b689-ab3eab0634bf", "alternateIdentifierType"=>"UUID"}])
211
- expect(subject.type).to eq('Article')
208
+ expect(subject.id).to eq("https://citationstyles.org/2020/07/11/seeking-public-comment-on-CSL-1-0-2")
209
+ expect(subject.url).to eq("https://citationstyles.org/2020/07/11/seeking-public-comment-on-CSL-1-0-2")
210
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "efdacb04-bcec-49d7-b689-ab3eab0634bf", "alternateIdentifierType" => "UUID" }])
211
+ expect(subject.type).to eq("Article")
212
212
  expect(subject.creators.length).to eq(1)
213
- expect(subject.creators.first).to eq("familyName"=>"Karcher", "givenName"=>"Sebastian", "type"=>"Person")
214
- expect(subject.titles).to eq([{"title"=>"Seeking Public Comment on CSL 1.0.2 Release"}])
215
- expect(subject.license).to eq('id' => 'CC-BY-4.0',
216
- 'url' => 'https://creativecommons.org/licenses/by/4.0/legalcode')
217
- expect(subject.date).to eq('published' => '2020-07-11', 'updated' => '2020-07-11')
218
- expect(subject.descriptions.first['description']).to start_with("Over the past few months, Citation Style Language developers have worked to address a backlog of feature requests. This work will be reflected in two upcoming releases.")
219
- expect(subject.publisher).to eq('name' => 'Citation Style Language')
220
- expect(subject.subjects).to eq([{"subject"=>"Engineering and technology"},
221
- {"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
222
- "subject"=>"FOS: Engineering and technology",
223
- "subjectScheme"=>"Fields of Science and Technology (FOS)"}])
224
- expect(subject.language).to eq('en')
225
- expect(subject.container).to eq("identifier"=>"https://citationstyles.org/", "identifierType"=>"URL", "title"=>"Citation Style Language", "type"=>"Periodical")
213
+ expect(subject.creators.first).to eq("familyName" => "Karcher", "givenName" => "Sebastian", "type" => "Person")
214
+ expect(subject.titles).to eq([{ "title" => "Seeking Public Comment on CSL 1.0.2 Release" }])
215
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
216
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
217
+ expect(subject.date).to eq("published" => "2020-07-11", "updated" => "2020-07-11")
218
+ expect(subject.descriptions.first["description"]).to start_with("Over the past few months, Citation Style Language developers have worked to address a backlog of feature requests. This work will be reflected in two upcoming releases.")
219
+ expect(subject.publisher).to eq("name" => "Citation Style Language")
220
+ expect(subject.subjects).to eq([{ "subject" => "Engineering and technology" },
221
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
222
+ "subject" => "FOS: Engineering and technology",
223
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
224
+ expect(subject.language).to eq("en")
225
+ expect(subject.container).to eq("identifier" => "https://citationstyles.org/", "identifierType" => "URL", "title" => "Citation Style Language", "type" => "Periodical")
226
+ end
227
+
228
+ it "ghost post with organizational author" do
229
+ input = "https://rogue-scholar.org/api/posts/5561f8e4-2ff1-4186-a8d5-8dacb3afe414"
230
+ subject = described_class.new(input: input)
231
+ puts subject.errors
232
+ # expect(subject.valid?).to be true
233
+ expect(subject.id).to eq("https://libscie.org/ku-leuven-supports-researchequals")
234
+ expect(subject.url).to eq("https://libscie.org/ku-leuven-supports-researchequals")
235
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "5561f8e4-2ff1-4186-a8d5-8dacb3afe414", "alternateIdentifierType" => "UUID" }])
236
+ expect(subject.type).to eq("Article")
237
+ expect(subject.creators.length).to eq(1)
238
+ expect(subject.creators.first).to eq("id"=>"https://ror.org/0342dzm54", "name"=>"Liberate Science", "type"=>"Organization")
239
+ expect(subject.titles).to eq([{ "title" => "KU Leuven supports ResearchEquals" }])
240
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
241
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
242
+ expect(subject.date).to eq("published" => "2023-05-09")
243
+ expect(subject.descriptions.first["description"]).to start_with("KU Leuven is now an inaugural supporting member of ResearchEquals")
244
+ expect(subject.publisher).to eq("name" => "Liberate Science")
245
+ expect(subject.subjects).to eq([{ "subject" => "Social sciences" },
246
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
247
+ "subject" => "FOS: Social sciences",
248
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
249
+ expect(subject.language).to eq("en")
250
+ expect(subject.container).to eq("identifier" => "https://libscie.org/", "identifierType" => "URL", "title" => "Liberate Science", "type" => "Periodical")
251
+ expect(subject.references).to be_nil
252
+ end
253
+
254
+ it "blog post with non-url id" do
255
+ input = "https://rogue-scholar.org/api/posts/1898d2d7-4d87-4487-96c4-3073cf99e9a5"
256
+ subject = described_class.new(input: input)
257
+ expect(subject.valid?).to be true
258
+ expect(subject.id).to eq("http://sfmatheson.blogspot.com/2023/01/quintessence-of-dust-2023-restart-why.html")
259
+ expect(subject.url).to eq("http://sfmatheson.blogspot.com/2023/01/quintessence-of-dust-2023-restart-why.html")
260
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "1898d2d7-4d87-4487-96c4-3073cf99e9a5", "alternateIdentifierType" => "UUID" }])
261
+ expect(subject.type).to eq("Article")
262
+ expect(subject.creators.length).to eq(1)
263
+ expect(subject.creators.first).to eq("familyName"=>"Matheson", "givenName"=>"Stephen", "type"=>"Person")
264
+ expect(subject.titles).to eq([{ "title" => "Quintessence of Dust 2023 restart: the why" }])
265
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
266
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
267
+ expect(subject.date).to eq("published"=>"2023-01-09", "updated"=>"2023-04-02")
268
+ expect(subject.descriptions.first["description"]).to start_with("It's early January 2023, a little before sunset in Tucson.")
269
+ expect(subject.publisher).to eq("name" => "Quintessence of Dust")
270
+ expect(subject.subjects).to eq([{ "subject" => "Social sciences" },
271
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
272
+ "subject" => "FOS: Social sciences",
273
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
274
+ expect(subject.language).to eq("en")
275
+ expect(subject.container).to eq("identifier" => "http://sfmatheson.blogspot.com/", "identifierType" => "URL", "title" => "Quintessence of Dust", "type" => "Periodical")
276
+ expect(subject.references).to be_nil
277
+ end
278
+
279
+ it "substack post with broken reference" do
280
+ input = "https://rogue-scholar.org/api/posts/2b105b29-acbc-4eae-9ff1-368803f36a4d"
281
+ subject = described_class.new(input: input)
282
+ expect(subject.valid?).to be true
283
+ expect(subject.id).to eq("https://doi.org/10.59350/bbcsr-r4b59")
284
+ expect(subject.url).to eq("https://markrubin.substack.com/p/the-preregistration-prescriptiveness")
285
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "2b105b29-acbc-4eae-9ff1-368803f36a4d", "alternateIdentifierType" => "UUID" }])
286
+ expect(subject.type).to eq("Article")
287
+ expect(subject.creators.length).to eq(1)
288
+ expect(subject.creators.first).to eq("familyName" => "Rubin", "givenName" => "Mark", "type" => "Person")
289
+ expect(subject.titles).to eq([{ "title" => "The Preregistration Prescriptiveness Trade-Off and Unknown Unknowns in Science" }])
290
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
291
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
292
+ expect(subject.date).to eq("published" => "2023-06-06")
293
+ expect(subject.descriptions.first["description"]).to start_with("Comments on Van Drimmelen (2023)")
294
+ expect(subject.publisher).to eq("name" => "Critical Metascience")
295
+ expect(subject.subjects).to eq([{ "subject" => "Social sciences" },
296
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
297
+ "subject" => "FOS: Social sciences",
298
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
299
+ expect(subject.language).to eq("en")
300
+ expect(subject.container).to eq("identifier" => "https://markrubin.substack.com", "identifierType" => "URL", "title" => "Critical Metascience", "type" => "Periodical")
301
+ expect(subject.references.length).to eq(16)
302
+ expect(subject.references.first).to eq("key" => "ref1", "doi" => "https://doi.org/10.3386/w27250")
226
303
  end
227
304
  end
228
305
 
229
- context 'get json_feed' do
230
- it 'unregistered posts' do
306
+ context "get json_feed" do
307
+ it "unregistered posts" do
231
308
  response = subject.get_json_feed_unregistered
232
309
  expect(response).to eq("ca2a7df4-f3b9-487c-82e9-27f54de75ea8")
233
310
  end
234
311
 
235
- it 'not indexed posts' do
236
- response = subject.get_json_feed_not_indexed('2023-01-01')
312
+ it "not indexed posts" do
313
+ response = subject.get_json_feed_not_indexed("2023-01-01")
237
314
  expect(response).to eq("ab58e412-06eb-42b7-b81a-d340825b9d48")
238
315
  end
239
316
 
240
- it 'by blog_id' do
241
- response = subject.get_json_feed_by_blog('tyfqw20')
317
+ it "by blog_id" do
318
+ response = subject.get_json_feed_by_blog("tyfqw20")
242
319
  expect(response).to eq("3e1278f6-e7c0-43e1-bb54-6829e1344c0d")
243
320
  end
244
321
  end
322
+
323
+ context "get doi_prefix for blog" do
324
+ it "by blog_id" do
325
+ response = subject.get_doi_prefix_by_blog_id("tyfqw20")
326
+ expect(response).to eq("10.59350")
327
+ end
328
+
329
+ it "by blog post uuid" do
330
+ response = subject.get_doi_prefix_by_json_feed_item_uuid("1898d2d7-4d87-4487-96c4-3073cf99e9a5")
331
+ expect(response).to eq("10.59350")
332
+ end
333
+
334
+ it "by blog post uuid specific prefix" do
335
+ response = subject.get_doi_prefix_by_json_feed_item_uuid("2b22bbba-bcba-4072-94cc-3f88442fff88")
336
+ expect(response).to eq("10.54900")
337
+ end
338
+ end
245
339
  end
data/spec/utils_spec.rb CHANGED
@@ -101,6 +101,14 @@ describe Commonmeta::Metadata, vcr: true do
101
101
  end
102
102
  end
103
103
 
104
+ context "validate_ror" do
105
+ it "validate_ror" do
106
+ ror = "https://ror.org/0342dzm54"
107
+ response = subject.validate_ror(ror)
108
+ expect(response).to eq("0342dzm54")
109
+ end
110
+ end
111
+
104
112
  context "validate_orcid_scheme" do
105
113
  it "validate_orcid_scheme" do
106
114
  orcid = "http://orcid.org"