commonmeta-ruby 3.2.14 → 3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/bin/commonmeta +1 -1
  4. data/lib/commonmeta/author_utils.rb +1 -1
  5. data/lib/commonmeta/cli.rb +14 -0
  6. data/lib/commonmeta/crossref_utils.rb +56 -14
  7. data/lib/commonmeta/readers/json_feed_reader.rb +30 -2
  8. data/lib/commonmeta/utils.rb +34 -0
  9. data/lib/commonmeta/version.rb +1 -1
  10. data/spec/cli_spec.rb +12 -3
  11. data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/doi_prefix/doi_prefix_by_blog.yml +997 -0
  12. data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/doi_prefix/doi_prefix_by_uuid.yml +256 -0
  13. data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/encode/by_blog.yml +997 -0
  14. data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/encode/by_uuid.yml +256 -0
  15. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_doi_prefix_for_blog/by_blog_id.yml +997 -0
  16. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_doi_prefix_for_blog/by_blog_post_uuid.yml +389 -0
  17. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_doi_prefix_for_blog/by_blog_post_uuid_specific_prefix.yml +389 -0
  18. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item/by_uuid.yml +136 -0
  19. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/blog_post_with_non-url_id.yml +136 -0
  20. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/ghost_post_with_organizational_author.yml +91 -0
  21. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/substack_post_with_broken_reference.yml +1316 -0
  22. data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_feed_item_from_rogue_scholar_with_organizational_author.yml +91 -0
  23. data/spec/readers/json_feed_reader_spec.rb +280 -186
  24. data/spec/utils_spec.rb +8 -0
  25. data/spec/writers/crossref_xml_writer_spec.rb +28 -0
  26. metadata +14 -2
@@ -1,245 +1,339 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'spec_helper'
3
+ require "spec_helper"
4
4
 
5
5
  describe Commonmeta::Metadata, vcr: true do
6
6
  subject { described_class.new }
7
7
 
8
- context 'get json_feed_item metadata' do
9
- it 'blogger post' do
10
- input = 'https://rogue-scholar.org/api/posts/f3629c86-06e0-42c0-844a-266b03a91ef1'
8
+ context "get json_feed_item metadata" do
9
+ it "blogger post" do
10
+ input = "https://rogue-scholar.org/api/posts/f3629c86-06e0-42c0-844a-266b03a91ef1"
11
11
  subject = described_class.new(input: input)
12
12
  expect(subject.valid?).to be true
13
- expect(subject.id).to eq('https://iphylo.blogspot.com/2023/05/ten-years-and-million-links.html')
14
- expect(subject.url).to eq('https://iphylo.blogspot.com/2023/05/ten-years-and-million-links.html')
15
- expect(subject.alternate_identifiers).to eq([{"alternateIdentifier"=>"f3629c86-06e0-42c0-844a-266b03a91ef1", "alternateIdentifierType"=>"UUID"}])
16
- expect(subject.type).to eq('Article')
13
+ expect(subject.id).to eq("https://iphylo.blogspot.com/2023/05/ten-years-and-million-links.html")
14
+ expect(subject.url).to eq("https://iphylo.blogspot.com/2023/05/ten-years-and-million-links.html")
15
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "f3629c86-06e0-42c0-844a-266b03a91ef1", "alternateIdentifierType" => "UUID" }])
16
+ expect(subject.type).to eq("Article")
17
17
  expect(subject.creators.length).to eq(1)
18
- expect(subject.creators.first).to eq("familyName"=>"Page", "givenName"=>"Roderic", "type"=>"Person")
19
- expect(subject.titles).to eq([{"title"=>"Ten years and a million links"}])
20
- expect(subject.license).to eq('id' => 'CC-BY-4.0',
21
- 'url' => 'https://creativecommons.org/licenses/by/4.0/legalcode')
22
- expect(subject.date).to eq('published' => '2023-05-31')
23
- expect(subject.descriptions.first['description']).to start_with("As trailed on a Twitter thread last week I’ve been working on a manuscript describing the efforts to map taxonomic names to their original descriptions in the taxonomic literature.")
24
- expect(subject.publisher).to eq('name' => 'iPhylo')
25
- expect(subject.subjects).to eq([{"subject"=>"Natural sciences"},
26
- {"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
27
- "subject"=>"FOS: Natural sciences",
28
- "subjectScheme"=>"Fields of Science and Technology (FOS)"}])
29
- expect(subject.language).to eq('en')
30
- expect(subject.container).to eq("identifier"=>"https://iphylo.blogspot.com/", "identifierType"=>"URL", "title"=>"iPhylo", "type"=>"Periodical")
31
- end
32
-
33
- it 'ghost post with doi' do
34
- input = 'https://rogue-scholar.org/api/posts/5bb66e92-5cb9-4659-8aca-20e486b695c9'
18
+ expect(subject.creators.first).to eq("familyName" => "Page", "givenName" => "Roderic", "type" => "Person")
19
+ expect(subject.titles).to eq([{ "title" => "Ten years and a million links" }])
20
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
21
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
22
+ expect(subject.date).to eq("published" => "2023-05-31")
23
+ expect(subject.descriptions.first["description"]).to start_with("As trailed on a Twitter thread last week I’ve been working on a manuscript describing the efforts to map taxonomic names to their original descriptions in the taxonomic literature.")
24
+ expect(subject.publisher).to eq("name" => "iPhylo")
25
+ expect(subject.subjects).to eq([{ "subject" => "Natural sciences" },
26
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
27
+ "subject" => "FOS: Natural sciences",
28
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
29
+ expect(subject.language).to eq("en")
30
+ expect(subject.container).to eq("identifier" => "https://iphylo.blogspot.com/", "identifierType" => "URL", "title" => "iPhylo", "type" => "Periodical")
31
+ end
32
+
33
+ it "ghost post with doi" do
34
+ input = "https://rogue-scholar.org/api/posts/5bb66e92-5cb9-4659-8aca-20e486b695c9"
35
35
  subject = described_class.new(input: input)
36
36
  expect(subject.valid?).to be true
37
- expect(subject.id).to eq('https://doi.org/10.53731/4nwxn-frt36')
38
- expect(subject.url).to eq('https://blog.front-matter.io/posts/does-it-compose')
39
- expect(subject.alternate_identifiers).to eq([{"alternateIdentifier"=>"5bb66e92-5cb9-4659-8aca-20e486b695c9", "alternateIdentifierType"=>"UUID"}])
40
- expect(subject.type).to eq('Article')
37
+ expect(subject.id).to eq("https://doi.org/10.53731/4nwxn-frt36")
38
+ expect(subject.url).to eq("https://blog.front-matter.io/posts/does-it-compose")
39
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "5bb66e92-5cb9-4659-8aca-20e486b695c9", "alternateIdentifierType" => "UUID" }])
40
+ expect(subject.type).to eq("Article")
41
41
  expect(subject.creators.length).to eq(1)
42
- expect(subject.creators.first).to eq("id" => "https://orcid.org/0000-0003-1419-2405", "familyName"=>"Fenner", "givenName"=>"Martin", "type"=>"Person")
43
- expect(subject.titles).to eq([{"title"=>"Does it compose?"}])
44
- expect(subject.license).to eq('id' => 'CC-BY-4.0',
45
- 'url' => 'https://creativecommons.org/licenses/by/4.0/legalcode')
46
- expect(subject.date).to eq('published' => '2023-05-16', 'updated' => '2023-05-16')
47
- expect(subject.descriptions.first['description']).to start_with("One question I have increasingly asked myself in the past few years. Meaning Can I run this open source software using Docker containers and a Docker Compose file?")
48
- expect(subject.publisher).to eq('name' => 'Front Matter')
49
- expect(subject.subjects).to eq([{"subject"=>"Engineering and technology"},
50
- {"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
51
- "subject"=>"FOS: Engineering and technology",
52
- "subjectScheme"=>"Fields of Science and Technology (FOS)"}])
53
- expect(subject.language).to eq('en')
54
- expect(subject.container).to eq("identifier"=>"https://blog.front-matter.io", "identifierType"=>"URL", "title"=>"Front Matter", "type"=>"Periodical")
55
- end
56
-
57
- it 'ghost post without doi' do
58
- input = 'https://rogue-scholar.org/api/posts/c3095752-2af0-40a4-a229-3ceb7424bce2'
42
+ expect(subject.creators.first).to eq("id" => "https://orcid.org/0000-0003-1419-2405", "familyName" => "Fenner", "givenName" => "Martin", "type" => "Person")
43
+ expect(subject.titles).to eq([{ "title" => "Does it compose?" }])
44
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
45
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
46
+ expect(subject.date).to eq("published" => "2023-05-16", "updated" => "2023-05-16")
47
+ expect(subject.descriptions.first["description"]).to start_with("One question I have increasingly asked myself in the past few years. Meaning Can I run this open source software using Docker containers and a Docker Compose file?")
48
+ expect(subject.publisher).to eq("name" => "Front Matter")
49
+ expect(subject.subjects).to eq([{ "subject" => "Engineering and technology" },
50
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
51
+ "subject" => "FOS: Engineering and technology",
52
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
53
+ expect(subject.language).to eq("en")
54
+ expect(subject.container).to eq("identifier" => "https://blog.front-matter.io", "identifierType" => "URL", "title" => "Front Matter", "type" => "Periodical")
55
+ end
56
+
57
+ it "ghost post without doi" do
58
+ input = "https://rogue-scholar.org/api/posts/c3095752-2af0-40a4-a229-3ceb7424bce2"
59
59
  subject = described_class.new(input: input)
60
60
  expect(subject.valid?).to be true
61
- expect(subject.id).to eq('https://www.ideasurg.pub/residency-visual-abstract')
62
- expect(subject.url).to eq('https://www.ideasurg.pub/residency-visual-abstract')
63
- expect(subject.alternate_identifiers).to eq([{"alternateIdentifier"=>"c3095752-2af0-40a4-a229-3ceb7424bce2", "alternateIdentifierType"=>"UUID"}])
64
- expect(subject.type).to eq('Article')
61
+ expect(subject.id).to eq("https://www.ideasurg.pub/residency-visual-abstract")
62
+ expect(subject.url).to eq("https://www.ideasurg.pub/residency-visual-abstract")
63
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "c3095752-2af0-40a4-a229-3ceb7424bce2", "alternateIdentifierType" => "UUID" }])
64
+ expect(subject.type).to eq("Article")
65
65
  expect(subject.creators.length).to eq(1)
66
- expect(subject.creators.first).to eq("familyName"=>"Sathe", "givenName"=>"Tejas S.", "type"=>"Person")
67
- expect(subject.titles).to eq([{"title"=>"The Residency Visual Abstract"}])
68
- expect(subject.license).to eq('id' => 'CC-BY-4.0',
69
- 'url' => 'https://creativecommons.org/licenses/by/4.0/legalcode')
70
- expect(subject.date).to eq('published' => '2023-04-08')
71
- expect(subject.descriptions.first['description']).to start_with("A graphical, user-friendly tool for programs to highlight important data to prospective applicants")
72
- expect(subject.publisher).to eq('name' => 'I.D.E.A.S.')
73
- expect(subject.subjects).to eq([{"subject"=>"Medical and health sciences"},
74
- {"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
75
- "subject"=>"FOS: Medical and health sciences",
76
- "subjectScheme"=>"Fields of Science and Technology (FOS)"}])
77
- expect(subject.language).to eq('en')
78
- expect(subject.container).to eq("identifier"=>"https://www.ideasurg.pub/", "identifierType"=>"URL", "title"=>"I.D.E.A.S.", "type"=>"Periodical")
79
- end
80
-
81
- it 'ghost post with author name suffix' do
82
- input = 'https://rogue-scholar.org/api/posts/6179ad80-cc7f-4904-9260-0ecb3c3a90ba'
66
+ expect(subject.creators.first).to eq("familyName" => "Sathe", "givenName" => "Tejas S.", "type" => "Person")
67
+ expect(subject.titles).to eq([{ "title" => "The Residency Visual Abstract" }])
68
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
69
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
70
+ expect(subject.date).to eq("published" => "2023-04-08")
71
+ expect(subject.descriptions.first["description"]).to start_with("A graphical, user-friendly tool for programs to highlight important data to prospective applicants")
72
+ expect(subject.publisher).to eq("name" => "I.D.E.A.S.")
73
+ expect(subject.subjects).to eq([{ "subject" => "Medical and health sciences" },
74
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
75
+ "subject" => "FOS: Medical and health sciences",
76
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
77
+ expect(subject.language).to eq("en")
78
+ expect(subject.container).to eq("identifier" => "https://www.ideasurg.pub/", "identifierType" => "URL", "title" => "I.D.E.A.S.", "type" => "Periodical")
79
+ end
80
+
81
+ it "ghost post with author name suffix" do
82
+ input = "https://rogue-scholar.org/api/posts/6179ad80-cc7f-4904-9260-0ecb3c3a90ba"
83
83
  subject = described_class.new(input: input)
84
84
  expect(subject.valid?).to be true
85
- expect(subject.id).to eq('https://www.ideasurg.pub/academic-powerhouse')
86
- expect(subject.url).to eq('https://www.ideasurg.pub/academic-powerhouse')
87
- expect(subject.alternate_identifiers).to eq([{"alternateIdentifier"=>"6179ad80-cc7f-4904-9260-0ecb3c3a90ba", "alternateIdentifierType"=>"UUID"}])
88
- expect(subject.type).to eq('Article')
85
+ expect(subject.id).to eq("https://www.ideasurg.pub/academic-powerhouse")
86
+ expect(subject.url).to eq("https://www.ideasurg.pub/academic-powerhouse")
87
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "6179ad80-cc7f-4904-9260-0ecb3c3a90ba", "alternateIdentifierType" => "UUID" }])
88
+ expect(subject.type).to eq("Article")
89
89
  expect(subject.creators.length).to eq(1)
90
- expect(subject.creators.first).to eq("familyName"=>"Sathe", "givenName"=>"Tejas S.", "type"=>"Person")
91
- expect(subject.titles).to eq([{"title"=>"How to Build an Academic Powerhouse: Let's Study Who's Doing it"}])
92
- expect(subject.license).to eq('id' => 'CC-BY-4.0',
93
- 'url' => 'https://creativecommons.org/licenses/by/4.0/legalcode')
94
- expect(subject.date).to eq('published' => '2023-06-03')
95
- expect(subject.descriptions.first['description']).to start_with("A Data Exploration with Public Data from the Academic Surgical Congress")
96
- expect(subject.publisher).to eq('name' => 'I.D.E.A.S.')
97
- expect(subject.subjects).to eq([{"subject"=>"Medical and health sciences"},
98
- {"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
99
- "subject"=>"FOS: Medical and health sciences",
100
- "subjectScheme"=>"Fields of Science and Technology (FOS)"}])
101
- expect(subject.language).to eq('en')
102
- expect(subject.container).to eq("identifier"=>"https://www.ideasurg.pub/", "identifierType"=>"URL", "title"=>"I.D.E.A.S.", "type"=>"Periodical")
90
+ expect(subject.creators.first).to eq("familyName" => "Sathe", "givenName" => "Tejas S.", "type" => "Person")
91
+ expect(subject.titles).to eq([{ "title" => "How to Build an Academic Powerhouse: Let's Study Who's Doing it" }])
92
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
93
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
94
+ expect(subject.date).to eq("published" => "2023-06-03")
95
+ expect(subject.descriptions.first["description"]).to start_with("A Data Exploration with Public Data from the Academic Surgical Congress")
96
+ expect(subject.publisher).to eq("name" => "I.D.E.A.S.")
97
+ expect(subject.subjects).to eq([{ "subject" => "Medical and health sciences" },
98
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
99
+ "subject" => "FOS: Medical and health sciences",
100
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
101
+ expect(subject.language).to eq("en")
102
+ expect(subject.container).to eq("identifier" => "https://www.ideasurg.pub/", "identifierType" => "URL", "title" => "I.D.E.A.S.", "type" => "Periodical")
103
103
  expect(subject.references).to be_nil
104
104
  end
105
105
 
106
- it 'syldavia gazette post with references' do
107
- input = 'https://rogue-scholar.org/api/posts/0022b9ef-525a-4a79-81ad-13411697f58a'
106
+ it "syldavia gazette post with references" do
107
+ input = "https://rogue-scholar.org/api/posts/0022b9ef-525a-4a79-81ad-13411697f58a"
108
108
  subject = described_class.new(input: input)
109
109
  expect(subject.valid?).to be true
110
- expect(subject.id).to eq('https://doi.org/10.53731/ffbx660-083tnag')
111
- expect(subject.url).to eq('https://syldavia-gazette.org/guinea-worms-chatgpt-neanderthals')
112
- expect(subject.alternate_identifiers).to eq([{"alternateIdentifier"=>"0022b9ef-525a-4a79-81ad-13411697f58a", "alternateIdentifierType"=>"UUID"}])
113
- expect(subject.type).to eq('Article')
110
+ expect(subject.id).to eq("https://doi.org/10.53731/ffbx660-083tnag")
111
+ expect(subject.url).to eq("https://syldavia-gazette.org/guinea-worms-chatgpt-neanderthals")
112
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "0022b9ef-525a-4a79-81ad-13411697f58a", "alternateIdentifierType" => "UUID" }])
113
+ expect(subject.type).to eq("Article")
114
114
  expect(subject.creators.length).to eq(1)
115
- expect(subject.creators.first).to eq("familyName"=>"Fenner", "givenName"=>"Martin", "id"=>"https://orcid.org/0000-0003-1419-2405", "type"=>"Person")
116
- expect(subject.titles).to eq([{"title"=>"Guinea Worms, ChatGPT, Neanderthals, Plagiarism, Tidyverse"}])
117
- expect(subject.license).to eq('id' => 'CC-BY-4.0',
118
- 'url' => 'https://creativecommons.org/licenses/by/4.0/legalcode')
119
- expect(subject.date).to eq('published' => '2023-02-01', 'updated' => '2023-04-13')
120
- expect(subject.descriptions.first['description']).to start_with("Guinea worm disease reaches all-time low: only 13* human cases reported in 2022")
121
- expect(subject.publisher).to eq('name' => 'Syldavia Gazette')
122
- expect(subject.subjects).to eq([{"subject"=>"Humanities"}, {"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf", "subject"=>"FOS: Humanities", "subjectScheme"=>"Fields of Science and Technology (FOS)"}])
123
- expect(subject.language).to eq('en')
124
- expect(subject.container).to eq("identifier"=>"https://syldavia-gazette.org", "identifierType"=>"URL", "title"=>"Syldavia Gazette", "type"=>"Periodical")
115
+ expect(subject.creators.first).to eq("familyName" => "Fenner", "givenName" => "Martin", "id" => "https://orcid.org/0000-0003-1419-2405", "type" => "Person")
116
+ expect(subject.titles).to eq([{ "title" => "Guinea Worms, ChatGPT, Neanderthals, Plagiarism, Tidyverse" }])
117
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
118
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
119
+ expect(subject.date).to eq("published" => "2023-02-01", "updated" => "2023-04-13")
120
+ expect(subject.descriptions.first["description"]).to start_with("Guinea worm disease reaches all-time low: only 13* human cases reported in 2022")
121
+ expect(subject.publisher).to eq("name" => "Syldavia Gazette")
122
+ expect(subject.subjects).to eq([{ "subject" => "Humanities" }, { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf", "subject" => "FOS: Humanities", "subjectScheme" => "Fields of Science and Technology (FOS)" }])
123
+ expect(subject.language).to eq("en")
124
+ expect(subject.container).to eq("identifier" => "https://syldavia-gazette.org", "identifierType" => "URL", "title" => "Syldavia Gazette", "type" => "Periodical")
125
125
  expect(subject.references.length).to eq(5)
126
- expect(subject.references.first).to eq("key"=>"ref1", "url"=>"https://cartercenter.org/news/pr/2023/2022-guinea-worm-worldwide-cases-announcement.html")
126
+ expect(subject.references.first).to eq("key" => "ref1", "url" => "https://cartercenter.org/news/pr/2023/2022-guinea-worm-worldwide-cases-announcement.html")
127
127
  end
128
128
 
129
- it 'wordpress post' do
130
- input = 'https://rogue-scholar.org/api/posts/1c578558-1324-4493-b8af-84c49eabc52f'
129
+ it "wordpress post" do
130
+ input = "https://rogue-scholar.org/api/posts/1c578558-1324-4493-b8af-84c49eabc52f"
131
131
  subject = described_class.new(input: input)
132
132
  expect(subject.valid?).to be true
133
- expect(subject.id).to eq('http://wisspub.net/?p=20455')
134
- expect(subject.url).to eq('https://wisspub.net/2023/05/23/eu-mitgliedstaaten-betonen-die-rolle-von-wissenschaftsgeleiteten-open-access-modellen-jenseits-von-apcs')
135
- expect(subject.alternate_identifiers).to eq([{"alternateIdentifier"=>"1c578558-1324-4493-b8af-84c49eabc52f", "alternateIdentifierType"=>"UUID"}])
136
- expect(subject.type).to eq('Article')
133
+ expect(subject.id).to eq("http://wisspub.net/?p=20455")
134
+ expect(subject.url).to eq("https://wisspub.net/2023/05/23/eu-mitgliedstaaten-betonen-die-rolle-von-wissenschaftsgeleiteten-open-access-modellen-jenseits-von-apcs")
135
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "1c578558-1324-4493-b8af-84c49eabc52f", "alternateIdentifierType" => "UUID" }])
136
+ expect(subject.type).to eq("Article")
137
137
  expect(subject.creators.length).to eq(1)
138
- expect(subject.creators.first).to eq("familyName"=>"Pampel", "givenName"=>"Heinz", "id"=>"https://orcid.org/0000-0003-3334-2771", "type"=>"Person")
139
- expect(subject.titles).to eq([{"title"=>"EU-Mitgliedstaaten betonen die Rolle von wissenschaftsgeleiteten Open-Access-Modellen jenseits von APCs"}])
140
- expect(subject.license).to eq('id' => 'CC-BY-4.0',
141
- 'url' => 'https://creativecommons.org/licenses/by/4.0/legalcode')
142
- expect(subject.date).to eq('published' => '2023-05-23', 'updated' => '2023-05-23')
143
- expect(subject.descriptions.first['description']).to start_with("Die EU-Wissenschaftsministerien haben sich auf ihrer heutigen Sitzung in Brüssel unter dem Titel “Council conclusions on high-quality, transparent, open, trustworthy and equitable scholarly publishing”")
144
- expect(subject.publisher).to eq('name' => 'wisspub.net')
145
- expect(subject.subjects).to eq([{"subject"=>"Engineering and technology"},
146
- {"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
147
- "subject"=>"FOS: Engineering and technology",
148
- "subjectScheme"=>"Fields of Science and Technology (FOS)"}])
149
- expect(subject.language).to eq('de')
150
- expect(subject.container).to eq("identifier"=>"https://wisspub.net", "identifierType"=>"URL", "title"=>"wisspub.net", "type"=>"Periodical")
151
- end
152
-
153
- it 'wordpress post with references' do
154
- input = 'https://rogue-scholar.org/api/posts/4e4bf150-751f-4245-b4ca-fe69e3c3bb24'
138
+ expect(subject.creators.first).to eq("familyName" => "Pampel", "givenName" => "Heinz", "id" => "https://orcid.org/0000-0003-3334-2771", "type" => "Person")
139
+ expect(subject.titles).to eq([{ "title" => "EU-Mitgliedstaaten betonen die Rolle von wissenschaftsgeleiteten Open-Access-Modellen jenseits von APCs" }])
140
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
141
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
142
+ expect(subject.date).to eq("published" => "2023-05-23", "updated" => "2023-05-23")
143
+ expect(subject.descriptions.first["description"]).to start_with("Die EU-Wissenschaftsministerien haben sich auf ihrer heutigen Sitzung in Brüssel unter dem Titel “Council conclusions on high-quality, transparent, open, trustworthy and equitable scholarly publishing”")
144
+ expect(subject.publisher).to eq("name" => "wisspub.net")
145
+ expect(subject.subjects).to eq([{ "subject" => "Engineering and technology" },
146
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
147
+ "subject" => "FOS: Engineering and technology",
148
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
149
+ expect(subject.language).to eq("de")
150
+ expect(subject.container).to eq("identifier" => "https://wisspub.net", "identifierType" => "URL", "title" => "wisspub.net", "type" => "Periodical")
151
+ end
152
+
153
+ it "wordpress post with references" do
154
+ input = "https://rogue-scholar.org/api/posts/4e4bf150-751f-4245-b4ca-fe69e3c3bb24"
155
155
  subject = described_class.new(input: input)
156
156
  expect(subject.valid?).to be true
157
- expect(subject.id).to eq('http://svpow.com/?p=20992')
158
- expect(subject.url).to eq('https://svpow.com/2023/06/09/new-paper-curtice-et-al-2023-on-the-first-haplocanthosaurus-from-dry-mesa')
159
- expect(subject.type).to eq('Article')
157
+ expect(subject.id).to eq("http://svpow.com/?p=20992")
158
+ expect(subject.url).to eq("https://svpow.com/2023/06/09/new-paper-curtice-et-al-2023-on-the-first-haplocanthosaurus-from-dry-mesa")
159
+ expect(subject.type).to eq("Article")
160
160
  expect(subject.creators.length).to eq(1)
161
- expect(subject.creators.first).to eq("familyName"=>"Wedel", "givenName"=>"Matt", "type"=>"Person")
162
- expect(subject.titles).to eq([{"title"=>"New paper: Curtice et al. (2023) on the first Haplocanthosaurus from Dry Mesa"}])
163
- expect(subject.license).to eq('id' => 'CC-BY-4.0',
164
- 'url' => 'https://creativecommons.org/licenses/by/4.0/legalcode')
165
- expect(subject.date).to eq('published' => '2023-06-09', 'updated' => '2023-06-09')
166
- expect(subject.descriptions.first['description']).to start_with("Haplocanthosaurus tibiae and dorsal vertebrae.")
167
- expect(subject.publisher).to eq('name' => 'Sauropod Vertebra Picture of the Week')
168
- expect(subject.subjects).to eq([{"subject"=>"Natural sciences"},
169
- {"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
170
- "subject"=>"FOS: Natural sciences",
171
- "subjectScheme"=>"Fields of Science and Technology (FOS)"}])
172
- expect(subject.language).to eq('en')
173
- expect(subject.container).to eq("identifier"=>"https://svpow.com", "identifierType"=>"URL", "title"=>"Sauropod Vertebra Picture of the Week", "type"=>"Periodical")
161
+ expect(subject.creators.first).to eq("familyName" => "Wedel", "givenName" => "Matt", "type" => "Person")
162
+ expect(subject.titles).to eq([{ "title" => "New paper: Curtice et al. (2023) on the first Haplocanthosaurus from Dry Mesa" }])
163
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
164
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
165
+ expect(subject.date).to eq("published" => "2023-06-09", "updated" => "2023-06-09")
166
+ expect(subject.descriptions.first["description"]).to start_with("Haplocanthosaurus tibiae and dorsal vertebrae.")
167
+ expect(subject.publisher).to eq("name" => "Sauropod Vertebra Picture of the Week")
168
+ expect(subject.subjects).to eq([{ "subject" => "Natural sciences" },
169
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
170
+ "subject" => "FOS: Natural sciences",
171
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
172
+ expect(subject.language).to eq("en")
173
+ expect(subject.container).to eq("identifier" => "https://svpow.com", "identifierType" => "URL", "title" => "Sauropod Vertebra Picture of the Week", "type" => "Periodical")
174
174
  expect(subject.references.length).to eq(3)
175
- expect(subject.references.first).to eq("key"=>"ref1", "url"=>"https://sauroposeidon.files.wordpress.com/2010/04/foster-and-wedel-2014-haplocanthosaurus-from-snowmass-colorado.pdf")
175
+ expect(subject.references.first).to eq("key" => "ref1", "url" => "https://sauroposeidon.files.wordpress.com/2010/04/foster-and-wedel-2014-haplocanthosaurus-from-snowmass-colorado.pdf")
176
176
  end
177
177
 
178
- it 'upstream post with references' do
179
- input = 'https://rogue-scholar.org/api/posts/954f8138-0ecd-4090-87c5-cef1297f1470'
178
+ it "upstream post with references" do
179
+ input = "https://rogue-scholar.org/api/posts/954f8138-0ecd-4090-87c5-cef1297f1470"
180
180
  subject = described_class.new(input: input)
181
181
  expect(subject.valid?).to be true
182
- expect(subject.id).to eq('https://doi.org/10.54900/zwm7q-vet94')
183
- expect(subject.url).to eq('https://upstream.force11.org/the-research-software-alliance-resa')
184
- expect(subject.alternate_identifiers).to eq([{"alternateIdentifier"=>"954f8138-0ecd-4090-87c5-cef1297f1470", "alternateIdentifierType"=>"UUID"}])
185
- expect(subject.type).to eq('Article')
182
+ expect(subject.id).to eq("https://doi.org/10.54900/zwm7q-vet94")
183
+ expect(subject.url).to eq("https://upstream.force11.org/the-research-software-alliance-resa")
184
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "954f8138-0ecd-4090-87c5-cef1297f1470", "alternateIdentifierType" => "UUID" }])
185
+ expect(subject.type).to eq("Article")
186
186
  expect(subject.creators.length).to eq(2)
187
- expect(subject.creators.first).to eq("familyName"=>"Katz", "givenName"=>"Daniel S.", "id"=>"https://orcid.org/0000-0001-5934-7525", "type"=>"Person")
188
- expect(subject.titles).to eq([{"title"=>"The Research Software Alliance (ReSA)"}])
189
- expect(subject.license).to eq('id' => 'CC-BY-4.0',
190
- 'url' => 'https://creativecommons.org/licenses/by/4.0/legalcode')
191
- expect(subject.date).to eq('published' => '2023-04-18', 'updated' => '2023-04-18')
192
- expect(subject.descriptions.first['description']).to start_with("Research software is a key part of most research today. As University of Manchester Professor Carole Goble has said, \"software is the ubiquitous instrument of science.\"")
193
- expect(subject.publisher).to eq('name' => 'Upstream')
194
- expect(subject.subjects).to eq([{"subject"=>"Humanities"},
195
- {"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
196
- "subject"=>"FOS: Humanities",
197
- "subjectScheme"=>"Fields of Science and Technology (FOS)"}])
198
- expect(subject.language).to eq('en')
199
- expect(subject.container).to eq("identifier"=>"https://upstream.force11.org", "identifierType"=>"URL", "title"=>"Upstream", "type"=>"Periodical")
187
+ expect(subject.creators.first).to eq("familyName" => "Katz", "givenName" => "Daniel S.", "id" => "https://orcid.org/0000-0001-5934-7525", "type" => "Person")
188
+ expect(subject.titles).to eq([{ "title" => "The Research Software Alliance (ReSA)" }])
189
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
190
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
191
+ expect(subject.date).to eq("published" => "2023-04-18", "updated" => "2023-04-18")
192
+ expect(subject.descriptions.first["description"]).to start_with("Research software is a key part of most research today. As University of Manchester Professor Carole Goble has said, \"software is the ubiquitous instrument of science.\"")
193
+ expect(subject.publisher).to eq("name" => "Upstream")
194
+ expect(subject.subjects).to eq([{ "subject" => "Humanities" },
195
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
196
+ "subject" => "FOS: Humanities",
197
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
198
+ expect(subject.language).to eq("en")
199
+ expect(subject.container).to eq("identifier" => "https://upstream.force11.org", "identifierType" => "URL", "title" => "Upstream", "type" => "Periodical")
200
200
  expect(subject.references.length).to eq(11)
201
- expect(subject.references.first).to eq("key"=>"ref1", "url"=>"https://software.ac.uk/blog/2014-12-04-its-impossible-conduct-research-without-software-say-7-out-10-uk-researchers")
201
+ expect(subject.references.first).to eq("key" => "ref1", "url" => "https://software.ac.uk/blog/2014-12-04-its-impossible-conduct-research-without-software-say-7-out-10-uk-researchers")
202
202
  end
203
203
 
204
- it 'jekyll post' do
205
- input = 'https://rogue-scholar.org/api/posts/efdacb04-bcec-49d7-b689-ab3eab0634bf'
204
+ it "jekyll post" do
205
+ input = "https://rogue-scholar.org/api/posts/efdacb04-bcec-49d7-b689-ab3eab0634bf"
206
206
  subject = described_class.new(input: input)
207
207
  expect(subject.valid?).to be true
208
- expect(subject.id).to eq('https://citationstyles.org/2020/07/11/seeking-public-comment-on-CSL-1-0-2')
209
- expect(subject.url).to eq('https://citationstyles.org/2020/07/11/seeking-public-comment-on-CSL-1-0-2')
210
- expect(subject.alternate_identifiers).to eq([{"alternateIdentifier"=>"efdacb04-bcec-49d7-b689-ab3eab0634bf", "alternateIdentifierType"=>"UUID"}])
211
- expect(subject.type).to eq('Article')
208
+ expect(subject.id).to eq("https://citationstyles.org/2020/07/11/seeking-public-comment-on-CSL-1-0-2")
209
+ expect(subject.url).to eq("https://citationstyles.org/2020/07/11/seeking-public-comment-on-CSL-1-0-2")
210
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "efdacb04-bcec-49d7-b689-ab3eab0634bf", "alternateIdentifierType" => "UUID" }])
211
+ expect(subject.type).to eq("Article")
212
212
  expect(subject.creators.length).to eq(1)
213
- expect(subject.creators.first).to eq("familyName"=>"Karcher", "givenName"=>"Sebastian", "type"=>"Person")
214
- expect(subject.titles).to eq([{"title"=>"Seeking Public Comment on CSL 1.0.2 Release"}])
215
- expect(subject.license).to eq('id' => 'CC-BY-4.0',
216
- 'url' => 'https://creativecommons.org/licenses/by/4.0/legalcode')
217
- expect(subject.date).to eq('published' => '2020-07-11', 'updated' => '2020-07-11')
218
- expect(subject.descriptions.first['description']).to start_with("Over the past few months, Citation Style Language developers have worked to address a backlog of feature requests. This work will be reflected in two upcoming releases.")
219
- expect(subject.publisher).to eq('name' => 'Citation Style Language')
220
- expect(subject.subjects).to eq([{"subject"=>"Engineering and technology"},
221
- {"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
222
- "subject"=>"FOS: Engineering and technology",
223
- "subjectScheme"=>"Fields of Science and Technology (FOS)"}])
224
- expect(subject.language).to eq('en')
225
- expect(subject.container).to eq("identifier"=>"https://citationstyles.org/", "identifierType"=>"URL", "title"=>"Citation Style Language", "type"=>"Periodical")
213
+ expect(subject.creators.first).to eq("familyName" => "Karcher", "givenName" => "Sebastian", "type" => "Person")
214
+ expect(subject.titles).to eq([{ "title" => "Seeking Public Comment on CSL 1.0.2 Release" }])
215
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
216
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
217
+ expect(subject.date).to eq("published" => "2020-07-11", "updated" => "2020-07-11")
218
+ expect(subject.descriptions.first["description"]).to start_with("Over the past few months, Citation Style Language developers have worked to address a backlog of feature requests. This work will be reflected in two upcoming releases.")
219
+ expect(subject.publisher).to eq("name" => "Citation Style Language")
220
+ expect(subject.subjects).to eq([{ "subject" => "Engineering and technology" },
221
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
222
+ "subject" => "FOS: Engineering and technology",
223
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
224
+ expect(subject.language).to eq("en")
225
+ expect(subject.container).to eq("identifier" => "https://citationstyles.org/", "identifierType" => "URL", "title" => "Citation Style Language", "type" => "Periodical")
226
+ end
227
+
228
+ it "ghost post with organizational author" do
229
+ input = "https://rogue-scholar.org/api/posts/5561f8e4-2ff1-4186-a8d5-8dacb3afe414"
230
+ subject = described_class.new(input: input)
231
+ puts subject.errors
232
+ # expect(subject.valid?).to be true
233
+ expect(subject.id).to eq("https://libscie.org/ku-leuven-supports-researchequals")
234
+ expect(subject.url).to eq("https://libscie.org/ku-leuven-supports-researchequals")
235
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "5561f8e4-2ff1-4186-a8d5-8dacb3afe414", "alternateIdentifierType" => "UUID" }])
236
+ expect(subject.type).to eq("Article")
237
+ expect(subject.creators.length).to eq(1)
238
+ expect(subject.creators.first).to eq("id"=>"https://ror.org/0342dzm54", "name"=>"Liberate Science", "type"=>"Organization")
239
+ expect(subject.titles).to eq([{ "title" => "KU Leuven supports ResearchEquals" }])
240
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
241
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
242
+ expect(subject.date).to eq("published" => "2023-05-09")
243
+ expect(subject.descriptions.first["description"]).to start_with("KU Leuven is now an inaugural supporting member of ResearchEquals")
244
+ expect(subject.publisher).to eq("name" => "Liberate Science")
245
+ expect(subject.subjects).to eq([{ "subject" => "Social sciences" },
246
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
247
+ "subject" => "FOS: Social sciences",
248
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
249
+ expect(subject.language).to eq("en")
250
+ expect(subject.container).to eq("identifier" => "https://libscie.org/", "identifierType" => "URL", "title" => "Liberate Science", "type" => "Periodical")
251
+ expect(subject.references).to be_nil
252
+ end
253
+
254
+ it "blog post with non-url id" do
255
+ input = "https://rogue-scholar.org/api/posts/1898d2d7-4d87-4487-96c4-3073cf99e9a5"
256
+ subject = described_class.new(input: input)
257
+ expect(subject.valid?).to be true
258
+ expect(subject.id).to eq("http://sfmatheson.blogspot.com/2023/01/quintessence-of-dust-2023-restart-why.html")
259
+ expect(subject.url).to eq("http://sfmatheson.blogspot.com/2023/01/quintessence-of-dust-2023-restart-why.html")
260
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "1898d2d7-4d87-4487-96c4-3073cf99e9a5", "alternateIdentifierType" => "UUID" }])
261
+ expect(subject.type).to eq("Article")
262
+ expect(subject.creators.length).to eq(1)
263
+ expect(subject.creators.first).to eq("familyName"=>"Matheson", "givenName"=>"Stephen", "type"=>"Person")
264
+ expect(subject.titles).to eq([{ "title" => "Quintessence of Dust 2023 restart: the why" }])
265
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
266
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
267
+ expect(subject.date).to eq("published"=>"2023-01-09", "updated"=>"2023-04-02")
268
+ expect(subject.descriptions.first["description"]).to start_with("It's early January 2023, a little before sunset in Tucson.")
269
+ expect(subject.publisher).to eq("name" => "Quintessence of Dust")
270
+ expect(subject.subjects).to eq([{ "subject" => "Social sciences" },
271
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
272
+ "subject" => "FOS: Social sciences",
273
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
274
+ expect(subject.language).to eq("en")
275
+ expect(subject.container).to eq("identifier" => "http://sfmatheson.blogspot.com/", "identifierType" => "URL", "title" => "Quintessence of Dust", "type" => "Periodical")
276
+ expect(subject.references).to be_nil
277
+ end
278
+
279
+ it "substack post with broken reference" do
280
+ input = "https://rogue-scholar.org/api/posts/2b105b29-acbc-4eae-9ff1-368803f36a4d"
281
+ subject = described_class.new(input: input)
282
+ expect(subject.valid?).to be true
283
+ expect(subject.id).to eq("https://doi.org/10.59350/bbcsr-r4b59")
284
+ expect(subject.url).to eq("https://markrubin.substack.com/p/the-preregistration-prescriptiveness")
285
+ expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "2b105b29-acbc-4eae-9ff1-368803f36a4d", "alternateIdentifierType" => "UUID" }])
286
+ expect(subject.type).to eq("Article")
287
+ expect(subject.creators.length).to eq(1)
288
+ expect(subject.creators.first).to eq("familyName" => "Rubin", "givenName" => "Mark", "type" => "Person")
289
+ expect(subject.titles).to eq([{ "title" => "The Preregistration Prescriptiveness Trade-Off and Unknown Unknowns in Science" }])
290
+ expect(subject.license).to eq("id" => "CC-BY-4.0",
291
+ "url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
292
+ expect(subject.date).to eq("published" => "2023-06-06")
293
+ expect(subject.descriptions.first["description"]).to start_with("Comments on Van Drimmelen (2023)")
294
+ expect(subject.publisher).to eq("name" => "Critical Metascience")
295
+ expect(subject.subjects).to eq([{ "subject" => "Social sciences" },
296
+ { "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
297
+ "subject" => "FOS: Social sciences",
298
+ "subjectScheme" => "Fields of Science and Technology (FOS)" }])
299
+ expect(subject.language).to eq("en")
300
+ expect(subject.container).to eq("identifier" => "https://markrubin.substack.com", "identifierType" => "URL", "title" => "Critical Metascience", "type" => "Periodical")
301
+ expect(subject.references.length).to eq(16)
302
+ expect(subject.references.first).to eq("key" => "ref1", "doi" => "https://doi.org/10.3386/w27250")
226
303
  end
227
304
  end
228
305
 
229
- context 'get json_feed' do
230
- it 'unregistered posts' do
306
+ context "get json_feed" do
307
+ it "unregistered posts" do
231
308
  response = subject.get_json_feed_unregistered
232
309
  expect(response).to eq("ca2a7df4-f3b9-487c-82e9-27f54de75ea8")
233
310
  end
234
311
 
235
- it 'not indexed posts' do
236
- response = subject.get_json_feed_not_indexed('2023-01-01')
312
+ it "not indexed posts" do
313
+ response = subject.get_json_feed_not_indexed("2023-01-01")
237
314
  expect(response).to eq("ab58e412-06eb-42b7-b81a-d340825b9d48")
238
315
  end
239
316
 
240
- it 'by blog_id' do
241
- response = subject.get_json_feed_by_blog('tyfqw20')
317
+ it "by blog_id" do
318
+ response = subject.get_json_feed_by_blog("tyfqw20")
242
319
  expect(response).to eq("3e1278f6-e7c0-43e1-bb54-6829e1344c0d")
243
320
  end
244
321
  end
322
+
323
+ context "get doi_prefix for blog" do
324
+ it "by blog_id" do
325
+ response = subject.get_doi_prefix_by_blog_id("tyfqw20")
326
+ expect(response).to eq("10.59350")
327
+ end
328
+
329
+ it "by blog post uuid" do
330
+ response = subject.get_doi_prefix_by_json_feed_item_uuid("1898d2d7-4d87-4487-96c4-3073cf99e9a5")
331
+ expect(response).to eq("10.59350")
332
+ end
333
+
334
+ it "by blog post uuid specific prefix" do
335
+ response = subject.get_doi_prefix_by_json_feed_item_uuid("2b22bbba-bcba-4072-94cc-3f88442fff88")
336
+ expect(response).to eq("10.54900")
337
+ end
338
+ end
245
339
  end
data/spec/utils_spec.rb CHANGED
@@ -101,6 +101,14 @@ describe Commonmeta::Metadata, vcr: true do
101
101
  end
102
102
  end
103
103
 
104
+ context "validate_ror" do
105
+ it "validate_ror" do
106
+ ror = "https://ror.org/0342dzm54"
107
+ response = subject.validate_ror(ror)
108
+ expect(response).to eq("0342dzm54")
109
+ end
110
+ end
111
+
104
112
  context "validate_orcid_scheme" do
105
113
  it "validate_orcid_scheme" do
106
114
  orcid = "http://orcid.org"