commonmeta-ruby 3.4.5 → 3.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +8 -8
- data/csl-data.json +538 -0
- data/lib/commonmeta/author_utils.rb +103 -71
- data/lib/commonmeta/crossref_utils.rb +31 -25
- data/lib/commonmeta/metadata.rb +8 -14
- data/lib/commonmeta/metadata_utils.rb +4 -3
- data/lib/commonmeta/readers/bibtex_reader.rb +3 -3
- data/lib/commonmeta/readers/cff_reader.rb +7 -6
- data/lib/commonmeta/readers/codemeta_reader.rb +3 -3
- data/lib/commonmeta/readers/crossref_reader.rb +131 -124
- data/lib/commonmeta/readers/crossref_xml_reader.rb +7 -6
- data/lib/commonmeta/readers/csl_reader.rb +3 -4
- data/lib/commonmeta/readers/datacite_reader.rb +5 -5
- data/lib/commonmeta/readers/json_feed_reader.rb +8 -4
- data/lib/commonmeta/readers/npm_reader.rb +2 -2
- data/lib/commonmeta/readers/ris_reader.rb +1 -1
- data/lib/commonmeta/readers/schema_org_reader.rb +6 -4
- data/lib/commonmeta/schema_utils.rb +1 -1
- data/lib/commonmeta/utils.rb +4 -2
- data/lib/commonmeta/version.rb +1 -1
- data/lib/commonmeta/writers/bibtex_writer.rb +1 -1
- data/lib/commonmeta/writers/cff_writer.rb +5 -4
- data/lib/commonmeta/writers/codemeta_writer.rb +4 -2
- data/lib/commonmeta/writers/csv_writer.rb +4 -2
- data/lib/commonmeta/writers/datacite_writer.rb +1 -1
- data/lib/commonmeta/writers/jats_writer.rb +9 -5
- data/lib/commonmeta/writers/ris_writer.rb +2 -1
- data/lib/commonmeta/writers/schema_org_writer.rb +7 -4
- data/resources/{commonmeta_v0.9.3.json → commonmeta_v0.10.3.json} +138 -55
- data/resources/csl-citation.json +99 -0
- data/spec/author_utils_spec.rb +16 -16
- data/spec/cli_spec.rb +1 -1
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_crossref_metadata/missing_contributor.yml +307 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_datacite_metadata/SoftwareSourceCode.yml +76 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/archived_wordpress_post.yml +119 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/book_oup.yml +107 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/journal_article_plos.yml +407 -0
- data/spec/metadata_spec.rb +2 -2
- data/spec/readers/bibtex_reader_spec.rb +5 -5
- data/spec/readers/cff_reader_spec.rb +127 -127
- data/spec/readers/codemeta_reader_spec.rb +11 -11
- data/spec/readers/crossref_reader_spec.rb +844 -835
- data/spec/readers/crossref_xml_reader_spec.rb +899 -901
- data/spec/readers/csl_reader_spec.rb +33 -33
- data/spec/readers/datacite_reader_spec.rb +106 -103
- data/spec/readers/json_feed_reader_spec.rb +68 -40
- data/spec/readers/npm_reader_spec.rb +32 -33
- data/spec/readers/ris_reader_spec.rb +36 -36
- data/spec/readers/schema_org_reader_spec.rb +289 -288
- data/spec/writers/codemeta_writer_spec.rb +19 -20
- data/spec/writers/crossref_xml_writer_spec.rb +73 -37
- data/spec/writers/datacite_writer_spec.rb +2 -1
- metadata +10 -3
@@ -1,134 +1,134 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
3
|
+
require "spec_helper"
|
4
4
|
|
5
5
|
describe Commonmeta::Metadata, vcr: true do
|
6
|
-
let(:fixture_path) {
|
6
|
+
let(:fixture_path) { "spec/fixtures/" }
|
7
7
|
|
8
|
-
context
|
9
|
-
it
|
8
|
+
context "get schema_org raw" do
|
9
|
+
it "BlogPosting" do
|
10
10
|
input = "#{fixture_path}schema_org.json"
|
11
11
|
subject = described_class.new(input: input)
|
12
12
|
expect(subject.raw).to eq(File.read(input).strip)
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
|
-
context
|
17
|
-
it
|
18
|
-
input =
|
16
|
+
context "get schema_org metadata" do
|
17
|
+
it "BlogPosting" do
|
18
|
+
input = "https://blog.front-matter.io/posts/eating-your-own-dog-food"
|
19
19
|
subject = described_class.new(input: input)
|
20
20
|
# expect(subject.valid?).to be true
|
21
|
-
expect(subject.id).to eq(
|
22
|
-
expect(subject.url).to eq(
|
23
|
-
expect(subject.type).to eq(
|
24
|
-
expect(subject.
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
expect(subject.titles).to eq([{
|
29
|
-
expect(subject.descriptions.first[
|
30
|
-
expect(subject.subjects).to eq([{
|
31
|
-
expect(subject.date).to eq(
|
32
|
-
|
21
|
+
expect(subject.id).to eq("https://doi.org/10.53731/r79vxn1-97aq74v-ag58n")
|
22
|
+
expect(subject.url).to eq("https://blog.front-matter.io/posts/eating-your-own-dog-food")
|
23
|
+
expect(subject.type).to eq("Article")
|
24
|
+
expect(subject.contributors).to eq([{ "familyName" => "Fenner",
|
25
|
+
"givenName" => "Martin",
|
26
|
+
"id" => "https://orcid.org/0000-0003-1419-2405",
|
27
|
+
"type" => "Person", "contributorRoles" => ["Author"] }])
|
28
|
+
expect(subject.titles).to eq([{ "title" => "Eating your own Dog Food" }])
|
29
|
+
expect(subject.descriptions.first["description"]).to start_with("Eating your own dog food")
|
30
|
+
expect(subject.subjects).to eq([{ "subject" => "Feature" }])
|
31
|
+
expect(subject.date).to eq("published" => "2016-12-20T00:00:00Z",
|
32
|
+
"updated" => "2022-08-15T09:06:22Z")
|
33
33
|
expect(subject.references.length).to eq(0)
|
34
|
-
expect(subject.publisher).to eq(
|
34
|
+
expect(subject.publisher).to eq("name" => "Front Matter")
|
35
35
|
end
|
36
36
|
|
37
|
-
it
|
38
|
-
input =
|
39
|
-
subject = described_class.new(input: input, doi:
|
37
|
+
it "BlogPosting with new DOI" do
|
38
|
+
input = "https://blog.front-matter.io/posts/eating-your-own-dog-food"
|
39
|
+
subject = described_class.new(input: input, doi: "10.5438/0000-00ss")
|
40
40
|
# expect(subject.valid?).to be true
|
41
|
-
expect(subject.id).to eq(
|
42
|
-
expect(subject.url).to eq(
|
43
|
-
expect(subject.type).to eq(
|
41
|
+
expect(subject.id).to eq("https://doi.org/10.5438/0000-00ss")
|
42
|
+
expect(subject.url).to eq("https://blog.front-matter.io/posts/eating-your-own-dog-food")
|
43
|
+
expect(subject.type).to eq("Article")
|
44
44
|
end
|
45
45
|
|
46
|
-
it
|
46
|
+
it "BlogPosting with type as array" do
|
47
47
|
input = "#{fixture_path}schema_org_type_as_array.json"
|
48
48
|
subject = described_class.new(input: input)
|
49
49
|
# expect(subject.valid?).to be true
|
50
|
-
expect(subject.id).to eq(
|
51
|
-
expect(subject.url).to eq(
|
52
|
-
expect(subject.type).to eq(
|
53
|
-
expect(subject.
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
expect(subject.titles).to eq([{
|
58
|
-
expect(subject.descriptions.first[
|
59
|
-
expect(subject.subjects).to eq([{
|
60
|
-
{
|
61
|
-
expect(subject.date).to eq(
|
62
|
-
|
63
|
-
|
50
|
+
expect(subject.id).to eq("https://doi.org/10.5438/4k3m-nyvg")
|
51
|
+
expect(subject.url).to eq("https://blog.datacite.org/eating-your-own-dog-food")
|
52
|
+
expect(subject.type).to eq("Article")
|
53
|
+
expect(subject.contributors).to eq([{ "affiliation" => [{ "name" => "DataCite" }],
|
54
|
+
"familyName" => "Fenner", "givenName" => "Martin",
|
55
|
+
"id" => "https://orcid.org/0000-0003-1419-2405",
|
56
|
+
"type" => "Person", "contributorRoles" => ["Author"] }])
|
57
|
+
expect(subject.titles).to eq([{ "title" => "Eating your own Dog Food" }])
|
58
|
+
expect(subject.descriptions.first["description"]).to start_with("Eating your own dog food")
|
59
|
+
expect(subject.subjects).to eq([{ "subject" => "Datacite" }, { "subject" => "Doi" },
|
60
|
+
{ "subject" => "Metadata" }, { "subject" => "Featured" }])
|
61
|
+
expect(subject.date).to eq("created" => "2016-12-20",
|
62
|
+
"published" => "2016-12-20",
|
63
|
+
"updated" => "2016-12-20")
|
64
64
|
expect(subject.references.length).to eq(2)
|
65
|
-
expect(subject.references.last).to eq(
|
66
|
-
|
67
|
-
expect(subject.publisher).to eq(
|
65
|
+
expect(subject.references.last).to eq("doi" => "10.5438/55e5-t5c0",
|
66
|
+
"key" => "https://doi.org/10.5438/55e5-t5c0")
|
67
|
+
expect(subject.publisher).to eq("name" => "DataCite")
|
68
68
|
end
|
69
69
|
|
70
|
-
context
|
71
|
-
it
|
72
|
-
input =
|
70
|
+
context "get schema_org metadata front matter" do
|
71
|
+
it "BlogPosting" do
|
72
|
+
input = "https://blog.front-matter.io/posts/step-forward-for-software-citation"
|
73
73
|
subject = described_class.new(input: input)
|
74
74
|
# expect(subject.valid?).to be true
|
75
|
-
expect(subject.id).to eq(
|
76
|
-
expect(subject.url).to eq(
|
77
|
-
expect(subject.type).to eq(
|
78
|
-
expect(subject.
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
expect(subject.titles).to eq([{
|
83
|
-
expect(subject.descriptions.first[
|
84
|
-
expect(subject.subjects).to eq([{
|
85
|
-
expect(subject.date).to eq(
|
86
|
-
|
75
|
+
expect(subject.id).to eq("https://doi.org/10.53731/r9531p1-97aq74v-ag78v")
|
76
|
+
expect(subject.url).to eq("https://blog.front-matter.io/posts/step-forward-for-software-citation")
|
77
|
+
expect(subject.type).to eq("Article")
|
78
|
+
expect(subject.contributors).to eq([{ "familyName" => "Fenner",
|
79
|
+
"givenName" => "Martin",
|
80
|
+
"id" => "https://orcid.org/0000-0003-1419-2405",
|
81
|
+
"type" => "Person", "contributorRoles" => ["Author"] }])
|
82
|
+
expect(subject.titles).to eq([{ "title" => "A step forward for software citation: GitHub's enhanced software citation support" }])
|
83
|
+
expect(subject.descriptions.first["description"]).to start_with("On August 19, GitHub announced software citation")
|
84
|
+
expect(subject.subjects).to eq([{ "subject" => "News" }])
|
85
|
+
expect(subject.date).to eq("published" => "2021-08-24T16:57:24Z",
|
86
|
+
"updated" => "2022-08-15T19:05:14Z")
|
87
87
|
expect(subject.references.length).to eq(0)
|
88
|
-
expect(subject.container).to eq("identifier"=>"https://blog.front-matter.io/", "identifierType"=>"URL", "title"=>"Front Matter", "type"=>"Periodical")
|
89
|
-
expect(subject.publisher).to eq(
|
88
|
+
expect(subject.container).to eq("identifier" => "https://blog.front-matter.io/", "identifierType" => "URL", "title" => "Front Matter", "type" => "Periodical")
|
89
|
+
expect(subject.publisher).to eq("name" => "Front Matter")
|
90
90
|
end
|
91
91
|
end
|
92
92
|
|
93
|
-
it
|
94
|
-
input =
|
95
|
-
subject = described_class.new(input: input, from:
|
93
|
+
it "zenodo" do
|
94
|
+
input = "https://www.zenodo.org/record/1196821"
|
95
|
+
subject = described_class.new(input: input, from: "schema_org")
|
96
96
|
# expect(subject.valid?).to be true
|
97
|
-
expect(subject.language).to eq(
|
98
|
-
expect(subject.id).to eq(
|
99
|
-
expect(subject.url).to eq(
|
100
|
-
expect(subject.type).to eq(
|
101
|
-
expect(subject.titles).to eq([{
|
102
|
-
expect(subject.
|
103
|
-
expect(subject.
|
104
|
-
|
105
|
-
expect(subject.publisher).to eq(
|
106
|
-
expect(subject.subjects).to eq([{
|
107
|
-
{
|
108
|
-
{
|
109
|
-
{
|
110
|
-
{
|
111
|
-
{
|
112
|
-
{
|
113
|
-
{
|
114
|
-
{
|
115
|
-
{
|
116
|
-
{
|
117
|
-
{
|
97
|
+
expect(subject.language).to eq("eng")
|
98
|
+
expect(subject.id).to eq("https://doi.org/10.5281/zenodo.1196821")
|
99
|
+
expect(subject.url).to eq("https://zenodo.org/record/1196821")
|
100
|
+
expect(subject.type).to eq("Dataset")
|
101
|
+
expect(subject.titles).to eq([{ "title" => "PsPM-SC4B: SCR, ECG, EMG, PSR and respiration measurements in a delay fear conditioning task with auditory CS and electrical US" }])
|
102
|
+
expect(subject.contributors.size).to eq(6)
|
103
|
+
expect(subject.contributors.first).to eq("type" => "Person", "contributorRoles" => ["Author"], "givenName" => "Matthias",
|
104
|
+
"familyName" => "Staib", "id" => "https://orcid.org/0000-0001-9688-838X", "affiliation" => [{ "name" => "University of Zurich, Zurich, Switzerland" }])
|
105
|
+
expect(subject.publisher).to eq("name" => "Zenodo")
|
106
|
+
expect(subject.subjects).to eq([{ "subject" => "Pupil size response" },
|
107
|
+
{ "subject" => "Skin conductance response" },
|
108
|
+
{ "subject" => "Electrocardiogram" },
|
109
|
+
{ "subject" => "Electromyogram" },
|
110
|
+
{ "subject" => "Electrodermal activity" },
|
111
|
+
{ "subject" => "Galvanic skin response" },
|
112
|
+
{ "subject" => "Psr" },
|
113
|
+
{ "subject" => "Scr" },
|
114
|
+
{ "subject" => "Ecg" },
|
115
|
+
{ "subject" => "Emg" },
|
116
|
+
{ "subject" => "Eda" },
|
117
|
+
{ "subject" => "Gsr" }])
|
118
118
|
end
|
119
119
|
|
120
|
-
it
|
121
|
-
input =
|
122
|
-
subject = described_class.new(input: input, from:
|
120
|
+
it "pangaea" do
|
121
|
+
input = "https://doi.pangaea.de/10.1594/PANGAEA.836178"
|
122
|
+
subject = described_class.new(input: input, from: "schema_org")
|
123
123
|
# expect(subject.valid?).to be true
|
124
|
-
expect(subject.id).to eq(
|
125
|
-
expect(subject.url).to eq(
|
126
|
-
expect(subject.type).to eq(
|
127
|
-
expect(subject.titles).to eq([{
|
128
|
-
expect(subject.
|
129
|
-
expect(subject.
|
130
|
-
|
131
|
-
expect(subject.publisher).to eq(
|
124
|
+
expect(subject.id).to eq("https://doi.org/10.1594/pangaea.836178")
|
125
|
+
expect(subject.url).to eq("https://doi.pangaea.de/10.1594/PANGAEA.836178")
|
126
|
+
expect(subject.type).to eq("Dataset")
|
127
|
+
expect(subject.titles).to eq([{ "title" => "Hydrological and meteorological investigations in a lake near Kangerlussuaq, west Greenland" }])
|
128
|
+
expect(subject.contributors.size).to eq(8)
|
129
|
+
expect(subject.contributors.first).to eq("type" => "Person", "contributorRoles" => ["Author"],
|
130
|
+
"givenName" => "Emma", "familyName" => "Johansson")
|
131
|
+
expect(subject.publisher).to eq("name" => "PANGAEA")
|
132
132
|
end
|
133
133
|
|
134
134
|
# TODO: check redirections
|
@@ -140,44 +140,44 @@ describe Commonmeta::Metadata, vcr: true do
|
|
140
140
|
# expect(subject.url).to eq("https://doi.org/10.3334/ornldaac/1339")
|
141
141
|
# expect(subject.type).to eq("bibtex"=>"misc", "citeproc"=>"article-journal", "ris"=>"GEN", "schemaOrg"=>"DataSet")
|
142
142
|
# expect(subject.titles).to eq([{"title"=>"Soil Moisture Profiles and Temperature Data from SoilSCAPE Sites, USA"}])
|
143
|
-
# expect(subject.
|
144
|
-
# expect(subject.
|
143
|
+
# expect(subject.contributors.size).to eq(12)
|
144
|
+
# expect(subject.contributors.first).to eq("familyName"=>"MOGHADDAM", "givenName"=>"M.", "name"=>"MOGHADDAM, M.", "type"=>"Person", "nameIdentifiers"=>[], "affiliation" => [])
|
145
145
|
# end
|
146
146
|
|
147
|
-
it
|
148
|
-
input =
|
149
|
-
subject = described_class.new(input: input, from:
|
147
|
+
it "harvard dataverse" do
|
148
|
+
input = "https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/NJ7XSO"
|
149
|
+
subject = described_class.new(input: input, from: "schema_org")
|
150
150
|
# expect(subject.valid?).to be true
|
151
|
-
expect(subject.id).to eq(
|
152
|
-
expect(subject.type).to eq(
|
153
|
-
expect(subject.titles).to eq([{
|
154
|
-
expect(subject.container).to eq(
|
155
|
-
|
156
|
-
expect(subject.
|
157
|
-
expect(subject.subjects).to eq([{
|
158
|
-
{
|
159
|
-
{
|
151
|
+
expect(subject.id).to eq("https://doi.org/10.7910/dvn/nj7xso")
|
152
|
+
expect(subject.type).to eq("Dataset")
|
153
|
+
expect(subject.titles).to eq([{ "title" => "Summary data ankylosing spondylitis GWAS" }])
|
154
|
+
expect(subject.container).to eq("identifier" => "https://dataverse.harvard.edu",
|
155
|
+
"identifierType" => "URL", "title" => "Harvard Dataverse", "type" => "DataRepository")
|
156
|
+
expect(subject.contributors).to eq([{ "name" => "International Genetics of Ankylosing Spondylitis Consortium (IGAS)", "type" => "Organization", "contributorRoles" => ["Author"] }])
|
157
|
+
expect(subject.subjects).to eq([{ "subject" => "Medicine, health and life sciences" },
|
158
|
+
{ "subject" => "genome-wide association studies" },
|
159
|
+
{ "subject" => "Ankylosing spondylitis" }])
|
160
160
|
end
|
161
161
|
|
162
|
-
it
|
163
|
-
input =
|
164
|
-
subject = described_class.new(input: input, from:
|
162
|
+
it "upstream blog" do
|
163
|
+
input = "https://upstream.force11.org/elife-reviewed-preprints-interview-with-fiona-hutton"
|
164
|
+
subject = described_class.new(input: input, from: "schema_org")
|
165
165
|
expect(subject.valid?).to be true
|
166
|
-
expect(subject.id).to eq(
|
167
|
-
expect(subject.type).to eq(
|
168
|
-
expect(subject.titles).to eq([{
|
169
|
-
expect(subject.container).to eq(
|
170
|
-
|
171
|
-
expect(subject.
|
172
|
-
expect(subject.
|
173
|
-
|
174
|
-
|
175
|
-
expect(subject.subjects).to eq([{
|
176
|
-
expect(subject.publisher).to eq(
|
177
|
-
expect(subject.date).to eq(
|
178
|
-
|
179
|
-
expect(subject.license).to eq(
|
180
|
-
|
166
|
+
expect(subject.id).to eq("https://doi.org/10.54900/8d7emer-rm2pg72")
|
167
|
+
expect(subject.type).to eq("Article")
|
168
|
+
expect(subject.titles).to eq([{ "title" => "eLife Reviewed Preprints: Interview with Fiona Hutton" }])
|
169
|
+
expect(subject.container).to eq("identifier" => "https://upstream.force11.org/",
|
170
|
+
"identifierType" => "URL", "title" => "Upstream", "type" => "Periodical")
|
171
|
+
expect(subject.contributors.size).to eq(2)
|
172
|
+
expect(subject.contributors.first).to eq("familyName" => "Hutton",
|
173
|
+
"givenName" => "Fiona",
|
174
|
+
"type" => "Person", "contributorRoles" => ["Author"])
|
175
|
+
expect(subject.subjects).to eq([{ "subject" => "Interviews" }])
|
176
|
+
expect(subject.publisher).to eq("name" => "Upstream")
|
177
|
+
expect(subject.date).to eq("published" => "2022-11-15T10:29:38Z",
|
178
|
+
"updated" => "2023-06-17T20:18:54Z")
|
179
|
+
expect(subject.license).to eq("id" => "CC-BY-4.0",
|
180
|
+
"url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
|
181
181
|
end
|
182
182
|
|
183
183
|
# TODO: check 403 status in DOI resolver
|
@@ -189,225 +189,226 @@ describe Commonmeta::Metadata, vcr: true do
|
|
189
189
|
# expect(subject.type).to eq("bibtex"=>"misc", "citeproc"=>"dataset", "resourceTypeGeneral"=>"Dataset", "ris"=>"DATA", "schemaOrg"=>"Dataset")
|
190
190
|
# expect(subject.titles).to eq([{"title"=>"Summary data ankylosing spondylitis GWAS"}])
|
191
191
|
# expect(subject.container).to eq("identifier"=>"https://dataverse.harvard.edu", "identifierType"=>"URL", "title"=>"Harvard Dataverse", "type"=>"DataRepository")
|
192
|
-
# expect(subject.
|
192
|
+
# expect(subject.contributors).to eq([{"name" => "International Genetics Of Ankylosing Spondylitis Consortium (IGAS)", "nameIdentifiers"=>[], "affiliation" => []}])
|
193
193
|
# end
|
194
194
|
end
|
195
195
|
|
196
|
-
context
|
197
|
-
it
|
196
|
+
context "get schema_org metadata as string" do
|
197
|
+
it "BlogPosting" do
|
198
198
|
input = "#{fixture_path}schema_org.json"
|
199
199
|
subject = described_class.new(input: input)
|
200
200
|
# expect(subject.valid?).to be true
|
201
|
-
expect(subject.language).to eq(
|
202
|
-
expect(subject.id).to eq(
|
203
|
-
expect(subject.url).to eq(
|
204
|
-
expect(subject.type).to eq(
|
205
|
-
expect(subject.
|
206
|
-
|
207
|
-
|
208
|
-
expect(subject.titles).to eq([{
|
209
|
-
expect(subject.descriptions.first[
|
210
|
-
expect(subject.subjects).to eq([{
|
211
|
-
{
|
212
|
-
expect(subject.date).to eq(
|
213
|
-
|
214
|
-
|
201
|
+
expect(subject.language).to eq("en")
|
202
|
+
expect(subject.id).to eq("https://doi.org/10.5438/4k3m-nyvg")
|
203
|
+
expect(subject.url).to eq("https://blog.datacite.org/eating-your-own-dog-food")
|
204
|
+
expect(subject.type).to eq("Article")
|
205
|
+
expect(subject.contributors).to eq([{ "familyName" => "Fenner", "givenName" => "Martin",
|
206
|
+
"id" => "https://orcid.org/0000-0003-1419-2405",
|
207
|
+
"type" => "Person", "contributorRoles" => ["Author"] }])
|
208
|
+
expect(subject.titles).to eq([{ "title" => "Eating your own Dog Food" }])
|
209
|
+
expect(subject.descriptions.first["description"]).to start_with("Eating your own dog food")
|
210
|
+
expect(subject.subjects).to eq([{ "subject" => "Datacite" }, { "subject" => "Doi" },
|
211
|
+
{ "subject" => "Metadata" }, { "subject" => "Featured" }])
|
212
|
+
expect(subject.date).to eq("created" => "2016-12-20",
|
213
|
+
"published" => "2016-12-20",
|
214
|
+
"updated" => "2016-12-20")
|
215
215
|
expect(subject.references.length).to eq(2)
|
216
|
-
expect(subject.references.last).to eq(
|
217
|
-
|
218
|
-
expect(subject.publisher).to eq(
|
216
|
+
expect(subject.references.last).to eq("doi" => "10.5438/55e5-t5c0",
|
217
|
+
"key" => "https://doi.org/10.5438/55e5-t5c0")
|
218
|
+
expect(subject.publisher).to eq("name" => "DataCite")
|
219
219
|
end
|
220
220
|
|
221
|
-
it
|
221
|
+
it "GTEx dataset" do
|
222
222
|
input = "#{fixture_path}schema_org_gtex.json"
|
223
223
|
subject = described_class.new(input: input)
|
224
224
|
# expect(subject.valid?).to be true
|
225
|
-
expect(subject.id).to eq(
|
226
|
-
expect(subject.alternate_identifiers).to eq([{
|
227
|
-
|
228
|
-
expect(subject.url).to eq(
|
229
|
-
expect(subject.
|
230
|
-
expect(subject.type).to eq(
|
231
|
-
expect(subject.
|
232
|
-
|
233
|
-
expect(subject.titles).to eq([{
|
234
|
-
expect(subject.version).to eq(
|
235
|
-
expect(subject.subjects).to eq([{
|
236
|
-
{
|
237
|
-
expect(subject.date).to eq(
|
238
|
-
expect(subject.container).to eq(
|
239
|
-
expect(subject.publisher).to eq(
|
225
|
+
expect(subject.id).to eq("https://doi.org/10.25491/d50j-3083")
|
226
|
+
expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "687610993",
|
227
|
+
"alternateIdentifierType" => "md5" }])
|
228
|
+
expect(subject.url).to eq("https://ors.datacite.org/doi:/10.25491/d50j-3083")
|
229
|
+
expect(subject.files).to eq([{ "url" => "https://storage.googleapis.com/gtex_analysis_v7/single_tissue_eqtl_data/GTEx_Analysis_v7_eQTL_expression_matrices.tar.gz" }])
|
230
|
+
expect(subject.type).to eq("Dataset")
|
231
|
+
expect(subject.contributors).to eq([{ "name" => "The GTEx Consortium",
|
232
|
+
"type" => "Organization", "contributorRoles" => ["Author"] }])
|
233
|
+
expect(subject.titles).to eq([{ "title" => "Fully processed, filtered and normalized gene expression matrices (in BED format) for each tissue, which were used as input into FastQTL for eQTL discovery" }])
|
234
|
+
expect(subject.version).to eq("v7")
|
235
|
+
expect(subject.subjects).to eq([{ "subject" => "Gtex" }, { "subject" => "Annotation" },
|
236
|
+
{ "subject" => "Phenotype" }, { "subject" => "Gene regulation" }, { "subject" => "Transcriptomics" }])
|
237
|
+
expect(subject.date).to eq("published" => "2017")
|
238
|
+
expect(subject.container).to eq("title" => "GTEx", "type" => "DataRepository")
|
239
|
+
expect(subject.publisher).to eq("name" => "GTEx")
|
240
240
|
expect(subject.funding_references.length).to eq(7)
|
241
241
|
expect(subject.funding_references.first).to eq(
|
242
|
-
|
242
|
+
"funderIdentifier" => "https://doi.org/10.13039/100000052", "funderIdentifierType" => "Crossref Funder ID", "funderName" => "Common Fund of the Office of the Director of the NIH",
|
243
243
|
)
|
244
244
|
end
|
245
245
|
|
246
|
-
it
|
246
|
+
it "TOPMed dataset" do
|
247
247
|
input = "#{fixture_path}schema_org_topmed.json"
|
248
248
|
subject = described_class.new(input: input)
|
249
249
|
# expect(subject.valid?).to be true
|
250
|
-
expect(subject.alternate_identifiers).to eq([{
|
251
|
-
|
252
|
-
{
|
253
|
-
|
254
|
-
{
|
255
|
-
|
256
|
-
expect(subject.url).to eq(
|
257
|
-
expect(subject.
|
258
|
-
|
259
|
-
|
260
|
-
expect(subject.
|
261
|
-
expect(subject.
|
262
|
-
expect(subject.
|
263
|
-
|
264
|
-
|
265
|
-
expect(subject.
|
266
|
-
expect(subject.
|
267
|
-
|
268
|
-
'key' => 'https://doi.org/10.23725/2g4s-qv04' }])
|
250
|
+
expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "3b33f6b9338fccab0901b7d317577ea3",
|
251
|
+
"alternateIdentifierType" => "md5" },
|
252
|
+
{ "alternateIdentifier" => "ark:/99999/fk41CrU4eszeLUDe",
|
253
|
+
"alternateIdentifierType" => "minid" },
|
254
|
+
{ "alternateIdentifier" => "dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7",
|
255
|
+
"alternateIdentifierType" => "dataguid" }])
|
256
|
+
expect(subject.url).to eq("https://ors.datacite.org/doi:/10.23725/8na3-9s47")
|
257
|
+
expect(subject.files).to eq([{ "url" => "s3://cgp-commons-public/topmed_open_access/197bc047-e917-55ed-852d-d563cdbc50e4/NWD165827.recab.cram" },
|
258
|
+
{ "url" => "gs://topmed-irc-share/public/NWD165827.recab.cram" }])
|
259
|
+
expect(subject.type).to eq("Dataset")
|
260
|
+
expect(subject.contributors).to eq([{ "name" => "TOPMed IRC", "type" => "Organization", "contributorRoles" => ["Author"] }])
|
261
|
+
expect(subject.titles).to eq([{ "title" => "NWD165827.recab.cram" }])
|
262
|
+
expect(subject.subjects).to eq([{ "subject" => "Topmed" },
|
263
|
+
{ "subject" => "Whole genome sequencing" }])
|
264
|
+
expect(subject.date).to eq("published" => "2017-11-30")
|
265
|
+
expect(subject.publisher).to eq("name" => "TOPMed")
|
266
|
+
expect(subject.references).to eq([{ "doi" => "10.23725/2g4s-qv04",
|
267
|
+
"key" => "https://doi.org/10.23725/2g4s-qv04" }])
|
269
268
|
expect(subject.funding_references).to eq([{
|
270
|
-
|
271
|
-
|
269
|
+
"funderIdentifier" => "https://doi.org/10.13039/100000050", "funderIdentifierType" => "Crossref Funder ID", "funderName" => "National Heart, Lung, and Blood Institute (NHLBI)",
|
270
|
+
}])
|
272
271
|
end
|
273
272
|
|
274
|
-
it
|
273
|
+
it "tdl_iodp dataset" do
|
275
274
|
input = "#{fixture_path}schema_org_tdl_iodp_invalid_authors.json"
|
276
275
|
subject = described_class.new(input: input)
|
277
276
|
expect(subject.valid?).to be false
|
278
277
|
end
|
279
278
|
|
280
|
-
it
|
279
|
+
it "geolocation" do
|
281
280
|
input = "#{fixture_path}schema_org_geolocation.json"
|
282
281
|
subject = described_class.new(input: input)
|
283
282
|
|
284
283
|
# expect(subject.valid?).to be true
|
285
|
-
expect(subject.id).to eq(
|
284
|
+
expect(subject.id).to eq("https://doi.org/10.6071/z7wc73")
|
286
285
|
expect(subject.alternate_identifiers).to be_nil
|
287
|
-
expect(subject.type).to eq(
|
288
|
-
expect(subject.
|
289
|
-
expect(subject.
|
290
|
-
|
291
|
-
expect(subject.titles).to eq([{
|
292
|
-
expect(subject.subjects).to eq([{
|
293
|
-
{
|
294
|
-
{
|
295
|
-
{
|
296
|
-
{
|
297
|
-
{
|
298
|
-
{
|
299
|
-
{
|
300
|
-
expect(subject.date).to eq(
|
301
|
-
expect(subject.publisher).to eq(
|
302
|
-
expect(subject.funding_references).to eq([{
|
286
|
+
expect(subject.type).to eq("Dataset")
|
287
|
+
expect(subject.contributors.length).to eq(6)
|
288
|
+
expect(subject.contributors.first).to eq("familyName" => "Bales", "givenName" => "Roger",
|
289
|
+
"type" => "Person", "contributorRoles" => ["Author"])
|
290
|
+
expect(subject.titles).to eq([{ "title" => "Southern Sierra Critical Zone Observatory (SSCZO), Providence Creek meteorological data, soil moisture and temperature, snow depth and air temperature" }])
|
291
|
+
expect(subject.subjects).to eq([{ "subject" => "Earth sciences" },
|
292
|
+
{ "subject" => "Soil moisture" },
|
293
|
+
{ "subject" => "Soil temperature" },
|
294
|
+
{ "subject" => "Snow depth" },
|
295
|
+
{ "subject" => "Air temperature" },
|
296
|
+
{ "subject" => "Water balance" },
|
297
|
+
{ "subject" => "Nevada" },
|
298
|
+
{ "subject" => "Sierra (mountain range)" }])
|
299
|
+
expect(subject.date).to eq("published" => "2013", "updated" => "2014-10-17")
|
300
|
+
expect(subject.publisher).to eq("name" => "UC Merced")
|
301
|
+
expect(subject.funding_references).to eq([{ "funderName" => "National Science Foundation, Division of Earth Sciences, Critical Zone Observatories" }])
|
303
302
|
expect(subject.geo_locations).to eq([{
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
303
|
+
"geoLocationPlace" => "Providence Creek (Lower, Upper and P301)", "geoLocationPoint" => {
|
304
|
+
"pointLatitude" => "37.047756", "pointLongitude" => "-119.221094",
|
305
|
+
},
|
306
|
+
}])
|
308
307
|
end
|
309
308
|
|
310
|
-
it
|
309
|
+
it "geolocation geoshape" do
|
311
310
|
input = "#{fixture_path}schema_org_geoshape.json"
|
312
311
|
subject = described_class.new(input: input)
|
313
312
|
|
314
313
|
# expect(subject.valid?).to be true
|
315
|
-
expect(subject.language).to eq(
|
316
|
-
expect(subject.id).to eq(
|
317
|
-
expect(subject.type).to eq(
|
318
|
-
expect(subject.
|
319
|
-
expect(subject.
|
320
|
-
|
321
|
-
expect(subject.titles).to eq([{
|
322
|
-
expect(subject.date).to eq(
|
323
|
-
expect(subject.publisher).to eq(
|
324
|
-
expect(subject.geo_locations).to eq([{
|
325
|
-
|
314
|
+
expect(subject.language).to eq("en")
|
315
|
+
expect(subject.id).to eq("https://doi.org/10.1594/pangaea.842237")
|
316
|
+
expect(subject.type).to eq("Dataset")
|
317
|
+
expect(subject.contributors.length).to eq(2)
|
318
|
+
expect(subject.contributors.first).to eq("name" => "Tara Oceans Consortium, Coordinators",
|
319
|
+
"type" => "Organization", "contributorRoles" => ["Author"])
|
320
|
+
expect(subject.titles).to eq([{ "title" => "Registry of all stations from the Tara Oceans Expedition (2009-2013)" }])
|
321
|
+
expect(subject.date).to eq("published" => "2015-02-03")
|
322
|
+
expect(subject.publisher).to eq("name" => "PANGAEA")
|
323
|
+
expect(subject.geo_locations).to eq([{ "geoLocationBox" => { "eastBoundLongitude" => "174.9006",
|
324
|
+
"northBoundLatitude" => "79.6753", "southBoundLatitude" => "-64.3088", "westBoundLongitude" => "-168.5182" } }])
|
326
325
|
end
|
327
326
|
|
328
|
-
it
|
327
|
+
it "schema_org list" do
|
329
328
|
data = File.read("#{fixture_path}schema_org_list.json").strip
|
330
329
|
input = JSON.parse(data).first.to_json
|
331
330
|
subject = described_class.new(input: input)
|
332
331
|
# expect(subject.valid?).to be true
|
333
|
-
expect(subject.id).to eq(
|
334
|
-
expect(subject.alternate_identifiers).to eq([{
|
335
|
-
|
336
|
-
{
|
337
|
-
|
338
|
-
{
|
339
|
-
|
340
|
-
expect(subject.url).to eq(
|
341
|
-
expect(subject.
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
expect(subject.
|
347
|
-
expect(subject.
|
348
|
-
|
349
|
-
expect(subject.
|
350
|
-
|
332
|
+
expect(subject.id).to eq("https://doi.org/10.23725/7jg3-v803")
|
333
|
+
expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "ark:/99999/fk4E1n6n1YHKxPk",
|
334
|
+
"alternateIdentifierType" => "minid" },
|
335
|
+
{ "alternateIdentifier" => "dg.4503/01b048d0-e128-4cb0-94e9-b2d2cab7563d",
|
336
|
+
"alternateIdentifierType" => "dataguid" },
|
337
|
+
{ "alternateIdentifier" => "f9e72bdf25bf4b4f0e581d9218fec2eb",
|
338
|
+
"alternateIdentifierType" => "md5" }])
|
339
|
+
expect(subject.url).to eq("https://ors.datacite.org/doi:/10.23725/7jg3-v803")
|
340
|
+
expect(subject.files).to eq([
|
341
|
+
{ "url" => "s3://cgp-commons-public/topmed_open_access/44a8837b-4456-5709-b56b-54e23000f13a/NWD100953.recab.cram" },
|
342
|
+
{ "url" => "gs://topmed-irc-share/public/NWD100953.recab.cram" },
|
343
|
+
{ "url" => "dos://dos.commons.ucsc-cgp.org/01b048d0-e128-4cb0-94e9-b2d2cab7563d?version=2018-05-26T133719.491772Z" },
|
344
|
+
])
|
345
|
+
expect(subject.type).to eq("Dataset")
|
346
|
+
expect(subject.contributors).to eq([{ "name" => "TOPMed", "type" => "Organization", "contributorRoles" => ["Author"] }])
|
347
|
+
expect(subject.titles).to eq([{ "title" => "NWD100953.recab.cram" }])
|
348
|
+
expect(subject.subjects).to eq([{ "subject" => "Topmed" },
|
349
|
+
{ "subject" => "Whole genome sequencing" }])
|
350
|
+
expect(subject.date).to eq("published" => "2017-11-30")
|
351
|
+
expect(subject.publisher).to eq("name" => "TOPMed")
|
351
352
|
expect(subject.funding_references).to eq([{
|
352
|
-
|
353
|
-
|
353
|
+
"funderIdentifier" => "https://doi.org/10.13039/100000050", "funderIdentifierType" => "Crossref Funder ID", "funderName" => "National Heart, Lung, and Blood Institute (NHLBI)",
|
354
|
+
}])
|
354
355
|
end
|
355
356
|
|
356
|
-
it
|
357
|
+
it "aida dataset" do
|
357
358
|
input = "#{fixture_path}aida.json"
|
358
359
|
subject = described_class.new(input: input)
|
359
360
|
|
360
361
|
# expect(subject.valid?).to be true
|
361
|
-
expect(subject.id).to eq(
|
362
|
-
expect(subject.url).to eq(
|
363
|
-
expect(subject.type).to eq(
|
364
|
-
# expect(subject.
|
365
|
-
expect(subject.titles).to eq([{
|
366
|
-
expect(subject.version).to eq(
|
367
|
-
expect(subject.subjects).to eq([{
|
368
|
-
{
|
369
|
-
expect(subject.date).to eq(
|
370
|
-
|
371
|
-
|
372
|
-
expect(subject.id).to eq(
|
373
|
-
expect(subject.publisher).to eq(
|
374
|
-
expect(subject.license).to eq(
|
362
|
+
expect(subject.id).to eq("https://doi.org/10.23698/aida/drov")
|
363
|
+
expect(subject.url).to eq("https://doi.aida.medtech4health.se/10.23698/aida/drov")
|
364
|
+
expect(subject.type).to eq("Dataset")
|
365
|
+
# expect(subject.contributors).to eq([{"familyName"=>"Lindman", "givenName"=>"Karin", "name"=>"Lindman, Karin", "nameIdentifiers"=>[{"nameIdentifier"=> "https://orcid.org/0000-0003-1298-517X", "nameIdentifierScheme"=>"ORCID", "schemeUri"=>"https://orcid.org"}], "type"=>"Person"}])
|
366
|
+
expect(subject.titles).to eq([{ "title" => "Ovary data from the Visual Sweden project DROID" }])
|
367
|
+
expect(subject.version).to eq("1.0")
|
368
|
+
expect(subject.subjects).to eq([{ "subject" => "Pathology" }, { "subject" => "Whole slide imaging" },
|
369
|
+
{ "subject" => "Annotated" }])
|
370
|
+
expect(subject.date).to eq("created" => "2019-01-09",
|
371
|
+
"published" => "2019-01-09",
|
372
|
+
"updated" => "2019-01-09")
|
373
|
+
expect(subject.id).to eq("https://doi.org/10.23698/aida/drov")
|
374
|
+
expect(subject.publisher).to eq("name" => "AIDA")
|
375
|
+
expect(subject.license).to eq("url" => "https://datasets.aida.medtech4health.se/10.23698/aida/drov#license")
|
375
376
|
end
|
376
377
|
|
377
|
-
it
|
378
|
+
it "from attributes" do
|
378
379
|
subject = described_class.new(input: nil,
|
379
|
-
from:
|
380
|
-
doi:
|
381
|
-
|
382
|
-
|
383
|
-
titles: [{
|
380
|
+
from: "schema_org",
|
381
|
+
doi: "10.5281/zenodo.1239",
|
382
|
+
contributors: [{ "type" => "Person", "contributorRoles" => ["Author"], "name" => "Jahn, Najko", "givenName" => "Najko",
|
383
|
+
"familyName" => "Jahn" }],
|
384
|
+
titles: [{ "title" => "Publication Fp7 Funding Acknowledgment - Plos Openaire" }],
|
384
385
|
descriptions: [{
|
385
|
-
|
386
|
+
"description" => "The dataset contains a sample of metadata describing papers", "descriptionType" => "Abstract",
|
386
387
|
}],
|
387
|
-
publisher: {
|
388
|
-
date: {
|
389
|
-
funding_references: [{
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
type:
|
388
|
+
publisher: { "name" => "Zenodo" },
|
389
|
+
date: { "published" => "2013-04-03" },
|
390
|
+
funding_references: [{ "awardNumber" => "246686",
|
391
|
+
"awardTitle" => "Open Access Infrastructure for Research in Europe",
|
392
|
+
"awardUri" => "info:eu-repo/grantAgreement/EC/FP7/246686/",
|
393
|
+
"funderIdentifier" => "https://doi.org/10.13039/501100000780",
|
394
|
+
"funderIdentifierType" => "Crossref Funder ID",
|
395
|
+
"funderName" => "European Commission" }],
|
396
|
+
type: "Dataset")
|
396
397
|
# expect(subject.valid?).to be true
|
397
|
-
expect(subject.id).to eq(
|
398
|
-
expect(subject.type).to eq(
|
399
|
-
expect(subject.
|
400
|
-
|
401
|
-
expect(subject.titles).to eq([{
|
402
|
-
expect(subject.descriptions.first[
|
403
|
-
expect(subject.date).to eq(
|
404
|
-
expect(subject.publisher).to eq(
|
405
|
-
expect(subject.funding_references).to eq([{
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
398
|
+
expect(subject.id).to eq("https://doi.org/10.5281/zenodo.1239")
|
399
|
+
expect(subject.type).to eq("Dataset")
|
400
|
+
expect(subject.contributors).to eq([{ "familyName" => "Jahn", "givenName" => "Najko",
|
401
|
+
"name" => "Jahn, Najko", "type" => "Person", "contributorRoles" => ["Author"] }])
|
402
|
+
expect(subject.titles).to eq([{ "title" => "Publication Fp7 Funding Acknowledgment - Plos Openaire" }])
|
403
|
+
expect(subject.descriptions.first["description"]).to start_with("The dataset contains a sample of metadata describing papers")
|
404
|
+
expect(subject.date).to eq("published" => "2013-04-03")
|
405
|
+
expect(subject.publisher).to eq("name" => "Zenodo")
|
406
|
+
expect(subject.funding_references).to eq([{ "awardNumber" => "246686",
|
407
|
+
"awardTitle" => "Open Access Infrastructure for Research in Europe",
|
408
|
+
"awardUri" => "info:eu-repo/grantAgreement/EC/FP7/246686/",
|
409
|
+
"funderIdentifier" => "https://doi.org/10.13039/501100000780",
|
410
|
+
"funderIdentifierType" => "Crossref Funder ID",
|
411
|
+
"funderName" => "European Commission" }])
|
411
412
|
end
|
412
413
|
end
|
413
414
|
end
|