briard 2.4.2 → 2.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/codeql-analysis.yml +72 -0
- data/.github/workflows/rubocop.yml +50 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +144 -620
- data/.rubocop_todo.yml +76 -0
- data/CHANGELOG.md +18 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +43 -9
- data/Rakefile +1 -1
- data/{bolognese.gemspec → briard.gemspec} +46 -39
- data/lib/briard/array.rb +2 -2
- data/lib/briard/author_utils.rb +79 -71
- data/lib/briard/cli.rb +12 -13
- data/lib/briard/crossref_utils.rb +73 -61
- data/lib/briard/datacite_utils.rb +132 -106
- data/lib/briard/doi_utils.rb +10 -10
- data/lib/briard/metadata.rb +96 -106
- data/lib/briard/metadata_utils.rb +87 -78
- data/lib/briard/readers/bibtex_reader.rb +65 -65
- data/lib/briard/readers/cff_reader.rb +88 -70
- data/lib/briard/readers/citeproc_reader.rb +90 -84
- data/lib/briard/readers/codemeta_reader.rb +68 -50
- data/lib/briard/readers/crosscite_reader.rb +2 -2
- data/lib/briard/readers/crossref_reader.rb +249 -210
- data/lib/briard/readers/datacite_json_reader.rb +3 -3
- data/lib/briard/readers/datacite_reader.rb +225 -189
- data/lib/briard/readers/npm_reader.rb +49 -42
- data/lib/briard/readers/ris_reader.rb +82 -80
- data/lib/briard/readers/schema_org_reader.rb +182 -159
- data/lib/briard/string.rb +1 -1
- data/lib/briard/utils.rb +4 -4
- data/lib/briard/version.rb +3 -1
- data/lib/briard/whitelist_scrubber.rb +11 -4
- data/lib/briard/writers/bibtex_writer.rb +14 -8
- data/lib/briard/writers/cff_writer.rb +33 -26
- data/lib/briard/writers/codemeta_writer.rb +19 -15
- data/lib/briard/writers/csv_writer.rb +6 -4
- data/lib/briard/writers/datacite_json_writer.rb +8 -2
- data/lib/briard/writers/jats_writer.rb +33 -28
- data/lib/briard/writers/rdf_xml_writer.rb +1 -1
- data/lib/briard/writers/ris_writer.rb +30 -18
- data/lib/briard/writers/turtle_writer.rb +1 -1
- data/lib/briard.rb +6 -6
- data/rubocop.sarif +0 -0
- data/spec/array_spec.rb +5 -5
- data/spec/author_utils_spec.rb +151 -132
- data/spec/datacite_utils_spec.rb +135 -83
- data/spec/doi_utils_spec.rb +168 -164
- data/spec/find_from_format_spec.rb +69 -69
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/onlies_keep_specific_tags.yml +65 -0
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/removes_a_tags.yml +65 -0
- data/spec/metadata_spec.rb +91 -90
- data/spec/readers/bibtex_reader_spec.rb +43 -38
- data/spec/readers/cff_reader_spec.rb +165 -153
- data/spec/readers/citeproc_reader_spec.rb +45 -40
- data/spec/readers/codemeta_reader_spec.rb +128 -115
- data/spec/readers/crosscite_reader_spec.rb +34 -24
- data/spec/readers/crossref_reader_spec.rb +1098 -939
- data/spec/readers/datacite_json_reader_spec.rb +53 -40
- data/spec/readers/datacite_reader_spec.rb +1541 -1337
- data/spec/readers/npm_reader_spec.rb +48 -43
- data/spec/readers/ris_reader_spec.rb +53 -47
- data/spec/readers/schema_org_reader_spec.rb +329 -267
- data/spec/spec_helper.rb +6 -5
- data/spec/utils_spec.rb +371 -347
- data/spec/writers/bibtex_writer_spec.rb +143 -143
- data/spec/writers/cff_writer_spec.rb +96 -90
- data/spec/writers/citation_writer_spec.rb +34 -33
- data/spec/writers/citeproc_writer_spec.rb +226 -224
- data/spec/writers/codemeta_writer_spec.rb +18 -16
- data/spec/writers/crosscite_writer_spec.rb +91 -73
- data/spec/writers/crossref_writer_spec.rb +99 -91
- data/spec/writers/csv_writer_spec.rb +70 -70
- data/spec/writers/datacite_json_writer_spec.rb +78 -68
- data/spec/writers/datacite_writer_spec.rb +417 -322
- data/spec/writers/jats_writer_spec.rb +177 -161
- data/spec/writers/rdf_xml_writer_spec.rb +68 -63
- data/spec/writers/ris_writer_spec.rb +162 -162
- data/spec/writers/schema_org_writer_spec.rb +329 -294
- data/spec/writers/turtle_writer_spec.rb +47 -47
- metadata +242 -166
- data/.github/workflows/release.yml +0 -47
@@ -3,366 +3,401 @@
|
|
3
3
|
require 'spec_helper'
|
4
4
|
|
5
5
|
describe Briard::Metadata, vcr: true do
|
6
|
-
context
|
7
|
-
it
|
8
|
-
input =
|
9
|
-
subject =
|
6
|
+
context 'write metadata as schema_org' do
|
7
|
+
it 'journal article' do
|
8
|
+
input = '10.7554/eLife.01567'
|
9
|
+
subject = described_class.new(input: input, from: 'crossref')
|
10
10
|
json = JSON.parse(subject.schema_org)
|
11
|
-
expect(json[
|
12
|
-
expect(json[
|
13
|
-
expect(json[
|
14
|
-
expect(json[
|
15
|
-
|
16
|
-
expect(json[
|
17
|
-
expect(json[
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
11
|
+
expect(json['@id']).to eq('https://doi.org/10.7554/elife.01567')
|
12
|
+
expect(json['@type']).to eq('ScholarlyArticle')
|
13
|
+
expect(json['isPartOf']).to eq('@type' => 'Periodical', 'issn' => '2050-084X')
|
14
|
+
expect(json['periodical']).to eq('@type' => 'Journal', 'firstPage' => 'e01567',
|
15
|
+
'identifier' => '2050-084X', 'identifierType' => 'ISSN', 'name' => 'eLife', 'volume' => '3')
|
16
|
+
expect(json['citation'].length).to eq(27)
|
17
|
+
expect(json['citation'].first).to eq('@id' => 'https://doi.org/10.1038/nature02100',
|
18
|
+
'@type' => 'CreativeWork')
|
19
|
+
expect(json['funder']).to eq([{ 'name' => 'SystemsX', '@type' => 'Organization' },
|
20
|
+
{ 'name' => 'EMBO',
|
21
|
+
'@type' => 'Organization',
|
22
|
+
'@id' => 'https://doi.org/10.13039/501100003043' },
|
23
|
+
{ 'name' => 'Swiss National Science Foundation',
|
24
|
+
'@type' => 'Organization',
|
25
|
+
'@id' => 'https://doi.org/10.13039/501100001711' },
|
26
|
+
{ 'name' => 'University of Lausanne',
|
27
|
+
'@type' => 'Organization',
|
28
|
+
'@id' => 'https://doi.org/10.13039/501100006390' }])
|
29
|
+
expect(json['license']).to eq('https://creativecommons.org/licenses/by/3.0/legalcode')
|
28
30
|
end
|
29
31
|
|
30
|
-
it
|
31
|
-
input =
|
32
|
-
subject =
|
32
|
+
it 'maremma schema.org JSON' do
|
33
|
+
input = 'https://github.com/datacite/maremma'
|
34
|
+
subject = described_class.new(input: input, from: 'codemeta')
|
33
35
|
json = JSON.parse(subject.schema_org)
|
34
|
-
expect(json[
|
35
|
-
expect(json[
|
36
|
-
expect(json[
|
37
|
-
expect(json[
|
36
|
+
expect(json['@id']).to eq('https://doi.org/10.5438/qeg0-3gm3')
|
37
|
+
expect(json['@type']).to eq('SoftwareSourceCode')
|
38
|
+
expect(json['name']).to eq('Maremma: a Ruby library for simplified network calls')
|
39
|
+
expect(json['author']).to eq('name' => 'Martin Fenner', 'givenName' => 'Martin',
|
40
|
+
'familyName' => 'Fenner', '@type' => 'Person', '@id' => 'https://orcid.org/0000-0003-0077-4738', 'affiliation' => { '@type' => 'Organization', 'name' => 'DataCite' })
|
38
41
|
end
|
39
42
|
|
40
|
-
it
|
41
|
-
input =
|
42
|
-
subject =
|
43
|
+
it 'Schema.org JSON' do
|
44
|
+
input = 'https://doi.org/10.5281/ZENODO.48440'
|
45
|
+
subject = described_class.new(input: input, from: 'datacite')
|
43
46
|
json = JSON.parse(subject.schema_org)
|
44
|
-
expect(json[
|
45
|
-
expect(json[
|
46
|
-
expect(json[
|
47
|
-
expect(json[
|
47
|
+
expect(json['@id']).to eq('https://doi.org/10.5281/zenodo.48440')
|
48
|
+
expect(json['@type']).to eq('SoftwareSourceCode')
|
49
|
+
expect(json['name']).to eq('Analysis Tools For Crossover Experiment Of Ui Using Choice Architecture')
|
50
|
+
expect(json['license']).to eq(['https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode',
|
51
|
+
'info:eu-repo/semantics/openAccess'])
|
48
52
|
end
|
49
53
|
|
50
|
-
it
|
51
|
-
input =
|
52
|
-
subject =
|
54
|
+
it 'Another Schema.org JSON' do
|
55
|
+
input = 'https://doi.org/10.5061/DRYAD.8515'
|
56
|
+
subject = described_class.new(input: input, from: 'datacite')
|
53
57
|
json = JSON.parse(subject.schema_org)
|
54
|
-
expect(json[
|
55
|
-
expect(json[
|
56
|
-
expect(json[
|
57
|
-
expect(json[
|
58
|
+
expect(json['@id']).to eq('https://doi.org/10.5061/dryad.8515')
|
59
|
+
expect(json['@type']).to eq('Dataset')
|
60
|
+
expect(json['license']).to eq('https://creativecommons.org/publicdomain/zero/1.0/legalcode')
|
61
|
+
expect(json['keywords']).to eq('plasmodium, malaria, mitochondrial genome, parasites')
|
58
62
|
end
|
59
63
|
|
60
|
-
it
|
61
|
-
input =
|
62
|
-
subject =
|
64
|
+
it 'Schema.org JSON IsSupplementTo' do
|
65
|
+
input = 'https://doi.org/10.5517/CC8H01S'
|
66
|
+
subject = described_class.new(input: input)
|
63
67
|
json = JSON.parse(subject.schema_org)
|
64
|
-
expect(json[
|
65
|
-
expect(json[
|
66
|
-
expect(json[
|
68
|
+
expect(json['@id']).to eq('https://doi.org/10.5517/cc8h01s')
|
69
|
+
expect(json['@type']).to eq('Dataset')
|
70
|
+
expect(json['@reverse']).to eq('isBasedOn' => {
|
71
|
+
'@id' => 'https://doi.org/10.1107/s1600536804021154', '@type' => 'ScholarlyArticle'
|
72
|
+
})
|
67
73
|
end
|
68
74
|
|
69
|
-
it
|
70
|
-
input =
|
71
|
-
subject =
|
75
|
+
it 'Schema.org JSON Cyark' do
|
76
|
+
input = 'https://doi.org/10.26301/jgf3-jm06'
|
77
|
+
subject = described_class.new(input: input)
|
72
78
|
json = JSON.parse(subject.schema_org)
|
73
|
-
expect(json[
|
74
|
-
expect(json[
|
79
|
+
expect(json['@id']).to eq('https://doi.org/10.26301/jgf3-jm06')
|
80
|
+
expect(json['@type']).to eq('Dataset')
|
75
81
|
end
|
76
82
|
|
77
|
-
it
|
78
|
-
input = fixture_path
|
79
|
-
subject =
|
83
|
+
it 'rdataone' do
|
84
|
+
input = "#{fixture_path}codemeta.json"
|
85
|
+
subject = described_class.new(input: input, from: 'codemeta')
|
80
86
|
json = JSON.parse(subject.schema_org)
|
81
|
-
expect(json[
|
82
|
-
expect(json[
|
83
|
-
expect(json[
|
84
|
-
expect(json[
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
87
|
+
expect(json['@id']).to eq('https://doi.org/10.5063/f1m61h5x')
|
88
|
+
expect(json['@type']).to eq('SoftwareSourceCode')
|
89
|
+
expect(json['name']).to eq('R Interface to the DataONE REST API')
|
90
|
+
expect(json['author']).to eq([{ 'name' => 'Matt Jones',
|
91
|
+
'givenName' => 'Matt',
|
92
|
+
'familyName' => 'Jones',
|
93
|
+
'@type' => 'Person',
|
94
|
+
'@id' => 'https://orcid.org/0000-0003-0077-4738',
|
95
|
+
'affiliation' => { '@type' => 'Organization',
|
96
|
+
'name' => 'NCEAS' } },
|
97
|
+
{ 'name' => 'Peter Slaughter',
|
98
|
+
'givenName' => 'Peter',
|
99
|
+
'familyName' => 'Slaughter',
|
100
|
+
'@type' => 'Person',
|
101
|
+
'@id' => 'https://orcid.org/0000-0002-2192-403X',
|
102
|
+
'affiliation' => { '@type' => 'Organization',
|
103
|
+
'name' => 'NCEAS' } },
|
104
|
+
{ 'name' => 'University Of California, Santa Barbara',
|
105
|
+
'@type' => 'Organization' }])
|
106
|
+
expect(json['version']).to eq('2.0.0')
|
107
|
+
expect(json['keywords']).to eq('data sharing, data repository, dataone')
|
99
108
|
end
|
100
109
|
|
101
|
-
it
|
102
|
-
input =
|
103
|
-
subject =
|
110
|
+
it 'Funding' do
|
111
|
+
input = 'https://doi.org/10.5438/6423'
|
112
|
+
subject = described_class.new(input: input)
|
104
113
|
json = JSON.parse(subject.schema_org)
|
105
|
-
expect(json[
|
106
|
-
expect(json[
|
107
|
-
expect(json[
|
108
|
-
expect(json[
|
109
|
-
expect(json[
|
110
|
-
|
114
|
+
expect(json['@id']).to eq('https://doi.org/10.5438/6423')
|
115
|
+
expect(json['@type']).to eq('Collection')
|
116
|
+
expect(json['hasPart'].length).to eq(25)
|
117
|
+
expect(json['hasPart'].first).to eq('@type' => 'CreativeWork', '@id' => 'https://doi.org/10.5281/zenodo.30799')
|
118
|
+
expect(json['funder']).to eq('@id' => 'https://doi.org/10.13039/501100000780',
|
119
|
+
'@type' => 'Organization', 'name' => 'European Commission')
|
120
|
+
expect(json['license']).to eq('https://creativecommons.org/licenses/by/4.0/legalcode')
|
111
121
|
end
|
112
122
|
|
113
|
-
it
|
114
|
-
input =
|
115
|
-
subject =
|
123
|
+
it 'Funding OpenAIRE' do
|
124
|
+
input = 'https://doi.org/10.5281/ZENODO.1239'
|
125
|
+
subject = described_class.new(input: input)
|
116
126
|
json = JSON.parse(subject.schema_org)
|
117
|
-
expect(json[
|
118
|
-
expect(json[
|
119
|
-
expect(json[
|
120
|
-
|
127
|
+
expect(json['@id']).to eq('https://doi.org/10.5281/zenodo.1239')
|
128
|
+
expect(json['@type']).to eq('Dataset')
|
129
|
+
expect(json['funder']).to eq('@id' => 'https://doi.org/10.13039/501100000780',
|
130
|
+
'@type' => 'Organization', 'name' => 'European Commission')
|
131
|
+
expect(json['license']).to eq(['https://creativecommons.org/publicdomain/zero/1.0/legalcode',
|
132
|
+
'info:eu-repo/semantics/openAccess'])
|
121
133
|
end
|
122
134
|
|
123
|
-
it
|
124
|
-
input =
|
125
|
-
subject =
|
135
|
+
it 'subject scheme' do
|
136
|
+
input = 'https://doi.org/10.4232/1.2745'
|
137
|
+
subject = described_class.new(input: input, from: 'datacite')
|
126
138
|
json = JSON.parse(subject.schema_org)
|
127
|
-
expect(json[
|
128
|
-
expect(json[
|
129
|
-
expect(json[
|
130
|
-
expect(json[
|
139
|
+
expect(json['@id']).to eq('https://doi.org/10.4232/1.2745')
|
140
|
+
expect(json['@type']).to eq('Dataset')
|
141
|
+
expect(json['name']).to eq('Flash Eurobarometer 54 (Madrid Summit)')
|
142
|
+
expect(json['keywords']).to eq('KAT12 International Institutions, Relations, Conditions, Internationale Politik und Institutionen, Regierung, politische Systeme, Parteien und Verbände, Wirtschaftssysteme und wirtschaftliche Entwicklung, International politics and organisation, Government, political systems and organisation, Economic systems and development')
|
131
143
|
end
|
132
144
|
|
133
|
-
it
|
134
|
-
input =
|
135
|
-
subject =
|
145
|
+
it 'subject scheme multiple keywords' do
|
146
|
+
input = 'https://doi.org/10.1594/pangaea.721193'
|
147
|
+
subject = described_class.new(input: input, from: 'datacite')
|
136
148
|
json = JSON.parse(subject.schema_org)
|
137
|
-
expect(json[
|
138
|
-
expect(json[
|
139
|
-
expect(json[
|
140
|
-
expect(json[
|
141
|
-
expect(json[
|
149
|
+
expect(json['@id']).to eq('https://doi.org/10.1594/pangaea.721193')
|
150
|
+
expect(json['@type']).to eq('Dataset')
|
151
|
+
expect(json['name']).to eq('Seawater carbonate chemistry and processes during experiments with Crassostrea gigas, 2007, supplement to: Kurihara, Haruko; Kato, Shoji; Ishimatsu, Atsushi (2007): Effects of increased seawater pCO2 on early development of the oyster Crassostrea gigas. Aquatic Biology, 1(1), 91-98')
|
152
|
+
expect(json['keywords']).to include('animalia, bottles or small containers/aquaria (<20 l)')
|
153
|
+
expect(json['license']).to eq('https://creativecommons.org/licenses/by/3.0/legalcode')
|
142
154
|
end
|
143
155
|
|
144
|
-
it
|
145
|
-
input = fixture_path
|
146
|
-
url =
|
147
|
-
content_url =
|
148
|
-
subject =
|
156
|
+
it 'author is organization' do
|
157
|
+
input = "#{fixture_path}gtex.xml"
|
158
|
+
url = 'https://ors.datacite.org/doi:/10.25491/9hx8-ke93'
|
159
|
+
content_url = 'https://storage.googleapis.com/gtex_analysis_v7/single_tissue_eqtl_data/GTEx_Analysis_v7_eQTL_expression_matrices.tar.gz'
|
160
|
+
subject = described_class.new(input: input, url: url, content_url: content_url,
|
161
|
+
from: 'datacite')
|
149
162
|
json = JSON.parse(subject.schema_org)
|
150
|
-
expect(json[
|
151
|
-
expect(json[
|
152
|
-
expect(json[
|
153
|
-
expect(json[
|
154
|
-
expect(json[
|
155
|
-
expect(json[
|
156
|
-
expect(json[
|
157
|
-
expect(json[
|
158
|
-
|
163
|
+
expect(json['@id']).to eq('https://doi.org/10.25491/9hx8-ke93')
|
164
|
+
expect(json['@type']).to eq('Dataset')
|
165
|
+
expect(json['author']).to eq('@type' => 'Organization', 'name' => 'The GTEx Consortium')
|
166
|
+
expect(json['url']).to eq('https://ors.datacite.org/doi:/10.25491/9hx8-ke93')
|
167
|
+
expect(json['encodingFormat']).to eq('application/tar')
|
168
|
+
expect(json['contentSize']).to eq('15.7M')
|
169
|
+
expect(json['contentUrl']).to eq('https://storage.googleapis.com/gtex_analysis_v7/single_tissue_eqtl_data/GTEx_Analysis_v7_eQTL_expression_matrices.tar.gz')
|
170
|
+
expect(json['includedInDataCatalog']).to eq(
|
171
|
+
'@id' => 'https://www.ebi.ac.uk/miriam/main/datatypes/MIR:00000663', '@type' => 'DataCatalog', 'name' => 'GTEx'
|
172
|
+
)
|
173
|
+
expect(json['@reverse']).to eq('isBasedOn' => { '@id' => 'https://doi.org/10.1038/nmeth.4407',
|
174
|
+
'@type' => 'ScholarlyArticle' })
|
159
175
|
end
|
160
176
|
|
161
|
-
it
|
162
|
-
input =
|
163
|
-
subject =
|
177
|
+
it 'series information' do
|
178
|
+
input = '10.4229/23RDEUPVSEC2008-5CO.8.3'
|
179
|
+
subject = described_class.new(input: input, from: 'datacite')
|
164
180
|
json = JSON.parse(subject.schema_org)
|
165
|
-
expect(json[
|
166
|
-
expect(json[
|
167
|
-
expect(json[
|
168
|
-
expect(json[
|
169
|
-
expect(json[
|
170
|
-
|
181
|
+
expect(json['@id']).to eq('https://doi.org/10.4229/23rdeupvsec2008-5co.8.3')
|
182
|
+
expect(json['@type']).to eq('ScholarlyArticle')
|
183
|
+
expect(json['name']).to eq('Rural Electrification With Hybrid Power Systems Based on Renewables - Technical System Configurations From the Point of View of the European Industry')
|
184
|
+
expect(json['author'].count).to eq(3)
|
185
|
+
expect(json['author'].first).to eq('@type' => 'Person', 'name' => 'P. Llamas', 'givenName' => 'P.',
|
186
|
+
'familyName' => 'Llamas')
|
187
|
+
expect(json['periodical']).to eq('@type' => 'Series', 'firstPage' => 'Spain; 3353',
|
188
|
+
'lastPage' => '3356', 'name' => '23rd European Photovoltaic Solar Energy Conference and Exhibition', 'volume' => '1-5 September 2008')
|
171
189
|
end
|
172
190
|
|
173
|
-
it
|
174
|
-
input =
|
175
|
-
subject =
|
191
|
+
it 'data catalog' do
|
192
|
+
input = '10.25491/8KMC-G314'
|
193
|
+
subject = described_class.new(input: input, from: 'datacite')
|
176
194
|
json = JSON.parse(subject.schema_org)
|
177
|
-
expect(json[
|
178
|
-
expect(json[
|
179
|
-
expect(json[
|
180
|
-
expect(json[
|
181
|
-
expect(json[
|
182
|
-
expect(json[
|
183
|
-
|
195
|
+
expect(json['@id']).to eq('https://doi.org/10.25491/8kmc-g314')
|
196
|
+
expect(json['@type']).to eq('Dataset')
|
197
|
+
expect(json['name']).to eq('Covariates used in eQTL analysis. Includes genotyping principal components and PEER factors')
|
198
|
+
expect(json['author']).to eq('@type' => 'Organization', 'name' => 'The GTEx Consortium')
|
199
|
+
expect(json['includedInDataCatalog']).to eq('@type' => 'DataCatalog', 'name' => 'GTEx')
|
200
|
+
expect(json['identifier']).to eq('@type' => 'PropertyValue', 'propertyID' => 'md5',
|
201
|
+
'value' => 'c7c89fe7366d50cd75448aa603c9de58')
|
202
|
+
expect(json['contentUrl']).to eq('https://storage.googleapis.com/gtex_analysis_v7/single_tissue_eqtl_data/GTEx_Analysis_v7_eQTL_covariates.tar.gz')
|
184
203
|
end
|
185
204
|
|
186
|
-
it
|
187
|
-
input =
|
188
|
-
subject =
|
205
|
+
it 'alternate identifiers' do
|
206
|
+
input = '10.23725/8na3-9s47'
|
207
|
+
subject = described_class.new(input: input, from: 'datacite')
|
189
208
|
json = JSON.parse(subject.schema_org)
|
190
|
-
expect(json[
|
191
|
-
expect(json[
|
192
|
-
expect(json[
|
193
|
-
expect(json[
|
194
|
-
expect(json[
|
195
|
-
expect(json[
|
196
|
-
[{
|
197
|
-
|
198
|
-
|
199
|
-
{
|
200
|
-
|
201
|
-
|
202
|
-
{
|
203
|
-
|
204
|
-
|
209
|
+
expect(json['@id']).to eq('https://doi.org/10.23725/8na3-9s47')
|
210
|
+
expect(json['@type']).to eq('Dataset')
|
211
|
+
expect(json['name']).to eq('NWD165827.recab.cram')
|
212
|
+
expect(json['author']).to eq('name' => 'TOPMed')
|
213
|
+
expect(json['includedInDataCatalog'].nil?).to be(true)
|
214
|
+
expect(json['identifier']).to eq(
|
215
|
+
[{ '@type' => 'PropertyValue',
|
216
|
+
'propertyID' => 'minid',
|
217
|
+
'value' => 'ark:/99999/fk41CrU4eszeLUDe' },
|
218
|
+
{ '@type' => 'PropertyValue',
|
219
|
+
'propertyID' => 'dataguid',
|
220
|
+
'value' => 'dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7' },
|
221
|
+
{ '@type' => 'PropertyValue',
|
222
|
+
'propertyID' => 'md5',
|
223
|
+
'value' => '3b33f6b9338fccab0901b7d317577ea3' }]
|
224
|
+
)
|
225
|
+
expect(json['contentUrl']).to include(
|
226
|
+
's3://cgp-commons-public/topmed_open_access/197bc047-e917-55ed-852d-d563cdbc50e4/NWD165827.recab.cram', 'gs://topmed-irc-share/public/NWD165827.recab.cram'
|
205
227
|
)
|
206
|
-
expect(json["contentUrl"]).to include("s3://cgp-commons-public/topmed_open_access/197bc047-e917-55ed-852d-d563cdbc50e4/NWD165827.recab.cram", "gs://topmed-irc-share/public/NWD165827.recab.cram")
|
207
228
|
end
|
208
229
|
|
209
|
-
it
|
210
|
-
input = fixture_path
|
211
|
-
subject =
|
230
|
+
it 'affiliation identifier' do
|
231
|
+
input = "#{fixture_path}datacite-example-affiliation.xml"
|
232
|
+
subject = described_class.new(input: input)
|
212
233
|
json = JSON.parse(subject.schema_org)
|
213
|
-
expect(json[
|
214
|
-
expect(json[
|
215
|
-
expect(json[
|
216
|
-
expect(json[
|
217
|
-
expect(json[
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
234
|
+
expect(json['@id']).to eq('https://doi.org/10.5072/example-full')
|
235
|
+
expect(json['@type']).to eq('SoftwareSourceCode')
|
236
|
+
expect(json['name']).to eq('Full DataCite XML Example')
|
237
|
+
expect(json['author'].length).to eq(3)
|
238
|
+
expect(json['author'].first).to eq('@id' => 'https://orcid.org/0000-0001-5000-0007',
|
239
|
+
'@type' => 'Person',
|
240
|
+
'affiliation' => { '@id' => 'https://ror.org/04wxnsj81', '@type' => 'Organization',
|
241
|
+
'name' => 'DataCite' },
|
242
|
+
'familyName' => 'Miller',
|
243
|
+
'givenName' => 'Elizabeth',
|
244
|
+
'name' => 'Elizabeth Miller')
|
245
|
+
expect(json['identifier']).to eq(
|
246
|
+
{ '@type' => 'PropertyValue',
|
247
|
+
'propertyID' => 'URL',
|
248
|
+
'value' => 'https://schema.datacite.org/meta/kernel-4.2/example/datacite-example-full-v4.2.xml' }
|
227
249
|
)
|
228
|
-
expect(json[
|
250
|
+
expect(json['license']).to eq('https://creativecommons.org/publicdomain/zero/1.0/legalcode')
|
229
251
|
end
|
230
252
|
|
231
|
-
it
|
232
|
-
input = fixture_path
|
233
|
-
doi =
|
234
|
-
subject =
|
253
|
+
it 'geo_location_point' do
|
254
|
+
input = "#{fixture_path}datacite-example-geolocation-2.xml"
|
255
|
+
doi = '10.6071/Z7WC73'
|
256
|
+
subject = described_class.new(input: input, doi: doi)
|
235
257
|
json = JSON.parse(subject.schema_org)
|
236
|
-
expect(json[
|
237
|
-
expect(json[
|
238
|
-
expect(json[
|
239
|
-
expect(json[
|
240
|
-
expect(json[
|
241
|
-
|
242
|
-
expect(json[
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
258
|
+
expect(json['@id']).to eq('https://doi.org/10.6071/z7wc73')
|
259
|
+
expect(json['@type']).to eq('Dataset')
|
260
|
+
expect(json['name']).to eq('Southern Sierra Critical Zone Observatory (SSCZO), Providence Creek meteorological data, soil moisture and temperature, snow depth and air temperature')
|
261
|
+
expect(json['author'].length).to eq(6)
|
262
|
+
expect(json['author'][2]).to eq('@id' => 'https://orcid.org/0000-0002-8862-1404',
|
263
|
+
'@type' => 'Person', 'familyName' => 'Stacy', 'givenName' => 'Erin', 'name' => 'Erin Stacy', 'affiliation' => { '@type' => 'Organization', 'name' => 'UC Merced' })
|
264
|
+
expect(json['includedInDataCatalog'].nil?).to be(true)
|
265
|
+
expect(json['spatialCoverage']).to eq([{ '@type' => 'Place',
|
266
|
+
'geo' =>
|
267
|
+
{ '@type' => 'GeoCoordinates',
|
268
|
+
'address' => 'Providence Creek (Lower, Upper and P301)',
|
269
|
+
'latitude' => '37.047756',
|
270
|
+
'longitude' => '-119.221094' } },
|
271
|
+
{ '@type' => 'Place',
|
272
|
+
'geo' =>
|
273
|
+
{ '@type' => 'GeoShape',
|
274
|
+
'address' => 'Providence Creek (Lower, Upper and P301)',
|
275
|
+
'box' => '37.046 -119.211 37.075 -119.182' } }])
|
276
|
+
expect(json['license']).to eq('https://creativecommons.org/licenses/by/4.0/legalcode')
|
254
277
|
end
|
255
278
|
|
256
|
-
it
|
257
|
-
input =
|
258
|
-
subject =
|
279
|
+
it 'geo_location_box' do
|
280
|
+
input = '10.1594/PANGAEA.842237'
|
281
|
+
subject = described_class.new(input: input, from: 'datacite')
|
259
282
|
json = JSON.parse(subject.schema_org)
|
260
|
-
expect(json[
|
261
|
-
expect(json[
|
262
|
-
expect(json[
|
263
|
-
expect(json[
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
expect(json[
|
270
|
-
expect(json[
|
271
|
-
|
283
|
+
expect(json['@id']).to eq('https://doi.org/10.1594/pangaea.842237')
|
284
|
+
expect(json['@type']).to eq('Dataset')
|
285
|
+
expect(json['name']).to eq('Registry of all stations from the Tara Oceans Expedition (2009-2013)')
|
286
|
+
expect(json['author']).to eq([{ 'familyName' => 'Tara Oceans Consortium',
|
287
|
+
'givenName' => 'Coordinators',
|
288
|
+
'name' => 'Coordinators Tara Oceans Consortium' },
|
289
|
+
{ 'familyName' => 'Tara Oceans Expedition',
|
290
|
+
'givenName' => 'Participants',
|
291
|
+
'name' => 'Participants Tara Oceans Expedition' }])
|
292
|
+
expect(json['includedInDataCatalog'].nil?).to be(true)
|
293
|
+
expect(json['spatialCoverage']).to eq('@type' => 'Place',
|
294
|
+
'geo' => {
|
295
|
+
'@type' => 'GeoShape', 'box' => '-64.3088 -168.5182 79.6753 174.9006'
|
296
|
+
})
|
297
|
+
expect(json['license']).to eq('https://creativecommons.org/licenses/by/3.0/legalcode')
|
272
298
|
end
|
273
299
|
|
274
|
-
it
|
275
|
-
input = fixture_path
|
276
|
-
subject =
|
300
|
+
it 'geo_location_polygon' do
|
301
|
+
input = "#{fixture_path}datacite-example-polygon-v4.1.xml"
|
302
|
+
subject = described_class.new(input: input)
|
277
303
|
json = JSON.parse(subject.schema_org)
|
278
|
-
expect(json[
|
279
|
-
expect(json[
|
280
|
-
expect(json[
|
281
|
-
expect(json[
|
282
|
-
|
283
|
-
expect(json[
|
284
|
-
expect(json[
|
304
|
+
expect(json['@id']).to eq('https://doi.org/10.5072/example-polygon')
|
305
|
+
expect(json['@type']).to eq('Dataset')
|
306
|
+
expect(json['name']).to eq('Meteo measurements at the Sand Motor')
|
307
|
+
expect(json['author']).to eq('@type' => 'Person', 'familyName' => 'Den Heijer', 'givenName' => 'C',
|
308
|
+
'name' => 'C Den Heijer')
|
309
|
+
expect(json['includedInDataCatalog'].nil?).to be(true)
|
310
|
+
expect(json['spatialCoverage'].dig('geo', 'polygon').length).to eq(34)
|
311
|
+
expect(json['spatialCoverage'].dig('geo',
|
312
|
+
'polygon')[0].first).to eq(['4.1738852605822',
|
313
|
+
'52.03913926329928'])
|
285
314
|
end
|
286
315
|
|
287
|
-
it
|
288
|
-
input = fixture_path
|
289
|
-
subject =
|
316
|
+
it 'from schema_org gtex' do
|
317
|
+
input = "#{fixture_path}schema_org_gtex.json"
|
318
|
+
subject = described_class.new(input: input, from: 'schema_org')
|
290
319
|
json = JSON.parse(subject.schema_org)
|
291
|
-
expect(json[
|
292
|
-
expect(json[
|
293
|
-
expect(json[
|
294
|
-
|
295
|
-
expect(json[
|
296
|
-
expect(json[
|
297
|
-
expect(json[
|
298
|
-
expect(json[
|
299
|
-
expect(json[
|
300
|
-
expect(json[
|
301
|
-
expect(json[
|
302
|
-
expect(json[
|
303
|
-
expect(json[
|
304
|
-
expect(json[
|
305
|
-
expect(json[
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
320
|
+
expect(json['@id']).to eq('https://doi.org/10.25491/d50j-3083')
|
321
|
+
expect(json['@type']).to eq('Dataset')
|
322
|
+
expect(json['identifier']).to eq('@type' => 'PropertyValue', 'propertyID' => 'md5',
|
323
|
+
'value' => '687610993')
|
324
|
+
expect(json['url']).to eq('https://ors.datacite.org/doi:/10.25491/d50j-3083')
|
325
|
+
expect(json['additionalType']).to eq('Gene expression matrices')
|
326
|
+
expect(json['name']).to eq('Fully processed, filtered and normalized gene expression matrices (in BED format) for each tissue, which were used as input into FastQTL for eQTL discovery')
|
327
|
+
expect(json['version']).to eq('v7')
|
328
|
+
expect(json['author']).to eq('@type' => 'Organization', 'name' => 'The GTEx Consortium')
|
329
|
+
expect(json['keywords']).to eq('gtex, annotation, phenotype, gene regulation, transcriptomics')
|
330
|
+
expect(json['datePublished']).to eq('2017')
|
331
|
+
expect(json['contentUrl']).to eq('https://storage.googleapis.com/gtex_analysis_v7/single_tissue_eqtl_data/GTEx_Analysis_v7_eQTL_expression_matrices.tar.gz')
|
332
|
+
expect(json['schemaVersion']).to eq('http://datacite.org/schema/kernel-4')
|
333
|
+
expect(json['includedInDataCatalog']).to eq('@type' => 'DataCatalog', 'name' => 'GTEx')
|
334
|
+
expect(json['publisher']).to eq('@type' => 'Organization', 'name' => 'GTEx')
|
335
|
+
expect(json['funder']).to eq([{ '@id' => 'https://doi.org/10.13039/100000052',
|
336
|
+
'name' => 'Common Fund of the Office of the Director of the NIH',
|
337
|
+
'@type' => 'Organization' },
|
338
|
+
{ '@id' => 'https://doi.org/10.13039/100000054',
|
339
|
+
'name' => 'National Cancer Institute (NCI)',
|
340
|
+
'@type' => 'Organization' },
|
341
|
+
{ '@id' => 'https://doi.org/10.13039/100000051',
|
342
|
+
'name' => 'National Human Genome Research Institute (NHGRI)',
|
343
|
+
'@type' => 'Organization' },
|
344
|
+
{ '@id' => 'https://doi.org/10.13039/100000050',
|
345
|
+
'name' => 'National Heart, Lung, and Blood Institute (NHLBI)',
|
346
|
+
'@type' => 'Organization' },
|
347
|
+
{ '@id' => 'https://doi.org/10.13039/100000026',
|
348
|
+
'name' => 'National Institute on Drug Abuse (NIDA)',
|
349
|
+
'@type' => 'Organization' },
|
350
|
+
{ '@id' => 'https://doi.org/10.13039/100000025',
|
351
|
+
'name' => 'National Institute of Mental Health (NIMH)',
|
352
|
+
'@type' => 'Organization' },
|
353
|
+
{ '@id' => 'https://doi.org/10.13039/100000065',
|
354
|
+
'name' => 'National Institute of Neurological Disorders and Stroke (NINDS)',
|
355
|
+
'@type' => 'Organization' }])
|
356
|
+
expect(json['provider']).to eq('@type' => 'Organization', 'name' => 'DataCite')
|
327
357
|
end
|
328
358
|
|
329
|
-
it
|
330
|
-
input = fixture_path
|
331
|
-
subject =
|
359
|
+
it 'from schema_org topmed' do
|
360
|
+
input = "#{fixture_path}schema_org_topmed.json"
|
361
|
+
subject = described_class.new(input: input, from: 'schema_org')
|
332
362
|
json = JSON.parse(subject.schema_org)
|
333
|
-
expect(json[
|
334
|
-
expect(json[
|
335
|
-
expect(json[
|
336
|
-
[{
|
337
|
-
|
338
|
-
|
339
|
-
{
|
340
|
-
|
341
|
-
|
342
|
-
{
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
expect(json[
|
347
|
-
expect(json[
|
348
|
-
expect(json[
|
349
|
-
expect(json[
|
350
|
-
expect(json[
|
351
|
-
expect(json[
|
352
|
-
expect(json[
|
353
|
-
|
354
|
-
|
355
|
-
expect(json[
|
356
|
-
expect(json[
|
363
|
+
expect(json['@id']).to eq('https://doi.org/10.23725/8na3-9s47')
|
364
|
+
expect(json['@type']).to eq('Dataset')
|
365
|
+
expect(json['identifier']).to eq(
|
366
|
+
[{ '@type' => 'PropertyValue',
|
367
|
+
'propertyID' => 'md5',
|
368
|
+
'value' => '3b33f6b9338fccab0901b7d317577ea3' },
|
369
|
+
{ '@type' => 'PropertyValue',
|
370
|
+
'propertyID' => 'minid',
|
371
|
+
'value' => 'ark:/99999/fk41CrU4eszeLUDe' },
|
372
|
+
{ '@type' => 'PropertyValue',
|
373
|
+
'propertyID' => 'dataguid',
|
374
|
+
'value' => 'dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7' }]
|
375
|
+
)
|
376
|
+
expect(json['url']).to eq('https://ors.datacite.org/doi:/10.23725/8na3-9s47')
|
377
|
+
expect(json['additionalType']).to eq('CRAM file')
|
378
|
+
expect(json['name']).to eq('NWD165827.recab.cram')
|
379
|
+
expect(json['author']).to eq('@type' => 'Organization', 'name' => 'TOPMed IRC')
|
380
|
+
expect(json['keywords']).to eq('topmed, whole genome sequencing')
|
381
|
+
expect(json['datePublished']).to eq('2017-11-30')
|
382
|
+
expect(json['contentUrl']).to eq([
|
383
|
+
's3://cgp-commons-public/topmed_open_access/197bc047-e917-55ed-852d-d563cdbc50e4/NWD165827.recab.cram', 'gs://topmed-irc-share/public/NWD165827.recab.cram'
|
384
|
+
])
|
385
|
+
expect(json['schemaVersion']).to eq('http://datacite.org/schema/kernel-4')
|
386
|
+
expect(json['publisher']).to eq('@type' => 'Organization', 'name' => 'TOPMed')
|
387
|
+
expect(json['citation']).to eq('@id' => 'https://doi.org/10.23725/2g4s-qv04',
|
388
|
+
'@type' => 'Dataset')
|
389
|
+
expect(json['funder']).to eq('@id' => 'https://doi.org/10.13039/100000050',
|
390
|
+
'@type' => 'Organization', 'name' => 'National Heart, Lung, and Blood Institute (NHLBI)')
|
391
|
+
expect(json['provider']).to eq('@type' => 'Organization', 'name' => 'DataCite')
|
357
392
|
end
|
358
393
|
|
359
|
-
it
|
360
|
-
input =
|
361
|
-
subject =
|
394
|
+
it 'interactive resource without dates' do
|
395
|
+
input = 'https://doi.org/10.34747/g6yb-3412'
|
396
|
+
subject = described_class.new(input: input, from: 'datacite')
|
362
397
|
json = JSON.parse(subject.schema_org)
|
363
|
-
expect(json[
|
364
|
-
expect(json[
|
365
|
-
expect(json[
|
398
|
+
expect(json['@id']).to eq('https://doi.org/10.34747/g6yb-3412')
|
399
|
+
expect(json['@type']).to eq('CreativeWork')
|
400
|
+
expect(json['datePublished']).to eq('2019')
|
366
401
|
end
|
367
402
|
end
|
368
403
|
end
|