briard 2.4.2 → 2.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/codeql-analysis.yml +72 -0
- data/.github/workflows/rubocop.yml +50 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +144 -620
- data/.rubocop_todo.yml +76 -0
- data/CHANGELOG.md +18 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +43 -9
- data/Rakefile +1 -1
- data/{bolognese.gemspec → briard.gemspec} +46 -39
- data/lib/briard/array.rb +2 -2
- data/lib/briard/author_utils.rb +79 -71
- data/lib/briard/cli.rb +12 -13
- data/lib/briard/crossref_utils.rb +73 -61
- data/lib/briard/datacite_utils.rb +132 -106
- data/lib/briard/doi_utils.rb +10 -10
- data/lib/briard/metadata.rb +96 -106
- data/lib/briard/metadata_utils.rb +87 -78
- data/lib/briard/readers/bibtex_reader.rb +65 -65
- data/lib/briard/readers/cff_reader.rb +88 -70
- data/lib/briard/readers/citeproc_reader.rb +90 -84
- data/lib/briard/readers/codemeta_reader.rb +68 -50
- data/lib/briard/readers/crosscite_reader.rb +2 -2
- data/lib/briard/readers/crossref_reader.rb +249 -210
- data/lib/briard/readers/datacite_json_reader.rb +3 -3
- data/lib/briard/readers/datacite_reader.rb +225 -189
- data/lib/briard/readers/npm_reader.rb +49 -42
- data/lib/briard/readers/ris_reader.rb +82 -80
- data/lib/briard/readers/schema_org_reader.rb +182 -159
- data/lib/briard/string.rb +1 -1
- data/lib/briard/utils.rb +4 -4
- data/lib/briard/version.rb +3 -1
- data/lib/briard/whitelist_scrubber.rb +11 -4
- data/lib/briard/writers/bibtex_writer.rb +14 -8
- data/lib/briard/writers/cff_writer.rb +33 -26
- data/lib/briard/writers/codemeta_writer.rb +19 -15
- data/lib/briard/writers/csv_writer.rb +6 -4
- data/lib/briard/writers/datacite_json_writer.rb +8 -2
- data/lib/briard/writers/jats_writer.rb +33 -28
- data/lib/briard/writers/rdf_xml_writer.rb +1 -1
- data/lib/briard/writers/ris_writer.rb +30 -18
- data/lib/briard/writers/turtle_writer.rb +1 -1
- data/lib/briard.rb +6 -6
- data/rubocop.sarif +0 -0
- data/spec/array_spec.rb +5 -5
- data/spec/author_utils_spec.rb +151 -132
- data/spec/datacite_utils_spec.rb +135 -83
- data/spec/doi_utils_spec.rb +168 -164
- data/spec/find_from_format_spec.rb +69 -69
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/onlies_keep_specific_tags.yml +65 -0
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/removes_a_tags.yml +65 -0
- data/spec/metadata_spec.rb +91 -90
- data/spec/readers/bibtex_reader_spec.rb +43 -38
- data/spec/readers/cff_reader_spec.rb +165 -153
- data/spec/readers/citeproc_reader_spec.rb +45 -40
- data/spec/readers/codemeta_reader_spec.rb +128 -115
- data/spec/readers/crosscite_reader_spec.rb +34 -24
- data/spec/readers/crossref_reader_spec.rb +1098 -939
- data/spec/readers/datacite_json_reader_spec.rb +53 -40
- data/spec/readers/datacite_reader_spec.rb +1541 -1337
- data/spec/readers/npm_reader_spec.rb +48 -43
- data/spec/readers/ris_reader_spec.rb +53 -47
- data/spec/readers/schema_org_reader_spec.rb +329 -267
- data/spec/spec_helper.rb +6 -5
- data/spec/utils_spec.rb +371 -347
- data/spec/writers/bibtex_writer_spec.rb +143 -143
- data/spec/writers/cff_writer_spec.rb +96 -90
- data/spec/writers/citation_writer_spec.rb +34 -33
- data/spec/writers/citeproc_writer_spec.rb +226 -224
- data/spec/writers/codemeta_writer_spec.rb +18 -16
- data/spec/writers/crosscite_writer_spec.rb +91 -73
- data/spec/writers/crossref_writer_spec.rb +99 -91
- data/spec/writers/csv_writer_spec.rb +70 -70
- data/spec/writers/datacite_json_writer_spec.rb +78 -68
- data/spec/writers/datacite_writer_spec.rb +417 -322
- data/spec/writers/jats_writer_spec.rb +177 -161
- data/spec/writers/rdf_xml_writer_spec.rb +68 -63
- data/spec/writers/ris_writer_spec.rb +162 -162
- data/spec/writers/schema_org_writer_spec.rb +329 -294
- data/spec/writers/turtle_writer_spec.rb +47 -47
- metadata +242 -166
- data/.github/workflows/release.yml +0 -47
data/spec/author_utils_spec.rb
CHANGED
@@ -3,188 +3,207 @@
|
|
3
3
|
require 'spec_helper'
|
4
4
|
|
5
5
|
describe Briard::Metadata, vcr: true do
|
6
|
-
|
6
|
+
subject { described_class.new(input: input, from: 'crossref') }
|
7
7
|
|
8
|
-
|
8
|
+
let(:input) { 'https://doi.org/10.1101/097196' }
|
9
9
|
|
10
|
-
context
|
11
|
-
it
|
12
|
-
author = {
|
10
|
+
context 'is_personal_name?' do
|
11
|
+
it 'has type organization' do
|
12
|
+
author = { 'email' => 'info@ucop.edu', 'name' => 'University of California, Santa Barbara',
|
13
|
+
'role' => { 'namespace' => 'http://www.ngdc.noaa.gov/metadata/published/xsd/schema/resources/Codelist/gmxCodelists.xml#CI_RoleCode', 'roleCode' => 'copyrightHolder' }, 'nameType' => 'Organizational' }
|
13
14
|
expect(subject.is_personal_name?(author)).to be false
|
14
15
|
end
|
15
16
|
|
16
|
-
it
|
17
|
-
author = {
|
17
|
+
it 'has id' do
|
18
|
+
author = { 'id' => 'http://orcid.org/0000-0003-1419-2405', 'givenName' => 'Martin', 'familyName' => 'Fenner', 'name' => 'Martin Fenner' }
|
18
19
|
expect(subject.is_personal_name?(author)).to be true
|
19
20
|
end
|
20
21
|
|
21
|
-
it
|
22
|
-
author = {
|
22
|
+
it 'has orcid id' do
|
23
|
+
author = { 'creatorName' => 'Fenner, Martin', 'givenName' => 'Martin', 'familyName' => 'Fenner',
|
24
|
+
'nameIdentifier' => { 'schemeURI' => 'http://orcid.org/', 'nameIdentifierScheme' => 'ORCID', '__content__' => '0000-0003-1419-2405' } }
|
23
25
|
expect(subject.is_personal_name?(author)).to be true
|
24
26
|
end
|
25
|
-
|
26
|
-
|
27
|
+
|
28
|
+
it 'has family name' do
|
29
|
+
author = { 'givenName' => 'Martin', 'familyName' => 'Fenner', 'name' => 'Martin Fenner' }
|
27
30
|
expect(subject.is_personal_name?(author)).to be true
|
28
31
|
end
|
29
32
|
|
30
|
-
it
|
31
|
-
author = {
|
33
|
+
it 'has comma' do
|
34
|
+
author = { 'name' => 'Fenner, Martin' }
|
32
35
|
expect(subject.is_personal_name?(author)).to be true
|
33
36
|
end
|
34
37
|
|
35
|
-
it
|
36
|
-
author = {
|
38
|
+
it 'has known given name' do
|
39
|
+
author = { 'name' => 'Martin Fenner' }
|
37
40
|
expect(subject.is_personal_name?(author)).to be true
|
38
41
|
end
|
39
42
|
|
40
|
-
it
|
41
|
-
author = {
|
43
|
+
it 'has no info' do
|
44
|
+
author = { 'name' => 'M Fenner' }
|
42
45
|
expect(subject.is_personal_name?(author)).to be false
|
43
46
|
end
|
44
47
|
end
|
45
48
|
|
46
|
-
context
|
47
|
-
it
|
48
|
-
input =
|
49
|
-
subject =
|
50
|
-
meta = Maremma.from_xml(subject.raw).fetch(
|
51
|
-
response = subject.get_one_author(meta.dig(
|
52
|
-
expect(response).to eq(
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
response
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
49
|
+
context 'get_one_author' do
|
50
|
+
it 'has familyName' do
|
51
|
+
input = 'https://doi.org/10.5438/4K3M-NYVG'
|
52
|
+
subject = described_class.new(input: input, from: 'datacite')
|
53
|
+
meta = Maremma.from_xml(subject.raw).fetch('resource', {})
|
54
|
+
response = subject.get_one_author(meta.dig('creators', 'creator'))
|
55
|
+
expect(response).to eq(
|
56
|
+
'nameIdentifiers' => [{ 'nameIdentifier' => 'https://orcid.org/0000-0003-1419-2405',
|
57
|
+
'nameIdentifierScheme' => 'ORCID', 'schemeUri' => 'https://orcid.org' }], 'name' => 'Fenner, Martin', 'givenName' => 'Martin', 'familyName' => 'Fenner'
|
58
|
+
)
|
59
|
+
end
|
60
|
+
|
61
|
+
it 'has name in sort-order' do
|
62
|
+
input = 'https://doi.org/10.5061/dryad.8515'
|
63
|
+
subject = described_class.new(input: input, from: 'datacite')
|
64
|
+
meta = Maremma.from_xml(subject.raw).fetch('resource', {})
|
65
|
+
response = subject.get_one_author(meta.dig('creators', 'creator').first)
|
66
|
+
expect(response).to eq('nameType' => 'Personal', 'name' => 'Ollomo, Benjamin',
|
67
|
+
'givenName' => 'Benjamin', 'familyName' => 'Ollomo', 'nameIdentifiers' => [], 'affiliation' => [{ 'affiliationIdentifier' => 'https://ror.org/01wyqb997', 'affiliationIdentifierScheme' => 'ROR', 'name' => 'Centre International de Recherches Médicales de Franceville' }])
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'has name in display-order' do
|
71
|
+
input = 'https://doi.org/10.5281/ZENODO.48440'
|
72
|
+
subject = described_class.new(input: input, from: 'datacite')
|
73
|
+
meta = Maremma.from_xml(subject.raw).fetch('resource', {})
|
74
|
+
response = subject.get_one_author(meta.dig('creators', 'creator'))
|
75
|
+
expect(response).to eq('nameType' => 'Personal', 'name' => 'Garza, Kristian',
|
76
|
+
'givenName' => 'Kristian', 'familyName' => 'Garza', 'nameIdentifiers' => [], 'affiliation' => [])
|
77
|
+
end
|
78
|
+
|
79
|
+
it 'has name in display-order with ORCID' do
|
80
|
+
input = 'https://doi.org/10.6084/M9.FIGSHARE.4700788'
|
81
|
+
subject = described_class.new(input: input, from: 'datacite')
|
82
|
+
meta = Maremma.from_xml(subject.raw).fetch('resource', {})
|
83
|
+
response = subject.get_one_author(meta.dig('creators', 'creator'))
|
84
|
+
expect(response).to eq('nameType' => 'Personal',
|
85
|
+
'nameIdentifiers' => [{ 'nameIdentifier' => 'https://orcid.org/0000-0003-4881-1606', 'nameIdentifierScheme' => 'ORCID', 'schemeUri' => 'https://orcid.org' }], 'name' => 'Bedini, Andrea', 'givenName' => 'Andrea', 'familyName' => 'Bedini', 'affiliation' => [])
|
86
|
+
end
|
87
|
+
|
88
|
+
it 'has name in Thai' do
|
89
|
+
input = 'https://doi.org/10.14457/KMITL.res.2006.17'
|
90
|
+
subject = described_class.new(input: input, from: 'datacite')
|
91
|
+
meta = Maremma.from_xml(subject.raw).fetch('resource', {})
|
92
|
+
response = subject.get_one_author(meta.dig('creators', 'creator'))
|
93
|
+
expect(response).to eq('name' => 'กัญจนา แซ่เตียว', 'nameIdentifiers' => [],
|
94
|
+
'affiliation' => [])
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'multiple author names in one field' do
|
98
|
+
input = 'https://doi.org/10.7910/dvn/eqtqyo'
|
99
|
+
subject = described_class.new(input: input, from: 'datacite')
|
100
|
+
meta = Maremma.from_xml(subject.raw).fetch('resource', {})
|
101
|
+
response = subject.get_authors(meta.dig('creators', 'creator'))
|
102
|
+
expect(response).to eq([{
|
103
|
+
'name' => 'Enos, Ryan (Harvard University); Fowler, Anthony (University Of Chicago); Vavreck, Lynn (UCLA)', 'nameIdentifiers' => [], 'affiliation' => []
|
104
|
+
}])
|
105
|
+
end
|
106
|
+
|
107
|
+
it 'hyper-authorship' do
|
108
|
+
input = 'https://doi.org/10.17182/HEPDATA.77274.V1'
|
109
|
+
subject = described_class.new(input: input, from: 'datacite')
|
110
|
+
meta = Maremma.from_xml(subject.raw).fetch('resource', {})
|
111
|
+
response = subject.get_authors(meta.dig('creators', 'creator'))
|
112
|
+
expect(response).to eq([{ 'affiliation' => [], 'name' => 'ALICE Collaboration',
|
113
|
+
'nameIdentifiers' => [], 'nameType' => 'Organizational' }])
|
114
|
+
end
|
115
|
+
|
116
|
+
it 'is organization' do
|
117
|
+
author = { 'email' => 'info@ucop.edu',
|
118
|
+
'creatorName' => { '__content__' => 'University of California, Santa Barbara', 'nameType' => 'Organizational' }, 'role' => { 'namespace' => 'http://www.ngdc.noaa.gov/metadata/published/xsd/schema/resources/Codelist/gmxCodelists.xml#CI_RoleCode', 'roleCode' => 'copyrightHolder' } }
|
105
119
|
response = subject.get_one_author(author)
|
106
|
-
expect(response).to eq(
|
120
|
+
expect(response).to eq('nameType' => 'Organizational',
|
121
|
+
'name' => 'University Of California, Santa Barbara', 'nameIdentifiers' => [], 'affiliation' => [])
|
107
122
|
end
|
108
123
|
|
109
|
-
it
|
110
|
-
input =
|
111
|
-
subject =
|
112
|
-
meta = Maremma.from_xml(subject.raw).fetch(
|
113
|
-
response = subject.get_one_author(meta.dig(
|
114
|
-
expect(response).to eq(
|
124
|
+
it 'name with affiliation' do
|
125
|
+
input = '10.11588/DIGLIT.6130'
|
126
|
+
subject = described_class.new(input: input, from: 'datacite')
|
127
|
+
meta = Maremma.from_xml(subject.raw).fetch('resource', {})
|
128
|
+
response = subject.get_one_author(meta.dig('creators', 'creator'))
|
129
|
+
expect(response).to eq('nameType' => 'Organizational', 'name' => 'Dr. Störi, Kunstsalon',
|
130
|
+
'nameIdentifiers' => [], 'affiliation' => [])
|
115
131
|
end
|
116
132
|
|
117
|
-
it
|
118
|
-
input =
|
119
|
-
subject =
|
133
|
+
it 'name with affiliation and country' do
|
134
|
+
input = '10.16910/jemr.9.1.2'
|
135
|
+
subject = described_class.new(input: input, from: 'crossref')
|
120
136
|
response = subject.get_one_author(subject.creators.first)
|
121
|
-
expect(response).to eq(
|
122
|
-
|
123
|
-
|
137
|
+
expect(response).to eq('familyName' => 'Eraslan',
|
138
|
+
'givenName' => 'Sukru',
|
139
|
+
'name' => 'Eraslan, Sukru')
|
124
140
|
end
|
125
141
|
|
126
|
-
it
|
127
|
-
input =
|
128
|
-
subject =
|
129
|
-
meta = Maremma.from_xml(subject.raw).fetch(
|
130
|
-
response = subject.get_one_author(meta.dig(
|
131
|
-
expect(response).to eq(
|
132
|
-
"name" => "Unknown",
|
133
|
-
"nameIdentifiers" => [])
|
142
|
+
it 'name with role' do
|
143
|
+
input = '10.14463/GBV:873056442'
|
144
|
+
subject = described_class.new(input: input, from: 'datacite')
|
145
|
+
meta = Maremma.from_xml(subject.raw).fetch('resource', {})
|
146
|
+
response = subject.get_one_author(meta.dig('creators', 'creator'))
|
147
|
+
expect(response).to eq('affiliation' => [], 'name' => 'Unknown', 'nameIdentifiers' => [])
|
134
148
|
end
|
135
149
|
|
136
|
-
it
|
137
|
-
input =
|
138
|
-
subject =
|
139
|
-
meta = Maremma.from_xml(subject.raw).fetch(
|
140
|
-
response = subject.get_one_author(meta.dig(
|
141
|
-
expect(response).to eq(
|
150
|
+
it 'multiple name_identifier' do
|
151
|
+
input = '10.24350/CIRM.V.19028803'
|
152
|
+
subject = described_class.new(input: input, from: 'datacite')
|
153
|
+
meta = Maremma.from_xml(subject.raw).fetch('resource', {})
|
154
|
+
response = subject.get_one_author(meta.dig('creators', 'creator'))
|
155
|
+
expect(response).to eq('nameType' => 'Personal', 'name' => 'Dubos, Thomas',
|
156
|
+
'givenName' => 'Thomas', 'familyName' => 'Dubos', 'affiliation' => [{ 'name' => 'École Polytechnique Laboratoire de Météorologie Dynamique' }], 'nameIdentifiers' => [{ 'nameIdentifier' => 'http://isni.org/isni/0000 0003 5752 6882', 'nameIdentifierScheme' => 'ISNI', 'schemeUri' => 'http://isni.org/isni/' }, { 'nameIdentifier' => 'https://orcid.org/0000-0003-4514-4211', 'nameIdentifierScheme' => 'ORCID', 'schemeUri' => 'https://orcid.org' }])
|
142
157
|
end
|
143
158
|
|
144
|
-
it
|
145
|
-
input = fixture_path
|
146
|
-
subject =
|
147
|
-
meta = Maremma.from_xml(subject.raw).fetch(
|
148
|
-
response = subject.get_one_author(meta.dig(
|
149
|
-
expect(response).to eq(
|
159
|
+
it 'nameType organizational' do
|
160
|
+
input = "#{fixture_path}gtex.xml"
|
161
|
+
subject = described_class.new(input: input, from: 'datacite')
|
162
|
+
meta = Maremma.from_xml(subject.raw).fetch('resource', {})
|
163
|
+
response = subject.get_one_author(meta.dig('creators', 'creator'))
|
164
|
+
expect(response).to eq('nameType' => 'Organizational', 'name' => 'The GTEx Consortium',
|
165
|
+
'nameIdentifiers' => [], 'affiliation' => [])
|
150
166
|
end
|
151
167
|
|
152
|
-
it
|
153
|
-
input =
|
154
|
-
subject =
|
155
|
-
expect(subject.creators.first).to eq(
|
168
|
+
it 'only familyName and givenName' do
|
169
|
+
input = 'https://doi.pangaea.de/10.1594/PANGAEA.836178'
|
170
|
+
subject = described_class.new(input: input, from: 'schema_org')
|
171
|
+
expect(subject.creators.first).to eq('nameType' => 'Personal', 'name' => 'Johansson, Emma',
|
172
|
+
'givenName' => 'Emma', 'familyName' => 'Johansson')
|
156
173
|
end
|
157
174
|
end
|
158
175
|
|
159
|
-
context
|
160
|
-
let(:author_with_organization)
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
176
|
+
context 'authors_as_string' do
|
177
|
+
let(:author_with_organization) do
|
178
|
+
[{ 'type' => 'Person',
|
179
|
+
'id' => 'http://orcid.org/0000-0003-0077-4738',
|
180
|
+
'name' => 'Matt Jones' },
|
181
|
+
{ 'type' => 'Person',
|
182
|
+
'id' => 'http://orcid.org/0000-0002-2192-403X',
|
183
|
+
'name' => 'Peter Slaughter' },
|
184
|
+
{ 'type' => 'Organization',
|
185
|
+
'id' => 'http://orcid.org/0000-0002-3957-2474',
|
186
|
+
'name' => 'University of California, Santa Barbara' }]
|
187
|
+
end
|
188
|
+
|
189
|
+
it 'author' do
|
171
190
|
response = subject.authors_as_string(subject.creators)
|
172
|
-
expect(response).to eq(
|
191
|
+
expect(response).to eq('Fenner, Martin and Crosas, Merc?? and Grethe, Jeffrey and Kennedy, David and Hermjakob, Henning and Rocca-Serra, Philippe and Durand, Gustavo and Berjon, Robin and Karcher, Sebastian and Martone, Maryann and Clark, Timothy')
|
173
192
|
end
|
174
193
|
|
175
|
-
it
|
194
|
+
it 'single author' do
|
176
195
|
response = subject.authors_as_string(subject.creators.first)
|
177
|
-
expect(response).to eq(
|
196
|
+
expect(response).to eq('Fenner, Martin')
|
178
197
|
end
|
179
198
|
|
180
|
-
it
|
199
|
+
it 'no author' do
|
181
200
|
response = subject.authors_as_string(nil)
|
182
|
-
expect(response).to
|
201
|
+
expect(response.nil?).to be(true)
|
183
202
|
end
|
184
203
|
|
185
|
-
it
|
204
|
+
it 'with organization' do
|
186
205
|
response = subject.authors_as_string(author_with_organization)
|
187
|
-
expect(response).to eq(
|
206
|
+
expect(response).to eq('Matt Jones and Peter Slaughter and {University of California, Santa Barbara}')
|
188
207
|
end
|
189
208
|
end
|
190
209
|
end
|