briard 2.4.2 → 2.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/codeql-analysis.yml +72 -0
  3. data/.github/workflows/rubocop.yml +50 -0
  4. data/.gitignore +1 -0
  5. data/.rubocop.yml +144 -620
  6. data/.rubocop_todo.yml +76 -0
  7. data/CHANGELOG.md +18 -0
  8. data/Gemfile +2 -0
  9. data/Gemfile.lock +43 -9
  10. data/Rakefile +1 -1
  11. data/{bolognese.gemspec → briard.gemspec} +46 -39
  12. data/lib/briard/array.rb +2 -2
  13. data/lib/briard/author_utils.rb +79 -71
  14. data/lib/briard/cli.rb +12 -13
  15. data/lib/briard/crossref_utils.rb +73 -61
  16. data/lib/briard/datacite_utils.rb +132 -106
  17. data/lib/briard/doi_utils.rb +10 -10
  18. data/lib/briard/metadata.rb +96 -106
  19. data/lib/briard/metadata_utils.rb +87 -78
  20. data/lib/briard/readers/bibtex_reader.rb +65 -65
  21. data/lib/briard/readers/cff_reader.rb +88 -70
  22. data/lib/briard/readers/citeproc_reader.rb +90 -84
  23. data/lib/briard/readers/codemeta_reader.rb +68 -50
  24. data/lib/briard/readers/crosscite_reader.rb +2 -2
  25. data/lib/briard/readers/crossref_reader.rb +249 -210
  26. data/lib/briard/readers/datacite_json_reader.rb +3 -3
  27. data/lib/briard/readers/datacite_reader.rb +225 -189
  28. data/lib/briard/readers/npm_reader.rb +49 -42
  29. data/lib/briard/readers/ris_reader.rb +82 -80
  30. data/lib/briard/readers/schema_org_reader.rb +182 -159
  31. data/lib/briard/string.rb +1 -1
  32. data/lib/briard/utils.rb +4 -4
  33. data/lib/briard/version.rb +3 -1
  34. data/lib/briard/whitelist_scrubber.rb +11 -4
  35. data/lib/briard/writers/bibtex_writer.rb +14 -8
  36. data/lib/briard/writers/cff_writer.rb +33 -26
  37. data/lib/briard/writers/codemeta_writer.rb +19 -15
  38. data/lib/briard/writers/csv_writer.rb +6 -4
  39. data/lib/briard/writers/datacite_json_writer.rb +8 -2
  40. data/lib/briard/writers/jats_writer.rb +33 -28
  41. data/lib/briard/writers/rdf_xml_writer.rb +1 -1
  42. data/lib/briard/writers/ris_writer.rb +30 -18
  43. data/lib/briard/writers/turtle_writer.rb +1 -1
  44. data/lib/briard.rb +6 -6
  45. data/rubocop.sarif +0 -0
  46. data/spec/array_spec.rb +5 -5
  47. data/spec/author_utils_spec.rb +151 -132
  48. data/spec/datacite_utils_spec.rb +135 -83
  49. data/spec/doi_utils_spec.rb +168 -164
  50. data/spec/find_from_format_spec.rb +69 -69
  51. data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/onlies_keep_specific_tags.yml +65 -0
  52. data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/removes_a_tags.yml +65 -0
  53. data/spec/metadata_spec.rb +91 -90
  54. data/spec/readers/bibtex_reader_spec.rb +43 -38
  55. data/spec/readers/cff_reader_spec.rb +165 -153
  56. data/spec/readers/citeproc_reader_spec.rb +45 -40
  57. data/spec/readers/codemeta_reader_spec.rb +128 -115
  58. data/spec/readers/crosscite_reader_spec.rb +34 -24
  59. data/spec/readers/crossref_reader_spec.rb +1098 -939
  60. data/spec/readers/datacite_json_reader_spec.rb +53 -40
  61. data/spec/readers/datacite_reader_spec.rb +1541 -1337
  62. data/spec/readers/npm_reader_spec.rb +48 -43
  63. data/spec/readers/ris_reader_spec.rb +53 -47
  64. data/spec/readers/schema_org_reader_spec.rb +329 -267
  65. data/spec/spec_helper.rb +6 -5
  66. data/spec/utils_spec.rb +371 -347
  67. data/spec/writers/bibtex_writer_spec.rb +143 -143
  68. data/spec/writers/cff_writer_spec.rb +96 -90
  69. data/spec/writers/citation_writer_spec.rb +34 -33
  70. data/spec/writers/citeproc_writer_spec.rb +226 -224
  71. data/spec/writers/codemeta_writer_spec.rb +18 -16
  72. data/spec/writers/crosscite_writer_spec.rb +91 -73
  73. data/spec/writers/crossref_writer_spec.rb +99 -91
  74. data/spec/writers/csv_writer_spec.rb +70 -70
  75. data/spec/writers/datacite_json_writer_spec.rb +78 -68
  76. data/spec/writers/datacite_writer_spec.rb +417 -322
  77. data/spec/writers/jats_writer_spec.rb +177 -161
  78. data/spec/writers/rdf_xml_writer_spec.rb +68 -63
  79. data/spec/writers/ris_writer_spec.rb +162 -162
  80. data/spec/writers/schema_org_writer_spec.rb +329 -294
  81. data/spec/writers/turtle_writer_spec.rb +47 -47
  82. metadata +242 -166
  83. data/.github/workflows/release.yml +0 -47
@@ -3,41 +3,54 @@
3
3
  require 'spec_helper'
4
4
 
5
5
  describe Briard::Metadata, vcr: true do
6
- let(:input) { "https://doi.org/10.5061/DRYAD.8515" }
6
+ subject { described_class.new(input: input, from: 'datacite') }
7
7
 
8
- subject { Briard::Metadata.new(input: input, from: "datacite") }
8
+ let(:input) { 'https://doi.org/10.5061/DRYAD.8515' }
9
9
 
10
- context "insert_identifier" do
11
- it "doi" do
12
- xml = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') { |xml| subject.insert_identifier(xml) }.to_xml
10
+ context 'insert_identifier' do
11
+ it 'doi' do
12
+ xml = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
13
+ subject.insert_identifier(xml)
14
+ end.to_xml
13
15
  response = Maremma.from_xml(xml)
14
- expect(response["identifier"]).to eq("identifierType"=>"DOI", "__content__"=>"10.5061/dryad.8515")
16
+ expect(response['identifier']).to eq('identifierType' => 'DOI',
17
+ '__content__' => '10.5061/dryad.8515')
15
18
  end
16
19
  end
17
20
 
18
- context "insert_creators" do
19
- it "insert" do
20
- xml = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') { |xml| subject.insert_creators(xml) }.to_xml
21
+ context 'insert_creators' do
22
+ it 'insert' do
23
+ xml = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
24
+ subject.insert_creators(xml)
25
+ end.to_xml
21
26
  response = Maremma.from_xml(xml)
22
- expect(response.dig("creators", "creator").first).to eq("affiliation" => {"__content__"=>"Centre International de Recherches Médicales de Franceville", "affiliationIdentifier"=>"https://ror.org/01wyqb997", "affiliationIdentifierScheme"=>"ROR"}, "creatorName"=>{"__content__"=>"Ollomo, Benjamin", "nameType"=>"Personal"}, "familyName"=>"Ollomo", "givenName"=>"Benjamin")
27
+ expect(response.dig('creators',
28
+ 'creator').first).to eq(
29
+ 'affiliation' => { '__content__' => 'Centre International de Recherches Médicales de Franceville',
30
+ 'affiliationIdentifier' => 'https://ror.org/01wyqb997', 'affiliationIdentifierScheme' => 'ROR' }, 'creatorName' => { '__content__' => 'Ollomo, Benjamin', 'nameType' => 'Personal' }, 'familyName' => 'Ollomo', 'givenName' => 'Benjamin'
31
+ )
23
32
  end
24
33
  end
25
34
 
26
- context "insert_contributors" do
27
- it "none" do
28
- xml = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') { |xml| subject.insert_contributors(xml) }.to_xml
35
+ context 'insert_contributors' do
36
+ it 'none' do
37
+ xml = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
38
+ subject.insert_contributors(xml)
39
+ end.to_xml
29
40
  response = Maremma.from_xml(xml)
30
- expect(response).to be_nil
41
+ expect(response.nil?).to be(true)
31
42
  end
32
43
  end
33
44
 
34
- context "insert_person" do
35
- it "creator only name" do
36
- person = { "name" => "Carberry, Josiah" }
37
- type = "creator"
38
- xml = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') { |xml| subject.insert_person(xml, person, type) }.to_xml
45
+ context 'insert_person' do
46
+ it 'creator only name' do
47
+ person = { 'name' => 'Carberry, Josiah' }
48
+ type = 'creator'
49
+ xml = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
50
+ subject.insert_person(xml, person, type)
51
+ end.to_xml
39
52
  response = Maremma.from_xml(xml)
40
- expect(response).to eq("creatorName"=>"Carberry, Josiah")
53
+ expect(response).to eq('creatorName' => 'Carberry, Josiah')
41
54
  end
42
55
 
43
56
  # it "creator given and family name" do
@@ -49,35 +62,45 @@ describe Briard::Metadata, vcr: true do
49
62
  # end
50
63
  end
51
64
 
52
- context "insert_titles" do
53
- it "insert" do
54
- xml = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') { |xml| subject.insert_titles(xml) }.to_xml
65
+ context 'insert_titles' do
66
+ it 'insert' do
67
+ xml = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
68
+ subject.insert_titles(xml)
69
+ end.to_xml
55
70
  response = Maremma.from_xml(xml)
56
- expect(response.dig("titles", "title")).to eq("Data from: A new malaria agent in African hominids.")
71
+ expect(response.dig('titles',
72
+ 'title')).to eq('Data from: A new malaria agent in African hominids.')
57
73
  end
58
74
  end
59
75
 
60
- context "insert_publisher" do
61
- it "insert" do
62
- xml = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') { |xml| subject.insert_publisher(xml) }.to_xml
76
+ context 'insert_publisher' do
77
+ it 'insert' do
78
+ xml = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
79
+ subject.insert_publisher(xml)
80
+ end.to_xml
63
81
  response = Maremma.from_xml(xml)
64
- expect(response["publisher"]).to eq("Dryad")
82
+ expect(response['publisher']).to eq('Dryad')
65
83
  end
66
84
  end
67
85
 
68
- context "insert_publication_year" do
69
- it "insert" do
70
- xml = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') { |xml| subject.insert_publication_year(xml) }.to_xml
86
+ context 'insert_publication_year' do
87
+ it 'insert' do
88
+ xml = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
89
+ subject.insert_publication_year(xml)
90
+ end.to_xml
71
91
  response = Maremma.from_xml(xml)
72
- expect(response["publicationYear"]).to eq("2011")
92
+ expect(response['publicationYear']).to eq('2011')
73
93
  end
74
94
  end
75
95
 
76
- context "insert_resource_type" do
77
- it "insert" do
78
- xml = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') { |xml| subject.insert_resource_type(xml) }.to_xml
96
+ context 'insert_resource_type' do
97
+ it 'insert' do
98
+ xml = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
99
+ subject.insert_resource_type(xml)
100
+ end.to_xml
79
101
  response = Maremma.from_xml(xml)
80
- expect(response["resourceType"]).to eq("resourceTypeGeneral"=>"Dataset", "__content__"=>"dataset")
102
+ expect(response['resourceType']).to eq('resourceTypeGeneral' => 'Dataset',
103
+ '__content__' => 'dataset')
81
104
  end
82
105
  end
83
106
 
@@ -89,90 +112,119 @@ describe Briard::Metadata, vcr: true do
89
112
  # end
90
113
  # end
91
114
 
92
- context "insert_dates" do
93
- it "insert" do
94
- xml = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') { |xml| subject.insert_dates(xml) }.to_xml
115
+ context 'insert_dates' do
116
+ it 'insert' do
117
+ xml = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
118
+ subject.insert_dates(xml)
119
+ end.to_xml
95
120
  response = Maremma.from_xml(xml)
96
- expect(response.dig("dates", "date")).to eq([{"__content__"=>"2011-02-01T17:22:41Z", "dateType"=>"Available"}, {"__content__"=>"2011", "dateType"=>"Issued"}])
121
+ expect(response.dig('dates',
122
+ 'date')).to eq([{ '__content__' => '2011-02-01T17:22:41Z', 'dateType' => 'Available' },
123
+ { '__content__' => '2011', 'dateType' => 'Issued' }])
97
124
  end
98
125
  end
99
126
 
100
- context "insert_subjects" do
101
- it "insert" do
102
- xml = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') { |xml| subject.insert_subjects(xml) }.to_xml
127
+ context 'insert_subjects' do
128
+ it 'insert' do
129
+ xml = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
130
+ subject.insert_subjects(xml)
131
+ end.to_xml
103
132
  response = Maremma.from_xml(xml)
104
- expect(response.dig("subjects", "subject")).to eq(["plasmodium", "malaria", "mitochondrial genome", "parasites"])
133
+ expect(response.dig('subjects',
134
+ 'subject')).to eq(['plasmodium', 'malaria', 'mitochondrial genome',
135
+ 'parasites'])
105
136
  end
106
137
  end
107
138
 
108
- context "insert_version" do
109
- it "insert" do
110
- xml = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') { |xml| subject.insert_version(xml) }.to_xml
139
+ context 'insert_version' do
140
+ it 'insert' do
141
+ xml = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
142
+ subject.insert_version(xml)
143
+ end.to_xml
111
144
  response = Maremma.from_xml(xml)
112
- expect(response.fetch("version", nil)).to eq("1")
145
+ expect(response.fetch('version', nil)).to eq('1')
113
146
  end
114
147
  end
115
148
 
116
- context "insert_sizes" do
117
- it "insert" do
118
- xml = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') { |xml| subject.insert_sizes(xml) }.to_xml
149
+ context 'insert_sizes' do
150
+ it 'insert' do
151
+ xml = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
152
+ subject.insert_sizes(xml)
153
+ end.to_xml
119
154
  response = Maremma.from_xml(xml)
120
- expect(response.fetch("sizes", nil)).to eq("size"=>"107328 bytes")
155
+ expect(response.fetch('sizes', nil)).to eq('size' => '107328 bytes')
121
156
  end
122
157
  end
123
158
 
124
- context "insert_formats" do
125
- let(:input) { IO.read(fixture_path + 'datacite-empty-sizes.xml') }
126
-
127
- subject { Briard::Metadata.new(input: input, from: "datacite") }
159
+ context 'insert_formats' do
160
+ subject { described_class.new(input: input, from: 'datacite') }
128
161
 
129
- it "insert" do
130
- xml = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') { |xml| subject.insert_formats(xml) }.to_xml
162
+ let(:input) { File.read("#{fixture_path}datacite-empty-sizes.xml") }
163
+
164
+ it 'insert' do
165
+ xml = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
166
+ subject.insert_formats(xml)
167
+ end.to_xml
131
168
  response = Maremma.from_xml(xml)
132
- expect(response.fetch("formats", nil)).to eq("format" => "text")
169
+ expect(response.fetch('formats', nil)).to eq('format' => 'text')
133
170
  end
134
171
  end
135
172
 
136
- context "insert_language" do
137
- it "insert" do
138
- xml = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') { |xml| subject.insert_language(xml) }.to_xml
173
+ context 'insert_language' do
174
+ it 'insert' do
175
+ xml = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
176
+ subject.insert_language(xml)
177
+ end.to_xml
139
178
  response = Maremma.from_xml(xml)
140
- expect(response.fetch("language", nil)).to eq("en")
179
+ expect(response.fetch('language', nil)).to eq('en')
141
180
  end
142
181
  end
143
182
 
144
- context "insert_related_identifiers" do
145
- it "related_identifier" do
183
+ context 'insert_related_identifiers' do
184
+ it 'related_identifier' do
146
185
  expect(subject.related_identifiers.length).to eq(1)
147
- expect(subject.related_identifiers.first).to eq("relatedIdentifier"=>"10.1371/journal.ppat.1000446", "relatedIdentifierType"=>"DOI", "relationType"=>"IsCitedBy")
186
+ expect(subject.related_identifiers.first).to eq(
187
+ 'relatedIdentifier' => '10.1371/journal.ppat.1000446', 'relatedIdentifierType' => 'DOI', 'relationType' => 'IsCitedBy'
188
+ )
148
189
  end
149
190
 
150
- it "insert" do
151
- xml = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') { |xml| subject.insert_related_identifiers(xml) }.to_xml
191
+ it 'insert' do
192
+ xml = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
193
+ subject.insert_related_identifiers(xml)
194
+ end.to_xml
152
195
  response = Maremma.from_xml(xml)
153
- expect(response.dig("relatedIdentifiers", "relatedIdentifier")).to eq("__content__"=>"10.1371/journal.ppat.1000446", "relatedIdentifierType"=>"DOI", "relationType"=>"IsCitedBy")
196
+ expect(response.dig('relatedIdentifiers',
197
+ 'relatedIdentifier')).to eq('__content__' => '10.1371/journal.ppat.1000446',
198
+ 'relatedIdentifierType' => 'DOI', 'relationType' => 'IsCitedBy')
154
199
  end
155
200
  end
156
201
 
157
- context "insert_rights_list" do
158
- it "insert" do
159
- xml = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') { |xml| subject.insert_rights_list(xml) }.to_xml
202
+ context 'insert_rights_list' do
203
+ it 'insert' do
204
+ xml = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
205
+ subject.insert_rights_list(xml)
206
+ end.to_xml
160
207
  response = Maremma.from_xml(xml)
161
- expect(response.dig("rightsList", "rights")).to eq("__content__" => "Creative Commons Zero v1.0 Universal",
162
- "rightsIdentifier" => "cc0-1.0",
163
- "rightsIdentifierScheme" => "SPDX",
164
- "rightsURI" => "https://creativecommons.org/publicdomain/zero/1.0/legalcode",
165
- "schemeURI" => "https://spdx.org/licenses/")
208
+ expect(response.dig('rightsList',
209
+ 'rights')).to eq('__content__' => 'Creative Commons Zero v1.0 Universal',
210
+ 'rightsIdentifier' => 'cc0-1.0',
211
+ 'rightsIdentifierScheme' => 'SPDX',
212
+ 'rightsURI' => 'https://creativecommons.org/publicdomain/zero/1.0/legalcode',
213
+ 'schemeURI' => 'https://spdx.org/licenses/')
166
214
  end
167
215
  end
168
216
 
169
- context "insert_descriptions" do
170
- it "insert" do
171
- input = "https://doi.org/10.5438/4K3M-NYVG"
172
- subject = Briard::Metadata.new(input: input, from: "datacite")
173
- xml = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') { |xml| subject.insert_descriptions(xml) }.to_xml
217
+ context 'insert_descriptions' do
218
+ it 'insert' do
219
+ input = 'https://doi.org/10.5438/4K3M-NYVG'
220
+ subject = described_class.new(input: input, from: 'datacite')
221
+ xml = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
222
+ subject.insert_descriptions(xml)
223
+ end.to_xml
174
224
  response = Maremma.from_xml(xml)
175
- expect(response.dig("descriptions", "description")).to eq("descriptionType" => "Abstract", "__content__" => "Eating your own dog food is a slang term to describe that an organization should itself use the products and services it provides. For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for...")
225
+ expect(response.dig('descriptions',
226
+ 'description')).to eq('descriptionType' => 'Abstract',
227
+ '__content__' => 'Eating your own dog food is a slang term to describe that an organization should itself use the products and services it provides. For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for...')
176
228
  end
177
229
  end
178
230
  end