briard 2.4.1 → 2.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/codeql-analysis.yml +72 -0
- data/.github/workflows/rubocop.yml +50 -0
- data/.rubocop.yml +144 -620
- data/.rubocop_todo.yml +76 -0
- data/CHANGELOG.md +22 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +43 -6
- data/Rakefile +1 -1
- data/{bolognese.gemspec → briard.gemspec} +46 -38
- data/lib/briard/array.rb +2 -2
- data/lib/briard/author_utils.rb +79 -71
- data/lib/briard/cli.rb +12 -13
- data/lib/briard/crossref_utils.rb +73 -61
- data/lib/briard/datacite_utils.rb +132 -106
- data/lib/briard/doi_utils.rb +10 -10
- data/lib/briard/metadata.rb +96 -106
- data/lib/briard/metadata_utils.rb +87 -78
- data/lib/briard/readers/bibtex_reader.rb +65 -65
- data/lib/briard/readers/cff_reader.rb +88 -70
- data/lib/briard/readers/citeproc_reader.rb +90 -84
- data/lib/briard/readers/codemeta_reader.rb +68 -50
- data/lib/briard/readers/crosscite_reader.rb +2 -2
- data/lib/briard/readers/crossref_reader.rb +249 -210
- data/lib/briard/readers/datacite_json_reader.rb +3 -3
- data/lib/briard/readers/datacite_reader.rb +225 -189
- data/lib/briard/readers/npm_reader.rb +49 -42
- data/lib/briard/readers/ris_reader.rb +82 -80
- data/lib/briard/readers/schema_org_reader.rb +182 -159
- data/lib/briard/string.rb +1 -1
- data/lib/briard/utils.rb +4 -4
- data/lib/briard/version.rb +3 -1
- data/lib/briard/whitelist_scrubber.rb +11 -4
- data/lib/briard/writers/bibtex_writer.rb +14 -8
- data/lib/briard/writers/cff_writer.rb +33 -26
- data/lib/briard/writers/codemeta_writer.rb +19 -15
- data/lib/briard/writers/csv_writer.rb +6 -4
- data/lib/briard/writers/datacite_json_writer.rb +8 -2
- data/lib/briard/writers/jats_writer.rb +33 -28
- data/lib/briard/writers/rdf_xml_writer.rb +1 -1
- data/lib/briard/writers/ris_writer.rb +30 -18
- data/lib/briard/writers/turtle_writer.rb +1 -1
- data/lib/briard.rb +6 -6
- data/rubocop.sarif +0 -0
- data/spec/array_spec.rb +5 -5
- data/spec/author_utils_spec.rb +151 -132
- data/spec/datacite_utils_spec.rb +135 -83
- data/spec/doi_utils_spec.rb +168 -164
- data/spec/find_from_format_spec.rb +69 -69
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/onlies_keep_specific_tags.yml +65 -0
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/removes_a_tags.yml +65 -0
- data/spec/metadata_spec.rb +91 -90
- data/spec/readers/bibtex_reader_spec.rb +43 -38
- data/spec/readers/cff_reader_spec.rb +165 -153
- data/spec/readers/citeproc_reader_spec.rb +45 -40
- data/spec/readers/codemeta_reader_spec.rb +128 -115
- data/spec/readers/crosscite_reader_spec.rb +34 -24
- data/spec/readers/crossref_reader_spec.rb +1098 -939
- data/spec/readers/datacite_json_reader_spec.rb +53 -40
- data/spec/readers/datacite_reader_spec.rb +1541 -1337
- data/spec/readers/npm_reader_spec.rb +48 -43
- data/spec/readers/ris_reader_spec.rb +53 -47
- data/spec/readers/schema_org_reader_spec.rb +329 -267
- data/spec/spec_helper.rb +6 -5
- data/spec/utils_spec.rb +371 -347
- data/spec/writers/bibtex_writer_spec.rb +143 -143
- data/spec/writers/cff_writer_spec.rb +96 -90
- data/spec/writers/citation_writer_spec.rb +34 -33
- data/spec/writers/citeproc_writer_spec.rb +226 -224
- data/spec/writers/codemeta_writer_spec.rb +18 -16
- data/spec/writers/crosscite_writer_spec.rb +91 -73
- data/spec/writers/crossref_writer_spec.rb +99 -91
- data/spec/writers/csv_writer_spec.rb +70 -70
- data/spec/writers/datacite_json_writer_spec.rb +78 -68
- data/spec/writers/datacite_writer_spec.rb +417 -322
- data/spec/writers/jats_writer_spec.rb +177 -161
- data/spec/writers/rdf_xml_writer_spec.rb +68 -63
- data/spec/writers/ris_writer_spec.rb +162 -162
- data/spec/writers/turtle_writer_spec.rb +47 -47
- metadata +250 -160
- data/.github/workflows/release.yml +0 -47
data/lib/briard/cli.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
|
-
# encoding: UTF-8
|
2
1
|
# frozen_string_literal: true
|
3
2
|
|
4
|
-
require
|
3
|
+
require 'thor'
|
5
4
|
|
6
5
|
require_relative 'doi_utils'
|
7
6
|
require_relative 'utils'
|
@@ -18,18 +17,18 @@ module Briard
|
|
18
17
|
# from http://stackoverflow.com/questions/22809972/adding-a-version-option-to-a-ruby-thor-cli
|
19
18
|
map %w[--version -v] => :__print_version
|
20
19
|
|
21
|
-
desc
|
20
|
+
desc '--version, -v', 'print the version'
|
22
21
|
def __print_version
|
23
22
|
puts Briard::VERSION
|
24
23
|
end
|
25
24
|
|
26
|
-
desc
|
27
|
-
method_option :from, aliases:
|
28
|
-
method_option :to, aliases:
|
29
|
-
method_option :regenerate, :
|
30
|
-
method_option :style, aliases:
|
31
|
-
method_option :locale, aliases:
|
32
|
-
method_option :show_errors, :
|
25
|
+
desc '', 'convert metadata'
|
26
|
+
method_option :from, aliases: '-f'
|
27
|
+
method_option :to, aliases: '-t', default: 'schema_org'
|
28
|
+
method_option :regenerate, type: :boolean, force: false
|
29
|
+
method_option :style, aliases: '-s', default: 'apa'
|
30
|
+
method_option :locale, aliases: '-l', default: 'en-US'
|
31
|
+
method_option :show_errors, type: :boolean, force: false
|
33
32
|
method_option :depositor
|
34
33
|
method_option :email
|
35
34
|
method_option :registrant
|
@@ -44,16 +43,16 @@ module Briard
|
|
44
43
|
depositor: options[:depositor],
|
45
44
|
email: options[:email],
|
46
45
|
registrant: options[:registrant])
|
47
|
-
to = options[:to] ||
|
46
|
+
to = options[:to] || 'schema_org'
|
48
47
|
|
49
48
|
if options[:show_errors] && !metadata.valid?
|
50
|
-
|
49
|
+
warn metadata.errors
|
51
50
|
else
|
52
51
|
puts metadata.send(to)
|
53
52
|
end
|
54
53
|
end
|
55
54
|
|
56
|
-
desc
|
55
|
+
desc '', 'encode'
|
57
56
|
def encode(prefix)
|
58
57
|
puts encode_doi(prefix)
|
59
58
|
end
|
@@ -6,17 +6,17 @@ module Briard
|
|
6
6
|
# variables CROSSREF_DEPOSITOR_NAME, CROSSREF_DEPOSITOR_EMAIL and CROSSREF_REGISTRANT,
|
7
7
|
# e.g. in a .env file
|
8
8
|
def crossref_xml
|
9
|
-
@crossref_xml ||= Nokogiri::XML::Builder.new(:
|
9
|
+
@crossref_xml ||= Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
|
10
10
|
xml.doi_batch(crossref_root_attributes) do
|
11
11
|
xml.head do
|
12
12
|
# we use a uuid as batch_id
|
13
13
|
xml.doi_batch_id(SecureRandom.uuid)
|
14
14
|
xml.timestamp(Time.now.utc.strftime('%Y%m%d%H%M%S'))
|
15
15
|
xml.depositor do
|
16
|
-
xml.depositor_name(ENV
|
17
|
-
xml.email_address(ENV
|
16
|
+
xml.depositor_name(ENV.fetch('CROSSREF_DEPOSITOR_NAME', nil))
|
17
|
+
xml.email_address(ENV.fetch('CROSSREF_DEPOSITOR_EMAIL', nil))
|
18
18
|
end
|
19
|
-
xml.registrant(ENV
|
19
|
+
xml.registrant(ENV.fetch('CROSSREF_REGISTRANT', nil))
|
20
20
|
end
|
21
21
|
xml.body do
|
22
22
|
insert_crossref_work(xml)
|
@@ -26,10 +26,10 @@ module Briard
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def crossref_errors(xml: nil)
|
29
|
-
filepath = File.expand_path(
|
29
|
+
filepath = File.expand_path('../../resources/crossref/crossref5.3.1.xsd', __dir__)
|
30
30
|
schema = Nokogiri::XML::Schema(open(filepath))
|
31
31
|
|
32
|
-
schema.validate(Nokogiri::XML(xml, nil, 'UTF-8')).map
|
32
|
+
schema.validate(Nokogiri::XML(xml, nil, 'UTF-8')).map(&:to_s).unwrap
|
33
33
|
rescue Nokogiri::XML::SyntaxError => e
|
34
34
|
e.message
|
35
35
|
end
|
@@ -37,9 +37,10 @@ module Briard
|
|
37
37
|
def insert_crossref_work(xml)
|
38
38
|
return xml if doi.blank?
|
39
39
|
|
40
|
-
|
40
|
+
case types['resourceTypeGeneral']
|
41
|
+
when 'JournalArticle'
|
41
42
|
insert_journal(xml)
|
42
|
-
|
43
|
+
when 'Preprint'
|
43
44
|
insert_posted_content(xml)
|
44
45
|
end
|
45
46
|
end
|
@@ -47,15 +48,15 @@ module Briard
|
|
47
48
|
def insert_journal(xml)
|
48
49
|
xml.journal do
|
49
50
|
if language.present?
|
50
|
-
xml.journal_metadata(
|
51
|
-
xml.full_title(container[
|
51
|
+
xml.journal_metadata('language' => language) do
|
52
|
+
xml.full_title(container['title'])
|
52
53
|
end
|
53
54
|
else
|
54
55
|
xml.journal_metadata do
|
55
|
-
xml.full_title(container[
|
56
|
+
xml.full_title(container['title'])
|
56
57
|
end
|
57
58
|
end
|
58
|
-
xml.journal_article(
|
59
|
+
xml.journal_article('publication_type' => 'full_text') do
|
59
60
|
insert_crossref_titles(xml)
|
60
61
|
insert_crossref_creators(xml)
|
61
62
|
insert_crossref_publication_date(xml)
|
@@ -70,7 +71,7 @@ module Briard
|
|
70
71
|
end
|
71
72
|
|
72
73
|
def insert_posted_content(xml)
|
73
|
-
posted_content = {
|
74
|
+
posted_content = { 'type' => 'other', 'language' => language }.compact
|
74
75
|
|
75
76
|
xml.posted_content(posted_content) do
|
76
77
|
insert_group_title(xml)
|
@@ -89,29 +90,35 @@ module Briard
|
|
89
90
|
def insert_group_title(xml)
|
90
91
|
return xml if subjects.blank?
|
91
92
|
|
92
|
-
xml.group_title(subjects.first[
|
93
|
+
xml.group_title(subjects.first['subject'].titleize)
|
93
94
|
end
|
94
95
|
|
95
96
|
def insert_crossref_creators(xml)
|
96
97
|
xml.contributors do
|
97
98
|
Array.wrap(creators).each_with_index do |au, index|
|
98
|
-
xml.person_name(
|
99
|
-
|
99
|
+
xml.person_name('contributor_role' => 'author',
|
100
|
+
'sequence' => index.zero? ? 'first' : 'additional') do
|
101
|
+
insert_crossref_person(xml, au, 'author')
|
100
102
|
end
|
101
103
|
end
|
102
104
|
end
|
103
105
|
end
|
104
106
|
|
105
|
-
def insert_crossref_person(xml, person,
|
106
|
-
person_name = person[
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
107
|
+
def insert_crossref_person(xml, person, _type)
|
108
|
+
person_name = if person['familyName'].present?
|
109
|
+
[person['familyName'], person['givenName']].compact.join(', ')
|
110
|
+
else
|
111
|
+
person['name']
|
112
|
+
end
|
113
|
+
xml.given_name(person['givenName']) if person['givenName'].present?
|
114
|
+
xml.surname(person['familyName']) if person['familyName'].present?
|
115
|
+
if person.dig('nameIdentifiers', 0, 'nameIdentifierScheme') == 'ORCID'
|
116
|
+
xml.ORCID(person.dig('nameIdentifiers', 0, 'nameIdentifier'))
|
111
117
|
end
|
112
|
-
Array.wrap(person[
|
113
|
-
attributes = {
|
114
|
-
|
118
|
+
Array.wrap(person['affiliation']).each do |affiliation|
|
119
|
+
attributes = { 'affiliationIdentifier' => affiliation['affiliationIdentifier'],
|
120
|
+
'affiliationIdentifierScheme' => affiliation['affiliationIdentifierScheme'], 'schemeURI' => affiliation['schemeUri'] }.compact
|
121
|
+
xml.affiliation(affiliation['name'], attributes)
|
115
122
|
end
|
116
123
|
end
|
117
124
|
|
@@ -119,7 +126,7 @@ module Briard
|
|
119
126
|
xml.titles do
|
120
127
|
Array.wrap(titles).each do |title|
|
121
128
|
if title.is_a?(Hash)
|
122
|
-
xml.title(title[
|
129
|
+
xml.title(title['title'])
|
123
130
|
else
|
124
131
|
xml.title(title)
|
125
132
|
end
|
@@ -129,13 +136,13 @@ module Briard
|
|
129
136
|
|
130
137
|
def insert_citation_list(xml)
|
131
138
|
# filter out references
|
132
|
-
references = related_identifiers.find_all { |ri| ri[
|
139
|
+
references = related_identifiers.find_all { |ri| ri['relationType'] == 'References' }
|
133
140
|
return xml if references.blank?
|
134
141
|
|
135
142
|
xml.citation_list do
|
136
143
|
references.each do |ref|
|
137
144
|
xml.citation do
|
138
|
-
xml.doi(ref[
|
145
|
+
xml.doi(ref['relatedIdentifier'])
|
139
146
|
end
|
140
147
|
end
|
141
148
|
end
|
@@ -157,20 +164,24 @@ module Briard
|
|
157
164
|
# end
|
158
165
|
|
159
166
|
def insert_crossref_alternate_identifiers(xml)
|
160
|
-
alternate_identifier = Array.wrap(identifiers).
|
167
|
+
alternate_identifier = Array.wrap(identifiers).reject do |r|
|
168
|
+
r['identifierType'] == 'DOI'
|
169
|
+
end.first
|
161
170
|
return xml if alternate_identifier.blank?
|
162
171
|
|
163
|
-
xml.item_number(alternate_identifier[
|
172
|
+
xml.item_number(alternate_identifier['identifier'],
|
173
|
+
'item_number_type' => alternate_identifier['identifierType'])
|
164
174
|
end
|
165
175
|
|
166
176
|
def insert_crossref_access_indicators(xml)
|
167
177
|
return xml if rights_list.blank?
|
168
178
|
|
169
|
-
rights_uri = Array.wrap(rights_list).map { |l| l[
|
179
|
+
rights_uri = Array.wrap(rights_list).map { |l| l['rightsUri'] }.first
|
170
180
|
|
171
|
-
xml.program(
|
172
|
-
|
173
|
-
xml.license_ref(rights_uri,
|
181
|
+
xml.program('xmlns' => 'http://www.crossref.org/AccessIndicators.xsd',
|
182
|
+
'name' => 'AccessIndicators') do
|
183
|
+
xml.license_ref(rights_uri, 'applies_to' => 'vor')
|
184
|
+
xml.license_ref(rights_uri, 'applies_to' => 'tdm')
|
174
185
|
end
|
175
186
|
end
|
176
187
|
|
@@ -206,7 +217,7 @@ module Briard
|
|
206
217
|
xml.subjects do
|
207
218
|
subjects.each do |subject|
|
208
219
|
if subject.is_a?(Hash)
|
209
|
-
xml.subject(subject[
|
220
|
+
xml.subject(subject['subject'])
|
210
221
|
else
|
211
222
|
xml.subject(subject)
|
212
223
|
end
|
@@ -220,7 +231,6 @@ module Briard
|
|
220
231
|
# xml.version(version_info)
|
221
232
|
# end
|
222
233
|
|
223
|
-
|
224
234
|
def insert_crossref_language(xml)
|
225
235
|
return xml unless language.present?
|
226
236
|
|
@@ -231,8 +241,8 @@ module Briard
|
|
231
241
|
return xml if date_registered.blank?
|
232
242
|
|
233
243
|
date = get_datetime_from_iso8601(date_registered)
|
234
|
-
|
235
|
-
xml.publication_date(
|
244
|
+
|
245
|
+
xml.publication_date('media_type' => 'online') do
|
236
246
|
xml.month(date.month) if date.month.present?
|
237
247
|
xml.day(date.day) if date.day.present?
|
238
248
|
xml.year(date.year) if date.year.present?
|
@@ -240,7 +250,7 @@ module Briard
|
|
240
250
|
end
|
241
251
|
|
242
252
|
def insert_posted_date(xml)
|
243
|
-
date_posted = get_date(dates,
|
253
|
+
date_posted = get_date(dates, 'Issued')
|
244
254
|
return xml if date_posted.blank?
|
245
255
|
|
246
256
|
date = get_datetime_from_iso8601(date_posted)
|
@@ -266,9 +276,9 @@ module Briard
|
|
266
276
|
xml.doi_data do
|
267
277
|
xml.doi(doi)
|
268
278
|
xml.resource(url)
|
269
|
-
xml.collection(
|
279
|
+
xml.collection('property' => 'text-mining') do
|
270
280
|
xml.item do
|
271
|
-
xml.resource(url,
|
281
|
+
xml.resource(url, 'mime_type' => 'text/html')
|
272
282
|
end
|
273
283
|
end
|
274
284
|
end
|
@@ -283,25 +293,27 @@ module Briard
|
|
283
293
|
r = rights
|
284
294
|
else
|
285
295
|
r = {}
|
286
|
-
r[
|
287
|
-
r[
|
296
|
+
r['rights'] = rights
|
297
|
+
r['rightsUri'] = normalize_id(rights)
|
288
298
|
end
|
289
299
|
|
290
300
|
attributes = {
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
301
|
+
'rightsURI' => r['rightsUri'],
|
302
|
+
'rightsIdentifier' => r['rightsIdentifier'],
|
303
|
+
'rightsIdentifierScheme' => r['rightsIdentifierScheme'],
|
304
|
+
'schemeURI' => r['schemeUri'],
|
305
|
+
'xml:lang' => r['lang']
|
296
306
|
}.compact
|
297
307
|
|
298
|
-
xml.rights(r[
|
308
|
+
xml.rights(r['rights'], attributes)
|
299
309
|
end
|
300
310
|
end
|
301
311
|
end
|
302
312
|
|
303
313
|
def insert_crossref_issn(xml)
|
304
|
-
issn = container.to_h.fetch('identifierType', nil) ==
|
314
|
+
issn = if container.to_h.fetch('identifierType', nil) == 'ISSN'
|
315
|
+
container.to_h.fetch('identifier', nil)
|
316
|
+
end
|
305
317
|
|
306
318
|
return xml if issn.blank?
|
307
319
|
|
@@ -315,22 +327,22 @@ module Briard
|
|
315
327
|
d = descriptions.first
|
316
328
|
else
|
317
329
|
d = {}
|
318
|
-
d[
|
330
|
+
d['description'] = descriptions.first
|
319
331
|
end
|
320
332
|
|
321
|
-
xml.abstract(
|
322
|
-
xml.p(d[
|
333
|
+
xml.abstract('xmlns' => 'http://www.ncbi.nlm.nih.gov/JATS1') do
|
334
|
+
xml.p(d['description'])
|
323
335
|
end
|
324
336
|
end
|
325
337
|
|
326
338
|
def crossref_root_attributes
|
327
|
-
{
|
328
|
-
|
329
|
-
:
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
:
|
339
|
+
{ 'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance',
|
340
|
+
'xsi:schemaLocation': 'http://www.crossref.org/schema/5.3.1 https://www.crossref.org/schemas/crossref5.3.1.xsd',
|
341
|
+
xmlns: 'http://www.crossref.org/schema/5.3.1',
|
342
|
+
'xmlns:jats': 'http://www.ncbi.nlm.nih.gov/JATS1',
|
343
|
+
'xmlns:fr': 'http://www.crossref.org/fundref.xsd',
|
344
|
+
'xmlns:mml': 'http://www.w3.org/1998/Math/MathML',
|
345
|
+
version: '5.3.1' }
|
334
346
|
end
|
335
347
|
end
|
336
|
-
end
|
348
|
+
end
|