briard 2.4.1 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/codeql-analysis.yml +72 -0
- data/.github/workflows/rubocop.yml +50 -0
- data/.rubocop.yml +144 -620
- data/.rubocop_todo.yml +76 -0
- data/CHANGELOG.md +22 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +43 -6
- data/Rakefile +1 -1
- data/{bolognese.gemspec → briard.gemspec} +46 -38
- data/lib/briard/array.rb +2 -2
- data/lib/briard/author_utils.rb +79 -71
- data/lib/briard/cli.rb +12 -13
- data/lib/briard/crossref_utils.rb +73 -61
- data/lib/briard/datacite_utils.rb +132 -106
- data/lib/briard/doi_utils.rb +10 -10
- data/lib/briard/metadata.rb +96 -106
- data/lib/briard/metadata_utils.rb +87 -78
- data/lib/briard/readers/bibtex_reader.rb +65 -65
- data/lib/briard/readers/cff_reader.rb +88 -70
- data/lib/briard/readers/citeproc_reader.rb +90 -84
- data/lib/briard/readers/codemeta_reader.rb +68 -50
- data/lib/briard/readers/crosscite_reader.rb +2 -2
- data/lib/briard/readers/crossref_reader.rb +249 -210
- data/lib/briard/readers/datacite_json_reader.rb +3 -3
- data/lib/briard/readers/datacite_reader.rb +225 -189
- data/lib/briard/readers/npm_reader.rb +49 -42
- data/lib/briard/readers/ris_reader.rb +82 -80
- data/lib/briard/readers/schema_org_reader.rb +182 -159
- data/lib/briard/string.rb +1 -1
- data/lib/briard/utils.rb +4 -4
- data/lib/briard/version.rb +3 -1
- data/lib/briard/whitelist_scrubber.rb +11 -4
- data/lib/briard/writers/bibtex_writer.rb +14 -8
- data/lib/briard/writers/cff_writer.rb +33 -26
- data/lib/briard/writers/codemeta_writer.rb +19 -15
- data/lib/briard/writers/csv_writer.rb +6 -4
- data/lib/briard/writers/datacite_json_writer.rb +8 -2
- data/lib/briard/writers/jats_writer.rb +33 -28
- data/lib/briard/writers/rdf_xml_writer.rb +1 -1
- data/lib/briard/writers/ris_writer.rb +30 -18
- data/lib/briard/writers/turtle_writer.rb +1 -1
- data/lib/briard.rb +6 -6
- data/rubocop.sarif +0 -0
- data/spec/array_spec.rb +5 -5
- data/spec/author_utils_spec.rb +151 -132
- data/spec/datacite_utils_spec.rb +135 -83
- data/spec/doi_utils_spec.rb +168 -164
- data/spec/find_from_format_spec.rb +69 -69
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/onlies_keep_specific_tags.yml +65 -0
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/removes_a_tags.yml +65 -0
- data/spec/metadata_spec.rb +91 -90
- data/spec/readers/bibtex_reader_spec.rb +43 -38
- data/spec/readers/cff_reader_spec.rb +165 -153
- data/spec/readers/citeproc_reader_spec.rb +45 -40
- data/spec/readers/codemeta_reader_spec.rb +128 -115
- data/spec/readers/crosscite_reader_spec.rb +34 -24
- data/spec/readers/crossref_reader_spec.rb +1098 -939
- data/spec/readers/datacite_json_reader_spec.rb +53 -40
- data/spec/readers/datacite_reader_spec.rb +1541 -1337
- data/spec/readers/npm_reader_spec.rb +48 -43
- data/spec/readers/ris_reader_spec.rb +53 -47
- data/spec/readers/schema_org_reader_spec.rb +329 -267
- data/spec/spec_helper.rb +6 -5
- data/spec/utils_spec.rb +371 -347
- data/spec/writers/bibtex_writer_spec.rb +143 -143
- data/spec/writers/cff_writer_spec.rb +96 -90
- data/spec/writers/citation_writer_spec.rb +34 -33
- data/spec/writers/citeproc_writer_spec.rb +226 -224
- data/spec/writers/codemeta_writer_spec.rb +18 -16
- data/spec/writers/crosscite_writer_spec.rb +91 -73
- data/spec/writers/crossref_writer_spec.rb +99 -91
- data/spec/writers/csv_writer_spec.rb +70 -70
- data/spec/writers/datacite_json_writer_spec.rb +78 -68
- data/spec/writers/datacite_writer_spec.rb +417 -322
- data/spec/writers/jats_writer_spec.rb +177 -161
- data/spec/writers/rdf_xml_writer_spec.rb +68 -63
- data/spec/writers/ris_writer_spec.rb +162 -162
- data/spec/writers/turtle_writer_spec.rb +47 -47
- metadata +250 -160
- data/.github/workflows/release.yml +0 -47
data/lib/briard/cli.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
|
-
# encoding: UTF-8
|
2
1
|
# frozen_string_literal: true
|
3
2
|
|
4
|
-
require
|
3
|
+
require 'thor'
|
5
4
|
|
6
5
|
require_relative 'doi_utils'
|
7
6
|
require_relative 'utils'
|
@@ -18,18 +17,18 @@ module Briard
|
|
18
17
|
# from http://stackoverflow.com/questions/22809972/adding-a-version-option-to-a-ruby-thor-cli
|
19
18
|
map %w[--version -v] => :__print_version
|
20
19
|
|
21
|
-
desc
|
20
|
+
desc '--version, -v', 'print the version'
|
22
21
|
def __print_version
|
23
22
|
puts Briard::VERSION
|
24
23
|
end
|
25
24
|
|
26
|
-
desc
|
27
|
-
method_option :from, aliases:
|
28
|
-
method_option :to, aliases:
|
29
|
-
method_option :regenerate, :
|
30
|
-
method_option :style, aliases:
|
31
|
-
method_option :locale, aliases:
|
32
|
-
method_option :show_errors, :
|
25
|
+
desc '', 'convert metadata'
|
26
|
+
method_option :from, aliases: '-f'
|
27
|
+
method_option :to, aliases: '-t', default: 'schema_org'
|
28
|
+
method_option :regenerate, type: :boolean, force: false
|
29
|
+
method_option :style, aliases: '-s', default: 'apa'
|
30
|
+
method_option :locale, aliases: '-l', default: 'en-US'
|
31
|
+
method_option :show_errors, type: :boolean, force: false
|
33
32
|
method_option :depositor
|
34
33
|
method_option :email
|
35
34
|
method_option :registrant
|
@@ -44,16 +43,16 @@ module Briard
|
|
44
43
|
depositor: options[:depositor],
|
45
44
|
email: options[:email],
|
46
45
|
registrant: options[:registrant])
|
47
|
-
to = options[:to] ||
|
46
|
+
to = options[:to] || 'schema_org'
|
48
47
|
|
49
48
|
if options[:show_errors] && !metadata.valid?
|
50
|
-
|
49
|
+
warn metadata.errors
|
51
50
|
else
|
52
51
|
puts metadata.send(to)
|
53
52
|
end
|
54
53
|
end
|
55
54
|
|
56
|
-
desc
|
55
|
+
desc '', 'encode'
|
57
56
|
def encode(prefix)
|
58
57
|
puts encode_doi(prefix)
|
59
58
|
end
|
@@ -6,17 +6,17 @@ module Briard
|
|
6
6
|
# variables CROSSREF_DEPOSITOR_NAME, CROSSREF_DEPOSITOR_EMAIL and CROSSREF_REGISTRANT,
|
7
7
|
# e.g. in a .env file
|
8
8
|
def crossref_xml
|
9
|
-
@crossref_xml ||= Nokogiri::XML::Builder.new(:
|
9
|
+
@crossref_xml ||= Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
|
10
10
|
xml.doi_batch(crossref_root_attributes) do
|
11
11
|
xml.head do
|
12
12
|
# we use a uuid as batch_id
|
13
13
|
xml.doi_batch_id(SecureRandom.uuid)
|
14
14
|
xml.timestamp(Time.now.utc.strftime('%Y%m%d%H%M%S'))
|
15
15
|
xml.depositor do
|
16
|
-
xml.depositor_name(ENV
|
17
|
-
xml.email_address(ENV
|
16
|
+
xml.depositor_name(ENV.fetch('CROSSREF_DEPOSITOR_NAME', nil))
|
17
|
+
xml.email_address(ENV.fetch('CROSSREF_DEPOSITOR_EMAIL', nil))
|
18
18
|
end
|
19
|
-
xml.registrant(ENV
|
19
|
+
xml.registrant(ENV.fetch('CROSSREF_REGISTRANT', nil))
|
20
20
|
end
|
21
21
|
xml.body do
|
22
22
|
insert_crossref_work(xml)
|
@@ -26,10 +26,10 @@ module Briard
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def crossref_errors(xml: nil)
|
29
|
-
filepath = File.expand_path(
|
29
|
+
filepath = File.expand_path('../../resources/crossref/crossref5.3.1.xsd', __dir__)
|
30
30
|
schema = Nokogiri::XML::Schema(open(filepath))
|
31
31
|
|
32
|
-
schema.validate(Nokogiri::XML(xml, nil, 'UTF-8')).map
|
32
|
+
schema.validate(Nokogiri::XML(xml, nil, 'UTF-8')).map(&:to_s).unwrap
|
33
33
|
rescue Nokogiri::XML::SyntaxError => e
|
34
34
|
e.message
|
35
35
|
end
|
@@ -37,9 +37,10 @@ module Briard
|
|
37
37
|
def insert_crossref_work(xml)
|
38
38
|
return xml if doi.blank?
|
39
39
|
|
40
|
-
|
40
|
+
case types['resourceTypeGeneral']
|
41
|
+
when 'JournalArticle'
|
41
42
|
insert_journal(xml)
|
42
|
-
|
43
|
+
when 'Preprint'
|
43
44
|
insert_posted_content(xml)
|
44
45
|
end
|
45
46
|
end
|
@@ -47,15 +48,15 @@ module Briard
|
|
47
48
|
def insert_journal(xml)
|
48
49
|
xml.journal do
|
49
50
|
if language.present?
|
50
|
-
xml.journal_metadata(
|
51
|
-
xml.full_title(container[
|
51
|
+
xml.journal_metadata('language' => language) do
|
52
|
+
xml.full_title(container['title'])
|
52
53
|
end
|
53
54
|
else
|
54
55
|
xml.journal_metadata do
|
55
|
-
xml.full_title(container[
|
56
|
+
xml.full_title(container['title'])
|
56
57
|
end
|
57
58
|
end
|
58
|
-
xml.journal_article(
|
59
|
+
xml.journal_article('publication_type' => 'full_text') do
|
59
60
|
insert_crossref_titles(xml)
|
60
61
|
insert_crossref_creators(xml)
|
61
62
|
insert_crossref_publication_date(xml)
|
@@ -70,7 +71,7 @@ module Briard
|
|
70
71
|
end
|
71
72
|
|
72
73
|
def insert_posted_content(xml)
|
73
|
-
posted_content = {
|
74
|
+
posted_content = { 'type' => 'other', 'language' => language }.compact
|
74
75
|
|
75
76
|
xml.posted_content(posted_content) do
|
76
77
|
insert_group_title(xml)
|
@@ -89,29 +90,35 @@ module Briard
|
|
89
90
|
def insert_group_title(xml)
|
90
91
|
return xml if subjects.blank?
|
91
92
|
|
92
|
-
xml.group_title(subjects.first[
|
93
|
+
xml.group_title(subjects.first['subject'].titleize)
|
93
94
|
end
|
94
95
|
|
95
96
|
def insert_crossref_creators(xml)
|
96
97
|
xml.contributors do
|
97
98
|
Array.wrap(creators).each_with_index do |au, index|
|
98
|
-
xml.person_name(
|
99
|
-
|
99
|
+
xml.person_name('contributor_role' => 'author',
|
100
|
+
'sequence' => index.zero? ? 'first' : 'additional') do
|
101
|
+
insert_crossref_person(xml, au, 'author')
|
100
102
|
end
|
101
103
|
end
|
102
104
|
end
|
103
105
|
end
|
104
106
|
|
105
|
-
def insert_crossref_person(xml, person,
|
106
|
-
person_name = person[
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
107
|
+
def insert_crossref_person(xml, person, _type)
|
108
|
+
person_name = if person['familyName'].present?
|
109
|
+
[person['familyName'], person['givenName']].compact.join(', ')
|
110
|
+
else
|
111
|
+
person['name']
|
112
|
+
end
|
113
|
+
xml.given_name(person['givenName']) if person['givenName'].present?
|
114
|
+
xml.surname(person['familyName']) if person['familyName'].present?
|
115
|
+
if person.dig('nameIdentifiers', 0, 'nameIdentifierScheme') == 'ORCID'
|
116
|
+
xml.ORCID(person.dig('nameIdentifiers', 0, 'nameIdentifier'))
|
111
117
|
end
|
112
|
-
Array.wrap(person[
|
113
|
-
attributes = {
|
114
|
-
|
118
|
+
Array.wrap(person['affiliation']).each do |affiliation|
|
119
|
+
attributes = { 'affiliationIdentifier' => affiliation['affiliationIdentifier'],
|
120
|
+
'affiliationIdentifierScheme' => affiliation['affiliationIdentifierScheme'], 'schemeURI' => affiliation['schemeUri'] }.compact
|
121
|
+
xml.affiliation(affiliation['name'], attributes)
|
115
122
|
end
|
116
123
|
end
|
117
124
|
|
@@ -119,7 +126,7 @@ module Briard
|
|
119
126
|
xml.titles do
|
120
127
|
Array.wrap(titles).each do |title|
|
121
128
|
if title.is_a?(Hash)
|
122
|
-
xml.title(title[
|
129
|
+
xml.title(title['title'])
|
123
130
|
else
|
124
131
|
xml.title(title)
|
125
132
|
end
|
@@ -129,13 +136,13 @@ module Briard
|
|
129
136
|
|
130
137
|
def insert_citation_list(xml)
|
131
138
|
# filter out references
|
132
|
-
references = related_identifiers.find_all { |ri| ri[
|
139
|
+
references = related_identifiers.find_all { |ri| ri['relationType'] == 'References' }
|
133
140
|
return xml if references.blank?
|
134
141
|
|
135
142
|
xml.citation_list do
|
136
143
|
references.each do |ref|
|
137
144
|
xml.citation do
|
138
|
-
xml.doi(ref[
|
145
|
+
xml.doi(ref['relatedIdentifier'])
|
139
146
|
end
|
140
147
|
end
|
141
148
|
end
|
@@ -157,20 +164,24 @@ module Briard
|
|
157
164
|
# end
|
158
165
|
|
159
166
|
def insert_crossref_alternate_identifiers(xml)
|
160
|
-
alternate_identifier = Array.wrap(identifiers).
|
167
|
+
alternate_identifier = Array.wrap(identifiers).reject do |r|
|
168
|
+
r['identifierType'] == 'DOI'
|
169
|
+
end.first
|
161
170
|
return xml if alternate_identifier.blank?
|
162
171
|
|
163
|
-
xml.item_number(alternate_identifier[
|
172
|
+
xml.item_number(alternate_identifier['identifier'],
|
173
|
+
'item_number_type' => alternate_identifier['identifierType'])
|
164
174
|
end
|
165
175
|
|
166
176
|
def insert_crossref_access_indicators(xml)
|
167
177
|
return xml if rights_list.blank?
|
168
178
|
|
169
|
-
rights_uri = Array.wrap(rights_list).map { |l| l[
|
179
|
+
rights_uri = Array.wrap(rights_list).map { |l| l['rightsUri'] }.first
|
170
180
|
|
171
|
-
xml.program(
|
172
|
-
|
173
|
-
xml.license_ref(rights_uri,
|
181
|
+
xml.program('xmlns' => 'http://www.crossref.org/AccessIndicators.xsd',
|
182
|
+
'name' => 'AccessIndicators') do
|
183
|
+
xml.license_ref(rights_uri, 'applies_to' => 'vor')
|
184
|
+
xml.license_ref(rights_uri, 'applies_to' => 'tdm')
|
174
185
|
end
|
175
186
|
end
|
176
187
|
|
@@ -206,7 +217,7 @@ module Briard
|
|
206
217
|
xml.subjects do
|
207
218
|
subjects.each do |subject|
|
208
219
|
if subject.is_a?(Hash)
|
209
|
-
xml.subject(subject[
|
220
|
+
xml.subject(subject['subject'])
|
210
221
|
else
|
211
222
|
xml.subject(subject)
|
212
223
|
end
|
@@ -220,7 +231,6 @@ module Briard
|
|
220
231
|
# xml.version(version_info)
|
221
232
|
# end
|
222
233
|
|
223
|
-
|
224
234
|
def insert_crossref_language(xml)
|
225
235
|
return xml unless language.present?
|
226
236
|
|
@@ -231,8 +241,8 @@ module Briard
|
|
231
241
|
return xml if date_registered.blank?
|
232
242
|
|
233
243
|
date = get_datetime_from_iso8601(date_registered)
|
234
|
-
|
235
|
-
xml.publication_date(
|
244
|
+
|
245
|
+
xml.publication_date('media_type' => 'online') do
|
236
246
|
xml.month(date.month) if date.month.present?
|
237
247
|
xml.day(date.day) if date.day.present?
|
238
248
|
xml.year(date.year) if date.year.present?
|
@@ -240,7 +250,7 @@ module Briard
|
|
240
250
|
end
|
241
251
|
|
242
252
|
def insert_posted_date(xml)
|
243
|
-
date_posted = get_date(dates,
|
253
|
+
date_posted = get_date(dates, 'Issued')
|
244
254
|
return xml if date_posted.blank?
|
245
255
|
|
246
256
|
date = get_datetime_from_iso8601(date_posted)
|
@@ -266,9 +276,9 @@ module Briard
|
|
266
276
|
xml.doi_data do
|
267
277
|
xml.doi(doi)
|
268
278
|
xml.resource(url)
|
269
|
-
xml.collection(
|
279
|
+
xml.collection('property' => 'text-mining') do
|
270
280
|
xml.item do
|
271
|
-
xml.resource(url,
|
281
|
+
xml.resource(url, 'mime_type' => 'text/html')
|
272
282
|
end
|
273
283
|
end
|
274
284
|
end
|
@@ -283,25 +293,27 @@ module Briard
|
|
283
293
|
r = rights
|
284
294
|
else
|
285
295
|
r = {}
|
286
|
-
r[
|
287
|
-
r[
|
296
|
+
r['rights'] = rights
|
297
|
+
r['rightsUri'] = normalize_id(rights)
|
288
298
|
end
|
289
299
|
|
290
300
|
attributes = {
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
301
|
+
'rightsURI' => r['rightsUri'],
|
302
|
+
'rightsIdentifier' => r['rightsIdentifier'],
|
303
|
+
'rightsIdentifierScheme' => r['rightsIdentifierScheme'],
|
304
|
+
'schemeURI' => r['schemeUri'],
|
305
|
+
'xml:lang' => r['lang']
|
296
306
|
}.compact
|
297
307
|
|
298
|
-
xml.rights(r[
|
308
|
+
xml.rights(r['rights'], attributes)
|
299
309
|
end
|
300
310
|
end
|
301
311
|
end
|
302
312
|
|
303
313
|
def insert_crossref_issn(xml)
|
304
|
-
issn = container.to_h.fetch('identifierType', nil) ==
|
314
|
+
issn = if container.to_h.fetch('identifierType', nil) == 'ISSN'
|
315
|
+
container.to_h.fetch('identifier', nil)
|
316
|
+
end
|
305
317
|
|
306
318
|
return xml if issn.blank?
|
307
319
|
|
@@ -315,22 +327,22 @@ module Briard
|
|
315
327
|
d = descriptions.first
|
316
328
|
else
|
317
329
|
d = {}
|
318
|
-
d[
|
330
|
+
d['description'] = descriptions.first
|
319
331
|
end
|
320
332
|
|
321
|
-
xml.abstract(
|
322
|
-
xml.p(d[
|
333
|
+
xml.abstract('xmlns' => 'http://www.ncbi.nlm.nih.gov/JATS1') do
|
334
|
+
xml.p(d['description'])
|
323
335
|
end
|
324
336
|
end
|
325
337
|
|
326
338
|
def crossref_root_attributes
|
327
|
-
{
|
328
|
-
|
329
|
-
:
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
:
|
339
|
+
{ 'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance',
|
340
|
+
'xsi:schemaLocation': 'http://www.crossref.org/schema/5.3.1 https://www.crossref.org/schemas/crossref5.3.1.xsd',
|
341
|
+
xmlns: 'http://www.crossref.org/schema/5.3.1',
|
342
|
+
'xmlns:jats': 'http://www.ncbi.nlm.nih.gov/JATS1',
|
343
|
+
'xmlns:fr': 'http://www.crossref.org/fundref.xsd',
|
344
|
+
'xmlns:mml': 'http://www.w3.org/1998/Math/MathML',
|
345
|
+
version: '5.3.1' }
|
334
346
|
end
|
335
347
|
end
|
336
|
-
end
|
348
|
+
end
|