briard 2.4.1 → 2.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/codeql-analysis.yml +72 -0
  3. data/.github/workflows/rubocop.yml +50 -0
  4. data/.rubocop.yml +144 -620
  5. data/.rubocop_todo.yml +76 -0
  6. data/CHANGELOG.md +22 -0
  7. data/Gemfile +2 -0
  8. data/Gemfile.lock +43 -6
  9. data/Rakefile +1 -1
  10. data/{bolognese.gemspec → briard.gemspec} +46 -38
  11. data/lib/briard/array.rb +2 -2
  12. data/lib/briard/author_utils.rb +79 -71
  13. data/lib/briard/cli.rb +12 -13
  14. data/lib/briard/crossref_utils.rb +73 -61
  15. data/lib/briard/datacite_utils.rb +132 -106
  16. data/lib/briard/doi_utils.rb +10 -10
  17. data/lib/briard/metadata.rb +96 -106
  18. data/lib/briard/metadata_utils.rb +87 -78
  19. data/lib/briard/readers/bibtex_reader.rb +65 -65
  20. data/lib/briard/readers/cff_reader.rb +88 -70
  21. data/lib/briard/readers/citeproc_reader.rb +90 -84
  22. data/lib/briard/readers/codemeta_reader.rb +68 -50
  23. data/lib/briard/readers/crosscite_reader.rb +2 -2
  24. data/lib/briard/readers/crossref_reader.rb +249 -210
  25. data/lib/briard/readers/datacite_json_reader.rb +3 -3
  26. data/lib/briard/readers/datacite_reader.rb +225 -189
  27. data/lib/briard/readers/npm_reader.rb +49 -42
  28. data/lib/briard/readers/ris_reader.rb +82 -80
  29. data/lib/briard/readers/schema_org_reader.rb +182 -159
  30. data/lib/briard/string.rb +1 -1
  31. data/lib/briard/utils.rb +4 -4
  32. data/lib/briard/version.rb +3 -1
  33. data/lib/briard/whitelist_scrubber.rb +11 -4
  34. data/lib/briard/writers/bibtex_writer.rb +14 -8
  35. data/lib/briard/writers/cff_writer.rb +33 -26
  36. data/lib/briard/writers/codemeta_writer.rb +19 -15
  37. data/lib/briard/writers/csv_writer.rb +6 -4
  38. data/lib/briard/writers/datacite_json_writer.rb +8 -2
  39. data/lib/briard/writers/jats_writer.rb +33 -28
  40. data/lib/briard/writers/rdf_xml_writer.rb +1 -1
  41. data/lib/briard/writers/ris_writer.rb +30 -18
  42. data/lib/briard/writers/turtle_writer.rb +1 -1
  43. data/lib/briard.rb +6 -6
  44. data/rubocop.sarif +0 -0
  45. data/spec/array_spec.rb +5 -5
  46. data/spec/author_utils_spec.rb +151 -132
  47. data/spec/datacite_utils_spec.rb +135 -83
  48. data/spec/doi_utils_spec.rb +168 -164
  49. data/spec/find_from_format_spec.rb +69 -69
  50. data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/onlies_keep_specific_tags.yml +65 -0
  51. data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/removes_a_tags.yml +65 -0
  52. data/spec/metadata_spec.rb +91 -90
  53. data/spec/readers/bibtex_reader_spec.rb +43 -38
  54. data/spec/readers/cff_reader_spec.rb +165 -153
  55. data/spec/readers/citeproc_reader_spec.rb +45 -40
  56. data/spec/readers/codemeta_reader_spec.rb +128 -115
  57. data/spec/readers/crosscite_reader_spec.rb +34 -24
  58. data/spec/readers/crossref_reader_spec.rb +1098 -939
  59. data/spec/readers/datacite_json_reader_spec.rb +53 -40
  60. data/spec/readers/datacite_reader_spec.rb +1541 -1337
  61. data/spec/readers/npm_reader_spec.rb +48 -43
  62. data/spec/readers/ris_reader_spec.rb +53 -47
  63. data/spec/readers/schema_org_reader_spec.rb +329 -267
  64. data/spec/spec_helper.rb +6 -5
  65. data/spec/utils_spec.rb +371 -347
  66. data/spec/writers/bibtex_writer_spec.rb +143 -143
  67. data/spec/writers/cff_writer_spec.rb +96 -90
  68. data/spec/writers/citation_writer_spec.rb +34 -33
  69. data/spec/writers/citeproc_writer_spec.rb +226 -224
  70. data/spec/writers/codemeta_writer_spec.rb +18 -16
  71. data/spec/writers/crosscite_writer_spec.rb +91 -73
  72. data/spec/writers/crossref_writer_spec.rb +99 -91
  73. data/spec/writers/csv_writer_spec.rb +70 -70
  74. data/spec/writers/datacite_json_writer_spec.rb +78 -68
  75. data/spec/writers/datacite_writer_spec.rb +417 -322
  76. data/spec/writers/jats_writer_spec.rb +177 -161
  77. data/spec/writers/rdf_xml_writer_spec.rb +68 -63
  78. data/spec/writers/ris_writer_spec.rb +162 -162
  79. data/spec/writers/turtle_writer_spec.rb +47 -47
  80. metadata +250 -160
  81. data/.github/workflows/release.yml +0 -47
data/lib/briard/cli.rb CHANGED
@@ -1,7 +1,6 @@
1
- # encoding: UTF-8
2
1
  # frozen_string_literal: true
3
2
 
4
- require "thor"
3
+ require 'thor'
5
4
 
6
5
  require_relative 'doi_utils'
7
6
  require_relative 'utils'
@@ -18,18 +17,18 @@ module Briard
18
17
  # from http://stackoverflow.com/questions/22809972/adding-a-version-option-to-a-ruby-thor-cli
19
18
  map %w[--version -v] => :__print_version
20
19
 
21
- desc "--version, -v", "print the version"
20
+ desc '--version, -v', 'print the version'
22
21
  def __print_version
23
22
  puts Briard::VERSION
24
23
  end
25
24
 
26
- desc "", "convert metadata"
27
- method_option :from, aliases: "-f"
28
- method_option :to, aliases: "-t", default: "schema_org"
29
- method_option :regenerate, :type => :boolean, :force => false
30
- method_option :style, aliases: "-s", default: "apa"
31
- method_option :locale, aliases: "-l", default: "en-US"
32
- method_option :show_errors, :type => :boolean, :force => false
25
+ desc '', 'convert metadata'
26
+ method_option :from, aliases: '-f'
27
+ method_option :to, aliases: '-t', default: 'schema_org'
28
+ method_option :regenerate, type: :boolean, force: false
29
+ method_option :style, aliases: '-s', default: 'apa'
30
+ method_option :locale, aliases: '-l', default: 'en-US'
31
+ method_option :show_errors, type: :boolean, force: false
33
32
  method_option :depositor
34
33
  method_option :email
35
34
  method_option :registrant
@@ -44,16 +43,16 @@ module Briard
44
43
  depositor: options[:depositor],
45
44
  email: options[:email],
46
45
  registrant: options[:registrant])
47
- to = options[:to] || "schema_org"
46
+ to = options[:to] || 'schema_org'
48
47
 
49
48
  if options[:show_errors] && !metadata.valid?
50
- $stderr.puts metadata.errors
49
+ warn metadata.errors
51
50
  else
52
51
  puts metadata.send(to)
53
52
  end
54
53
  end
55
54
 
56
- desc "", "encode"
55
+ desc '', 'encode'
57
56
  def encode(prefix)
58
57
  puts encode_doi(prefix)
59
58
  end
@@ -6,17 +6,17 @@ module Briard
6
6
  # variables CROSSREF_DEPOSITOR_NAME, CROSSREF_DEPOSITOR_EMAIL and CROSSREF_REGISTRANT,
7
7
  # e.g. in a .env file
8
8
  def crossref_xml
9
- @crossref_xml ||= Nokogiri::XML::Builder.new(:encoding => 'UTF-8') do |xml|
9
+ @crossref_xml ||= Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
10
10
  xml.doi_batch(crossref_root_attributes) do
11
11
  xml.head do
12
12
  # we use a uuid as batch_id
13
13
  xml.doi_batch_id(SecureRandom.uuid)
14
14
  xml.timestamp(Time.now.utc.strftime('%Y%m%d%H%M%S'))
15
15
  xml.depositor do
16
- xml.depositor_name(ENV['CROSSREF_DEPOSITOR_NAME'])
17
- xml.email_address(ENV['CROSSREF_DEPOSITOR_EMAIL'])
16
+ xml.depositor_name(ENV.fetch('CROSSREF_DEPOSITOR_NAME', nil))
17
+ xml.email_address(ENV.fetch('CROSSREF_DEPOSITOR_EMAIL', nil))
18
18
  end
19
- xml.registrant(ENV['CROSSREF_REGISTRANT'])
19
+ xml.registrant(ENV.fetch('CROSSREF_REGISTRANT', nil))
20
20
  end
21
21
  xml.body do
22
22
  insert_crossref_work(xml)
@@ -26,10 +26,10 @@ module Briard
26
26
  end
27
27
 
28
28
  def crossref_errors(xml: nil)
29
- filepath = File.expand_path("../../../resources/crossref/crossref5.3.1.xsd", __FILE__)
29
+ filepath = File.expand_path('../../resources/crossref/crossref5.3.1.xsd', __dir__)
30
30
  schema = Nokogiri::XML::Schema(open(filepath))
31
31
 
32
- schema.validate(Nokogiri::XML(xml, nil, 'UTF-8')).map { |error| error.to_s }.unwrap
32
+ schema.validate(Nokogiri::XML(xml, nil, 'UTF-8')).map(&:to_s).unwrap
33
33
  rescue Nokogiri::XML::SyntaxError => e
34
34
  e.message
35
35
  end
@@ -37,9 +37,10 @@ module Briard
37
37
  def insert_crossref_work(xml)
38
38
  return xml if doi.blank?
39
39
 
40
- if types["resourceTypeGeneral"] == "JournalArticle"
40
+ case types['resourceTypeGeneral']
41
+ when 'JournalArticle'
41
42
  insert_journal(xml)
42
- elsif types["resourceTypeGeneral"] == "Preprint"
43
+ when 'Preprint'
43
44
  insert_posted_content(xml)
44
45
  end
45
46
  end
@@ -47,15 +48,15 @@ module Briard
47
48
  def insert_journal(xml)
48
49
  xml.journal do
49
50
  if language.present?
50
- xml.journal_metadata("language" => language) do
51
- xml.full_title(container["title"])
51
+ xml.journal_metadata('language' => language) do
52
+ xml.full_title(container['title'])
52
53
  end
53
54
  else
54
55
  xml.journal_metadata do
55
- xml.full_title(container["title"])
56
+ xml.full_title(container['title'])
56
57
  end
57
58
  end
58
- xml.journal_article("publication_type" => "full_text") do
59
+ xml.journal_article('publication_type' => 'full_text') do
59
60
  insert_crossref_titles(xml)
60
61
  insert_crossref_creators(xml)
61
62
  insert_crossref_publication_date(xml)
@@ -70,7 +71,7 @@ module Briard
70
71
  end
71
72
 
72
73
  def insert_posted_content(xml)
73
- posted_content = { "type" => "other", "language" => language }.compact
74
+ posted_content = { 'type' => 'other', 'language' => language }.compact
74
75
 
75
76
  xml.posted_content(posted_content) do
76
77
  insert_group_title(xml)
@@ -89,29 +90,35 @@ module Briard
89
90
  def insert_group_title(xml)
90
91
  return xml if subjects.blank?
91
92
 
92
- xml.group_title(subjects.first["subject"].titleize)
93
+ xml.group_title(subjects.first['subject'].titleize)
93
94
  end
94
95
 
95
96
  def insert_crossref_creators(xml)
96
97
  xml.contributors do
97
98
  Array.wrap(creators).each_with_index do |au, index|
98
- xml.person_name("contributor_role" => "author", "sequence" => index == 0 ? "first" : "additional") do
99
- insert_crossref_person(xml, au, "author")
99
+ xml.person_name('contributor_role' => 'author',
100
+ 'sequence' => index.zero? ? 'first' : 'additional') do
101
+ insert_crossref_person(xml, au, 'author')
100
102
  end
101
103
  end
102
104
  end
103
105
  end
104
106
 
105
- def insert_crossref_person(xml, person, type)
106
- person_name = person["familyName"].present? ? [person["familyName"], person["givenName"]].compact.join(", ") : person["name"]
107
- xml.given_name(person["givenName"]) if person["givenName"].present?
108
- xml.surname(person["familyName"]) if person["familyName"].present?
109
- if person.dig("nameIdentifiers", 0, "nameIdentifierScheme") == "ORCID"
110
- xml.ORCID(person.dig("nameIdentifiers", 0, "nameIdentifier"))
107
+ def insert_crossref_person(xml, person, _type)
108
+ person_name = if person['familyName'].present?
109
+ [person['familyName'], person['givenName']].compact.join(', ')
110
+ else
111
+ person['name']
112
+ end
113
+ xml.given_name(person['givenName']) if person['givenName'].present?
114
+ xml.surname(person['familyName']) if person['familyName'].present?
115
+ if person.dig('nameIdentifiers', 0, 'nameIdentifierScheme') == 'ORCID'
116
+ xml.ORCID(person.dig('nameIdentifiers', 0, 'nameIdentifier'))
111
117
  end
112
- Array.wrap(person["affiliation"]).each do |affiliation|
113
- attributes = { "affiliationIdentifier" => affiliation["affiliationIdentifier"], "affiliationIdentifierScheme" => affiliation["affiliationIdentifierScheme"], "schemeURI" => affiliation["schemeUri"] }.compact
114
- xml.affiliation(affiliation["name"], attributes)
118
+ Array.wrap(person['affiliation']).each do |affiliation|
119
+ attributes = { 'affiliationIdentifier' => affiliation['affiliationIdentifier'],
120
+ 'affiliationIdentifierScheme' => affiliation['affiliationIdentifierScheme'], 'schemeURI' => affiliation['schemeUri'] }.compact
121
+ xml.affiliation(affiliation['name'], attributes)
115
122
  end
116
123
  end
117
124
 
@@ -119,7 +126,7 @@ module Briard
119
126
  xml.titles do
120
127
  Array.wrap(titles).each do |title|
121
128
  if title.is_a?(Hash)
122
- xml.title(title["title"])
129
+ xml.title(title['title'])
123
130
  else
124
131
  xml.title(title)
125
132
  end
@@ -129,13 +136,13 @@ module Briard
129
136
 
130
137
  def insert_citation_list(xml)
131
138
  # filter out references
132
- references = related_identifiers.find_all { |ri| ri["relationType"] == "References" }
139
+ references = related_identifiers.find_all { |ri| ri['relationType'] == 'References' }
133
140
  return xml if references.blank?
134
141
 
135
142
  xml.citation_list do
136
143
  references.each do |ref|
137
144
  xml.citation do
138
- xml.doi(ref["relatedIdentifier"])
145
+ xml.doi(ref['relatedIdentifier'])
139
146
  end
140
147
  end
141
148
  end
@@ -157,20 +164,24 @@ module Briard
157
164
  # end
158
165
 
159
166
  def insert_crossref_alternate_identifiers(xml)
160
- alternate_identifier = Array.wrap(identifiers).select { |r| r["identifierType"] != "DOI" }.first
167
+ alternate_identifier = Array.wrap(identifiers).reject do |r|
168
+ r['identifierType'] == 'DOI'
169
+ end.first
161
170
  return xml if alternate_identifier.blank?
162
171
 
163
- xml.item_number(alternate_identifier["identifier"], "item_number_type" => alternate_identifier["identifierType"])
172
+ xml.item_number(alternate_identifier['identifier'],
173
+ 'item_number_type' => alternate_identifier['identifierType'])
164
174
  end
165
175
 
166
176
  def insert_crossref_access_indicators(xml)
167
177
  return xml if rights_list.blank?
168
178
 
169
- rights_uri = Array.wrap(rights_list).map { |l| l["rightsUri"] }.first
179
+ rights_uri = Array.wrap(rights_list).map { |l| l['rightsUri'] }.first
170
180
 
171
- xml.program("xmlns" => "http://www.crossref.org/AccessIndicators.xsd", "name" => "AccessIndicators") do
172
- xml.license_ref(rights_uri, "applies_to" => "vor")
173
- xml.license_ref(rights_uri, "applies_to" => "tdm")
181
+ xml.program('xmlns' => 'http://www.crossref.org/AccessIndicators.xsd',
182
+ 'name' => 'AccessIndicators') do
183
+ xml.license_ref(rights_uri, 'applies_to' => 'vor')
184
+ xml.license_ref(rights_uri, 'applies_to' => 'tdm')
174
185
  end
175
186
  end
176
187
 
@@ -206,7 +217,7 @@ module Briard
206
217
  xml.subjects do
207
218
  subjects.each do |subject|
208
219
  if subject.is_a?(Hash)
209
- xml.subject(subject["subject"])
220
+ xml.subject(subject['subject'])
210
221
  else
211
222
  xml.subject(subject)
212
223
  end
@@ -220,7 +231,6 @@ module Briard
220
231
  # xml.version(version_info)
221
232
  # end
222
233
 
223
-
224
234
  def insert_crossref_language(xml)
225
235
  return xml unless language.present?
226
236
 
@@ -231,8 +241,8 @@ module Briard
231
241
  return xml if date_registered.blank?
232
242
 
233
243
  date = get_datetime_from_iso8601(date_registered)
234
-
235
- xml.publication_date("media_type" => "online") do
244
+
245
+ xml.publication_date('media_type' => 'online') do
236
246
  xml.month(date.month) if date.month.present?
237
247
  xml.day(date.day) if date.day.present?
238
248
  xml.year(date.year) if date.year.present?
@@ -240,7 +250,7 @@ module Briard
240
250
  end
241
251
 
242
252
  def insert_posted_date(xml)
243
- date_posted = get_date(dates, "Issued")
253
+ date_posted = get_date(dates, 'Issued')
244
254
  return xml if date_posted.blank?
245
255
 
246
256
  date = get_datetime_from_iso8601(date_posted)
@@ -266,9 +276,9 @@ module Briard
266
276
  xml.doi_data do
267
277
  xml.doi(doi)
268
278
  xml.resource(url)
269
- xml.collection("property" => "text-mining") do
279
+ xml.collection('property' => 'text-mining') do
270
280
  xml.item do
271
- xml.resource(url, "mime_type" => "text/html")
281
+ xml.resource(url, 'mime_type' => 'text/html')
272
282
  end
273
283
  end
274
284
  end
@@ -283,25 +293,27 @@ module Briard
283
293
  r = rights
284
294
  else
285
295
  r = {}
286
- r["rights"] = rights
287
- r["rightsUri"] = normalize_id(rights)
296
+ r['rights'] = rights
297
+ r['rightsUri'] = normalize_id(rights)
288
298
  end
289
299
 
290
300
  attributes = {
291
- "rightsURI" => r["rightsUri"],
292
- "rightsIdentifier" => r["rightsIdentifier"],
293
- "rightsIdentifierScheme" => r["rightsIdentifierScheme"],
294
- "schemeURI" => r["schemeUri"],
295
- "xml:lang" => r["lang"]
301
+ 'rightsURI' => r['rightsUri'],
302
+ 'rightsIdentifier' => r['rightsIdentifier'],
303
+ 'rightsIdentifierScheme' => r['rightsIdentifierScheme'],
304
+ 'schemeURI' => r['schemeUri'],
305
+ 'xml:lang' => r['lang']
296
306
  }.compact
297
307
 
298
- xml.rights(r["rights"], attributes)
308
+ xml.rights(r['rights'], attributes)
299
309
  end
300
310
  end
301
311
  end
302
312
 
303
313
  def insert_crossref_issn(xml)
304
- issn = container.to_h.fetch('identifierType', nil) == "ISSN" ? container.to_h.fetch('identifier', nil) : nil
314
+ issn = if container.to_h.fetch('identifierType', nil) == 'ISSN'
315
+ container.to_h.fetch('identifier', nil)
316
+ end
305
317
 
306
318
  return xml if issn.blank?
307
319
 
@@ -315,22 +327,22 @@ module Briard
315
327
  d = descriptions.first
316
328
  else
317
329
  d = {}
318
- d["description"] = descriptions.first
330
+ d['description'] = descriptions.first
319
331
  end
320
332
 
321
- xml.abstract("xmlns" => "http://www.ncbi.nlm.nih.gov/JATS1") do
322
- xml.p(d["description"])
333
+ xml.abstract('xmlns' => 'http://www.ncbi.nlm.nih.gov/JATS1') do
334
+ xml.p(d['description'])
323
335
  end
324
336
  end
325
337
 
326
338
  def crossref_root_attributes
327
- { :'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
328
- :'xsi:schemaLocation' => 'http://www.crossref.org/schema/5.3.1 https://www.crossref.org/schemas/crossref5.3.1.xsd',
329
- :'xmlns' => 'http://www.crossref.org/schema/5.3.1',
330
- :'xmlns:jats' => 'http://www.ncbi.nlm.nih.gov/JATS1',
331
- :'xmlns:fr' => 'http://www.crossref.org/fundref.xsd',
332
- :'xmlns:mml' => 'http://www.w3.org/1998/Math/MathML',
333
- :'version' => '5.3.1' }
339
+ { 'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance',
340
+ 'xsi:schemaLocation': 'http://www.crossref.org/schema/5.3.1 https://www.crossref.org/schemas/crossref5.3.1.xsd',
341
+ xmlns: 'http://www.crossref.org/schema/5.3.1',
342
+ 'xmlns:jats': 'http://www.ncbi.nlm.nih.gov/JATS1',
343
+ 'xmlns:fr': 'http://www.crossref.org/fundref.xsd',
344
+ 'xmlns:mml': 'http://www.w3.org/1998/Math/MathML',
345
+ version: '5.3.1' }
334
346
  end
335
347
  end
336
- end
348
+ end