briard 2.8.2 → 2.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/Gemfile.lock +30 -20
  4. data/briard.gemspec +1 -0
  5. data/lib/briard/metadata.rb +4 -2
  6. data/lib/briard/metadata_utils.rb +2 -0
  7. data/lib/briard/readers/datacite_reader.rb +1 -1
  8. data/lib/briard/readers/schema_org_reader.rb +24 -7
  9. data/lib/briard/schema_utils.rb +15 -0
  10. data/lib/briard/version.rb +1 -1
  11. data/resources/json-schema/briard_schema.json +462 -0
  12. data/spec/fixtures/crosscite.json +1 -1
  13. data/spec/fixtures/datacite.json +1 -1
  14. data/spec/fixtures/datacite_software.json +1 -1
  15. data/spec/fixtures/datacite_software_version.json +1 -1
  16. data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/BlogPosting.yml +18 -18
  17. data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/BlogPosting_with_new_DOI.yml +22 -22
  18. data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/get_schema_org_metadata_front_matter/BlogPosting.yml +22 -22
  19. data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/harvard_dataverse.yml +6 -6
  20. data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/pangaea.yml +10 -10
  21. data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/zenodo.yml +8 -8
  22. data/spec/fixtures/vcr_cassettes/Briard_Metadata/json_schema_errors/doi_not_found.yml +105 -0
  23. data/spec/fixtures/vcr_cassettes/Briard_Metadata/json_schema_errors/is_valid.yml +65 -0
  24. data/spec/fixtures/vcr_cassettes/Briard_Metadata/json_schema_valid_/is_valid.yml +65 -0
  25. data/spec/fixtures/vcr_cassettes/Briard_Metadata/write_metadata_as_crossref/another_schema_org_from_front-matter.yml +22 -22
  26. data/spec/fixtures/vcr_cassettes/Briard_Metadata/write_metadata_as_crossref/journal_article_from_datacite.yml +6 -6
  27. data/spec/fixtures/vcr_cassettes/Briard_Metadata/write_metadata_as_crossref/posted_content.yml +9 -9
  28. data/spec/fixtures/vcr_cassettes/Briard_Metadata/write_metadata_as_crossref/schema_org_from_another_science_blog.yml +9 -9
  29. data/spec/fixtures/vcr_cassettes/Briard_Metadata/write_metadata_as_crossref/schema_org_from_front_matter.yml +22 -22
  30. data/spec/metadata_spec.rb +2 -0
  31. data/spec/readers/crossref_json_reader_spec.rb +1 -1
  32. data/spec/readers/datacite_json_reader_spec.rb +2 -2
  33. data/spec/readers/datacite_reader_spec.rb +36 -36
  34. data/spec/readers/npm_reader_spec.rb +3 -3
  35. data/spec/readers/schema_org_reader_spec.rb +26 -3
  36. data/spec/schema_utils_spec.rb +24 -0
  37. data/spec/writers/crossref_writer_spec.rb +1 -1
  38. data/spec/writers/datacite_writer_spec.rb +6 -6
  39. metadata +22 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b82034f66939dab07cc20186304bcdbffdde06c43abc4c614ae2bae68e4d0f72
4
- data.tar.gz: b8f5d74bff45e5f016e7ec094860c632b01b66df5f104501b3c4bae6fc4adc8c
3
+ metadata.gz: 18ccea8677e7dd2e7c7fc253a23e060c8778f81780f55904ce5572c3d6c9d685
4
+ data.tar.gz: c9c42bd2d6fe73495da63b8280a2d06b172ad8013af605103d17c2c32523cc05
5
5
  SHA512:
6
- metadata.gz: 4d143feff904aef51b688359e9888d83ac627566febea50fcfd7d5dec001c0634f268a3918aa12c4813e4f2c2a8acbb190c75a9ff7d6bbb580e26925ca292c2f
7
- data.tar.gz: 4185d017bb16a13ee4e10a5e0b9335178e8b8bf36b6d951cc82976814176d0a254818235570c6806812f5e7b01b3026492044ca7923964185221e860abfc3ed5
6
+ metadata.gz: fac8077f5d561ad88c74be7f14da043e14a6dfb0a2ba36874905d39ac2f9a994adf4d7e21efd8885d1e9e8d7db21463970a5faf02b26366f3cb13efede9122bb
7
+ data.tar.gz: 48e8f3b859cf3a753eda735b579f6d6c43197fdc635ea2a215e12d0ef3078990ccf36ec0f215f09b7e846acf17427547b8acca172551b1d13bf94ab668ab5939
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # Changelog
2
2
 
3
+ ## [v2.8.2](https://github.com/front-matter/briard/tree/v2.8.2) (2022-11-23)
4
+
5
+ [Full Changelog](https://github.com/front-matter/briard/compare/v2.8.0...v2.8.2)
6
+
3
7
  ## [v2.8.0](https://github.com/front-matter/briard/tree/v2.8.0) (2022-11-22)
4
8
 
5
9
  [Full Changelog](https://github.com/front-matter/briard/compare/v2.6.5...v2.8.0)
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- briard (2.8.2)
4
+ briard (2.9.1)
5
5
  activesupport (>= 4.2.5, < 8.0)
6
6
  base32-url (>= 0.5.0, < 1)
7
7
  benchmark_methods (~> 0.7)
@@ -18,6 +18,7 @@ PATH
18
18
  gender_detector (~> 0.1.2)
19
19
  iso8601 (~> 0.9.1)
20
20
  json-ld-preloaded (~> 3.1, >= 3.1.3)
21
+ json_schemer (~> 0.2.23)
21
22
  jsonlint (~> 0.3.0)
22
23
  loofah (~> 2.19)
23
24
  maremma (>= 4.9.7, < 5)
@@ -75,6 +76,8 @@ GEM
75
76
  scanf (~> 1.0)
76
77
  sxp (~> 1.2)
77
78
  unicode-types (~> 1.7)
79
+ ecma-re-validator (0.4.0)
80
+ regexp_parser (~> 2.2)
78
81
  edtf (3.1.0)
79
82
  activesupport (>= 3.0, < 8.0)
80
83
  excon (0.71.1)
@@ -99,13 +102,14 @@ GEM
99
102
  haml (5.2.2)
100
103
  temple (>= 0.8.0)
101
104
  tilt
105
+ hana (1.3.7)
102
106
  hashdiff (1.0.1)
103
107
  htmlentities (4.3.4)
104
108
  i18n (1.12.0)
105
109
  concurrent-ruby (~> 1.0)
106
110
  iso8601 (0.9.1)
107
- json (2.6.2)
108
- json-canonicalization (0.3.0)
111
+ json (2.6.3)
112
+ json-canonicalization (0.3.1)
109
113
  json-ld (3.2.3)
110
114
  htmlentities (~> 4.3)
111
115
  json-canonicalization (~> 0.3)
@@ -116,12 +120,17 @@ GEM
116
120
  json-ld-preloaded (3.2.2)
117
121
  json-ld (~> 3.2)
118
122
  rdf (~> 3.2)
123
+ json_schemer (0.2.24)
124
+ ecma-re-validator (~> 0.3)
125
+ hana (~> 1.3)
126
+ regexp_parser (~> 2.0)
127
+ uri_template (~> 0.7)
119
128
  jsonlint (0.3.0)
120
129
  oj (~> 3)
121
130
  optimist (~> 3)
122
131
  latex-decode (0.4.0)
123
132
  link_header (0.0.8)
124
- loofah (2.19.0)
133
+ loofah (2.19.1)
125
134
  crass (~> 1.0.2)
126
135
  nokogiri (>= 1.5.9)
127
136
  maremma (4.9.9)
@@ -139,27 +148,27 @@ GEM
139
148
  oj (>= 2.8.3)
140
149
  oj_mimic_json (~> 1.0, >= 1.0.1)
141
150
  matrix (0.4.2)
142
- mini_portile2 (2.8.0)
143
- minitest (5.16.3)
151
+ mini_portile2 (2.8.1)
152
+ minitest (5.17.0)
144
153
  multi_json (1.15.0)
145
154
  multipart-post (2.2.3)
146
155
  namae (1.1.1)
147
- nokogiri (1.13.9)
156
+ nokogiri (1.13.10)
148
157
  mini_portile2 (~> 2.8.0)
149
158
  racc (~> 1.4)
150
159
  oj (3.13.23)
151
160
  oj_mimic_json (1.0.1)
152
161
  optimist (3.0.1)
153
162
  parallel (1.22.1)
154
- parser (3.1.2.1)
163
+ parser (3.2.0.0)
155
164
  ast (~> 2.4.1)
156
165
  postrank-uri (1.0.24)
157
166
  addressable (>= 2.4.0)
158
167
  nokogiri (>= 1.8.0)
159
168
  public_suffix (>= 2.0.0, < 2.1)
160
169
  public_suffix (2.0.5)
161
- racc (1.6.0)
162
- rack (2.2.4)
170
+ racc (1.6.2)
171
+ rack (2.2.5)
163
172
  rack-test (2.0.2)
164
173
  rack (>= 1.3)
165
174
  rainbow (3.1.1)
@@ -196,34 +205,34 @@ GEM
196
205
  rspec-mocks (~> 3.12.0)
197
206
  rspec-core (3.12.0)
198
207
  rspec-support (~> 3.12.0)
199
- rspec-expectations (3.12.0)
208
+ rspec-expectations (3.12.2)
200
209
  diff-lcs (>= 1.2.0, < 2.0)
201
210
  rspec-support (~> 3.12.0)
202
- rspec-mocks (3.12.0)
211
+ rspec-mocks (3.12.2)
203
212
  diff-lcs (>= 1.2.0, < 2.0)
204
213
  rspec-support (~> 3.12.0)
205
214
  rspec-support (3.12.0)
206
215
  rspec-xsd (0.1.0)
207
216
  nokogiri (~> 1.6)
208
217
  rspec (~> 3)
209
- rubocop (1.39.0)
218
+ rubocop (1.43.0)
210
219
  json (~> 2.3)
211
220
  parallel (~> 1.10)
212
- parser (>= 3.1.2.1)
221
+ parser (>= 3.2.0.0)
213
222
  rainbow (>= 2.2.2, < 4.0)
214
223
  regexp_parser (>= 1.8, < 3.0)
215
224
  rexml (>= 3.2.5, < 4.0)
216
- rubocop-ast (>= 1.23.0, < 2.0)
225
+ rubocop-ast (>= 1.24.1, < 2.0)
217
226
  ruby-progressbar (~> 1.7)
218
- unicode-display_width (>= 1.4.0, < 3.0)
219
- rubocop-ast (1.23.0)
227
+ unicode-display_width (>= 2.4.0, < 3.0)
228
+ rubocop-ast (1.24.1)
220
229
  parser (>= 3.1.1.0)
221
- rubocop-performance (1.15.1)
230
+ rubocop-performance (1.15.2)
222
231
  rubocop (>= 1.7.0, < 2.0)
223
232
  rubocop-ast (>= 0.4.0)
224
233
  rubocop-rake (0.6.0)
225
234
  rubocop (~> 1.0)
226
- rubocop-rspec (2.15.0)
235
+ rubocop-rspec (2.16.0)
227
236
  rubocop (~> 1.33)
228
237
  ruby-progressbar (1.11.0)
229
238
  ruby2_keywords (0.0.5)
@@ -242,9 +251,10 @@ GEM
242
251
  tilt (2.0.11)
243
252
  tzinfo (2.0.5)
244
253
  concurrent-ruby (~> 1.0)
245
- unicode-display_width (2.3.0)
254
+ unicode-display_width (2.4.2)
246
255
  unicode-types (1.8.0)
247
256
  unicode_utils (1.4.0)
257
+ uri_template (0.7.0)
248
258
  vcr (3.0.3)
249
259
  webmock (3.18.1)
250
260
  addressable (>= 2.8.0)
data/briard.gemspec CHANGED
@@ -35,6 +35,7 @@ Gem::Specification.new do |s|
35
35
  s.add_dependency 'iso8601', '~> 0.9.1'
36
36
  s.add_dependency 'json-ld-preloaded', '~> 3.1', '>= 3.1.3'
37
37
  s.add_dependency 'jsonlint', '~> 0.3.0'
38
+ s.add_dependency 'json_schemer', '~> 0.2.23'
38
39
  s.add_dependency 'loofah', '~> 2.19'
39
40
  s.add_dependency 'maremma', '>= 4.9.7', '< 5'
40
41
  s.add_dependency 'namae', '~> 1.0'
@@ -163,9 +163,11 @@ module Briard
163
163
  exists? && errors.nil?
164
164
  end
165
165
 
166
- # validate against DataCite schema, unless already errors in the reader
166
+ # Catch errors in the reader
167
+ # Then validate against JSON schema for internal metadata format
168
+ # Then validate against DataCite schema, unless already errors in the reader
167
169
  def errors
168
- meta.fetch('errors', nil) || datacite_errors(xml: datacite, schema_version: schema_version)
170
+ meta.fetch('errors', nil) || json_schema_errors || datacite_errors(xml: datacite, schema_version: schema_version)
169
171
  end
170
172
 
171
173
  def descriptions
@@ -4,6 +4,7 @@ require_relative 'doi_utils'
4
4
  require_relative 'author_utils'
5
5
  require_relative 'crossref_utils'
6
6
  require_relative 'datacite_utils'
7
+ require_relative 'schema_utils'
7
8
  require_relative 'utils'
8
9
 
9
10
  require_relative 'readers/bibtex_reader'
@@ -42,6 +43,7 @@ module Briard
42
43
  include Briard::AuthorUtils
43
44
  include Briard::CrossrefUtils
44
45
  include Briard::DataciteUtils
46
+ include Briard::SchemaUtils
45
47
  include Briard::Utils
46
48
 
47
49
  include Briard::Readers::BibtexReader
@@ -310,7 +310,7 @@ module Briard
310
310
  'container' => set_container(meta),
311
311
  'publisher' => parse_attributes(meta.fetch('publisher', nil),
312
312
  first: true).to_s.strip.presence,
313
- 'agency' => 'datacite',
313
+ 'agency' => 'DataCite',
314
314
  'funding_references' => funding_references,
315
315
  'dates' => dates,
316
316
  'publication_year' => parse_attributes(meta.fetch('publicationYear', nil),
@@ -43,12 +43,14 @@ module Briard
43
43
  link = doc.css("link[rel='canonical']")
44
44
  hsh['@id'] = link[0]['href'] if link.present?
45
45
 
46
- # workaround if license included but not with schema.org
47
- license = doc.at("meta[name='DCTERMS.license']")
46
+ # workaround if license not included with schema.org
47
+ license = doc.at("meta[name='dc.rights']")
48
48
  hsh['license'] = license['content'] if license.present?
49
49
 
50
50
  # workaround for html language attribute if no language is set via schema.org
51
- lang = doc.at('html')['lang']
51
+ lang = doc.at("meta[name='dc.language']") || doc.at("meta[name='citation_language']")
52
+ lang = lang['content'] if lang.present?
53
+ lang = doc.at('html')['lang'] if lang.blank?
52
54
  hsh['inLanguage'] = lang if hsh['inLanguage'].blank?
53
55
 
54
56
  # workaround if issn not included with schema.org
@@ -57,6 +59,20 @@ module Briard
57
59
  hsh['isPartOf'] = { 'name' => name ? name['content'] : nil,
58
60
  'issn' => issn ? issn['content'] : nil }.compact
59
61
 
62
+ # workaround if not all authors are included with schema.org (e.g. in Ghost metadata)
63
+ authors = doc.css("meta[name='citation_author']").map do |author|
64
+ { 'name' => author['content'] }
65
+ end
66
+ hsh['author'] = hsh['creator'] if hsh['author'].blank? && hsh['creator'].present?
67
+ hsh['author'] = authors if authors.length > Array.wrap(hsh['author']).length
68
+
69
+ # workaround if publisher not included with schema.org (e.g. Zenodo)
70
+ if hsh['publisher'].blank?
71
+ publisher = doc.at("meta[property='og:site_name']")
72
+ publisher = publisher['content'] if publisher.present?
73
+ hsh['publisher'] = { 'name' => publisher }
74
+ end
75
+
60
76
  string = hsh.to_json if hsh.present?
61
77
  end
62
78
 
@@ -106,7 +122,7 @@ module Briard
106
122
  contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch('editor',
107
123
  nil))))
108
124
  publisher = parse_attributes(meta.fetch('publisher', nil), content: 'name', first: true)
109
-
125
+
110
126
  ct = schema_org == 'Dataset' ? 'includedInDataCatalog' : 'Periodical'
111
127
  container = if meta.fetch(ct, nil).present?
112
128
  url = parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: 'url',
@@ -125,12 +141,13 @@ module Briard
125
141
  }.compact
126
142
  elsif %w[BlogPosting Article].include?(schema_org)
127
143
  issn = meta.dig('isPartOf', 'issn')
144
+ url = meta.dig('publisher', 'url')
128
145
 
129
146
  {
130
147
  'type' => 'Blog',
131
148
  'title' => meta.dig('isPartOf', 'name'),
132
- 'identifier' => issn,
133
- 'identifierType' => issn.present? ? 'ISSN' : nil
149
+ 'identifier' => issn.presence || url.presence,
150
+ 'identifierType' => issn.present? ? 'ISSN' : 'URL'
134
151
  }.compact
135
152
  else
136
153
  {}
@@ -249,7 +266,7 @@ module Briard
249
266
  [{ 'description' => sanitize(meta.fetch('description')),
250
267
  'descriptionType' => 'Abstract' }]
251
268
  end,
252
- 'rights_list' => rights_list,
269
+ 'rights_list' => rights_list.presence,
253
270
  'version_info' => meta.fetch('version', nil).to_s.presence,
254
271
  'subjects' => subjects,
255
272
  'language' => language,
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+ require 'json_schemer'
3
+ require 'pathname'
4
+
5
+ module Briard
6
+ module SchemaUtils
7
+ JSON_SCHEMA = schema = File.read(File.expand_path('../../resources/json-schema/briard_schema.json', __dir__))
8
+
9
+ def json_schema_errors
10
+ schemer = JSONSchemer.schema(JSON_SCHEMA)
11
+ errors = schemer.validate(self.meta).to_a
12
+ errors.map {|err| JSONSchemer::Errors.pretty err }.presence
13
+ end
14
+ end
15
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Briard
4
- VERSION = '2.8.2'
4
+ VERSION = '2.9.1'
5
5
  end