briard 2.8.2 → 2.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/Gemfile.lock +30 -20
  4. data/briard.gemspec +1 -0
  5. data/lib/briard/metadata.rb +4 -2
  6. data/lib/briard/metadata_utils.rb +2 -0
  7. data/lib/briard/readers/datacite_reader.rb +1 -1
  8. data/lib/briard/readers/schema_org_reader.rb +24 -7
  9. data/lib/briard/schema_utils.rb +15 -0
  10. data/lib/briard/version.rb +1 -1
  11. data/resources/json-schema/briard_schema.json +462 -0
  12. data/spec/fixtures/crosscite.json +1 -1
  13. data/spec/fixtures/datacite.json +1 -1
  14. data/spec/fixtures/datacite_software.json +1 -1
  15. data/spec/fixtures/datacite_software_version.json +1 -1
  16. data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/BlogPosting.yml +18 -18
  17. data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/BlogPosting_with_new_DOI.yml +22 -22
  18. data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/get_schema_org_metadata_front_matter/BlogPosting.yml +22 -22
  19. data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/harvard_dataverse.yml +6 -6
  20. data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/pangaea.yml +10 -10
  21. data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/zenodo.yml +8 -8
  22. data/spec/fixtures/vcr_cassettes/Briard_Metadata/json_schema_errors/doi_not_found.yml +105 -0
  23. data/spec/fixtures/vcr_cassettes/Briard_Metadata/json_schema_errors/is_valid.yml +65 -0
  24. data/spec/fixtures/vcr_cassettes/Briard_Metadata/json_schema_valid_/is_valid.yml +65 -0
  25. data/spec/fixtures/vcr_cassettes/Briard_Metadata/write_metadata_as_crossref/another_schema_org_from_front-matter.yml +22 -22
  26. data/spec/fixtures/vcr_cassettes/Briard_Metadata/write_metadata_as_crossref/journal_article_from_datacite.yml +6 -6
  27. data/spec/fixtures/vcr_cassettes/Briard_Metadata/write_metadata_as_crossref/posted_content.yml +9 -9
  28. data/spec/fixtures/vcr_cassettes/Briard_Metadata/write_metadata_as_crossref/schema_org_from_another_science_blog.yml +9 -9
  29. data/spec/fixtures/vcr_cassettes/Briard_Metadata/write_metadata_as_crossref/schema_org_from_front_matter.yml +22 -22
  30. data/spec/metadata_spec.rb +2 -0
  31. data/spec/readers/crossref_json_reader_spec.rb +1 -1
  32. data/spec/readers/datacite_json_reader_spec.rb +2 -2
  33. data/spec/readers/datacite_reader_spec.rb +36 -36
  34. data/spec/readers/npm_reader_spec.rb +3 -3
  35. data/spec/readers/schema_org_reader_spec.rb +26 -3
  36. data/spec/schema_utils_spec.rb +24 -0
  37. data/spec/writers/crossref_writer_spec.rb +1 -1
  38. data/spec/writers/datacite_writer_spec.rb +6 -6
  39. metadata +22 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b82034f66939dab07cc20186304bcdbffdde06c43abc4c614ae2bae68e4d0f72
4
- data.tar.gz: b8f5d74bff45e5f016e7ec094860c632b01b66df5f104501b3c4bae6fc4adc8c
3
+ metadata.gz: 18ccea8677e7dd2e7c7fc253a23e060c8778f81780f55904ce5572c3d6c9d685
4
+ data.tar.gz: c9c42bd2d6fe73495da63b8280a2d06b172ad8013af605103d17c2c32523cc05
5
5
  SHA512:
6
- metadata.gz: 4d143feff904aef51b688359e9888d83ac627566febea50fcfd7d5dec001c0634f268a3918aa12c4813e4f2c2a8acbb190c75a9ff7d6bbb580e26925ca292c2f
7
- data.tar.gz: 4185d017bb16a13ee4e10a5e0b9335178e8b8bf36b6d951cc82976814176d0a254818235570c6806812f5e7b01b3026492044ca7923964185221e860abfc3ed5
6
+ metadata.gz: fac8077f5d561ad88c74be7f14da043e14a6dfb0a2ba36874905d39ac2f9a994adf4d7e21efd8885d1e9e8d7db21463970a5faf02b26366f3cb13efede9122bb
7
+ data.tar.gz: 48e8f3b859cf3a753eda735b579f6d6c43197fdc635ea2a215e12d0ef3078990ccf36ec0f215f09b7e846acf17427547b8acca172551b1d13bf94ab668ab5939
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # Changelog
2
2
 
3
+ ## [v2.8.2](https://github.com/front-matter/briard/tree/v2.8.2) (2022-11-23)
4
+
5
+ [Full Changelog](https://github.com/front-matter/briard/compare/v2.8.0...v2.8.2)
6
+
3
7
  ## [v2.8.0](https://github.com/front-matter/briard/tree/v2.8.0) (2022-11-22)
4
8
 
5
9
  [Full Changelog](https://github.com/front-matter/briard/compare/v2.6.5...v2.8.0)
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- briard (2.8.2)
4
+ briard (2.9.1)
5
5
  activesupport (>= 4.2.5, < 8.0)
6
6
  base32-url (>= 0.5.0, < 1)
7
7
  benchmark_methods (~> 0.7)
@@ -18,6 +18,7 @@ PATH
18
18
  gender_detector (~> 0.1.2)
19
19
  iso8601 (~> 0.9.1)
20
20
  json-ld-preloaded (~> 3.1, >= 3.1.3)
21
+ json_schemer (~> 0.2.23)
21
22
  jsonlint (~> 0.3.0)
22
23
  loofah (~> 2.19)
23
24
  maremma (>= 4.9.7, < 5)
@@ -75,6 +76,8 @@ GEM
75
76
  scanf (~> 1.0)
76
77
  sxp (~> 1.2)
77
78
  unicode-types (~> 1.7)
79
+ ecma-re-validator (0.4.0)
80
+ regexp_parser (~> 2.2)
78
81
  edtf (3.1.0)
79
82
  activesupport (>= 3.0, < 8.0)
80
83
  excon (0.71.1)
@@ -99,13 +102,14 @@ GEM
99
102
  haml (5.2.2)
100
103
  temple (>= 0.8.0)
101
104
  tilt
105
+ hana (1.3.7)
102
106
  hashdiff (1.0.1)
103
107
  htmlentities (4.3.4)
104
108
  i18n (1.12.0)
105
109
  concurrent-ruby (~> 1.0)
106
110
  iso8601 (0.9.1)
107
- json (2.6.2)
108
- json-canonicalization (0.3.0)
111
+ json (2.6.3)
112
+ json-canonicalization (0.3.1)
109
113
  json-ld (3.2.3)
110
114
  htmlentities (~> 4.3)
111
115
  json-canonicalization (~> 0.3)
@@ -116,12 +120,17 @@ GEM
116
120
  json-ld-preloaded (3.2.2)
117
121
  json-ld (~> 3.2)
118
122
  rdf (~> 3.2)
123
+ json_schemer (0.2.24)
124
+ ecma-re-validator (~> 0.3)
125
+ hana (~> 1.3)
126
+ regexp_parser (~> 2.0)
127
+ uri_template (~> 0.7)
119
128
  jsonlint (0.3.0)
120
129
  oj (~> 3)
121
130
  optimist (~> 3)
122
131
  latex-decode (0.4.0)
123
132
  link_header (0.0.8)
124
- loofah (2.19.0)
133
+ loofah (2.19.1)
125
134
  crass (~> 1.0.2)
126
135
  nokogiri (>= 1.5.9)
127
136
  maremma (4.9.9)
@@ -139,27 +148,27 @@ GEM
139
148
  oj (>= 2.8.3)
140
149
  oj_mimic_json (~> 1.0, >= 1.0.1)
141
150
  matrix (0.4.2)
142
- mini_portile2 (2.8.0)
143
- minitest (5.16.3)
151
+ mini_portile2 (2.8.1)
152
+ minitest (5.17.0)
144
153
  multi_json (1.15.0)
145
154
  multipart-post (2.2.3)
146
155
  namae (1.1.1)
147
- nokogiri (1.13.9)
156
+ nokogiri (1.13.10)
148
157
  mini_portile2 (~> 2.8.0)
149
158
  racc (~> 1.4)
150
159
  oj (3.13.23)
151
160
  oj_mimic_json (1.0.1)
152
161
  optimist (3.0.1)
153
162
  parallel (1.22.1)
154
- parser (3.1.2.1)
163
+ parser (3.2.0.0)
155
164
  ast (~> 2.4.1)
156
165
  postrank-uri (1.0.24)
157
166
  addressable (>= 2.4.0)
158
167
  nokogiri (>= 1.8.0)
159
168
  public_suffix (>= 2.0.0, < 2.1)
160
169
  public_suffix (2.0.5)
161
- racc (1.6.0)
162
- rack (2.2.4)
170
+ racc (1.6.2)
171
+ rack (2.2.5)
163
172
  rack-test (2.0.2)
164
173
  rack (>= 1.3)
165
174
  rainbow (3.1.1)
@@ -196,34 +205,34 @@ GEM
196
205
  rspec-mocks (~> 3.12.0)
197
206
  rspec-core (3.12.0)
198
207
  rspec-support (~> 3.12.0)
199
- rspec-expectations (3.12.0)
208
+ rspec-expectations (3.12.2)
200
209
  diff-lcs (>= 1.2.0, < 2.0)
201
210
  rspec-support (~> 3.12.0)
202
- rspec-mocks (3.12.0)
211
+ rspec-mocks (3.12.2)
203
212
  diff-lcs (>= 1.2.0, < 2.0)
204
213
  rspec-support (~> 3.12.0)
205
214
  rspec-support (3.12.0)
206
215
  rspec-xsd (0.1.0)
207
216
  nokogiri (~> 1.6)
208
217
  rspec (~> 3)
209
- rubocop (1.39.0)
218
+ rubocop (1.43.0)
210
219
  json (~> 2.3)
211
220
  parallel (~> 1.10)
212
- parser (>= 3.1.2.1)
221
+ parser (>= 3.2.0.0)
213
222
  rainbow (>= 2.2.2, < 4.0)
214
223
  regexp_parser (>= 1.8, < 3.0)
215
224
  rexml (>= 3.2.5, < 4.0)
216
- rubocop-ast (>= 1.23.0, < 2.0)
225
+ rubocop-ast (>= 1.24.1, < 2.0)
217
226
  ruby-progressbar (~> 1.7)
218
- unicode-display_width (>= 1.4.0, < 3.0)
219
- rubocop-ast (1.23.0)
227
+ unicode-display_width (>= 2.4.0, < 3.0)
228
+ rubocop-ast (1.24.1)
220
229
  parser (>= 3.1.1.0)
221
- rubocop-performance (1.15.1)
230
+ rubocop-performance (1.15.2)
222
231
  rubocop (>= 1.7.0, < 2.0)
223
232
  rubocop-ast (>= 0.4.0)
224
233
  rubocop-rake (0.6.0)
225
234
  rubocop (~> 1.0)
226
- rubocop-rspec (2.15.0)
235
+ rubocop-rspec (2.16.0)
227
236
  rubocop (~> 1.33)
228
237
  ruby-progressbar (1.11.0)
229
238
  ruby2_keywords (0.0.5)
@@ -242,9 +251,10 @@ GEM
242
251
  tilt (2.0.11)
243
252
  tzinfo (2.0.5)
244
253
  concurrent-ruby (~> 1.0)
245
- unicode-display_width (2.3.0)
254
+ unicode-display_width (2.4.2)
246
255
  unicode-types (1.8.0)
247
256
  unicode_utils (1.4.0)
257
+ uri_template (0.7.0)
248
258
  vcr (3.0.3)
249
259
  webmock (3.18.1)
250
260
  addressable (>= 2.8.0)
data/briard.gemspec CHANGED
@@ -35,6 +35,7 @@ Gem::Specification.new do |s|
35
35
  s.add_dependency 'iso8601', '~> 0.9.1'
36
36
  s.add_dependency 'json-ld-preloaded', '~> 3.1', '>= 3.1.3'
37
37
  s.add_dependency 'jsonlint', '~> 0.3.0'
38
+ s.add_dependency 'json_schemer', '~> 0.2.23'
38
39
  s.add_dependency 'loofah', '~> 2.19'
39
40
  s.add_dependency 'maremma', '>= 4.9.7', '< 5'
40
41
  s.add_dependency 'namae', '~> 1.0'
@@ -163,9 +163,11 @@ module Briard
163
163
  exists? && errors.nil?
164
164
  end
165
165
 
166
- # validate against DataCite schema, unless already errors in the reader
166
+ # Catch errors in the reader
167
+ # Then validate against JSON schema for internal metadata format
168
+ # Then validate against DataCite schema, unless already errors in the reader
167
169
  def errors
168
- meta.fetch('errors', nil) || datacite_errors(xml: datacite, schema_version: schema_version)
170
+ meta.fetch('errors', nil) || json_schema_errors || datacite_errors(xml: datacite, schema_version: schema_version)
169
171
  end
170
172
 
171
173
  def descriptions
@@ -4,6 +4,7 @@ require_relative 'doi_utils'
4
4
  require_relative 'author_utils'
5
5
  require_relative 'crossref_utils'
6
6
  require_relative 'datacite_utils'
7
+ require_relative 'schema_utils'
7
8
  require_relative 'utils'
8
9
 
9
10
  require_relative 'readers/bibtex_reader'
@@ -42,6 +43,7 @@ module Briard
42
43
  include Briard::AuthorUtils
43
44
  include Briard::CrossrefUtils
44
45
  include Briard::DataciteUtils
46
+ include Briard::SchemaUtils
45
47
  include Briard::Utils
46
48
 
47
49
  include Briard::Readers::BibtexReader
@@ -310,7 +310,7 @@ module Briard
310
310
  'container' => set_container(meta),
311
311
  'publisher' => parse_attributes(meta.fetch('publisher', nil),
312
312
  first: true).to_s.strip.presence,
313
- 'agency' => 'datacite',
313
+ 'agency' => 'DataCite',
314
314
  'funding_references' => funding_references,
315
315
  'dates' => dates,
316
316
  'publication_year' => parse_attributes(meta.fetch('publicationYear', nil),
@@ -43,12 +43,14 @@ module Briard
43
43
  link = doc.css("link[rel='canonical']")
44
44
  hsh['@id'] = link[0]['href'] if link.present?
45
45
 
46
- # workaround if license included but not with schema.org
47
- license = doc.at("meta[name='DCTERMS.license']")
46
+ # workaround if license not included with schema.org
47
+ license = doc.at("meta[name='dc.rights']")
48
48
  hsh['license'] = license['content'] if license.present?
49
49
 
50
50
  # workaround for html language attribute if no language is set via schema.org
51
- lang = doc.at('html')['lang']
51
+ lang = doc.at("meta[name='dc.language']") || doc.at("meta[name='citation_language']")
52
+ lang = lang['content'] if lang.present?
53
+ lang = doc.at('html')['lang'] if lang.blank?
52
54
  hsh['inLanguage'] = lang if hsh['inLanguage'].blank?
53
55
 
54
56
  # workaround if issn not included with schema.org
@@ -57,6 +59,20 @@ module Briard
57
59
  hsh['isPartOf'] = { 'name' => name ? name['content'] : nil,
58
60
  'issn' => issn ? issn['content'] : nil }.compact
59
61
 
62
+ # workaround if not all authors are included with schema.org (e.g. in Ghost metadata)
63
+ authors = doc.css("meta[name='citation_author']").map do |author|
64
+ { 'name' => author['content'] }
65
+ end
66
+ hsh['author'] = hsh['creator'] if hsh['author'].blank? && hsh['creator'].present?
67
+ hsh['author'] = authors if authors.length > Array.wrap(hsh['author']).length
68
+
69
+ # workaround if publisher not included with schema.org (e.g. Zenodo)
70
+ if hsh['publisher'].blank?
71
+ publisher = doc.at("meta[property='og:site_name']")
72
+ publisher = publisher['content'] if publisher.present?
73
+ hsh['publisher'] = { 'name' => publisher }
74
+ end
75
+
60
76
  string = hsh.to_json if hsh.present?
61
77
  end
62
78
 
@@ -106,7 +122,7 @@ module Briard
106
122
  contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch('editor',
107
123
  nil))))
108
124
  publisher = parse_attributes(meta.fetch('publisher', nil), content: 'name', first: true)
109
-
125
+
110
126
  ct = schema_org == 'Dataset' ? 'includedInDataCatalog' : 'Periodical'
111
127
  container = if meta.fetch(ct, nil).present?
112
128
  url = parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: 'url',
@@ -125,12 +141,13 @@ module Briard
125
141
  }.compact
126
142
  elsif %w[BlogPosting Article].include?(schema_org)
127
143
  issn = meta.dig('isPartOf', 'issn')
144
+ url = meta.dig('publisher', 'url')
128
145
 
129
146
  {
130
147
  'type' => 'Blog',
131
148
  'title' => meta.dig('isPartOf', 'name'),
132
- 'identifier' => issn,
133
- 'identifierType' => issn.present? ? 'ISSN' : nil
149
+ 'identifier' => issn.presence || url.presence,
150
+ 'identifierType' => issn.present? ? 'ISSN' : 'URL'
134
151
  }.compact
135
152
  else
136
153
  {}
@@ -249,7 +266,7 @@ module Briard
249
266
  [{ 'description' => sanitize(meta.fetch('description')),
250
267
  'descriptionType' => 'Abstract' }]
251
268
  end,
252
- 'rights_list' => rights_list,
269
+ 'rights_list' => rights_list.presence,
253
270
  'version_info' => meta.fetch('version', nil).to_s.presence,
254
271
  'subjects' => subjects,
255
272
  'language' => language,
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+ require 'json_schemer'
3
+ require 'pathname'
4
+
5
+ module Briard
6
+ module SchemaUtils
7
+ JSON_SCHEMA = schema = File.read(File.expand_path('../../resources/json-schema/briard_schema.json', __dir__))
8
+
9
+ def json_schema_errors
10
+ schemer = JSONSchemer.schema(JSON_SCHEMA)
11
+ errors = schemer.validate(self.meta).to_a
12
+ errors.map {|err| JSONSchemer::Errors.pretty err }.presence
13
+ end
14
+ end
15
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Briard
4
- VERSION = '2.8.2'
4
+ VERSION = '2.9.1'
5
5
  end