briard 2.8.2 → 2.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Gemfile.lock +30 -20
- data/briard.gemspec +1 -0
- data/lib/briard/metadata.rb +4 -2
- data/lib/briard/metadata_utils.rb +2 -0
- data/lib/briard/readers/datacite_reader.rb +1 -1
- data/lib/briard/readers/schema_org_reader.rb +24 -7
- data/lib/briard/schema_utils.rb +15 -0
- data/lib/briard/version.rb +1 -1
- data/resources/json-schema/briard_schema.json +462 -0
- data/spec/fixtures/crosscite.json +1 -1
- data/spec/fixtures/datacite.json +1 -1
- data/spec/fixtures/datacite_software.json +1 -1
- data/spec/fixtures/datacite_software_version.json +1 -1
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/BlogPosting.yml +18 -18
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/BlogPosting_with_new_DOI.yml +22 -22
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/get_schema_org_metadata_front_matter/BlogPosting.yml +22 -22
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/harvard_dataverse.yml +6 -6
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/pangaea.yml +10 -10
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/zenodo.yml +8 -8
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/json_schema_errors/doi_not_found.yml +105 -0
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/json_schema_errors/is_valid.yml +65 -0
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/json_schema_valid_/is_valid.yml +65 -0
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/write_metadata_as_crossref/another_schema_org_from_front-matter.yml +22 -22
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/write_metadata_as_crossref/journal_article_from_datacite.yml +6 -6
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/write_metadata_as_crossref/posted_content.yml +9 -9
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/write_metadata_as_crossref/schema_org_from_another_science_blog.yml +9 -9
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/write_metadata_as_crossref/schema_org_from_front_matter.yml +22 -22
- data/spec/metadata_spec.rb +2 -0
- data/spec/readers/crossref_json_reader_spec.rb +1 -1
- data/spec/readers/datacite_json_reader_spec.rb +2 -2
- data/spec/readers/datacite_reader_spec.rb +36 -36
- data/spec/readers/npm_reader_spec.rb +3 -3
- data/spec/readers/schema_org_reader_spec.rb +26 -3
- data/spec/schema_utils_spec.rb +24 -0
- data/spec/writers/crossref_writer_spec.rb +1 -1
- data/spec/writers/datacite_writer_spec.rb +6 -6
- metadata +22 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 18ccea8677e7dd2e7c7fc253a23e060c8778f81780f55904ce5572c3d6c9d685
|
4
|
+
data.tar.gz: c9c42bd2d6fe73495da63b8280a2d06b172ad8013af605103d17c2c32523cc05
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fac8077f5d561ad88c74be7f14da043e14a6dfb0a2ba36874905d39ac2f9a994adf4d7e21efd8885d1e9e8d7db21463970a5faf02b26366f3cb13efede9122bb
|
7
|
+
data.tar.gz: 48e8f3b859cf3a753eda735b579f6d6c43197fdc635ea2a215e12d0ef3078990ccf36ec0f215f09b7e846acf17427547b8acca172551b1d13bf94ab668ab5939
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## [v2.8.2](https://github.com/front-matter/briard/tree/v2.8.2) (2022-11-23)
|
4
|
+
|
5
|
+
[Full Changelog](https://github.com/front-matter/briard/compare/v2.8.0...v2.8.2)
|
6
|
+
|
3
7
|
## [v2.8.0](https://github.com/front-matter/briard/tree/v2.8.0) (2022-11-22)
|
4
8
|
|
5
9
|
[Full Changelog](https://github.com/front-matter/briard/compare/v2.6.5...v2.8.0)
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
briard (2.
|
4
|
+
briard (2.9.1)
|
5
5
|
activesupport (>= 4.2.5, < 8.0)
|
6
6
|
base32-url (>= 0.5.0, < 1)
|
7
7
|
benchmark_methods (~> 0.7)
|
@@ -18,6 +18,7 @@ PATH
|
|
18
18
|
gender_detector (~> 0.1.2)
|
19
19
|
iso8601 (~> 0.9.1)
|
20
20
|
json-ld-preloaded (~> 3.1, >= 3.1.3)
|
21
|
+
json_schemer (~> 0.2.23)
|
21
22
|
jsonlint (~> 0.3.0)
|
22
23
|
loofah (~> 2.19)
|
23
24
|
maremma (>= 4.9.7, < 5)
|
@@ -75,6 +76,8 @@ GEM
|
|
75
76
|
scanf (~> 1.0)
|
76
77
|
sxp (~> 1.2)
|
77
78
|
unicode-types (~> 1.7)
|
79
|
+
ecma-re-validator (0.4.0)
|
80
|
+
regexp_parser (~> 2.2)
|
78
81
|
edtf (3.1.0)
|
79
82
|
activesupport (>= 3.0, < 8.0)
|
80
83
|
excon (0.71.1)
|
@@ -99,13 +102,14 @@ GEM
|
|
99
102
|
haml (5.2.2)
|
100
103
|
temple (>= 0.8.0)
|
101
104
|
tilt
|
105
|
+
hana (1.3.7)
|
102
106
|
hashdiff (1.0.1)
|
103
107
|
htmlentities (4.3.4)
|
104
108
|
i18n (1.12.0)
|
105
109
|
concurrent-ruby (~> 1.0)
|
106
110
|
iso8601 (0.9.1)
|
107
|
-
json (2.6.
|
108
|
-
json-canonicalization (0.3.
|
111
|
+
json (2.6.3)
|
112
|
+
json-canonicalization (0.3.1)
|
109
113
|
json-ld (3.2.3)
|
110
114
|
htmlentities (~> 4.3)
|
111
115
|
json-canonicalization (~> 0.3)
|
@@ -116,12 +120,17 @@ GEM
|
|
116
120
|
json-ld-preloaded (3.2.2)
|
117
121
|
json-ld (~> 3.2)
|
118
122
|
rdf (~> 3.2)
|
123
|
+
json_schemer (0.2.24)
|
124
|
+
ecma-re-validator (~> 0.3)
|
125
|
+
hana (~> 1.3)
|
126
|
+
regexp_parser (~> 2.0)
|
127
|
+
uri_template (~> 0.7)
|
119
128
|
jsonlint (0.3.0)
|
120
129
|
oj (~> 3)
|
121
130
|
optimist (~> 3)
|
122
131
|
latex-decode (0.4.0)
|
123
132
|
link_header (0.0.8)
|
124
|
-
loofah (2.19.
|
133
|
+
loofah (2.19.1)
|
125
134
|
crass (~> 1.0.2)
|
126
135
|
nokogiri (>= 1.5.9)
|
127
136
|
maremma (4.9.9)
|
@@ -139,27 +148,27 @@ GEM
|
|
139
148
|
oj (>= 2.8.3)
|
140
149
|
oj_mimic_json (~> 1.0, >= 1.0.1)
|
141
150
|
matrix (0.4.2)
|
142
|
-
mini_portile2 (2.8.
|
143
|
-
minitest (5.
|
151
|
+
mini_portile2 (2.8.1)
|
152
|
+
minitest (5.17.0)
|
144
153
|
multi_json (1.15.0)
|
145
154
|
multipart-post (2.2.3)
|
146
155
|
namae (1.1.1)
|
147
|
-
nokogiri (1.13.
|
156
|
+
nokogiri (1.13.10)
|
148
157
|
mini_portile2 (~> 2.8.0)
|
149
158
|
racc (~> 1.4)
|
150
159
|
oj (3.13.23)
|
151
160
|
oj_mimic_json (1.0.1)
|
152
161
|
optimist (3.0.1)
|
153
162
|
parallel (1.22.1)
|
154
|
-
parser (3.
|
163
|
+
parser (3.2.0.0)
|
155
164
|
ast (~> 2.4.1)
|
156
165
|
postrank-uri (1.0.24)
|
157
166
|
addressable (>= 2.4.0)
|
158
167
|
nokogiri (>= 1.8.0)
|
159
168
|
public_suffix (>= 2.0.0, < 2.1)
|
160
169
|
public_suffix (2.0.5)
|
161
|
-
racc (1.6.
|
162
|
-
rack (2.2.
|
170
|
+
racc (1.6.2)
|
171
|
+
rack (2.2.5)
|
163
172
|
rack-test (2.0.2)
|
164
173
|
rack (>= 1.3)
|
165
174
|
rainbow (3.1.1)
|
@@ -196,34 +205,34 @@ GEM
|
|
196
205
|
rspec-mocks (~> 3.12.0)
|
197
206
|
rspec-core (3.12.0)
|
198
207
|
rspec-support (~> 3.12.0)
|
199
|
-
rspec-expectations (3.12.
|
208
|
+
rspec-expectations (3.12.2)
|
200
209
|
diff-lcs (>= 1.2.0, < 2.0)
|
201
210
|
rspec-support (~> 3.12.0)
|
202
|
-
rspec-mocks (3.12.
|
211
|
+
rspec-mocks (3.12.2)
|
203
212
|
diff-lcs (>= 1.2.0, < 2.0)
|
204
213
|
rspec-support (~> 3.12.0)
|
205
214
|
rspec-support (3.12.0)
|
206
215
|
rspec-xsd (0.1.0)
|
207
216
|
nokogiri (~> 1.6)
|
208
217
|
rspec (~> 3)
|
209
|
-
rubocop (1.
|
218
|
+
rubocop (1.43.0)
|
210
219
|
json (~> 2.3)
|
211
220
|
parallel (~> 1.10)
|
212
|
-
parser (>= 3.
|
221
|
+
parser (>= 3.2.0.0)
|
213
222
|
rainbow (>= 2.2.2, < 4.0)
|
214
223
|
regexp_parser (>= 1.8, < 3.0)
|
215
224
|
rexml (>= 3.2.5, < 4.0)
|
216
|
-
rubocop-ast (>= 1.
|
225
|
+
rubocop-ast (>= 1.24.1, < 2.0)
|
217
226
|
ruby-progressbar (~> 1.7)
|
218
|
-
unicode-display_width (>=
|
219
|
-
rubocop-ast (1.
|
227
|
+
unicode-display_width (>= 2.4.0, < 3.0)
|
228
|
+
rubocop-ast (1.24.1)
|
220
229
|
parser (>= 3.1.1.0)
|
221
|
-
rubocop-performance (1.15.
|
230
|
+
rubocop-performance (1.15.2)
|
222
231
|
rubocop (>= 1.7.0, < 2.0)
|
223
232
|
rubocop-ast (>= 0.4.0)
|
224
233
|
rubocop-rake (0.6.0)
|
225
234
|
rubocop (~> 1.0)
|
226
|
-
rubocop-rspec (2.
|
235
|
+
rubocop-rspec (2.16.0)
|
227
236
|
rubocop (~> 1.33)
|
228
237
|
ruby-progressbar (1.11.0)
|
229
238
|
ruby2_keywords (0.0.5)
|
@@ -242,9 +251,10 @@ GEM
|
|
242
251
|
tilt (2.0.11)
|
243
252
|
tzinfo (2.0.5)
|
244
253
|
concurrent-ruby (~> 1.0)
|
245
|
-
unicode-display_width (2.
|
254
|
+
unicode-display_width (2.4.2)
|
246
255
|
unicode-types (1.8.0)
|
247
256
|
unicode_utils (1.4.0)
|
257
|
+
uri_template (0.7.0)
|
248
258
|
vcr (3.0.3)
|
249
259
|
webmock (3.18.1)
|
250
260
|
addressable (>= 2.8.0)
|
data/briard.gemspec
CHANGED
@@ -35,6 +35,7 @@ Gem::Specification.new do |s|
|
|
35
35
|
s.add_dependency 'iso8601', '~> 0.9.1'
|
36
36
|
s.add_dependency 'json-ld-preloaded', '~> 3.1', '>= 3.1.3'
|
37
37
|
s.add_dependency 'jsonlint', '~> 0.3.0'
|
38
|
+
s.add_dependency 'json_schemer', '~> 0.2.23'
|
38
39
|
s.add_dependency 'loofah', '~> 2.19'
|
39
40
|
s.add_dependency 'maremma', '>= 4.9.7', '< 5'
|
40
41
|
s.add_dependency 'namae', '~> 1.0'
|
data/lib/briard/metadata.rb
CHANGED
@@ -163,9 +163,11 @@ module Briard
|
|
163
163
|
exists? && errors.nil?
|
164
164
|
end
|
165
165
|
|
166
|
-
#
|
166
|
+
# Catch errors in the reader
|
167
|
+
# Then validate against JSON schema for internal metadata format
|
168
|
+
# Then validate against DataCite schema, unless already errors in the reader
|
167
169
|
def errors
|
168
|
-
meta.fetch('errors', nil) || datacite_errors(xml: datacite, schema_version: schema_version)
|
170
|
+
meta.fetch('errors', nil) || json_schema_errors || datacite_errors(xml: datacite, schema_version: schema_version)
|
169
171
|
end
|
170
172
|
|
171
173
|
def descriptions
|
@@ -4,6 +4,7 @@ require_relative 'doi_utils'
|
|
4
4
|
require_relative 'author_utils'
|
5
5
|
require_relative 'crossref_utils'
|
6
6
|
require_relative 'datacite_utils'
|
7
|
+
require_relative 'schema_utils'
|
7
8
|
require_relative 'utils'
|
8
9
|
|
9
10
|
require_relative 'readers/bibtex_reader'
|
@@ -42,6 +43,7 @@ module Briard
|
|
42
43
|
include Briard::AuthorUtils
|
43
44
|
include Briard::CrossrefUtils
|
44
45
|
include Briard::DataciteUtils
|
46
|
+
include Briard::SchemaUtils
|
45
47
|
include Briard::Utils
|
46
48
|
|
47
49
|
include Briard::Readers::BibtexReader
|
@@ -310,7 +310,7 @@ module Briard
|
|
310
310
|
'container' => set_container(meta),
|
311
311
|
'publisher' => parse_attributes(meta.fetch('publisher', nil),
|
312
312
|
first: true).to_s.strip.presence,
|
313
|
-
'agency' => '
|
313
|
+
'agency' => 'DataCite',
|
314
314
|
'funding_references' => funding_references,
|
315
315
|
'dates' => dates,
|
316
316
|
'publication_year' => parse_attributes(meta.fetch('publicationYear', nil),
|
@@ -43,12 +43,14 @@ module Briard
|
|
43
43
|
link = doc.css("link[rel='canonical']")
|
44
44
|
hsh['@id'] = link[0]['href'] if link.present?
|
45
45
|
|
46
|
-
# workaround if license included
|
47
|
-
license = doc.at("meta[name='
|
46
|
+
# workaround if license not included with schema.org
|
47
|
+
license = doc.at("meta[name='dc.rights']")
|
48
48
|
hsh['license'] = license['content'] if license.present?
|
49
49
|
|
50
50
|
# workaround for html language attribute if no language is set via schema.org
|
51
|
-
lang = doc.at('
|
51
|
+
lang = doc.at("meta[name='dc.language']") || doc.at("meta[name='citation_language']")
|
52
|
+
lang = lang['content'] if lang.present?
|
53
|
+
lang = doc.at('html')['lang'] if lang.blank?
|
52
54
|
hsh['inLanguage'] = lang if hsh['inLanguage'].blank?
|
53
55
|
|
54
56
|
# workaround if issn not included with schema.org
|
@@ -57,6 +59,20 @@ module Briard
|
|
57
59
|
hsh['isPartOf'] = { 'name' => name ? name['content'] : nil,
|
58
60
|
'issn' => issn ? issn['content'] : nil }.compact
|
59
61
|
|
62
|
+
# workaround if not all authors are included with schema.org (e.g. in Ghost metadata)
|
63
|
+
authors = doc.css("meta[name='citation_author']").map do |author|
|
64
|
+
{ 'name' => author['content'] }
|
65
|
+
end
|
66
|
+
hsh['author'] = hsh['creator'] if hsh['author'].blank? && hsh['creator'].present?
|
67
|
+
hsh['author'] = authors if authors.length > Array.wrap(hsh['author']).length
|
68
|
+
|
69
|
+
# workaround if publisher not included with schema.org (e.g. Zenodo)
|
70
|
+
if hsh['publisher'].blank?
|
71
|
+
publisher = doc.at("meta[property='og:site_name']")
|
72
|
+
publisher = publisher['content'] if publisher.present?
|
73
|
+
hsh['publisher'] = { 'name' => publisher }
|
74
|
+
end
|
75
|
+
|
60
76
|
string = hsh.to_json if hsh.present?
|
61
77
|
end
|
62
78
|
|
@@ -106,7 +122,7 @@ module Briard
|
|
106
122
|
contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch('editor',
|
107
123
|
nil))))
|
108
124
|
publisher = parse_attributes(meta.fetch('publisher', nil), content: 'name', first: true)
|
109
|
-
|
125
|
+
|
110
126
|
ct = schema_org == 'Dataset' ? 'includedInDataCatalog' : 'Periodical'
|
111
127
|
container = if meta.fetch(ct, nil).present?
|
112
128
|
url = parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: 'url',
|
@@ -125,12 +141,13 @@ module Briard
|
|
125
141
|
}.compact
|
126
142
|
elsif %w[BlogPosting Article].include?(schema_org)
|
127
143
|
issn = meta.dig('isPartOf', 'issn')
|
144
|
+
url = meta.dig('publisher', 'url')
|
128
145
|
|
129
146
|
{
|
130
147
|
'type' => 'Blog',
|
131
148
|
'title' => meta.dig('isPartOf', 'name'),
|
132
|
-
'identifier' => issn,
|
133
|
-
'identifierType' => issn.present? ? 'ISSN' :
|
149
|
+
'identifier' => issn.presence || url.presence,
|
150
|
+
'identifierType' => issn.present? ? 'ISSN' : 'URL'
|
134
151
|
}.compact
|
135
152
|
else
|
136
153
|
{}
|
@@ -249,7 +266,7 @@ module Briard
|
|
249
266
|
[{ 'description' => sanitize(meta.fetch('description')),
|
250
267
|
'descriptionType' => 'Abstract' }]
|
251
268
|
end,
|
252
|
-
'rights_list' => rights_list,
|
269
|
+
'rights_list' => rights_list.presence,
|
253
270
|
'version_info' => meta.fetch('version', nil).to_s.presence,
|
254
271
|
'subjects' => subjects,
|
255
272
|
'language' => language,
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'json_schemer'
|
3
|
+
require 'pathname'
|
4
|
+
|
5
|
+
module Briard
|
6
|
+
module SchemaUtils
|
7
|
+
JSON_SCHEMA = schema = File.read(File.expand_path('../../resources/json-schema/briard_schema.json', __dir__))
|
8
|
+
|
9
|
+
def json_schema_errors
|
10
|
+
schemer = JSONSchemer.schema(JSON_SCHEMA)
|
11
|
+
errors = schemer.validate(self.meta).to_a
|
12
|
+
errors.map {|err| JSONSchemer::Errors.pretty err }.presence
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/briard/version.rb
CHANGED