briard 2.8.2 → 2.9.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Gemfile.lock +30 -20
- data/briard.gemspec +1 -0
- data/lib/briard/metadata.rb +4 -2
- data/lib/briard/metadata_utils.rb +2 -0
- data/lib/briard/readers/datacite_reader.rb +1 -1
- data/lib/briard/readers/schema_org_reader.rb +24 -7
- data/lib/briard/schema_utils.rb +15 -0
- data/lib/briard/version.rb +1 -1
- data/resources/json-schema/briard_schema.json +462 -0
- data/spec/fixtures/crosscite.json +1 -1
- data/spec/fixtures/datacite.json +1 -1
- data/spec/fixtures/datacite_software.json +1 -1
- data/spec/fixtures/datacite_software_version.json +1 -1
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/BlogPosting.yml +18 -18
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/BlogPosting_with_new_DOI.yml +22 -22
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/get_schema_org_metadata_front_matter/BlogPosting.yml +22 -22
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/harvard_dataverse.yml +6 -6
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/pangaea.yml +10 -10
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/get_schema_org_metadata/zenodo.yml +8 -8
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/json_schema_errors/doi_not_found.yml +105 -0
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/json_schema_errors/is_valid.yml +65 -0
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/json_schema_valid_/is_valid.yml +65 -0
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/write_metadata_as_crossref/another_schema_org_from_front-matter.yml +22 -22
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/write_metadata_as_crossref/journal_article_from_datacite.yml +6 -6
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/write_metadata_as_crossref/posted_content.yml +9 -9
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/write_metadata_as_crossref/schema_org_from_another_science_blog.yml +9 -9
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/write_metadata_as_crossref/schema_org_from_front_matter.yml +22 -22
- data/spec/metadata_spec.rb +2 -0
- data/spec/readers/crossref_json_reader_spec.rb +1 -1
- data/spec/readers/datacite_json_reader_spec.rb +2 -2
- data/spec/readers/datacite_reader_spec.rb +36 -36
- data/spec/readers/npm_reader_spec.rb +3 -3
- data/spec/readers/schema_org_reader_spec.rb +26 -3
- data/spec/schema_utils_spec.rb +24 -0
- data/spec/writers/crossref_writer_spec.rb +1 -1
- data/spec/writers/datacite_writer_spec.rb +6 -6
- metadata +22 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 18ccea8677e7dd2e7c7fc253a23e060c8778f81780f55904ce5572c3d6c9d685
|
4
|
+
data.tar.gz: c9c42bd2d6fe73495da63b8280a2d06b172ad8013af605103d17c2c32523cc05
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fac8077f5d561ad88c74be7f14da043e14a6dfb0a2ba36874905d39ac2f9a994adf4d7e21efd8885d1e9e8d7db21463970a5faf02b26366f3cb13efede9122bb
|
7
|
+
data.tar.gz: 48e8f3b859cf3a753eda735b579f6d6c43197fdc635ea2a215e12d0ef3078990ccf36ec0f215f09b7e846acf17427547b8acca172551b1d13bf94ab668ab5939
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## [v2.8.2](https://github.com/front-matter/briard/tree/v2.8.2) (2022-11-23)
|
4
|
+
|
5
|
+
[Full Changelog](https://github.com/front-matter/briard/compare/v2.8.0...v2.8.2)
|
6
|
+
|
3
7
|
## [v2.8.0](https://github.com/front-matter/briard/tree/v2.8.0) (2022-11-22)
|
4
8
|
|
5
9
|
[Full Changelog](https://github.com/front-matter/briard/compare/v2.6.5...v2.8.0)
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
briard (2.
|
4
|
+
briard (2.9.1)
|
5
5
|
activesupport (>= 4.2.5, < 8.0)
|
6
6
|
base32-url (>= 0.5.0, < 1)
|
7
7
|
benchmark_methods (~> 0.7)
|
@@ -18,6 +18,7 @@ PATH
|
|
18
18
|
gender_detector (~> 0.1.2)
|
19
19
|
iso8601 (~> 0.9.1)
|
20
20
|
json-ld-preloaded (~> 3.1, >= 3.1.3)
|
21
|
+
json_schemer (~> 0.2.23)
|
21
22
|
jsonlint (~> 0.3.0)
|
22
23
|
loofah (~> 2.19)
|
23
24
|
maremma (>= 4.9.7, < 5)
|
@@ -75,6 +76,8 @@ GEM
|
|
75
76
|
scanf (~> 1.0)
|
76
77
|
sxp (~> 1.2)
|
77
78
|
unicode-types (~> 1.7)
|
79
|
+
ecma-re-validator (0.4.0)
|
80
|
+
regexp_parser (~> 2.2)
|
78
81
|
edtf (3.1.0)
|
79
82
|
activesupport (>= 3.0, < 8.0)
|
80
83
|
excon (0.71.1)
|
@@ -99,13 +102,14 @@ GEM
|
|
99
102
|
haml (5.2.2)
|
100
103
|
temple (>= 0.8.0)
|
101
104
|
tilt
|
105
|
+
hana (1.3.7)
|
102
106
|
hashdiff (1.0.1)
|
103
107
|
htmlentities (4.3.4)
|
104
108
|
i18n (1.12.0)
|
105
109
|
concurrent-ruby (~> 1.0)
|
106
110
|
iso8601 (0.9.1)
|
107
|
-
json (2.6.
|
108
|
-
json-canonicalization (0.3.
|
111
|
+
json (2.6.3)
|
112
|
+
json-canonicalization (0.3.1)
|
109
113
|
json-ld (3.2.3)
|
110
114
|
htmlentities (~> 4.3)
|
111
115
|
json-canonicalization (~> 0.3)
|
@@ -116,12 +120,17 @@ GEM
|
|
116
120
|
json-ld-preloaded (3.2.2)
|
117
121
|
json-ld (~> 3.2)
|
118
122
|
rdf (~> 3.2)
|
123
|
+
json_schemer (0.2.24)
|
124
|
+
ecma-re-validator (~> 0.3)
|
125
|
+
hana (~> 1.3)
|
126
|
+
regexp_parser (~> 2.0)
|
127
|
+
uri_template (~> 0.7)
|
119
128
|
jsonlint (0.3.0)
|
120
129
|
oj (~> 3)
|
121
130
|
optimist (~> 3)
|
122
131
|
latex-decode (0.4.0)
|
123
132
|
link_header (0.0.8)
|
124
|
-
loofah (2.19.
|
133
|
+
loofah (2.19.1)
|
125
134
|
crass (~> 1.0.2)
|
126
135
|
nokogiri (>= 1.5.9)
|
127
136
|
maremma (4.9.9)
|
@@ -139,27 +148,27 @@ GEM
|
|
139
148
|
oj (>= 2.8.3)
|
140
149
|
oj_mimic_json (~> 1.0, >= 1.0.1)
|
141
150
|
matrix (0.4.2)
|
142
|
-
mini_portile2 (2.8.
|
143
|
-
minitest (5.
|
151
|
+
mini_portile2 (2.8.1)
|
152
|
+
minitest (5.17.0)
|
144
153
|
multi_json (1.15.0)
|
145
154
|
multipart-post (2.2.3)
|
146
155
|
namae (1.1.1)
|
147
|
-
nokogiri (1.13.
|
156
|
+
nokogiri (1.13.10)
|
148
157
|
mini_portile2 (~> 2.8.0)
|
149
158
|
racc (~> 1.4)
|
150
159
|
oj (3.13.23)
|
151
160
|
oj_mimic_json (1.0.1)
|
152
161
|
optimist (3.0.1)
|
153
162
|
parallel (1.22.1)
|
154
|
-
parser (3.
|
163
|
+
parser (3.2.0.0)
|
155
164
|
ast (~> 2.4.1)
|
156
165
|
postrank-uri (1.0.24)
|
157
166
|
addressable (>= 2.4.0)
|
158
167
|
nokogiri (>= 1.8.0)
|
159
168
|
public_suffix (>= 2.0.0, < 2.1)
|
160
169
|
public_suffix (2.0.5)
|
161
|
-
racc (1.6.
|
162
|
-
rack (2.2.
|
170
|
+
racc (1.6.2)
|
171
|
+
rack (2.2.5)
|
163
172
|
rack-test (2.0.2)
|
164
173
|
rack (>= 1.3)
|
165
174
|
rainbow (3.1.1)
|
@@ -196,34 +205,34 @@ GEM
|
|
196
205
|
rspec-mocks (~> 3.12.0)
|
197
206
|
rspec-core (3.12.0)
|
198
207
|
rspec-support (~> 3.12.0)
|
199
|
-
rspec-expectations (3.12.
|
208
|
+
rspec-expectations (3.12.2)
|
200
209
|
diff-lcs (>= 1.2.0, < 2.0)
|
201
210
|
rspec-support (~> 3.12.0)
|
202
|
-
rspec-mocks (3.12.
|
211
|
+
rspec-mocks (3.12.2)
|
203
212
|
diff-lcs (>= 1.2.0, < 2.0)
|
204
213
|
rspec-support (~> 3.12.0)
|
205
214
|
rspec-support (3.12.0)
|
206
215
|
rspec-xsd (0.1.0)
|
207
216
|
nokogiri (~> 1.6)
|
208
217
|
rspec (~> 3)
|
209
|
-
rubocop (1.
|
218
|
+
rubocop (1.43.0)
|
210
219
|
json (~> 2.3)
|
211
220
|
parallel (~> 1.10)
|
212
|
-
parser (>= 3.
|
221
|
+
parser (>= 3.2.0.0)
|
213
222
|
rainbow (>= 2.2.2, < 4.0)
|
214
223
|
regexp_parser (>= 1.8, < 3.0)
|
215
224
|
rexml (>= 3.2.5, < 4.0)
|
216
|
-
rubocop-ast (>= 1.
|
225
|
+
rubocop-ast (>= 1.24.1, < 2.0)
|
217
226
|
ruby-progressbar (~> 1.7)
|
218
|
-
unicode-display_width (>=
|
219
|
-
rubocop-ast (1.
|
227
|
+
unicode-display_width (>= 2.4.0, < 3.0)
|
228
|
+
rubocop-ast (1.24.1)
|
220
229
|
parser (>= 3.1.1.0)
|
221
|
-
rubocop-performance (1.15.
|
230
|
+
rubocop-performance (1.15.2)
|
222
231
|
rubocop (>= 1.7.0, < 2.0)
|
223
232
|
rubocop-ast (>= 0.4.0)
|
224
233
|
rubocop-rake (0.6.0)
|
225
234
|
rubocop (~> 1.0)
|
226
|
-
rubocop-rspec (2.
|
235
|
+
rubocop-rspec (2.16.0)
|
227
236
|
rubocop (~> 1.33)
|
228
237
|
ruby-progressbar (1.11.0)
|
229
238
|
ruby2_keywords (0.0.5)
|
@@ -242,9 +251,10 @@ GEM
|
|
242
251
|
tilt (2.0.11)
|
243
252
|
tzinfo (2.0.5)
|
244
253
|
concurrent-ruby (~> 1.0)
|
245
|
-
unicode-display_width (2.
|
254
|
+
unicode-display_width (2.4.2)
|
246
255
|
unicode-types (1.8.0)
|
247
256
|
unicode_utils (1.4.0)
|
257
|
+
uri_template (0.7.0)
|
248
258
|
vcr (3.0.3)
|
249
259
|
webmock (3.18.1)
|
250
260
|
addressable (>= 2.8.0)
|
data/briard.gemspec
CHANGED
@@ -35,6 +35,7 @@ Gem::Specification.new do |s|
|
|
35
35
|
s.add_dependency 'iso8601', '~> 0.9.1'
|
36
36
|
s.add_dependency 'json-ld-preloaded', '~> 3.1', '>= 3.1.3'
|
37
37
|
s.add_dependency 'jsonlint', '~> 0.3.0'
|
38
|
+
s.add_dependency 'json_schemer', '~> 0.2.23'
|
38
39
|
s.add_dependency 'loofah', '~> 2.19'
|
39
40
|
s.add_dependency 'maremma', '>= 4.9.7', '< 5'
|
40
41
|
s.add_dependency 'namae', '~> 1.0'
|
data/lib/briard/metadata.rb
CHANGED
@@ -163,9 +163,11 @@ module Briard
|
|
163
163
|
exists? && errors.nil?
|
164
164
|
end
|
165
165
|
|
166
|
-
#
|
166
|
+
# Catch errors in the reader
|
167
|
+
# Then validate against JSON schema for internal metadata format
|
168
|
+
# Then validate against DataCite schema, unless already errors in the reader
|
167
169
|
def errors
|
168
|
-
meta.fetch('errors', nil) || datacite_errors(xml: datacite, schema_version: schema_version)
|
170
|
+
meta.fetch('errors', nil) || json_schema_errors || datacite_errors(xml: datacite, schema_version: schema_version)
|
169
171
|
end
|
170
172
|
|
171
173
|
def descriptions
|
@@ -4,6 +4,7 @@ require_relative 'doi_utils'
|
|
4
4
|
require_relative 'author_utils'
|
5
5
|
require_relative 'crossref_utils'
|
6
6
|
require_relative 'datacite_utils'
|
7
|
+
require_relative 'schema_utils'
|
7
8
|
require_relative 'utils'
|
8
9
|
|
9
10
|
require_relative 'readers/bibtex_reader'
|
@@ -42,6 +43,7 @@ module Briard
|
|
42
43
|
include Briard::AuthorUtils
|
43
44
|
include Briard::CrossrefUtils
|
44
45
|
include Briard::DataciteUtils
|
46
|
+
include Briard::SchemaUtils
|
45
47
|
include Briard::Utils
|
46
48
|
|
47
49
|
include Briard::Readers::BibtexReader
|
@@ -310,7 +310,7 @@ module Briard
|
|
310
310
|
'container' => set_container(meta),
|
311
311
|
'publisher' => parse_attributes(meta.fetch('publisher', nil),
|
312
312
|
first: true).to_s.strip.presence,
|
313
|
-
'agency' => '
|
313
|
+
'agency' => 'DataCite',
|
314
314
|
'funding_references' => funding_references,
|
315
315
|
'dates' => dates,
|
316
316
|
'publication_year' => parse_attributes(meta.fetch('publicationYear', nil),
|
@@ -43,12 +43,14 @@ module Briard
|
|
43
43
|
link = doc.css("link[rel='canonical']")
|
44
44
|
hsh['@id'] = link[0]['href'] if link.present?
|
45
45
|
|
46
|
-
# workaround if license included
|
47
|
-
license = doc.at("meta[name='
|
46
|
+
# workaround if license not included with schema.org
|
47
|
+
license = doc.at("meta[name='dc.rights']")
|
48
48
|
hsh['license'] = license['content'] if license.present?
|
49
49
|
|
50
50
|
# workaround for html language attribute if no language is set via schema.org
|
51
|
-
lang = doc.at('
|
51
|
+
lang = doc.at("meta[name='dc.language']") || doc.at("meta[name='citation_language']")
|
52
|
+
lang = lang['content'] if lang.present?
|
53
|
+
lang = doc.at('html')['lang'] if lang.blank?
|
52
54
|
hsh['inLanguage'] = lang if hsh['inLanguage'].blank?
|
53
55
|
|
54
56
|
# workaround if issn not included with schema.org
|
@@ -57,6 +59,20 @@ module Briard
|
|
57
59
|
hsh['isPartOf'] = { 'name' => name ? name['content'] : nil,
|
58
60
|
'issn' => issn ? issn['content'] : nil }.compact
|
59
61
|
|
62
|
+
# workaround if not all authors are included with schema.org (e.g. in Ghost metadata)
|
63
|
+
authors = doc.css("meta[name='citation_author']").map do |author|
|
64
|
+
{ 'name' => author['content'] }
|
65
|
+
end
|
66
|
+
hsh['author'] = hsh['creator'] if hsh['author'].blank? && hsh['creator'].present?
|
67
|
+
hsh['author'] = authors if authors.length > Array.wrap(hsh['author']).length
|
68
|
+
|
69
|
+
# workaround if publisher not included with schema.org (e.g. Zenodo)
|
70
|
+
if hsh['publisher'].blank?
|
71
|
+
publisher = doc.at("meta[property='og:site_name']")
|
72
|
+
publisher = publisher['content'] if publisher.present?
|
73
|
+
hsh['publisher'] = { 'name' => publisher }
|
74
|
+
end
|
75
|
+
|
60
76
|
string = hsh.to_json if hsh.present?
|
61
77
|
end
|
62
78
|
|
@@ -106,7 +122,7 @@ module Briard
|
|
106
122
|
contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch('editor',
|
107
123
|
nil))))
|
108
124
|
publisher = parse_attributes(meta.fetch('publisher', nil), content: 'name', first: true)
|
109
|
-
|
125
|
+
|
110
126
|
ct = schema_org == 'Dataset' ? 'includedInDataCatalog' : 'Periodical'
|
111
127
|
container = if meta.fetch(ct, nil).present?
|
112
128
|
url = parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: 'url',
|
@@ -125,12 +141,13 @@ module Briard
|
|
125
141
|
}.compact
|
126
142
|
elsif %w[BlogPosting Article].include?(schema_org)
|
127
143
|
issn = meta.dig('isPartOf', 'issn')
|
144
|
+
url = meta.dig('publisher', 'url')
|
128
145
|
|
129
146
|
{
|
130
147
|
'type' => 'Blog',
|
131
148
|
'title' => meta.dig('isPartOf', 'name'),
|
132
|
-
'identifier' => issn,
|
133
|
-
'identifierType' => issn.present? ? 'ISSN' :
|
149
|
+
'identifier' => issn.presence || url.presence,
|
150
|
+
'identifierType' => issn.present? ? 'ISSN' : 'URL'
|
134
151
|
}.compact
|
135
152
|
else
|
136
153
|
{}
|
@@ -249,7 +266,7 @@ module Briard
|
|
249
266
|
[{ 'description' => sanitize(meta.fetch('description')),
|
250
267
|
'descriptionType' => 'Abstract' }]
|
251
268
|
end,
|
252
|
-
'rights_list' => rights_list,
|
269
|
+
'rights_list' => rights_list.presence,
|
253
270
|
'version_info' => meta.fetch('version', nil).to_s.presence,
|
254
271
|
'subjects' => subjects,
|
255
272
|
'language' => language,
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'json_schemer'
|
3
|
+
require 'pathname'
|
4
|
+
|
5
|
+
module Briard
|
6
|
+
module SchemaUtils
|
7
|
+
JSON_SCHEMA = schema = File.read(File.expand_path('../../resources/json-schema/briard_schema.json', __dir__))
|
8
|
+
|
9
|
+
def json_schema_errors
|
10
|
+
schemer = JSONSchemer.schema(JSON_SCHEMA)
|
11
|
+
errors = schemer.validate(self.meta).to_a
|
12
|
+
errors.map {|err| JSONSchemer::Errors.pretty err }.presence
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/briard/version.rb
CHANGED