briard 2.4.1 → 2.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/codeql-analysis.yml +72 -0
- data/.github/workflows/rubocop.yml +50 -0
- data/.rubocop.yml +144 -620
- data/.rubocop_todo.yml +76 -0
- data/CHANGELOG.md +22 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +43 -6
- data/Rakefile +1 -1
- data/{bolognese.gemspec → briard.gemspec} +46 -38
- data/lib/briard/array.rb +2 -2
- data/lib/briard/author_utils.rb +79 -71
- data/lib/briard/cli.rb +12 -13
- data/lib/briard/crossref_utils.rb +73 -61
- data/lib/briard/datacite_utils.rb +132 -106
- data/lib/briard/doi_utils.rb +10 -10
- data/lib/briard/metadata.rb +96 -106
- data/lib/briard/metadata_utils.rb +87 -78
- data/lib/briard/readers/bibtex_reader.rb +65 -65
- data/lib/briard/readers/cff_reader.rb +88 -70
- data/lib/briard/readers/citeproc_reader.rb +90 -84
- data/lib/briard/readers/codemeta_reader.rb +68 -50
- data/lib/briard/readers/crosscite_reader.rb +2 -2
- data/lib/briard/readers/crossref_reader.rb +249 -210
- data/lib/briard/readers/datacite_json_reader.rb +3 -3
- data/lib/briard/readers/datacite_reader.rb +225 -189
- data/lib/briard/readers/npm_reader.rb +49 -42
- data/lib/briard/readers/ris_reader.rb +82 -80
- data/lib/briard/readers/schema_org_reader.rb +182 -159
- data/lib/briard/string.rb +1 -1
- data/lib/briard/utils.rb +4 -4
- data/lib/briard/version.rb +3 -1
- data/lib/briard/whitelist_scrubber.rb +11 -4
- data/lib/briard/writers/bibtex_writer.rb +14 -8
- data/lib/briard/writers/cff_writer.rb +33 -26
- data/lib/briard/writers/codemeta_writer.rb +19 -15
- data/lib/briard/writers/csv_writer.rb +6 -4
- data/lib/briard/writers/datacite_json_writer.rb +8 -2
- data/lib/briard/writers/jats_writer.rb +33 -28
- data/lib/briard/writers/rdf_xml_writer.rb +1 -1
- data/lib/briard/writers/ris_writer.rb +30 -18
- data/lib/briard/writers/turtle_writer.rb +1 -1
- data/lib/briard.rb +6 -6
- data/rubocop.sarif +0 -0
- data/spec/array_spec.rb +5 -5
- data/spec/author_utils_spec.rb +151 -132
- data/spec/datacite_utils_spec.rb +135 -83
- data/spec/doi_utils_spec.rb +168 -164
- data/spec/find_from_format_spec.rb +69 -69
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/onlies_keep_specific_tags.yml +65 -0
- data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/removes_a_tags.yml +65 -0
- data/spec/metadata_spec.rb +91 -90
- data/spec/readers/bibtex_reader_spec.rb +43 -38
- data/spec/readers/cff_reader_spec.rb +165 -153
- data/spec/readers/citeproc_reader_spec.rb +45 -40
- data/spec/readers/codemeta_reader_spec.rb +128 -115
- data/spec/readers/crosscite_reader_spec.rb +34 -24
- data/spec/readers/crossref_reader_spec.rb +1098 -939
- data/spec/readers/datacite_json_reader_spec.rb +53 -40
- data/spec/readers/datacite_reader_spec.rb +1541 -1337
- data/spec/readers/npm_reader_spec.rb +48 -43
- data/spec/readers/ris_reader_spec.rb +53 -47
- data/spec/readers/schema_org_reader_spec.rb +329 -267
- data/spec/spec_helper.rb +6 -5
- data/spec/utils_spec.rb +371 -347
- data/spec/writers/bibtex_writer_spec.rb +143 -143
- data/spec/writers/cff_writer_spec.rb +96 -90
- data/spec/writers/citation_writer_spec.rb +34 -33
- data/spec/writers/citeproc_writer_spec.rb +226 -224
- data/spec/writers/codemeta_writer_spec.rb +18 -16
- data/spec/writers/crosscite_writer_spec.rb +91 -73
- data/spec/writers/crossref_writer_spec.rb +99 -91
- data/spec/writers/csv_writer_spec.rb +70 -70
- data/spec/writers/datacite_json_writer_spec.rb +78 -68
- data/spec/writers/datacite_writer_spec.rb +417 -322
- data/spec/writers/jats_writer_spec.rb +177 -161
- data/spec/writers/rdf_xml_writer_spec.rb +68 -63
- data/spec/writers/ris_writer_spec.rb +162 -162
- data/spec/writers/turtle_writer_spec.rb +47 -47
- metadata +250 -160
- data/.github/workflows/release.yml +0 -47
@@ -4,27 +4,30 @@ module Briard
|
|
4
4
|
module Readers
|
5
5
|
module CrossrefReader
|
6
6
|
# CrossRef types from https://api.crossref.org/types
|
7
|
-
def get_crossref(id: nil, **
|
8
|
-
return {
|
7
|
+
def get_crossref(id: nil, **_options)
|
8
|
+
return { 'string' => nil, 'state' => 'not_found' } unless id.present?
|
9
9
|
|
10
10
|
doi = doi_from_url(id)
|
11
11
|
url = "https://api.crossref.org/works/#{doi}/transform/application/vnd.crossref.unixsd+xml"
|
12
|
-
response = Maremma.get(url, accept:
|
13
|
-
string = response.body.fetch(
|
12
|
+
response = Maremma.get(url, accept: 'text/xml;charset=utf-8', raw: true)
|
13
|
+
string = response.body.fetch('data', nil)
|
14
14
|
string = Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).to_s if string.present?
|
15
15
|
|
16
|
-
{
|
16
|
+
{ 'string' => string }
|
17
17
|
end
|
18
18
|
|
19
19
|
def read_crossref(string: nil, **options)
|
20
|
-
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url,
|
20
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url,
|
21
|
+
:sandbox, :validate, :ra))
|
21
22
|
|
22
23
|
if string.present?
|
23
|
-
m = Maremma.from_xml(string).dig(
|
24
|
-
|
24
|
+
m = Maremma.from_xml(string).dig('crossref_result', 'query_result', 'body', 'query',
|
25
|
+
'doi_record') || {}
|
26
|
+
meta = m.dig('doi_record', 'crossref', 'error').nil? ? m : {}
|
25
27
|
|
26
28
|
# query contains information from outside metadata schema, e.g. publisher name
|
27
|
-
query = Maremma.from_xml(string).dig(
|
29
|
+
query = Maremma.from_xml(string).dig('crossref_result', 'query_result', 'body',
|
30
|
+
'query') || {}
|
28
31
|
else
|
29
32
|
meta = {}
|
30
33
|
query = {}
|
@@ -32,7 +35,7 @@ module Briard
|
|
32
35
|
|
33
36
|
# model should be one of book, conference, database, dissertation, journal, peer_review, posted_content,
|
34
37
|
# report_paper, sa_component, standard
|
35
|
-
model = meta
|
38
|
+
model = meta['crossref'].to_h.keys.last
|
36
39
|
|
37
40
|
resource_type = nil
|
38
41
|
bibliographic_metadata = {}
|
@@ -40,213 +43,240 @@ module Briard
|
|
40
43
|
journal_metadata = nil
|
41
44
|
journal_issue = {}
|
42
45
|
journal_metadata = nil
|
43
|
-
publisher = query.dig(
|
46
|
+
publisher = query.dig('crm_item', 0)
|
44
47
|
publisher = nil unless publisher.is_a?(String)
|
45
48
|
|
46
49
|
case model
|
47
|
-
when
|
48
|
-
book_metadata = meta.dig(
|
49
|
-
book_series_metadata = meta.dig(
|
50
|
-
book_set_metadata = meta.dig(
|
51
|
-
bibliographic_metadata = meta.dig(
|
52
|
-
|
50
|
+
when 'book'
|
51
|
+
book_metadata = meta.dig('crossref', 'book', 'book_metadata')
|
52
|
+
book_series_metadata = meta.dig('crossref', 'book', 'book_series_metadata')
|
53
|
+
book_set_metadata = meta.dig('crossref', 'book', 'book_set_metadata')
|
54
|
+
bibliographic_metadata = meta.dig('crossref', 'book',
|
55
|
+
'content_item') || book_metadata || book_series_metadata || book_set_metadata
|
56
|
+
resource_type = if bibliographic_metadata.fetch('component_type', nil)
|
57
|
+
"book-#{bibliographic_metadata.fetch('component_type')}"
|
58
|
+
else
|
59
|
+
'book'
|
60
|
+
end
|
53
61
|
# publisher = if book_metadata.present?
|
54
62
|
# book_metadata.dig("publisher", "publisher_name")
|
55
63
|
# elsif book_series_metadata.present?
|
56
64
|
# book_series_metadata.dig("publisher", "publisher_name")
|
57
65
|
# end
|
58
|
-
when
|
59
|
-
event_metadata = meta.dig(
|
60
|
-
bibliographic_metadata = meta.dig(
|
61
|
-
when
|
62
|
-
journal_metadata = meta.dig(
|
63
|
-
journal_issue = meta.dig(
|
64
|
-
journal_article = meta.dig(
|
66
|
+
when 'conference'
|
67
|
+
event_metadata = meta.dig('crossref', 'conference', 'event_metadata') || {}
|
68
|
+
bibliographic_metadata = meta.dig('crossref', 'conference', 'conference_paper').to_h
|
69
|
+
when 'journal'
|
70
|
+
journal_metadata = meta.dig('crossref', 'journal', 'journal_metadata') || {}
|
71
|
+
journal_issue = meta.dig('crossref', 'journal', 'journal_issue') || {}
|
72
|
+
journal_article = meta.dig('crossref', 'journal', 'journal_article') || {}
|
65
73
|
bibliographic_metadata = journal_article.presence || journal_issue.presence || journal_metadata
|
66
|
-
program_metadata = bibliographic_metadata.dig(
|
74
|
+
program_metadata = bibliographic_metadata.dig('crossmark', 'custom_metadata',
|
75
|
+
'program') || bibliographic_metadata['program']
|
67
76
|
resource_type = if journal_article.present?
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
when
|
75
|
-
bibliographic_metadata = meta.dig(
|
76
|
-
publisher ||= bibliographic_metadata.dig(
|
77
|
-
when
|
78
|
-
bibliographic_metadata = meta.dig(
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
77
|
+
'journal_article'
|
78
|
+
elsif journal_issue.present?
|
79
|
+
'journal_issue'
|
80
|
+
else
|
81
|
+
'journal'
|
82
|
+
end
|
83
|
+
when 'posted_content'
|
84
|
+
bibliographic_metadata = meta.dig('crossref', 'posted_content').to_h
|
85
|
+
publisher ||= bibliographic_metadata.dig('institution', 'institution_name')
|
86
|
+
when 'sa_component'
|
87
|
+
bibliographic_metadata = meta.dig('crossref', 'sa_component', 'component_list',
|
88
|
+
'component').to_h
|
89
|
+
related_identifier = Array.wrap(query.to_h['crm_item']).find do |cr|
|
90
|
+
cr['name'] == 'relation'
|
91
|
+
end
|
92
|
+
journal_metadata = { 'relatedIdentifier' => related_identifier.to_h.fetch('__content',
|
93
|
+
nil) }
|
94
|
+
when 'database'
|
95
|
+
bibliographic_metadata = meta.dig('crossref', 'database', 'dataset').to_h
|
96
|
+
resource_type = 'dataset'
|
97
|
+
when 'report_paper'
|
98
|
+
bibliographic_metadata = meta.dig('crossref', 'report_paper',
|
99
|
+
'report_paper_metadata').to_h
|
100
|
+
resource_type = 'report'
|
101
|
+
when 'peer_review'
|
102
|
+
bibliographic_metadata = meta.dig('crossref', 'peer_review')
|
103
|
+
when 'dissertation'
|
104
|
+
bibliographic_metadata = meta.dig('crossref', 'dissertation')
|
91
105
|
end
|
92
106
|
|
93
107
|
resource_type = (resource_type || model).to_s.underscore.camelcase.presence
|
94
|
-
schema_org = Briard::Utils::CR_TO_SO_TRANSLATIONS[resource_type] ||
|
108
|
+
schema_org = Briard::Utils::CR_TO_SO_TRANSLATIONS[resource_type] || 'ScholarlyArticle'
|
95
109
|
types = {
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
110
|
+
'resourceTypeGeneral' => Briard::Utils::CR_TO_DC_TRANSLATIONS[resource_type],
|
111
|
+
'resourceType' => resource_type,
|
112
|
+
'schemaOrg' => schema_org,
|
113
|
+
'citeproc' => Briard::Utils::CR_TO_CP_TRANSLATIONS[resource_type] || 'article-journal',
|
114
|
+
'bibtex' => Briard::Utils::CR_TO_BIB_TRANSLATIONS[resource_type] || 'misc',
|
115
|
+
'ris' => Briard::Utils::CR_TO_RIS_TRANSLATIONS[resource_type] || 'JOUR'
|
102
116
|
}.compact
|
103
117
|
|
104
|
-
titles = if bibliographic_metadata
|
105
|
-
Array.wrap(bibliographic_metadata
|
106
|
-
if r.blank? || (r[
|
118
|
+
titles = if bibliographic_metadata['titles'].present?
|
119
|
+
Array.wrap(bibliographic_metadata['titles']).map do |r|
|
120
|
+
if r.blank? || (r['title'].blank? && r['original_language_title'].blank?)
|
107
121
|
nil
|
108
|
-
elsif r[
|
109
|
-
{
|
110
|
-
elsif r[
|
111
|
-
{
|
122
|
+
elsif r['title'].is_a?(String)
|
123
|
+
{ 'title' => sanitize(r['title']) }
|
124
|
+
elsif r['original_language_title'].present?
|
125
|
+
{ 'title' => sanitize(r.dig('original_language_title', '__content__')),
|
126
|
+
'lang' => r.dig('original_language_title', 'language') }
|
112
127
|
else
|
113
|
-
{
|
128
|
+
{ 'title' => sanitize(r.dig('title', '__content__')) }.compact
|
114
129
|
end
|
115
130
|
end.compact
|
116
131
|
else
|
117
|
-
[{
|
132
|
+
[{ 'title' => ':(unav)' }]
|
118
133
|
end
|
119
134
|
|
120
135
|
date_published = crossref_date_published(bibliographic_metadata)
|
121
136
|
if date_published.present?
|
122
|
-
date_published = {
|
137
|
+
date_published = { 'date' => date_published, 'dateType' => 'Issued' }
|
123
138
|
else
|
124
|
-
date_published = Array.wrap(query.to_h[
|
125
|
-
|
139
|
+
date_published = Array.wrap(query.to_h['crm_item']).find { |cr| cr['name'] == 'created' }
|
140
|
+
if date_published.present?
|
141
|
+
date_published = { 'date' => date_published.fetch('__content__', '')[0..9],
|
142
|
+
'dateType' => 'Issued' }
|
143
|
+
end
|
144
|
+
end
|
145
|
+
date_updated = Array.wrap(query.to_h['crm_item']).find { |cr| cr['name'] == 'last-update' }
|
146
|
+
if date_updated.present?
|
147
|
+
date_updated = { 'date' => date_updated.fetch('__content__', nil),
|
148
|
+
'dateType' => 'Updated' }
|
126
149
|
end
|
127
|
-
date_updated = Array.wrap(query.to_h["crm_item"]).find { |cr| cr["name"] == "last-update" }
|
128
|
-
date_updated = { "date" => date_updated.fetch("__content__", nil), "dateType" => "Updated" } if date_updated.present?
|
129
150
|
|
130
|
-
date_registered = Array.wrap(query.to_h[
|
131
|
-
|
151
|
+
date_registered = Array.wrap(query.to_h['crm_item']).find do |cr|
|
152
|
+
cr['name'] == 'deposit-timestamp'
|
153
|
+
end
|
154
|
+
if date_registered.present?
|
155
|
+
date_registered = get_datetime_from_time(date_registered.fetch('__content__', nil))
|
156
|
+
end
|
132
157
|
|
133
158
|
# check that date is valid iso8601 date
|
134
|
-
date_published = nil unless Date.edtf(date_published.to_h[
|
135
|
-
date_updated = nil unless Date.edtf(date_updated.to_h[
|
159
|
+
date_published = nil unless Date.edtf(date_published.to_h['date']).present?
|
160
|
+
date_updated = nil unless Date.edtf(date_updated.to_h['date']).present?
|
136
161
|
|
137
162
|
dates = [date_published, date_updated].compact
|
138
|
-
publication_year = date_published.to_h.fetch(
|
163
|
+
publication_year = date_published.to_h.fetch('date', '')[0..3].presence
|
139
164
|
|
140
|
-
state = meta.present? || read_options.present? ?
|
165
|
+
state = meta.present? || read_options.present? ? 'findable' : 'not_found'
|
141
166
|
|
142
167
|
related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata))
|
143
168
|
|
144
169
|
container = if journal_metadata.present?
|
145
|
-
issn = normalize_issn(journal_metadata.to_h.fetch(
|
146
|
-
|
147
|
-
{
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
170
|
+
issn = normalize_issn(journal_metadata.to_h.fetch('issn', nil))
|
171
|
+
|
172
|
+
{ 'type' => 'Journal',
|
173
|
+
'identifier' => issn,
|
174
|
+
'identifierType' => issn.present? ? 'ISSN' : nil,
|
175
|
+
'title' => parse_attributes(journal_metadata.to_h['full_title']),
|
176
|
+
'volume' => parse_attributes(journal_issue.dig('journal_volume', 'volume')),
|
177
|
+
'issue' => parse_attributes(journal_issue['issue']),
|
178
|
+
'firstPage' => bibliographic_metadata.dig('pages',
|
179
|
+
'first_page') || parse_attributes(journal_article.to_h.dig('publisher_item', 'item_number'),
|
180
|
+
first: true),
|
181
|
+
'lastPage' => bibliographic_metadata.dig('pages', 'last_page') }.compact
|
155
182
|
|
156
183
|
# By using book_metadata, we can account for where resource_type is `BookChapter` and not assume its a whole book
|
157
184
|
elsif book_metadata.present?
|
158
185
|
identifiers = crossref_alternate_identifiers(book_metadata)
|
159
186
|
|
160
187
|
{
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
188
|
+
'type' => 'Book',
|
189
|
+
'title' => book_metadata.dig('titles', 'title'),
|
190
|
+
'firstPage' => bibliographic_metadata.dig('pages', 'first_page'),
|
191
|
+
'lastPage' => bibliographic_metadata.dig('pages', 'last_page'),
|
192
|
+
'identifiers' => identifiers
|
166
193
|
}.compact
|
167
194
|
|
168
|
-
elsif book_series_metadata.to_h.fetch(
|
169
|
-
issn = normalize_issn(book_series_metadata.dig(
|
195
|
+
elsif book_series_metadata.to_h.fetch('series_metadata', nil).present?
|
196
|
+
issn = normalize_issn(book_series_metadata.dig('series_metadata', 'issn'))
|
170
197
|
|
171
|
-
{
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
198
|
+
{ 'type' => 'Book Series',
|
199
|
+
'identifier' => issn,
|
200
|
+
'identifierType' => issn.present? ? 'ISSN' : nil,
|
201
|
+
'title' => book_series_metadata.dig('series_metadata', 'titles', 'title'),
|
202
|
+
'volume' => bibliographic_metadata.fetch('volume', nil) }.compact
|
176
203
|
end
|
177
204
|
|
178
|
-
id = normalize_doi(options[:doi] || options[:id] || bibliographic_metadata.dig(
|
205
|
+
id = normalize_doi(options[:doi] || options[:id] || bibliographic_metadata.dig('doi_data',
|
206
|
+
'doi'))
|
179
207
|
|
180
208
|
# Let sections override this in case of alternative metadata structures, such as book chapters, which
|
181
209
|
# have their meta inside `content_item`, but the main book indentifers inside of `book_metadata`
|
182
210
|
identifiers ||= crossref_alternate_identifiers(bibliographic_metadata)
|
183
211
|
|
184
|
-
{
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
212
|
+
{ 'id' => id,
|
213
|
+
'types' => types,
|
214
|
+
'doi' => doi_from_url(id),
|
215
|
+
'url' => parse_attributes(bibliographic_metadata.dig('doi_data', 'resource'),
|
216
|
+
first: true),
|
217
|
+
'titles' => titles,
|
218
|
+
'identifiers' => identifiers,
|
219
|
+
'creators' => crossref_people(bibliographic_metadata, 'author'),
|
220
|
+
'contributors' => crossref_people(bibliographic_metadata, 'editor'),
|
221
|
+
'funding_references' => crossref_funding_reference(program_metadata),
|
222
|
+
'publisher' => publisher,
|
223
|
+
'container' => container,
|
224
|
+
'agency' => agency = options[:ra] || 'crossref',
|
225
|
+
'related_identifiers' => related_identifiers,
|
226
|
+
'dates' => dates,
|
227
|
+
'publication_year' => publication_year,
|
228
|
+
'descriptions' => crossref_description(bibliographic_metadata),
|
229
|
+
'rights_list' => crossref_license(program_metadata),
|
230
|
+
'version_info' => nil,
|
231
|
+
'subjects' => nil,
|
232
|
+
'language' => nil,
|
233
|
+
'sizes' => nil,
|
234
|
+
'schema_version' => 'http://datacite.org/schema/kernel-4',
|
235
|
+
'state' => state,
|
236
|
+
'date_registered' => date_registered }.merge(read_options)
|
209
237
|
end
|
210
238
|
|
211
239
|
def crossref_alternate_identifiers(bibliographic_metadata)
|
212
|
-
if bibliographic_metadata.dig(
|
213
|
-
Array.wrap(bibliographic_metadata.dig(
|
240
|
+
if bibliographic_metadata.dig('publisher_item', 'item_number').present?
|
241
|
+
Array.wrap(bibliographic_metadata.dig('publisher_item', 'item_number')).map do |item|
|
214
242
|
if item.is_a?(String)
|
215
|
-
{
|
216
|
-
|
243
|
+
{ 'identifier' => item,
|
244
|
+
'identifierType' => 'Publisher ID' }
|
217
245
|
else
|
218
|
-
{
|
219
|
-
|
246
|
+
{ 'identifier' => item.fetch('__content__', nil),
|
247
|
+
'identifierType' => item.fetch('item_number_type', nil) || 'Publisher ID' }
|
220
248
|
end
|
221
249
|
end
|
222
|
-
elsif parse_attributes(bibliographic_metadata.fetch(
|
223
|
-
[{
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
250
|
+
elsif parse_attributes(bibliographic_metadata.fetch('item_number', nil)).present?
|
251
|
+
[{ 'identifier' => parse_attributes(bibliographic_metadata.fetch('item_number', nil)),
|
252
|
+
'identifierType' => parse_attributes(bibliographic_metadata.dig('item_number',
|
253
|
+
'item_number_type')) || 'Publisher ID' }]
|
254
|
+
elsif parse_attributes(bibliographic_metadata.fetch('isbn', nil)).present?
|
255
|
+
[{ 'identifier' => parse_attributes(bibliographic_metadata.fetch('isbn', nil), first: true),
|
256
|
+
'identifierType' => 'ISBN' }]
|
228
257
|
else
|
229
258
|
[]
|
230
259
|
end
|
231
260
|
end
|
232
261
|
|
233
262
|
def crossref_description(bibliographic_metadata)
|
234
|
-
abstract = Array.wrap(bibliographic_metadata
|
235
|
-
{
|
263
|
+
abstract = Array.wrap(bibliographic_metadata['abstract']).map do |r|
|
264
|
+
{ 'descriptionType' => 'Abstract',
|
265
|
+
'description' => sanitize(parse_attributes(r, content: 'p')) }.compact
|
236
266
|
end
|
237
267
|
|
238
|
-
description = Array.wrap(bibliographic_metadata
|
239
|
-
{
|
268
|
+
description = Array.wrap(bibliographic_metadata['description']).map do |r|
|
269
|
+
{ 'descriptionType' => 'Other', 'description' => sanitize(parse_attributes(r)) }.compact
|
240
270
|
end
|
241
271
|
|
242
272
|
(abstract + description)
|
243
273
|
end
|
244
274
|
|
245
275
|
def crossref_license(program_metadata)
|
246
|
-
access_indicator = Array.wrap(program_metadata).find { |m| m[
|
276
|
+
access_indicator = Array.wrap(program_metadata).find { |m| m['name'] == 'AccessIndicators' }
|
247
277
|
if access_indicator.present?
|
248
|
-
Array.wrap(access_indicator[
|
249
|
-
hsh_to_spdx(
|
278
|
+
Array.wrap(access_indicator['license_ref']).map do |license|
|
279
|
+
hsh_to_spdx('rightsURI' => parse_attributes(license))
|
250
280
|
end.uniq
|
251
281
|
else
|
252
282
|
[]
|
@@ -254,46 +284,61 @@ module Briard
|
|
254
284
|
end
|
255
285
|
|
256
286
|
def crossref_people(bibliographic_metadata, contributor_role)
|
257
|
-
person = bibliographic_metadata.dig(
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
287
|
+
person = bibliographic_metadata.dig('contributors',
|
288
|
+
'person_name') || bibliographic_metadata['person_name']
|
289
|
+
organization = Array.wrap(bibliographic_metadata.dig('contributors', 'organization'))
|
290
|
+
if contributor_role == 'author' && Array.wrap(person).select do |a|
|
291
|
+
a['contributor_role'] == 'author'
|
292
|
+
end.blank? && Array.wrap(organization).select do |a|
|
293
|
+
a['contributor_role'] == 'author'
|
294
|
+
end.blank?
|
295
|
+
person = [{ 'name' => ':(unav)', 'contributor_role' => 'author' }]
|
296
|
+
end
|
297
|
+
|
298
|
+
(Array.wrap(person) + Array.wrap(organization)).select do |a|
|
299
|
+
a['contributor_role'] == contributor_role
|
300
|
+
end.map do |a|
|
301
|
+
name_identifiers = if normalize_orcid(parse_attributes(a['ORCID'])).present?
|
302
|
+
[{
|
303
|
+
'nameIdentifier' => normalize_orcid(parse_attributes(a['ORCID'])), 'nameIdentifierScheme' => 'ORCID', 'schemeUri' => 'https://orcid.org'
|
304
|
+
}]
|
305
|
+
end
|
306
|
+
if a['surname'].present? || a['given_name'].present? || name_identifiers.present?
|
307
|
+
given_name = parse_attributes(a['given_name'])
|
308
|
+
family_name = parse_attributes(a['surname'])
|
309
|
+
affiliation = Array.wrap(a['affiliation']).map do |a|
|
267
310
|
if a.is_a?(Hash)
|
268
311
|
a
|
269
|
-
elsif a.is_a?(Hash) && a.key?(
|
312
|
+
elsif a.is_a?(Hash) && a.key?('__content__') && a['__content__'].strip.blank?
|
270
313
|
nil
|
271
|
-
elsif a.is_a?(Hash) && a.key?(
|
272
|
-
{
|
314
|
+
elsif a.is_a?(Hash) && a.key?('__content__')
|
315
|
+
{ 'name' => a['__content__'] }
|
273
316
|
elsif a.strip.blank?
|
274
317
|
nil
|
275
318
|
elsif a.is_a?(String)
|
276
|
-
|
319
|
+
{ 'name' => a }
|
277
320
|
end
|
278
321
|
end.compact
|
279
322
|
|
280
|
-
{
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
323
|
+
{ 'nameType' => 'Personal',
|
324
|
+
'nameIdentifiers' => name_identifiers,
|
325
|
+
'name' => [family_name, given_name].compact.join(', '),
|
326
|
+
'givenName' => given_name,
|
327
|
+
'familyName' => family_name,
|
328
|
+
'affiliation' => affiliation.presence,
|
329
|
+
'contributorType' => contributor_role == 'editor' ? 'Editor' : nil }.compact
|
287
330
|
else
|
288
|
-
{
|
289
|
-
|
331
|
+
{ 'nameType' => 'Organizational',
|
332
|
+
'name' => a['name'] || a['__content__'] }
|
290
333
|
end
|
291
334
|
end
|
292
335
|
end
|
293
336
|
|
294
337
|
def crossref_funding_reference(program_metadata)
|
295
|
-
fundref = Array.wrap(program_metadata).find { |a| a[
|
296
|
-
Array.wrap(fundref.fetch(
|
338
|
+
fundref = Array.wrap(program_metadata).find { |a| a['name'] == 'fundref' } || {}
|
339
|
+
Array.wrap(fundref.fetch('assertion', [])).select do |a|
|
340
|
+
a['name'] == 'fundgroup' && a['assertion'].present?
|
341
|
+
end.map do |f|
|
297
342
|
funder_identifier = nil
|
298
343
|
funder_identifier_type = nil
|
299
344
|
funder_name = nil
|
@@ -301,66 +346,60 @@ module Briard
|
|
301
346
|
award_number = nil
|
302
347
|
award_uri = nil
|
303
348
|
|
304
|
-
Array.wrap(f.fetch(
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
349
|
+
Array.wrap(f.fetch('assertion')).each do |a|
|
350
|
+
case a.fetch('name')
|
351
|
+
when 'award_number'
|
352
|
+
award_number = a.fetch('__content__', nil)
|
353
|
+
award_uri = a.fetch('awardURI', nil)
|
354
|
+
when 'funder_name'
|
355
|
+
funder_name = a.fetch('__content__', nil).to_s.squish.presence
|
356
|
+
funder_identifier = validate_funder_doi(a.dig('assertion', '__content__'))
|
357
|
+
funder_identifier_type = 'Crossref Funder ID' if funder_identifier.present?
|
312
358
|
end
|
313
359
|
end
|
314
360
|
|
315
361
|
# funder_name is required in DataCite
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
nil
|
325
|
-
end
|
362
|
+
next unless funder_name.present?
|
363
|
+
|
364
|
+
{ 'funderIdentifier' => funder_identifier,
|
365
|
+
'funderIdentifierType' => funder_identifier_type,
|
366
|
+
'funderName' => funder_name,
|
367
|
+
'awardTitle' => award_title,
|
368
|
+
'awardNumber' => award_number,
|
369
|
+
'awardUri' => award_uri }.compact
|
326
370
|
end.compact
|
327
371
|
end
|
328
372
|
|
329
373
|
def crossref_date_published(bibliographic_metadata)
|
330
|
-
pub_date = Array.wrap(bibliographic_metadata.fetch(
|
331
|
-
|
374
|
+
pub_date = Array.wrap(bibliographic_metadata.fetch('publication_date', nil)).presence ||
|
375
|
+
Array.wrap(bibliographic_metadata.fetch('acceptance_date', nil))
|
332
376
|
if pub_date.present?
|
333
|
-
get_date_from_parts(pub_date.first[
|
334
|
-
|
335
|
-
nil
|
377
|
+
get_date_from_parts(pub_date.first['year'], pub_date.first['month'],
|
378
|
+
pub_date.first['day'])
|
336
379
|
end
|
337
380
|
end
|
338
381
|
|
339
382
|
def crossref_is_part_of(model_metadata)
|
340
|
-
if model_metadata.present? && model_metadata.fetch(
|
341
|
-
{
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
elsif model_metadata.present? && model_metadata.fetch(
|
346
|
-
{
|
347
|
-
|
348
|
-
|
349
|
-
else
|
350
|
-
nil
|
383
|
+
if model_metadata.present? && model_metadata.fetch('issn', nil).present?
|
384
|
+
{ 'relatedIdentifier' => normalize_issn(model_metadata.fetch('issn', nil)),
|
385
|
+
'relationType' => 'IsPartOf',
|
386
|
+
'relatedIdentifierType' => 'ISSN',
|
387
|
+
'resourceTypeGeneral' => 'Collection' }.compact
|
388
|
+
elsif model_metadata.present? && model_metadata.fetch('relatedIdentifier', nil).present?
|
389
|
+
{ 'relatedIdentifier' => model_metadata.fetch('relatedIdentifier', nil),
|
390
|
+
'relationType' => 'IsPartOf',
|
391
|
+
'relatedIdentifierType' => 'DOI' }.compact
|
351
392
|
end
|
352
393
|
end
|
353
394
|
|
354
395
|
def crossref_references(bibliographic_metadata)
|
355
|
-
refs = bibliographic_metadata.dig(
|
356
|
-
Array.wrap(refs).select { |a| a[
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
nil
|
363
|
-
end
|
396
|
+
refs = bibliographic_metadata.dig('citation_list', 'citation')
|
397
|
+
Array.wrap(refs).select { |a| a['doi'].present? }.map do |c|
|
398
|
+
next unless c['doi'].present?
|
399
|
+
|
400
|
+
{ 'relatedIdentifier' => parse_attributes(c['doi']).downcase,
|
401
|
+
'relationType' => 'References',
|
402
|
+
'relatedIdentifierType' => 'DOI' }.compact
|
364
403
|
end.compact.unwrap
|
365
404
|
end
|
366
405
|
end
|