relaton-doi 1.20.1 → 2.0.0.pre.alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop.yml +1 -1
- data/CLAUDE.md +45 -0
- data/README.adoc +24 -25
- data/lib/relaton/doi/crossref.rb +64 -0
- data/lib/relaton/doi/parser.rb +827 -0
- data/lib/relaton/doi/processor.rb +64 -0
- data/lib/relaton/doi/util.rb +8 -0
- data/lib/relaton/doi/version.rb +7 -0
- data/lib/relaton/doi.rb +24 -0
- data/relaton-doi.gemspec +7 -8
- metadata +19 -35
- data/lib/relaton_doi/crossref.rb +0 -62
- data/lib/relaton_doi/parser.rb +0 -806
- data/lib/relaton_doi/processor.rb +0 -57
- data/lib/relaton_doi/util.rb +0 -6
- data/lib/relaton_doi/version.rb +0 -5
- data/lib/relaton_doi.rb +0 -27
|
@@ -0,0 +1,827 @@
|
|
|
1
|
+
module Relaton
|
|
2
|
+
module Doi
|
|
3
|
+
class Parser
|
|
4
|
+
COUNTRIES = %w[USA].freeze
|
|
5
|
+
|
|
6
|
+
TYPES = {
|
|
7
|
+
"book-chapter" => "inbook",
|
|
8
|
+
"book-part" => "inbook",
|
|
9
|
+
"book-section" => "inbook",
|
|
10
|
+
"book-series" => "book",
|
|
11
|
+
"book-set" => "book",
|
|
12
|
+
"book-track" => "inbook",
|
|
13
|
+
"component" => "misc",
|
|
14
|
+
"database" => "dataset",
|
|
15
|
+
"dissertation" => "thesis",
|
|
16
|
+
"edited-book" => "book",
|
|
17
|
+
"grant" => "misc",
|
|
18
|
+
"journal-article" => "article",
|
|
19
|
+
"journal-issue" => "article",
|
|
20
|
+
"journal-volume" => "journal",
|
|
21
|
+
"monograph" => "book",
|
|
22
|
+
"other" => "misc",
|
|
23
|
+
"peer-review" => "article",
|
|
24
|
+
"posted-content" => "dataset",
|
|
25
|
+
"proceedings-article" => "inproceedings",
|
|
26
|
+
"proceedings-series" => "proceedings",
|
|
27
|
+
"reference-book" => "book",
|
|
28
|
+
"reference-entry" => "inbook",
|
|
29
|
+
"report-component" => "techreport",
|
|
30
|
+
"report-series" => "techreport",
|
|
31
|
+
"report" => "techreport",
|
|
32
|
+
}.freeze
|
|
33
|
+
|
|
34
|
+
REALATION_TYPES = {
|
|
35
|
+
"is-cited-by" => "isCitedIn",
|
|
36
|
+
"belongs-to" => "related",
|
|
37
|
+
"is-child-of" => "includedIn",
|
|
38
|
+
"is-expression-of" => "expressionOf",
|
|
39
|
+
"has-expression" => "hasExpression",
|
|
40
|
+
"is-manifestation-of" => "manifestationOf",
|
|
41
|
+
"is-manuscript-of" => "draftOf",
|
|
42
|
+
"has-manuscript" => "hasDraft",
|
|
43
|
+
"is-preprint-of" => "draftOf",
|
|
44
|
+
"has-preprint" => "hasDraft",
|
|
45
|
+
"is-replaced-by" => "obsoletedBy",
|
|
46
|
+
"replaces" => "obsoletes",
|
|
47
|
+
"is-translation-of" => "translatedFrom",
|
|
48
|
+
"has-translation" => "hasTranslation",
|
|
49
|
+
"is-version-of" => "editionOf",
|
|
50
|
+
"has-version" => "hasEdition",
|
|
51
|
+
"is-based-on" => "updates",
|
|
52
|
+
"is-basis-for" => "updatedBy",
|
|
53
|
+
"is-comment-on" => "commentaryOf",
|
|
54
|
+
"has-comment" => "hasCommentary",
|
|
55
|
+
"is-continued-by" => "hasSuccessor",
|
|
56
|
+
"continues" => "successorOf",
|
|
57
|
+
"is-derived-from" => "derives",
|
|
58
|
+
"has-derivation" => "derivedFrom",
|
|
59
|
+
"is-documented-by" => "describedBy",
|
|
60
|
+
"documents" => "describes",
|
|
61
|
+
"is-part-of" => "partOf",
|
|
62
|
+
"has-part" => "hasPart",
|
|
63
|
+
"is-review-of" => "reviewOf",
|
|
64
|
+
"has-review" => "hasReview",
|
|
65
|
+
"references" => "cites",
|
|
66
|
+
"is-referenced-by" => "isCitedIn",
|
|
67
|
+
"requires" => "hasComplement",
|
|
68
|
+
"is-required-by" => "complementOf",
|
|
69
|
+
"is-supplement-to" => "complementOf",
|
|
70
|
+
"is-supplemented-by" => "hasComplement",
|
|
71
|
+
}.freeze
|
|
72
|
+
|
|
73
|
+
ATTRS = %i[type fetched title docidentifier date source abstract contributor place
|
|
74
|
+
ext relation extent series medium].freeze
|
|
75
|
+
|
|
76
|
+
CROSSREF_API_URL = "https://api.crossref.org/works?query=%{query}&filter=%{filter}".freeze
|
|
77
|
+
MAX_RETRIES = 3
|
|
78
|
+
|
|
79
|
+
#
|
|
80
|
+
# Initialize instance.
|
|
81
|
+
#
|
|
82
|
+
# @param [Hash] src The source hash.
|
|
83
|
+
#
|
|
84
|
+
def initialize(src)
|
|
85
|
+
@src = src
|
|
86
|
+
@item = {}
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
#
|
|
90
|
+
# Initialize instance and parse the source hash.
|
|
91
|
+
#
|
|
92
|
+
# @param [Hash] src The source hash.
|
|
93
|
+
#
|
|
94
|
+
# @return [Bib::ItemData] The bibitem.
|
|
95
|
+
#
|
|
96
|
+
def self.parse(src)
|
|
97
|
+
new(src).parse
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
#
|
|
101
|
+
# Parse the source hash.
|
|
102
|
+
#
|
|
103
|
+
# @return [Bib::ItemData] The bibitem.
|
|
104
|
+
#
|
|
105
|
+
def parse
|
|
106
|
+
ATTRS.each { |m| @item[m] = send "parse_#{m}" }
|
|
107
|
+
create_bibitem @src["DOI"], @item
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
#
|
|
111
|
+
# Create a bibitem from the bibitem hash.
|
|
112
|
+
#
|
|
113
|
+
# @param [String] doi The DOI.
|
|
114
|
+
# @param [Hash] bibitem The bibitem hash.
|
|
115
|
+
#
|
|
116
|
+
# @return [Bib::ItemData] The bibitem.
|
|
117
|
+
#
|
|
118
|
+
def create_bibitem(doi, bibitem) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength
|
|
119
|
+
doctype_content = @src["type"]
|
|
120
|
+
case doi
|
|
121
|
+
when /\/nist/
|
|
122
|
+
bibitem[:ext] = Nist::Ext.new(doctype: Nist::Doctype.new(content: doctype_content))
|
|
123
|
+
Nist::ItemData.new(**bibitem)
|
|
124
|
+
when /\/rfc\d+/
|
|
125
|
+
bibitem[:ext] = Ietf::Ext.new(doctype: Ietf::Doctype.new(content: doctype_content))
|
|
126
|
+
Ietf::ItemData.new(**bibitem)
|
|
127
|
+
when /\/0026-1394\//
|
|
128
|
+
bibitem[:ext] = Bipm::Ext.new(doctype: Bipm::Doctype.new(content: doctype_content))
|
|
129
|
+
Bipm::ItemData.new(**bibitem)
|
|
130
|
+
when /\/ieee/
|
|
131
|
+
bibitem[:ext] = Ieee::Ext.new(doctype: Ieee::Doctype.new(content: doctype_content))
|
|
132
|
+
Ieee::ItemData.new(**bibitem)
|
|
133
|
+
else Bib::ItemData.new(**bibitem)
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
#
|
|
138
|
+
# Parse the type.
|
|
139
|
+
#
|
|
140
|
+
# @return [String] The type.
|
|
141
|
+
#
|
|
142
|
+
def parse_type
|
|
143
|
+
TYPES[@src["type"]] || @src["type"]
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
#
|
|
147
|
+
# Parse the ext element with doctype.
|
|
148
|
+
#
|
|
149
|
+
# @return [Bib::Ext] The ext element.
|
|
150
|
+
#
|
|
151
|
+
def parse_ext
|
|
152
|
+
Bib::Ext.new doctype: Bib::Doctype.new(content: @src["type"])
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
#
|
|
156
|
+
# Parse the fetched date.
|
|
157
|
+
#
|
|
158
|
+
# @return [String] The fetched date.
|
|
159
|
+
#
|
|
160
|
+
def parse_fetched
|
|
161
|
+
Date.today.to_s
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
#
|
|
165
|
+
# Parse titles from the source hash.
|
|
166
|
+
#
|
|
167
|
+
# @return [Array<Bib::Title>] The titles.
|
|
168
|
+
#
|
|
169
|
+
def parse_title # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
170
|
+
if @src["title"].is_a?(Array) && @src["title"].any?
|
|
171
|
+
main_sub_titles
|
|
172
|
+
elsif @src["project"].is_a?(Array) && @src["project"].any?
|
|
173
|
+
project_titles
|
|
174
|
+
elsif @src["container-title"].is_a?(Array) && @src["container-title"].size > 1
|
|
175
|
+
@src["container-title"][0..-2].map { |t| create_title t }
|
|
176
|
+
else []
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
#
|
|
181
|
+
# Parse main and subtitle from the source hash.
|
|
182
|
+
#
|
|
183
|
+
# @return [Array<Bib::Title>] The titles.
|
|
184
|
+
#
|
|
185
|
+
def main_sub_titles
|
|
186
|
+
title = @src["title"].map { |t| create_title t }
|
|
187
|
+
Array(@src["subtitle"]).each { |t| title << create_title(t, "subtitle") }
|
|
188
|
+
Array(@src["short-title"]).each { |t| title << create_title(t, "short") }
|
|
189
|
+
title
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
#
|
|
193
|
+
# Fetch titles from the projects.
|
|
194
|
+
#
|
|
195
|
+
# @return [Array<Bib::Title>] The titles.
|
|
196
|
+
#
|
|
197
|
+
def project_titles
|
|
198
|
+
Array(@src["project"]).reduce([]) do |memo, proj|
|
|
199
|
+
memo + Array(proj["project-title"]).map { |t| create_title t["title"] }
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
#
|
|
204
|
+
# Create a title from the title and type.
|
|
205
|
+
#
|
|
206
|
+
# @param [String] title The title content.
|
|
207
|
+
# @param [String] type The title type. Defaults to "main".
|
|
208
|
+
#
|
|
209
|
+
# @return [Bib::Title] The title.
|
|
210
|
+
#
|
|
211
|
+
def create_title(title, type = "main")
|
|
212
|
+
cnt = str_cleanup title
|
|
213
|
+
Bib::Title.new type: type, content: cnt, script: "Latn"
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
#
|
|
217
|
+
# Parse a docidentifier from the source hash.
|
|
218
|
+
#
|
|
219
|
+
# @return [Array<Bib::Docidentifier>] The docidentifier.
|
|
220
|
+
#
|
|
221
|
+
def parse_docidentifier
|
|
222
|
+
%w[DOI ISBN ISSN].each_with_object([]) do |type, obj|
|
|
223
|
+
prm = type == "DOI"
|
|
224
|
+
Array(@src[type]).each do |id|
|
|
225
|
+
t = issn_type(type, id)
|
|
226
|
+
obj << Bib::Docidentifier.new(type: t, content: id, primary: prm)
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
#
|
|
232
|
+
# Create an ISSN type if it's an ISSN ID.
|
|
233
|
+
#
|
|
234
|
+
# @param [String] type identifier type
|
|
235
|
+
# @param [String] id identifier
|
|
236
|
+
#
|
|
237
|
+
# @return [String] identifier type
|
|
238
|
+
#
|
|
239
|
+
def issn_type(type, id)
|
|
240
|
+
return type unless type == "ISSN"
|
|
241
|
+
|
|
242
|
+
t = @src["issn-type"]&.find { |it| it["value"] == id }&.dig("type")
|
|
243
|
+
t ? "issn.#{t}" : type.downcase
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
#
|
|
247
|
+
# Parce dates from the source hash.
|
|
248
|
+
#
|
|
249
|
+
# @return [Array<Bib::Date>] The dates.
|
|
250
|
+
#
|
|
251
|
+
def parse_date # rubocop:disable Metrics/CyclomaticComplexity
|
|
252
|
+
dates = %w[issued published approved].each_with_object([]) do |type, obj|
|
|
253
|
+
next unless @src.dig(type, "date-parts")&.first&.compact&.any?
|
|
254
|
+
|
|
255
|
+
obj << Bib::Date.new(type: type, at: date_type(type))
|
|
256
|
+
end
|
|
257
|
+
if dates.none?
|
|
258
|
+
dates << Bib::Date.new(type: "created", at: date_type("created"))
|
|
259
|
+
end
|
|
260
|
+
dates
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
#
|
|
264
|
+
# Join date parts into a string.
|
|
265
|
+
#
|
|
266
|
+
# @param [String] type The date type.
|
|
267
|
+
#
|
|
268
|
+
# @return [String] The date string.
|
|
269
|
+
#
|
|
270
|
+
def date_type(type)
|
|
271
|
+
@src[type]["date-parts"][0].map { |d| d.to_s.rjust(2, "0") }.join "-"
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
#
|
|
275
|
+
# Parse source URIs from the source hash.
|
|
276
|
+
#
|
|
277
|
+
# @return [Array<Bib::Uri>] The source URIs.
|
|
278
|
+
#
|
|
279
|
+
def parse_source # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity
|
|
280
|
+
disprefered_links = %w[similarity-checking text-mining]
|
|
281
|
+
links = []
|
|
282
|
+
if @src["URL"]
|
|
283
|
+
links << Bib::Uri.new(type: "DOI", content: @src["URL"])
|
|
284
|
+
end
|
|
285
|
+
[@src["link"], @src.dig("resource", "primary")].flatten.compact.each do |l|
|
|
286
|
+
next if disprefered_links.include? l["intended-application"]
|
|
287
|
+
|
|
288
|
+
type = case l["URL"]
|
|
289
|
+
when /\.pdf$/ then "pdf"
|
|
290
|
+
# when /\/rfc\d+$|iopscience\.iop\.org|ieeexplore\.ieee\.org/
|
|
291
|
+
else "src"
|
|
292
|
+
end
|
|
293
|
+
links << Bib::Uri.new(type: type, content: l["URL"]) # if type
|
|
294
|
+
end
|
|
295
|
+
links
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
#
|
|
299
|
+
# Parse abstract from the source hash.
|
|
300
|
+
#
|
|
301
|
+
# @return [Array<Bib::LocalizedMarkedUpString>] The abstract.
|
|
302
|
+
#
|
|
303
|
+
def parse_abstract
|
|
304
|
+
return [] unless @src["abstract"]
|
|
305
|
+
|
|
306
|
+
content = @src["abstract"]
|
|
307
|
+
abstract = Bib::Abstract.new(
|
|
308
|
+
content: content, language: "en", script: "Latn",
|
|
309
|
+
)
|
|
310
|
+
[abstract]
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
#
|
|
314
|
+
# Parse contributors from the source hash.
|
|
315
|
+
#
|
|
316
|
+
# @return [Array<Bib::Contributor>] The contributors.
|
|
317
|
+
#
|
|
318
|
+
def parse_contributor
|
|
319
|
+
contribs = author_investigators
|
|
320
|
+
contribs += authors_editors_translators
|
|
321
|
+
contribs += contribs_from_parent(contribs)
|
|
322
|
+
contribs << contributor(org_publisher, "publisher")
|
|
323
|
+
contribs += org_aurhorizer
|
|
324
|
+
contribs + org_enabler
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
#
|
|
328
|
+
# Create authors investigators from the source hash.
|
|
329
|
+
#
|
|
330
|
+
# @return [Array<Bib::Contributor>] The authors investigators.
|
|
331
|
+
#
|
|
332
|
+
def author_investigators
|
|
333
|
+
Array(@src["project"]).reduce([]) do |memo, proj|
|
|
334
|
+
memo + create_investigators(proj, "lead-investigator") +
|
|
335
|
+
create_investigators(proj, "investigator")
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
#
|
|
340
|
+
# Create investigators from the project.
|
|
341
|
+
#
|
|
342
|
+
# @param [Hash] project The project hash.
|
|
343
|
+
# @param [String] type The investigator type. "lead-investigator" or "investigator".
|
|
344
|
+
#
|
|
345
|
+
# @return [Array<Bib::Contributor>] The investigators.
|
|
346
|
+
#
|
|
347
|
+
def create_investigators(project, type)
|
|
348
|
+
description = type.gsub("-", " ")
|
|
349
|
+
Array(project[type]).map do |inv|
|
|
350
|
+
contributor(create_person(inv), "author", description)
|
|
351
|
+
end
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
#
|
|
355
|
+
# Create authors editors translators from the source hash.
|
|
356
|
+
#
|
|
357
|
+
# @return [Array<Bib::Contributor>] The authors editors translators.
|
|
358
|
+
#
|
|
359
|
+
def authors_editors_translators
|
|
360
|
+
%w[author editor translator].each_with_object([]) do |type, a|
|
|
361
|
+
@src[type]&.each do |c|
|
|
362
|
+
contrib = if c["family"]
|
|
363
|
+
create_person(c)
|
|
364
|
+
else
|
|
365
|
+
create_org(str_cleanup(c["name"]))
|
|
366
|
+
end
|
|
367
|
+
a << contributor(contrib, type)
|
|
368
|
+
end
|
|
369
|
+
end
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
#
|
|
373
|
+
# Fetch authors and editors from parent if they are not present in the book part.
|
|
374
|
+
#
|
|
375
|
+
# @param [Array<Bib::Contributor>] contribs present contributors
|
|
376
|
+
#
|
|
377
|
+
# @return [Array<Bib::Contributor>] contributors with authors and editors from parent
|
|
378
|
+
#
|
|
379
|
+
def contribs_from_parent(contribs) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
380
|
+
return [] unless %w[inbook inproceedings dataset].include?(parse_type) && @src["container-title"]
|
|
381
|
+
|
|
382
|
+
has_authors = contribs.any? { |c| c.role&.any? { |r| r.type == "author" } }
|
|
383
|
+
has_editors = contribs.any? { |c| c.role&.any? { |r| r.type == "editor" } }
|
|
384
|
+
return [] if has_authors && has_editors
|
|
385
|
+
|
|
386
|
+
create_authors_editors(has_authors, "author")
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
#
|
|
390
|
+
# Fetch parent item from Crossref.
|
|
391
|
+
#
|
|
392
|
+
# @return [Hash, nil] parent item
|
|
393
|
+
#
|
|
394
|
+
def parent_item
|
|
395
|
+
@parent_item ||= begin
|
|
396
|
+
query = CGI.escape [@src["container-title"][0], fetch_year].compact.join("+")
|
|
397
|
+
filter = "type:#{%w[book book-set edited-book monograph reference-book].join ',type:'}"
|
|
398
|
+
items = fetch_crossref(query: query, filter: filter)
|
|
399
|
+
items&.detect { |i| i["title"].include? @src["container-title"][0] }
|
|
400
|
+
end
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
#
|
|
404
|
+
# Create authors and editors from parent item.
|
|
405
|
+
#
|
|
406
|
+
# @param [Boolean] has true if authors or editors are present in the book part
|
|
407
|
+
# @param [String] type "author" or "editor"
|
|
408
|
+
#
|
|
409
|
+
# @return [Array<Bib::Contributor>] authors or editors
|
|
410
|
+
#
|
|
411
|
+
def create_authors_editors(has, type)
|
|
412
|
+
return [] if has || !parent_item
|
|
413
|
+
|
|
414
|
+
Array(parent_item[type]).map { |a| contributor(create_person(a), type) }
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
#
|
|
418
|
+
# Cerate an organization publisher from the source hash.
|
|
419
|
+
#
|
|
420
|
+
# @return [Bib::Organization] The organization.
|
|
421
|
+
#
|
|
422
|
+
def org_publisher
|
|
423
|
+
pbr = @src["institution"]&.detect do |i|
|
|
424
|
+
@src["publisher"].include?(i["name"]) ||
|
|
425
|
+
i["name"].include?(@src["publisher"])
|
|
426
|
+
end
|
|
427
|
+
a = pbr["acronym"]&.first if pbr
|
|
428
|
+
create_org(str_cleanup(@src["publisher"]), a)
|
|
429
|
+
end
|
|
430
|
+
|
|
431
|
+
#
|
|
432
|
+
# Clean up trailing punctuation and whitespace from a string.
|
|
433
|
+
#
|
|
434
|
+
# @param [String] str The string to clean up.
|
|
435
|
+
#
|
|
436
|
+
# @return [String] The cleaned up string.
|
|
437
|
+
#
|
|
438
|
+
def str_cleanup(str)
|
|
439
|
+
str.strip.sub(/[,\/\s]+$/, "").sub(/\s:$/, "")
|
|
440
|
+
end
|
|
441
|
+
|
|
442
|
+
#
|
|
443
|
+
# Create an organization with properly typed name and abbreviation.
|
|
444
|
+
#
|
|
445
|
+
# @param [String] name The organization name.
|
|
446
|
+
# @param [String, nil] abbreviation The organization abbreviation.
|
|
447
|
+
#
|
|
448
|
+
# @return [Bib::Organization] The organization.
|
|
449
|
+
#
|
|
450
|
+
def create_org(name, abbreviation = nil)
|
|
451
|
+
n = [Bib::TypedLocalizedString.new(content: name)]
|
|
452
|
+
a = abbreviation ? Bib::LocalizedString.new(content: abbreviation) : nil
|
|
453
|
+
Bib::Organization.new name: n, abbreviation: a
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
#
|
|
457
|
+
# Parse authorizer contributor from the source hash.
|
|
458
|
+
#
|
|
459
|
+
# @return [Array<Bib::Contributor>] The authorizer contributor.
|
|
460
|
+
#
|
|
461
|
+
def org_aurhorizer
|
|
462
|
+
return [] unless @src["standards-body"]
|
|
463
|
+
|
|
464
|
+
name, acronym = @src["standards-body"].values_at("name", "acronym")
|
|
465
|
+
org = create_org(name, acronym)
|
|
466
|
+
[contributor(org, "authorizer")]
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
#
|
|
470
|
+
# Parse enabler contributor from the source hash.
|
|
471
|
+
#
|
|
472
|
+
# @return [Array<Bib::Contributor>] The enabler contributor.
|
|
473
|
+
#
|
|
474
|
+
def org_enabler
|
|
475
|
+
Array(@src["project"]).each_with_object([]) do |proj, memo|
|
|
476
|
+
proj["funding"].each do |f|
|
|
477
|
+
memo << create_enabler(f.dig("funder", "name"))
|
|
478
|
+
end
|
|
479
|
+
end + Array(@src["funder"]).map { |f| create_enabler f["name"] }
|
|
480
|
+
end
|
|
481
|
+
|
|
482
|
+
#
|
|
483
|
+
# Create enabler contributor with type "enabler".
|
|
484
|
+
#
|
|
485
|
+
# @param [String] name The funder name.
|
|
486
|
+
#
|
|
487
|
+
# @return [Bib::Contributor] The enabler contributor.
|
|
488
|
+
#
|
|
489
|
+
def create_enabler(name)
|
|
490
|
+
contributor(create_org(name), "enabler")
|
|
491
|
+
end
|
|
492
|
+
|
|
493
|
+
#
|
|
494
|
+
# Create contributor from an entity and a role type.
|
|
495
|
+
#
|
|
496
|
+
# @param [Bib::Person, Bib::Organization] entity The entity.
|
|
497
|
+
# @param [String] type The role type.
|
|
498
|
+
#
|
|
499
|
+
# @return [Bib::Contributor] The contributor.
|
|
500
|
+
#
|
|
501
|
+
def contributor(entity, type, descriprion = nil)
|
|
502
|
+
desc = descriprion ? [Bib::LocalizedMarkedUpString.new(content: descriprion)] : nil
|
|
503
|
+
role = [Bib::Contributor::Role.new(type: type, description: desc)]
|
|
504
|
+
if entity.is_a?(Bib::Person)
|
|
505
|
+
Bib::Contributor.new(role: role, person: entity)
|
|
506
|
+
else
|
|
507
|
+
Bib::Contributor.new(role: role, organization: entity)
|
|
508
|
+
end
|
|
509
|
+
end
|
|
510
|
+
|
|
511
|
+
#
|
|
512
|
+
# Create a person from a person hash.
|
|
513
|
+
#
|
|
514
|
+
# @param [Hash] person The person hash.
|
|
515
|
+
#
|
|
516
|
+
# @return [Bib::Person] The person.
|
|
517
|
+
#
|
|
518
|
+
def create_person(person)
|
|
519
|
+
Bib::Person.new(
|
|
520
|
+
name: create_person_name(person),
|
|
521
|
+
affiliation: create_affiliation(person),
|
|
522
|
+
identifier: person_id(person),
|
|
523
|
+
)
|
|
524
|
+
end
|
|
525
|
+
|
|
526
|
+
#
|
|
527
|
+
# Create person affiliations from a person hash.
|
|
528
|
+
#
|
|
529
|
+
# @param [Hash] person The person hash.
|
|
530
|
+
#
|
|
531
|
+
# @return [Array<Bib::Affiliation>] The affiliations.
|
|
532
|
+
#
|
|
533
|
+
def create_affiliation(person)
|
|
534
|
+
(person["affiliation"] || []).map do |a|
|
|
535
|
+
Bib::Affiliation.new organization: create_org(a["name"])
|
|
536
|
+
end
|
|
537
|
+
end
|
|
538
|
+
|
|
539
|
+
#
|
|
540
|
+
# Create a person full name from a person hash.
|
|
541
|
+
#
|
|
542
|
+
# @param [Hash] person The person hash.
|
|
543
|
+
#
|
|
544
|
+
# @return [Bib::FullName] The full name.
|
|
545
|
+
#
|
|
546
|
+
def create_person_name(person)
|
|
547
|
+
surname = titlecase(person["family"])
|
|
548
|
+
sn = Bib::LocalizedString.new(content: surname, language: "en", script: "Latn")
|
|
549
|
+
Bib::FullName.new(
|
|
550
|
+
surname: sn, forename: forename(person), addition: nameaddition(person),
|
|
551
|
+
completename: completename(person), prefix: nameprefix(person)
|
|
552
|
+
)
|
|
553
|
+
end
|
|
554
|
+
|
|
555
|
+
#
|
|
556
|
+
# Capitalize the first letter of each word in a string except for words that
|
|
557
|
+
# are 2 letters or less.
|
|
558
|
+
#
|
|
559
|
+
# @param [<Type>] str <description>
|
|
560
|
+
#
|
|
561
|
+
# @return [<Type>] <description>
|
|
562
|
+
#
|
|
563
|
+
def titlecase(str)
|
|
564
|
+
str.split.map do |s|
|
|
565
|
+
if s.size > 2 && s.upcase == s && !/\.&/.match?(s)
|
|
566
|
+
s.capitalize
|
|
567
|
+
else
|
|
568
|
+
s
|
|
569
|
+
end
|
|
570
|
+
end.join " "
|
|
571
|
+
end
|
|
572
|
+
|
|
573
|
+
#
|
|
574
|
+
# Create a person name prefix from a person hash.
|
|
575
|
+
#
|
|
576
|
+
# @param [Hash] person The person hash.
|
|
577
|
+
#
|
|
578
|
+
# @return [Array<Bib::LocalizedString>] The name prefix.
|
|
579
|
+
#
|
|
580
|
+
def nameprefix(person)
|
|
581
|
+
return [] unless person["prefix"]
|
|
582
|
+
|
|
583
|
+
[Bib::LocalizedString.new(content: person["prefix"], language: "en", script: "Latn")]
|
|
584
|
+
end
|
|
585
|
+
|
|
586
|
+
#
|
|
587
|
+
# Create a complete name from a person hash.
|
|
588
|
+
#
|
|
589
|
+
# @param [Hash] person The person hash.
|
|
590
|
+
#
|
|
591
|
+
# @return [Bib::LocalizedString] The complete name.
|
|
592
|
+
#
|
|
593
|
+
def completename(person)
|
|
594
|
+
return unless person["name"]
|
|
595
|
+
|
|
596
|
+
Bib::LocalizedString.new(content: person["name"], language: "en", script: "Latn")
|
|
597
|
+
end
|
|
598
|
+
|
|
599
|
+
#
|
|
600
|
+
# Create a forename from a person hash.
|
|
601
|
+
#
|
|
602
|
+
# @param [Hash] person The person hash.
|
|
603
|
+
#
|
|
604
|
+
# @return [Array<Bib::FullNameType::Forename>] The forename.
|
|
605
|
+
#
|
|
606
|
+
def forename(person)
|
|
607
|
+
return [] unless person["given"]
|
|
608
|
+
|
|
609
|
+
fname = titlecase(person["given"])
|
|
610
|
+
[Bib::FullNameType::Forename.new(content: fname, language: "en", script: "Latn")]
|
|
611
|
+
end
|
|
612
|
+
|
|
613
|
+
#
|
|
614
|
+
# Create an addition from a person hash.
|
|
615
|
+
#
|
|
616
|
+
# @param [Hash] person The person hash.
|
|
617
|
+
#
|
|
618
|
+
# @return [Array<Bib::LocalizedString>] The addition.
|
|
619
|
+
#
|
|
620
|
+
def nameaddition(person)
|
|
621
|
+
return [] unless person["suffix"]
|
|
622
|
+
|
|
623
|
+
[Bib::LocalizedString.new(content: person["suffix"], language: "en", script: "Latn")]
|
|
624
|
+
end
|
|
625
|
+
|
|
626
|
+
#
|
|
627
|
+
# Create a person identifier from a person hash.
|
|
628
|
+
#
|
|
629
|
+
# @param [Hash] person The person hash.
|
|
630
|
+
#
|
|
631
|
+
# @return [Array<Bib::Person::Identifier>] The person identifier.
|
|
632
|
+
#
|
|
633
|
+
def person_id(person)
|
|
634
|
+
return [] unless person["ORCID"]
|
|
635
|
+
|
|
636
|
+
[Bib::Person::Identifier.new(type: "orcid", content: person["ORCID"])]
|
|
637
|
+
end
|
|
638
|
+
|
|
639
|
+
#
|
|
640
|
+
# Parse a place from the source hash.
|
|
641
|
+
#
|
|
642
|
+
# @return [Array<Bib::Place>] The place.
|
|
643
|
+
#
|
|
644
|
+
def parse_place # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/AbcSize
|
|
645
|
+
pub_location = @src["publisher-location"] || fetch_location
|
|
646
|
+
return [] unless pub_location
|
|
647
|
+
|
|
648
|
+
pls1, pls2 = pub_location.split(", ")
|
|
649
|
+
pls1 = str_cleanup pls1
|
|
650
|
+
pls2 &&= str_cleanup pls2
|
|
651
|
+
if COUNTRIES.include? pls2
|
|
652
|
+
country = Bib::Place::RegionType.new(content: pls2)
|
|
653
|
+
[Bib::Place.new(city: pls1, country: [country])]
|
|
654
|
+
elsif pls2 && pls2 == pls2&.upcase
|
|
655
|
+
region = Bib::Place::RegionType.new(content: pls2)
|
|
656
|
+
[Bib::Place.new(city: pls1, region: [region])]
|
|
657
|
+
elsif pls1 == pls2 || pls2.nil? || pls2.empty?
|
|
658
|
+
[Bib::Place.new(city: pls1)]
|
|
659
|
+
else
|
|
660
|
+
[Bib::Place.new(city: pls1), Bib::Place.new(city: pls2)]
|
|
661
|
+
end
|
|
662
|
+
end
|
|
663
|
+
|
|
664
|
+
#
|
|
665
|
+
# Fetch location from container.
|
|
666
|
+
#
|
|
667
|
+
# @return [String, nil] The location.
|
|
668
|
+
#
|
|
669
|
+
def fetch_location
|
|
670
|
+
title = @item[:title].first&.content
|
|
671
|
+
qparts = [title, fetch_year, @src["publisher"]]
|
|
672
|
+
query = CGI.escape qparts.compact.join("+").gsub(" ", "+")
|
|
673
|
+
filter = "type:#{%w[book-chapter book-part book-section book-track].join(',type:')}"
|
|
674
|
+
items = fetch_crossref(query: query, filter: filter)
|
|
675
|
+
items&.detect do |i|
|
|
676
|
+
i["publisher-location"] && i["container-title"].include?(title)
|
|
677
|
+
end&.dig("publisher-location")
|
|
678
|
+
end
|
|
679
|
+
|
|
680
|
+
#
|
|
681
|
+
# Parse relations from the source hash.
|
|
682
|
+
#
|
|
683
|
+
# @return [Array<Bib::Relation>] The relations.
|
|
684
|
+
#
|
|
685
|
+
def parse_relation # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
|
686
|
+
rels = included_in_relation
|
|
687
|
+
@src["relation"].each_with_object(rels) do |(k, v), a|
|
|
688
|
+
type, desc = relation_type k
|
|
689
|
+
Array(v).each do |r|
|
|
690
|
+
rel_item = Crossref.get_by_id r["id"]
|
|
691
|
+
title = rel_item["title"].map { |t| create_title t }
|
|
692
|
+
docid = Bib::Docidentifier.new(content: r["id"], type: "DOI")
|
|
693
|
+
bib = create_bibitem r["id"], title: title, docidentifier: [docid]
|
|
694
|
+
a << Bib::Relation.new(type: type, description: desc, bibitem: bib)
|
|
695
|
+
end
|
|
696
|
+
end
|
|
697
|
+
end
|
|
698
|
+
|
|
699
|
+
#
|
|
700
|
+
# Transform crossref relation type to relaton relation type.
|
|
701
|
+
#
|
|
702
|
+
# @param [String] crtype The crossref relation type.
|
|
703
|
+
#
|
|
704
|
+
# @return [Array<String>] The relaton relation type and description.
|
|
705
|
+
#
|
|
706
|
+
def relation_type(crtype)
|
|
707
|
+
type = REALATION_TYPES[crtype] || begin
|
|
708
|
+
desc = Bib::LocalizedMarkedUpString.new(content: crtype)
|
|
709
|
+
"related"
|
|
710
|
+
end
|
|
711
|
+
[type, desc]
|
|
712
|
+
end
|
|
713
|
+
|
|
714
|
+
#
|
|
715
|
+
# Create included in relation.
|
|
716
|
+
#
|
|
717
|
+
# @return [Array<Bib::Relation>] The relations.
|
|
718
|
+
#
|
|
719
|
+
def included_in_relation
|
|
720
|
+
types = %w[
|
|
721
|
+
book book-chapter book-part book-section book-track dataset journal-issue
|
|
722
|
+
journal-value proceedings-article reference-entry report-component
|
|
723
|
+
]
|
|
724
|
+
return [] unless @src["container-title"] && types.include?(@src["type"])
|
|
725
|
+
|
|
726
|
+
@src["container-title"].map do |ct|
|
|
727
|
+
contrib = create_authors_editors false, "editor"
|
|
728
|
+
bib = Bib::ItemBase.new(title: [Bib::Title.new(content: ct)], contributor: contrib)
|
|
729
|
+
Bib::Relation.new(type: "includedIn", bibitem: bib)
|
|
730
|
+
end
|
|
731
|
+
end
|
|
732
|
+
|
|
733
|
+
#
|
|
734
|
+
# Fetch year from the source hash.
|
|
735
|
+
#
|
|
736
|
+
# @return [String] The year.
|
|
737
|
+
#
|
|
738
|
+
def fetch_year
|
|
739
|
+
d = @src["published"] || @src["approved"] || @src["created"]
|
|
740
|
+
d["date-parts"][0][0]
|
|
741
|
+
end
|
|
742
|
+
|
|
743
|
+
#
|
|
744
|
+
# Parse an extent from the source hash.
|
|
745
|
+
#
|
|
746
|
+
# @return [Array<Bib::Extent>] The extent.
|
|
747
|
+
#
|
|
748
|
+
def parse_extent # rubocop:disable Metrics/AbcSize
|
|
749
|
+
extent = []
|
|
750
|
+
extent << Bib::Locality.new(type: "volume", reference_from: @src["volume"]) if @src["volume"]
|
|
751
|
+
extent << Bib::Locality.new(type: "issue", reference_from: @src["issue"]) if @src["issue"]
|
|
752
|
+
if @src["page"]
|
|
753
|
+
from, to = @src["page"].split("-")
|
|
754
|
+
extent << Bib::Locality.new(type: "page", reference_from: from, reference_to: to)
|
|
755
|
+
end
|
|
756
|
+
extent.any? ? [Bib::Extent.new(locality: extent)] : []
|
|
757
|
+
end
|
|
758
|
+
|
|
759
|
+
#
|
|
760
|
+
# Parse a series from the source hash.
|
|
761
|
+
#
|
|
762
|
+
# @return [Array<Bib::Series>] The series.
|
|
763
|
+
#
|
|
764
|
+
def parse_series # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
765
|
+
types = %w[inbook incollection inproceedings]
|
|
766
|
+
return [] if !@src["container-title"] || types.include?(@item[:type]) || @src["type"] == "report-component"
|
|
767
|
+
|
|
768
|
+
con_ttl = if main_sub_titles.any? || project_titles.any?
|
|
769
|
+
@src["container-title"]
|
|
770
|
+
elsif @src["container-title"].size > 1
|
|
771
|
+
sct = @src["short-container-title"]&.last
|
|
772
|
+
abbrev = Bib::LocalizedString.new(content: sct) if sct
|
|
773
|
+
@src["container-title"][-1..-1]
|
|
774
|
+
else []
|
|
775
|
+
end
|
|
776
|
+
con_ttl.map do |ct|
|
|
777
|
+
title = Bib::Title.new content: ct
|
|
778
|
+
Bib::Series.new title: [title], abbreviation: abbrev
|
|
779
|
+
end
|
|
780
|
+
end
|
|
781
|
+
|
|
782
|
+
#
|
|
783
|
+
# Parse a medium from the source hash.
|
|
784
|
+
#
|
|
785
|
+
# @return [Bib::Medium, nil] The medium.
|
|
786
|
+
#
|
|
787
|
+
def parse_medium
|
|
788
|
+
genre = @src["degree"]&.first
|
|
789
|
+
return unless genre
|
|
790
|
+
|
|
791
|
+
Bib::Medium.new genre: genre
|
|
792
|
+
end
|
|
793
|
+
|
|
794
|
+
#
|
|
795
|
+
# Fetch data from Crossref API with retry logic.
|
|
796
|
+
#
|
|
797
|
+
# @param [String] query The query string.
|
|
798
|
+
# @param [String] filter The filter string.
|
|
799
|
+
#
|
|
800
|
+
# @return [Array<Hash>, nil] Items array from response or nil for 4xx responses.
|
|
801
|
+
#
|
|
802
|
+
# @raise [Relaton::RequestError] If request fails after retries.
|
|
803
|
+
#
|
|
804
|
+
def fetch_crossref(query:, filter:) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
|
|
805
|
+
url = format(CROSSREF_API_URL, query: query, filter: filter)
|
|
806
|
+
retries = 0
|
|
807
|
+
begin
|
|
808
|
+
resp = Faraday.get url
|
|
809
|
+
case resp.status
|
|
810
|
+
when 200..299
|
|
811
|
+
JSON.parse(resp.body).dig("message", "items")
|
|
812
|
+
when 400..499
|
|
813
|
+
nil
|
|
814
|
+
else
|
|
815
|
+
raise Relaton::RequestError, "Crossref request failed: #{resp.status} #{resp.body}"
|
|
816
|
+
end
|
|
817
|
+
rescue Faraday::Error => e
|
|
818
|
+
retries += 1
|
|
819
|
+
retry if retries <= MAX_RETRIES
|
|
820
|
+
raise Relaton::RequestError, "Crossref network error after #{MAX_RETRIES} retries: #{e.message}"
|
|
821
|
+
rescue JSON::ParserError => e
|
|
822
|
+
raise Relaton::RequestError, "Crossref JSON parsing error: #{e.message}"
|
|
823
|
+
end
|
|
824
|
+
end
|
|
825
|
+
end
|
|
826
|
+
end
|
|
827
|
+
end
|