relaton-ietf 1.7.4 → 1.9.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -11
- data/.rubocop.yml +1 -1
- data/README.adoc +30 -4
- data/grammars/basicdoc.rng +165 -20
- data/grammars/biblio.rng +5 -6
- data/grammars/ietf.rng +42 -0
- data/grammars/isodoc.rng +532 -16
- data/grammars/reqt.rng +31 -2
- data/lib/relaton_ietf/committee.rb +8 -0
- data/lib/relaton_ietf/hash_converter.rb +1 -1
- data/lib/relaton_ietf/ietf_bibliographic_item.rb +2 -2
- data/lib/relaton_ietf/ietf_bibliography.rb +5 -1
- data/lib/relaton_ietf/scrapper.rb +354 -340
- data/lib/relaton_ietf/version.rb +1 -1
- data/lib/relaton_ietf/xml_parser.rb +1 -1
- data/lib/relaton_ietf.rb +1 -0
- data/relaton_ietf.gemspec +3 -5
- metadata +11 -10
@@ -1,362 +1,376 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "net/http"
|
4
|
-
require "nokogiri"
|
5
4
|
require "relaton_bib"
|
6
5
|
require "relaton_ietf/ietf_bibliographic_item"
|
7
6
|
|
8
7
|
module RelatonIetf
|
9
|
-
# rubocop:disable Metrics/ModuleLength
|
10
|
-
|
11
8
|
# Scrapper module
|
12
9
|
module Scrapper
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
class << self
|
17
|
-
# @param text [String]
|
18
|
-
# @param is_relation [TrueClass, FalseClass]
|
19
|
-
# @return [RelatonIetf::IetfBibliographicItem]
|
20
|
-
def scrape_page(text, is_relation = false)
|
21
|
-
# Remove initial "IETF " string if specified
|
22
|
-
ref = text.gsub(/^IETF /, "")
|
23
|
-
/^(RFC|BCP|FYI|STD)\s(?<num>\d+)/ =~ ref
|
24
|
-
ref.sub! /(?<=^(?:RFC|BCP|FYI|STD)\s)(\d+)/, num.rjust(4, "0") if num
|
25
|
-
rfc_item ref, is_relation
|
26
|
-
rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
27
|
-
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
28
|
-
Net::ProtocolError, SocketError
|
29
|
-
raise RelatonBib::RequestError, "No document found for #{ref} reference."
|
30
|
-
end
|
31
|
-
|
32
|
-
# @param reference [Nokogiri::XML::Element, nil]
|
33
|
-
# @param is_relation [TrueClass, FalseClass]
|
34
|
-
# @param url [String, NilClass]
|
35
|
-
# @param ver [String, NilClass] Internet Draft version
|
36
|
-
# @return [RelatonIetf::IetfBibliographicItem]
|
37
|
-
def fetch_rfc(reference, is_relation = false, url = nil, ver = nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
38
|
-
return unless reference
|
39
|
-
|
40
|
-
ietf_item(
|
41
|
-
is_relation: is_relation,
|
42
|
-
id: reference[:anchor],
|
43
|
-
type: "standard",
|
44
|
-
docid: docids(reference, ver),
|
45
|
-
status: status(reference),
|
46
|
-
language: [language(reference)],
|
47
|
-
link: link(reference, url, ver),
|
48
|
-
title: titles(reference),
|
49
|
-
formattedref: formattedref(reference),
|
50
|
-
abstract: abstracts(reference),
|
51
|
-
contributor: contributors(reference),
|
52
|
-
relation: relations(reference),
|
53
|
-
date: dates(reference),
|
54
|
-
series: series(reference),
|
55
|
-
place: ["Fremont, CA"],
|
56
|
-
keyword: reference.xpath("front/keyword").map(&:text),
|
57
|
-
doctype: doctype(reference[:anchor])
|
58
|
-
)
|
59
|
-
end
|
60
|
-
|
61
|
-
private
|
62
|
-
|
63
|
-
# @param anchor [String]
|
64
|
-
# @return [String]
|
65
|
-
def doctype(anchor)
|
66
|
-
anchor&.include?("I-D") ? "internet-draft" : "rfc"
|
67
|
-
end
|
68
|
-
|
69
|
-
# @param reference [Nokogiri::XML::Element]
|
70
|
-
# @param url [String]
|
71
|
-
# @param ver [String, NilClass] Internet Draft version
|
72
|
-
# @return [Array<Hash>]
|
73
|
-
def link(reference, url, ver)
|
74
|
-
l = []
|
75
|
-
l << { type: "xml", content: url } if url
|
76
|
-
l << { type: "src", content: reference[:target] } if reference[:target]
|
77
|
-
if /^I-D/.match? reference[:anchor]
|
78
|
-
reference.xpath("format").each do |f|
|
79
|
-
c = ver ? f[:target].sub(/(?<=-)\d{2}(?=\.)/, ver) : f[:target]
|
80
|
-
l << { type: f[:type], content: c }
|
81
|
-
end
|
82
|
-
end
|
83
|
-
l
|
84
|
-
end
|
85
|
-
|
86
|
-
# @param attrs [Hash]
|
87
|
-
# @return [RelatonIetf::IetfBibliographicItem]
|
88
|
-
def ietf_item(**attrs)
|
89
|
-
attrs[:fetched] = Date.today.to_s unless attrs.delete(:is_relation)
|
90
|
-
attrs[:script] = ["Latn"]
|
91
|
-
RelatonIetf::IetfBibliographicItem.new **attrs
|
92
|
-
end
|
93
|
-
|
94
|
-
# @param ref [String]
|
95
|
-
# @return [RelatonIetf::IetfBibliographicItem]
|
96
|
-
def rfc_item(ref, is_relation)
|
97
|
-
/(?<=-)(?<ver>\d{2})$/ =~ ref
|
98
|
-
if /^I-D/.match? ref
|
99
|
-
ref.sub! /-\d{2}/, "" if ver
|
100
|
-
ref.sub! /(?<=I-D\.)draft-/, ""
|
101
|
-
end
|
102
|
-
|
103
|
-
uri = "#{GH_URL}#{ref.sub(/\s|\u00a0/, '.')}.xml"
|
104
|
-
doc = Nokogiri::XML get_page(uri)
|
105
|
-
fetch_rfc doc.at("/referencegroup", "/reference"), is_relation, uri, ver
|
106
|
-
end
|
107
|
-
|
108
|
-
# @param reference [Nokogiri::XML::Element]
|
109
|
-
# @return [Hash]
|
110
|
-
def relations(reference)
|
111
|
-
reference.xpath("reference").map do |ref|
|
112
|
-
{ type: "includes", bibitem: fetch_rfc(ref, true) }
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
# @param uri [String]
|
117
|
-
# @return [String] HTTP response body
|
118
|
-
def get_page(uri)
|
119
|
-
res = Net::HTTP.get_response(URI(uri))
|
120
|
-
if res.code != "200"
|
121
|
-
raise RelatonBib::RequestError, "No document found at #{uri}"
|
122
|
-
end
|
123
|
-
|
124
|
-
res.body
|
125
|
-
end
|
126
|
-
|
127
|
-
# @param reference [Nokogiri::XML::Element]
|
128
|
-
# @return [String]
|
129
|
-
def language(reference)
|
130
|
-
reference[:lang] || "en"
|
131
|
-
end
|
132
|
-
|
133
|
-
# @param reference [Nokogiri::XML::Element]
|
134
|
-
# @return [Array<Hash>]
|
135
|
-
def titles(reference)
|
136
|
-
reference.xpath("./front/title").map do |title|
|
137
|
-
{ content: title.text, language: language(reference), script: "Latn" }
|
138
|
-
end
|
139
|
-
end
|
10
|
+
extend RelatonBib::BibXMLParser
|
11
|
+
extend Scrapper
|
140
12
|
|
141
|
-
|
142
|
-
# @return [RelatonBib::FormattedRef, nil]
|
143
|
-
def formattedref(reference)
|
144
|
-
return if reference.at "./fornt/title"
|
13
|
+
FLAVOR = "IETF"
|
145
14
|
|
146
|
-
|
147
|
-
RelatonBib::FormattedRef.new content: cont, language: language(reference), script: "Latn" if cont
|
148
|
-
end
|
149
|
-
|
150
|
-
# @param reference [Nokogiri::XML::Element]
|
151
|
-
# @return [Array<RelatonBib::FormattedString>]
|
152
|
-
def abstracts(ref)
|
153
|
-
ref.xpath("./front/abstract").map do |a|
|
154
|
-
RelatonBib::FormattedString.new(
|
155
|
-
content: a.text.gsub(/\\n\\t{2,4}/, " ").strip,
|
156
|
-
language: language(ref), script: "Latn"
|
157
|
-
)
|
158
|
-
end
|
159
|
-
end
|
160
|
-
|
161
|
-
# @param reference [Nokogiri::XML::Element]
|
162
|
-
# @return [Array<Hash>]
|
163
|
-
def contributors(reference)
|
164
|
-
persons(reference) + organizations(reference)
|
165
|
-
end
|
166
|
-
|
167
|
-
# @param reference [Nokogiri::XML::Element]
|
168
|
-
# @return [Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>]
|
169
|
-
def persons(reference)
|
170
|
-
reference.xpath("./front/author[@surname]|./front/author[@fullname]")
|
171
|
-
.map do |author|
|
172
|
-
entity = RelatonBib::Person.new(
|
173
|
-
name: full_name(author, reference),
|
174
|
-
affiliation: [affiliation(author)],
|
175
|
-
contact: contacts(author.at("./address"))
|
176
|
-
)
|
177
|
-
{ entity: entity, role: [contributor_role(author)] }
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
# @param reference [Nokogiri::XML::Element]
|
182
|
-
# @return [Array<Hash{Symbol=>RelatonBib::Organization,Symbol=>Array<String>}>]
|
183
|
-
def organizations(reference)
|
184
|
-
publisher = { entity: new_org, role: [type: "publisher"] }
|
185
|
-
orgs = reference.xpath("./seriesinfo").reduce([publisher]) do |mem, si|
|
186
|
-
next mem unless si[:stream]
|
187
|
-
|
188
|
-
mem << { entity: new_org(si[:stream], nil), role: [type: "author"] }
|
189
|
-
end
|
190
|
-
orgs + reference.xpath(
|
191
|
-
"front/author[not(@surname)][not(@fullname)]/organization"
|
192
|
-
).map do |org|
|
193
|
-
{ entity: new_org(org.text, nil), role: [type: "author"] }
|
194
|
-
end
|
195
|
-
end
|
196
|
-
|
197
|
-
# @param author [Nokogiri::XML::Element]
|
198
|
-
# @param ref [Nokogiri::XML::Element]
|
199
|
-
# @return [RelatonBib::FullName]
|
200
|
-
def full_name(author, ref)
|
201
|
-
lang = language ref
|
202
|
-
RelatonBib::FullName.new(
|
203
|
-
completename: localized_string(author[:fullname], lang),
|
204
|
-
initial: [localized_string(author[:initials], lang)].compact,
|
205
|
-
surname: localized_string(author[:surname], lang)
|
206
|
-
)
|
207
|
-
end
|
208
|
-
|
209
|
-
# @param content [String]
|
210
|
-
# @param lang [String]
|
211
|
-
# @return [RelatonBib::LocalizedString]
|
212
|
-
def localized_string(content, lang)
|
213
|
-
return unless content
|
214
|
-
|
215
|
-
RelatonBib::LocalizedString.new(content, lang)
|
216
|
-
end
|
217
|
-
|
218
|
-
# @param postal [Nokogiri::XML::Element]
|
219
|
-
# @return [Array<RelatonBib::Address, RelatonBib::Phone>]
|
220
|
-
def contacts(addr)
|
221
|
-
contacts = []
|
222
|
-
return contacts unless addr
|
223
|
-
|
224
|
-
postal = addr.at("./postal")
|
225
|
-
contacts << address(postal) if postal
|
226
|
-
add_contact(contacts, "phone", addr.at("./phone"))
|
227
|
-
add_contact(contacts, "email", addr.at("./email"))
|
228
|
-
add_contact(contacts, "uri", addr.at("./uri"))
|
229
|
-
contacts
|
230
|
-
end
|
231
|
-
|
232
|
-
# @param postal [Nokogiri::XML::Element]
|
233
|
-
# @rerurn [RelatonBib::Address]
|
234
|
-
def address(postal) # rubocop:disable Metrics/CyclomaticComplexity
|
235
|
-
RelatonBib::Address.new(
|
236
|
-
street: [(postal.at("./postalLine") || postal.at("./street"))&.text],
|
237
|
-
city: postal.at("./city")&.text,
|
238
|
-
postcode: postal.at("./code")&.text,
|
239
|
-
country: postal.at("./country")&.text,
|
240
|
-
state: postal.at("./region")&.text
|
241
|
-
)
|
242
|
-
end
|
243
|
-
|
244
|
-
# @param type [String] allowed "phone", "email" or "uri"
|
245
|
-
# @param value [String]
|
246
|
-
def add_contact(contacts, type, value)
|
247
|
-
return unless value
|
248
|
-
|
249
|
-
contacts << RelatonBib::Contact.new(type: type, value: value.text)
|
250
|
-
end
|
251
|
-
|
252
|
-
# @param author [Nokogiri::XML::Element]
|
253
|
-
# @return [RelatonBib::Affiliation]
|
254
|
-
def affiliation(author)
|
255
|
-
organization = author.at("./organization")
|
256
|
-
org = if organization.nil? || organization&.text&.empty?
|
257
|
-
new_org
|
258
|
-
else
|
259
|
-
new_org organization.text, organization[:abbrev]
|
260
|
-
end
|
261
|
-
RelatonBib::Affiliation.new organization: org
|
262
|
-
end
|
263
|
-
|
264
|
-
# @param name [String]
|
265
|
-
# @param abbr [String]
|
266
|
-
# @return [RelatonBib::Organization]
|
267
|
-
def new_org(name = "Internet Engineering Task Force", abbr = "IETF")
|
268
|
-
RelatonBib::Organization.new name: name, abbreviation: abbr
|
269
|
-
end
|
270
|
-
|
271
|
-
# @param author [Nokogiri::XML::Document]
|
272
|
-
# @return [Hash]
|
273
|
-
def contributor_role(author)
|
274
|
-
{ type: author[:role] || "author" }
|
275
|
-
end
|
15
|
+
GH_URL = "https://raw.githubusercontent.com/relaton/relaton-data-ietf/master/data/reference."
|
276
16
|
|
277
|
-
|
278
|
-
|
279
|
-
|
17
|
+
# @param text [String]
|
18
|
+
# @param is_relation [TrueClass, FalseClass]
|
19
|
+
# @return [RelatonIetf::IetfBibliographicItem]
|
20
|
+
def scrape_page(text, is_relation: false)
|
21
|
+
# Remove initial "IETF " string if specified
|
22
|
+
ref = text.gsub(/^IETF /, "")
|
23
|
+
/^(?:RFC|BCP|FYI|STD)\s(?<num>\d+)/ =~ ref
|
24
|
+
ref.sub!(/(?<=^(?:RFC|BCP|FYI|STD)\s)(\d+)/, num.rjust(4, "0")) if num
|
25
|
+
rfc_item ref, is_relation
|
26
|
+
rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
27
|
+
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
28
|
+
Net::ProtocolError, SocketError
|
29
|
+
raise RelatonBib::RequestError, "No document found for #{ref} reference"
|
30
|
+
end
|
280
31
|
|
281
|
-
|
32
|
+
# @param reference [Nokogiri::XML::Element, nil]
|
33
|
+
# @param is_relation [Boolean] don't add fetched date for relation
|
34
|
+
# @param url [String, NilClass]
|
35
|
+
# @param ver [String, NilClass] Internet Draft version
|
36
|
+
# @return [RelatonBib::tfBibliographicItem]
|
37
|
+
# def fetch_rfc(reference, is_relation: false, url: nil, ver: nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
38
|
+
# return unless reference
|
39
|
+
|
40
|
+
# hash = {
|
41
|
+
# id: reference[:anchor],
|
42
|
+
# type: "standard",
|
43
|
+
# docid: docids(reference, ver),
|
44
|
+
# status: status(reference),
|
45
|
+
# language: [language(reference)],
|
46
|
+
# script: ["Latn"],
|
47
|
+
# link: link(reference, url, ver),
|
48
|
+
# title: titles(reference),
|
49
|
+
# formattedref: formattedref(reference),
|
50
|
+
# abstract: abstracts(reference),
|
51
|
+
# contributor: contributors(reference),
|
52
|
+
# relation: relations(reference),
|
53
|
+
# date: dates(reference),
|
54
|
+
# series: series(reference),
|
55
|
+
# keyword: reference.xpath("front/keyword").map(&:text),
|
56
|
+
# doctype: doctype(reference[:anchor]),
|
57
|
+
# }
|
58
|
+
# hash[:fetched] = Date.today.to_s unless is_relation
|
59
|
+
# bib_item(**hash)
|
60
|
+
# end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
# @param anchor [String]
|
65
|
+
# @return [String]
|
66
|
+
# def doctype(anchor)
|
67
|
+
# anchor&.include?("I-D") ? "internet-draft" : "rfc"
|
68
|
+
# end
|
69
|
+
|
70
|
+
# @param reference [Nokogiri::XML::Element]
|
71
|
+
# @param url [String]
|
72
|
+
# @param ver [String, NilClass] Internet Draft version
|
73
|
+
# @return [Array<Hash>]
|
74
|
+
# def link(reference, url, ver)
|
75
|
+
# l = []
|
76
|
+
# l << { type: "xml", content: url } if url
|
77
|
+
# l << { type: "src", content: reference[:target] } if reference[:target]
|
78
|
+
# if /^I-D/.match? reference[:anchor]
|
79
|
+
# reference.xpath("format").each do |f|
|
80
|
+
# c = ver ? f[:target].sub(/(?<=-)\d{2}(?=\.)/, ver) : f[:target]
|
81
|
+
# l << { type: f[:type], content: c }
|
82
|
+
# end
|
83
|
+
# end
|
84
|
+
# l
|
85
|
+
# end
|
86
|
+
|
87
|
+
# @param attrs [Hash]
|
88
|
+
# @return [RelatonIetf::IetfBibliographicItem]
|
89
|
+
def bib_item(**attrs)
|
90
|
+
unless attrs.delete(:is_relation)
|
91
|
+
attrs[:fetched] = Date.today.to_s
|
92
|
+
attrs[:place] = ["Fremont, CA"]
|
282
93
|
end
|
94
|
+
RelatonIetf::IetfBibliographicItem.new(**attrs)
|
95
|
+
end
|
283
96
|
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
d = [date[:year], month(date[:month]),
|
294
|
-
(date[:day] || 1)].compact.join "-"
|
295
|
-
date = Time.parse(d).strftime "%Y-%m-%d"
|
296
|
-
[RelatonBib::BibliographicDate.new(type: "published", on: date)]
|
97
|
+
# @param ref [String]
|
98
|
+
# @param is_relation [Boolen, nil]
|
99
|
+
# @return [RelatonIetf::IetfBibliographicItem]
|
100
|
+
def rfc_item(ref, is_relation)
|
101
|
+
/(?<=-)(?<ver>\d{2})$/ =~ ref
|
102
|
+
if /^I-D/.match? ref
|
103
|
+
ref.sub!(/-\d{2}/, "") if ver
|
104
|
+
ref.sub!(/(?<=I-D\.)draft-/, "")
|
297
105
|
end
|
298
106
|
|
299
|
-
#
|
300
|
-
#
|
301
|
-
#
|
302
|
-
#
|
303
|
-
|
304
|
-
|
305
|
-
# @return [Array<RelatonBib::DocumentIdentifier>]
|
306
|
-
#
|
307
|
-
def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/AbcSize
|
308
|
-
id = (reference[:anchor] || reference[:docName] || reference[:number])
|
309
|
-
ret = []
|
310
|
-
if id
|
311
|
-
ret << RelatonBib::DocumentIdentifier.new(
|
312
|
-
type: "IETF", id: id.sub(/^(RFC)/, "\\1 ")
|
313
|
-
)
|
314
|
-
end
|
315
|
-
if (id = reference[:anchor])
|
316
|
-
ret << RelatonBib::DocumentIdentifier.new(type: "rfc-anchor", id: id)
|
317
|
-
end
|
318
|
-
ret + reference.xpath("./seriesInfo").map do |si|
|
319
|
-
next unless ["DOI", "Internet-Draft"].include? si[:name]
|
320
|
-
|
321
|
-
id = si[:value]
|
322
|
-
id.sub! /(?<=-)\d{2}$/, ver if ver && si[:name] == "Internet-Draft"
|
323
|
-
RelatonBib::DocumentIdentifier.new(id: id, type: si[:name])
|
324
|
-
end.compact
|
325
|
-
end
|
107
|
+
uri = "#{GH_URL}#{ref.sub(/\s|\u00a0/, '.')}.xml"
|
108
|
+
# doc = Nokogiri::XML get_page(uri)
|
109
|
+
# r = doc.at("/referencegroup", "/reference")
|
110
|
+
# fetch_rfc r, is_relation: is_relation, url: uri, ver: ver
|
111
|
+
parse get_page(uri), url: uri, is_relation: is_relation, ver: ver
|
112
|
+
end
|
326
113
|
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
)
|
344
|
-
end.compact
|
345
|
-
end
|
114
|
+
# @param reference [Nokogiri::XML::Element]
|
115
|
+
# @return [Hash]
|
116
|
+
# def relations(reference)
|
117
|
+
# reference.xpath("reference").map do |ref|
|
118
|
+
# { type: "includes", bibitem: fetch_rfc(ref, is_relation: true) }
|
119
|
+
# end
|
120
|
+
# end
|
121
|
+
|
122
|
+
# @param uri [String]
|
123
|
+
# @return [String] HTTP response body
|
124
|
+
def get_page(uri)
|
125
|
+
res = Net::HTTP.get_response(URI(uri))
|
126
|
+
return unless res.code == "200"
|
127
|
+
|
128
|
+
res.body
|
129
|
+
end
|
346
130
|
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
# @return [RelatonBib::DocumentStatus]
|
352
|
-
#
|
353
|
-
def status(reference)
|
354
|
-
st = reference.at("./seriesinfo[@status]")
|
355
|
-
return unless st
|
356
|
-
|
357
|
-
RelatonBib::DocumentStatus.new(stage: st[:status])
|
358
|
-
end
|
131
|
+
# @param [RelatonBib::WorkGroup]
|
132
|
+
# @return [RelatonIetf::Committee]
|
133
|
+
def committee(wgr)
|
134
|
+
Committee.new wgr
|
359
135
|
end
|
136
|
+
|
137
|
+
# @param reference [Nokogiri::XML::Element]
|
138
|
+
# @return [String]
|
139
|
+
# def language(reference)
|
140
|
+
# reference[:lang] || "en"
|
141
|
+
# end
|
142
|
+
|
143
|
+
# @param reference [Nokogiri::XML::Element]
|
144
|
+
# @return [Array<Hash>]
|
145
|
+
# def titles(reference)
|
146
|
+
# reference.xpath("./front/title").map do |title|
|
147
|
+
# { content: title.text, language: language(reference), script: "Latn" }
|
148
|
+
# end
|
149
|
+
# end
|
150
|
+
|
151
|
+
# @param reference [Nokogiri::XML::Element]
|
152
|
+
# @return [RelatonBib::FormattedRef, nil]
|
153
|
+
# def formattedref(reference)
|
154
|
+
# return if reference.at "./front/title"
|
155
|
+
|
156
|
+
# cont = (reference[:anchor] || reference[:docName] || reference[:number])
|
157
|
+
# if cont
|
158
|
+
# RelatonBib::FormattedRef.new(
|
159
|
+
# content: cont, language: language(reference), script: "Latn",
|
160
|
+
# )
|
161
|
+
# end
|
162
|
+
# end
|
163
|
+
|
164
|
+
# @param reference [Nokogiri::XML::Element]
|
165
|
+
# @return [Array<RelatonBib::FormattedString>]
|
166
|
+
# def abstracts(ref)
|
167
|
+
# ref.xpath("./front/abstract").map do |a|
|
168
|
+
# RelatonBib::FormattedString.new(
|
169
|
+
# content: a.text.gsub(/\\n\\t{2,4}/, " ").strip,
|
170
|
+
# language: language(ref), script: "Latn"
|
171
|
+
# )
|
172
|
+
# end
|
173
|
+
# end
|
174
|
+
|
175
|
+
# @param reference [Nokogiri::XML::Element]
|
176
|
+
# @return [Array<Hash>]
|
177
|
+
# def contributors(reference)
|
178
|
+
# persons(reference) + organizations(reference)
|
179
|
+
# end
|
180
|
+
|
181
|
+
# @param reference [Nokogiri::XML::Element]
|
182
|
+
# @return [Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>]
|
183
|
+
# def persons(reference)
|
184
|
+
# reference.xpath("./front/author[@surname]|./front/author[@fullname]")
|
185
|
+
# .map do |author|
|
186
|
+
# entity = RelatonBib::Person.new(
|
187
|
+
# name: full_name(author, reference),
|
188
|
+
# affiliation: [affiliation(author)],
|
189
|
+
# contact: contacts(author.at("./address")),
|
190
|
+
# )
|
191
|
+
# { entity: entity, role: [contributor_role(author)] }
|
192
|
+
# end
|
193
|
+
# end
|
194
|
+
|
195
|
+
# @param reference [Nokogiri::XML::Element]
|
196
|
+
# @return [Array<Hash{Symbol=>RelatonBib::Organization,
|
197
|
+
# Symbol=>Array<String>}>]
|
198
|
+
# def organizations(reference)
|
199
|
+
# publisher = { entity: new_org, role: [type: "publisher"] }
|
200
|
+
# orgs = reference.xpath("./seriesinfo").reduce([publisher]) do |mem, si|
|
201
|
+
# next mem unless si[:stream]
|
202
|
+
|
203
|
+
# mem << { entity: new_org(si[:stream], nil), role: [type: "author"] }
|
204
|
+
# end
|
205
|
+
# orgs + reference.xpath(
|
206
|
+
# "front/author[not(@surname)][not(@fullname)]/organization",
|
207
|
+
# ).map do |org|
|
208
|
+
# { entity: new_org(org.text, nil), role: [type: "author"] }
|
209
|
+
# end
|
210
|
+
# end
|
211
|
+
|
212
|
+
# @param author [Nokogiri::XML::Element]
|
213
|
+
# @param ref [Nokogiri::XML::Element]
|
214
|
+
# @return [RelatonBib::FullName]
|
215
|
+
# def full_name(author, ref)
|
216
|
+
# lang = language ref
|
217
|
+
# RelatonBib::FullName.new(
|
218
|
+
# completename: localized_string(author[:fullname], lang),
|
219
|
+
# initial: [localized_string(author[:initials], lang)].compact,
|
220
|
+
# surname: localized_string(author[:surname], lang),
|
221
|
+
# )
|
222
|
+
# end
|
223
|
+
|
224
|
+
# @param content [String]
|
225
|
+
# @param lang [String]
|
226
|
+
# @return [RelatonBib::LocalizedString]
|
227
|
+
# def localized_string(content, lang)
|
228
|
+
# return unless content
|
229
|
+
|
230
|
+
# RelatonBib::LocalizedString.new(content, lang)
|
231
|
+
# end
|
232
|
+
|
233
|
+
# @param postal [Nokogiri::XML::Element]
|
234
|
+
# @return [Array<RelatonBib::Address, RelatonBib::Phone>]
|
235
|
+
# def contacts(addr)
|
236
|
+
# contacts = []
|
237
|
+
# return contacts unless addr
|
238
|
+
|
239
|
+
# postal = addr.at("./postal")
|
240
|
+
# contacts << address(postal) if postal
|
241
|
+
# add_contact(contacts, "phone", addr.at("./phone"))
|
242
|
+
# add_contact(contacts, "email", addr.at("./email"))
|
243
|
+
# add_contact(contacts, "uri", addr.at("./uri"))
|
244
|
+
# contacts
|
245
|
+
# end
|
246
|
+
|
247
|
+
# @param postal [Nokogiri::XML::Element]
|
248
|
+
# @rerurn [RelatonBib::Address]
|
249
|
+
# def address(postal) # rubocop:disable Metrics/CyclomaticComplexity
|
250
|
+
# RelatonBib::Address.new(
|
251
|
+
# street: [(postal.at("./postalLine") || postal.at("./street"))&.text],
|
252
|
+
# city: postal.at("./city")&.text,
|
253
|
+
# postcode: postal.at("./code")&.text,
|
254
|
+
# country: postal.at("./country")&.text,
|
255
|
+
# state: postal.at("./region")&.text,
|
256
|
+
# )
|
257
|
+
# end
|
258
|
+
|
259
|
+
# @param type [String] allowed "phone", "email" or "uri"
|
260
|
+
# @param value [String]
|
261
|
+
# def add_contact(contacts, type, value)
|
262
|
+
# return unless value
|
263
|
+
|
264
|
+
# contacts << RelatonBib::Contact.new(type: type, value: value.text)
|
265
|
+
# end
|
266
|
+
|
267
|
+
# @param author [Nokogiri::XML::Element]
|
268
|
+
# @return [RelatonBib::Affiliation]
|
269
|
+
# def affiliation(author)
|
270
|
+
# organization = author.at("./organization")
|
271
|
+
# org = if organization.nil? || organization&.text&.empty?
|
272
|
+
# new_org
|
273
|
+
# else
|
274
|
+
# new_org organization.text, organization[:abbrev]
|
275
|
+
# end
|
276
|
+
# RelatonBib::Affiliation.new organization: org
|
277
|
+
# end
|
278
|
+
|
279
|
+
# @param name [String]
|
280
|
+
# @param abbr [String]
|
281
|
+
# @return [RelatonBib::Organization]
|
282
|
+
# def new_org(name = "Internet Engineering Task Force", abbr = "IETF")
|
283
|
+
# RelatonBib::Organization.new name: name, abbreviation: abbr
|
284
|
+
# end
|
285
|
+
|
286
|
+
# @param author [Nokogiri::XML::Document]
|
287
|
+
# @return [Hash]
|
288
|
+
# def contributor_role(author)
|
289
|
+
# { type: author[:role] || "author" }
|
290
|
+
# end
|
291
|
+
|
292
|
+
# def month(mon)
|
293
|
+
# return 1 if !mon || mon.empty?
|
294
|
+
# return mon if /^\d+$/.match? mon
|
295
|
+
|
296
|
+
# Date::MONTHNAMES.index(mon)
|
297
|
+
# end
|
298
|
+
|
299
|
+
#
|
300
|
+
# Extract date from reference.
|
301
|
+
#
|
302
|
+
# @param reference [Nokogiri::XML::Element]
|
303
|
+
# @return [Array<RelatonBib::BibliographicDate>] published data.
|
304
|
+
#
|
305
|
+
# def dates(reference)
|
306
|
+
# return unless (date = reference.at "./front/date")
|
307
|
+
|
308
|
+
# d = [date[:year], month(date[:month]),
|
309
|
+
# (date[:day] || 1)].compact.join "-"
|
310
|
+
# date = Time.parse(d).strftime "%Y-%m-%d"
|
311
|
+
# [RelatonBib::BibliographicDate.new(type: "published", on: date)]
|
312
|
+
# end
|
313
|
+
|
314
|
+
#
|
315
|
+
# Extract document identifiers from reference
|
316
|
+
#
|
317
|
+
# @param reference [Nokogiri::XML::Element]
|
318
|
+
# @param ver [String, NilClass] Internet Draft version
|
319
|
+
#
|
320
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
321
|
+
#
|
322
|
+
# def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/AbcSize
|
323
|
+
# id = (reference[:anchor] || reference[:docName] || reference[:number])
|
324
|
+
# ret = []
|
325
|
+
# if id
|
326
|
+
# ret << RelatonBib::DocumentIdentifier.new(
|
327
|
+
# type: "IETF", id: id.sub(/^(RFC)/, "\\1 "),
|
328
|
+
# )
|
329
|
+
# end
|
330
|
+
# if (id = reference[:anchor])
|
331
|
+
# ret << RelatonBib::DocumentIdentifier.new(type: "rfc-anchor", id: id)
|
332
|
+
# end
|
333
|
+
# names = ["DOI", "Internet-Draft"]
|
334
|
+
# ret + reference.xpath("./seriesInfo").map do |si|
|
335
|
+
# next unless names.include? si[:name]
|
336
|
+
|
337
|
+
# id = si[:value]
|
338
|
+
# id.sub!(/(?<=-)\d{2}$/, ver) if ver && si[:name] == "Internet-Draft"
|
339
|
+
# RelatonBib::DocumentIdentifier.new(id: id, type: si[:name])
|
340
|
+
# end.compact
|
341
|
+
# end
|
342
|
+
|
343
|
+
#
|
344
|
+
# Extract series form reference
|
345
|
+
# @param reference [Nokogiri::XML::Element]
|
346
|
+
#
|
347
|
+
# @return [Array<RelatonBib::Series>]
|
348
|
+
#
|
349
|
+
# def series(reference)
|
350
|
+
# reference.xpath("./seriesInfo").map do |si|
|
351
|
+
# next if si[:name] == "DOI" || si[:stream] || si[:status]
|
352
|
+
|
353
|
+
# RelatonBib::Series.new(
|
354
|
+
# title: RelatonBib::TypedTitleString.new(
|
355
|
+
# content: si[:name], language: language(reference), script: "Latn",
|
356
|
+
# ),
|
357
|
+
# number: si[:value],
|
358
|
+
# type: "main",
|
359
|
+
# )
|
360
|
+
# end.compact
|
361
|
+
# end
|
362
|
+
|
363
|
+
#
|
364
|
+
# extract status
|
365
|
+
# @param reference [Nokogiri::XML::Element]
|
366
|
+
#
|
367
|
+
# @return [RelatonBib::DocumentStatus]
|
368
|
+
#
|
369
|
+
# def status(reference)
|
370
|
+
# st = reference.at("./seriesinfo[@status]")
|
371
|
+
# return unless st
|
372
|
+
|
373
|
+
# RelatonBib::DocumentStatus.new(stage: st[:status])
|
374
|
+
# end
|
360
375
|
end
|
361
|
-
# rubocop:enable Metrics/ModuleLength
|
362
376
|
end
|