relaton-ietf 1.7.4 → 1.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -11
- data/.rubocop.yml +1 -1
- data/README.adoc +30 -4
- data/grammars/basicdoc.rng +165 -20
- data/grammars/biblio.rng +5 -6
- data/grammars/ietf.rng +42 -0
- data/grammars/isodoc.rng +532 -16
- data/grammars/reqt.rng +31 -2
- data/lib/relaton_ietf/committee.rb +8 -0
- data/lib/relaton_ietf/hash_converter.rb +1 -1
- data/lib/relaton_ietf/ietf_bibliographic_item.rb +2 -2
- data/lib/relaton_ietf/ietf_bibliography.rb +5 -1
- data/lib/relaton_ietf/scrapper.rb +354 -340
- data/lib/relaton_ietf/version.rb +1 -1
- data/lib/relaton_ietf/xml_parser.rb +1 -1
- data/lib/relaton_ietf.rb +1 -0
- data/relaton_ietf.gemspec +3 -5
- metadata +11 -10
@@ -1,362 +1,376 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "net/http"
|
4
|
-
require "nokogiri"
|
5
4
|
require "relaton_bib"
|
6
5
|
require "relaton_ietf/ietf_bibliographic_item"
|
7
6
|
|
8
7
|
module RelatonIetf
|
9
|
-
# rubocop:disable Metrics/ModuleLength
|
10
|
-
|
11
8
|
# Scrapper module
|
12
9
|
module Scrapper
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
class << self
|
17
|
-
# @param text [String]
|
18
|
-
# @param is_relation [TrueClass, FalseClass]
|
19
|
-
# @return [RelatonIetf::IetfBibliographicItem]
|
20
|
-
def scrape_page(text, is_relation = false)
|
21
|
-
# Remove initial "IETF " string if specified
|
22
|
-
ref = text.gsub(/^IETF /, "")
|
23
|
-
/^(RFC|BCP|FYI|STD)\s(?<num>\d+)/ =~ ref
|
24
|
-
ref.sub! /(?<=^(?:RFC|BCP|FYI|STD)\s)(\d+)/, num.rjust(4, "0") if num
|
25
|
-
rfc_item ref, is_relation
|
26
|
-
rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
27
|
-
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
28
|
-
Net::ProtocolError, SocketError
|
29
|
-
raise RelatonBib::RequestError, "No document found for #{ref} reference."
|
30
|
-
end
|
31
|
-
|
32
|
-
# @param reference [Nokogiri::XML::Element, nil]
|
33
|
-
# @param is_relation [TrueClass, FalseClass]
|
34
|
-
# @param url [String, NilClass]
|
35
|
-
# @param ver [String, NilClass] Internet Draft version
|
36
|
-
# @return [RelatonIetf::IetfBibliographicItem]
|
37
|
-
def fetch_rfc(reference, is_relation = false, url = nil, ver = nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
38
|
-
return unless reference
|
39
|
-
|
40
|
-
ietf_item(
|
41
|
-
is_relation: is_relation,
|
42
|
-
id: reference[:anchor],
|
43
|
-
type: "standard",
|
44
|
-
docid: docids(reference, ver),
|
45
|
-
status: status(reference),
|
46
|
-
language: [language(reference)],
|
47
|
-
link: link(reference, url, ver),
|
48
|
-
title: titles(reference),
|
49
|
-
formattedref: formattedref(reference),
|
50
|
-
abstract: abstracts(reference),
|
51
|
-
contributor: contributors(reference),
|
52
|
-
relation: relations(reference),
|
53
|
-
date: dates(reference),
|
54
|
-
series: series(reference),
|
55
|
-
place: ["Fremont, CA"],
|
56
|
-
keyword: reference.xpath("front/keyword").map(&:text),
|
57
|
-
doctype: doctype(reference[:anchor])
|
58
|
-
)
|
59
|
-
end
|
60
|
-
|
61
|
-
private
|
62
|
-
|
63
|
-
# @param anchor [String]
|
64
|
-
# @return [String]
|
65
|
-
def doctype(anchor)
|
66
|
-
anchor&.include?("I-D") ? "internet-draft" : "rfc"
|
67
|
-
end
|
68
|
-
|
69
|
-
# @param reference [Nokogiri::XML::Element]
|
70
|
-
# @param url [String]
|
71
|
-
# @param ver [String, NilClass] Internet Draft version
|
72
|
-
# @return [Array<Hash>]
|
73
|
-
def link(reference, url, ver)
|
74
|
-
l = []
|
75
|
-
l << { type: "xml", content: url } if url
|
76
|
-
l << { type: "src", content: reference[:target] } if reference[:target]
|
77
|
-
if /^I-D/.match? reference[:anchor]
|
78
|
-
reference.xpath("format").each do |f|
|
79
|
-
c = ver ? f[:target].sub(/(?<=-)\d{2}(?=\.)/, ver) : f[:target]
|
80
|
-
l << { type: f[:type], content: c }
|
81
|
-
end
|
82
|
-
end
|
83
|
-
l
|
84
|
-
end
|
85
|
-
|
86
|
-
# @param attrs [Hash]
|
87
|
-
# @return [RelatonIetf::IetfBibliographicItem]
|
88
|
-
def ietf_item(**attrs)
|
89
|
-
attrs[:fetched] = Date.today.to_s unless attrs.delete(:is_relation)
|
90
|
-
attrs[:script] = ["Latn"]
|
91
|
-
RelatonIetf::IetfBibliographicItem.new **attrs
|
92
|
-
end
|
93
|
-
|
94
|
-
# @param ref [String]
|
95
|
-
# @return [RelatonIetf::IetfBibliographicItem]
|
96
|
-
def rfc_item(ref, is_relation)
|
97
|
-
/(?<=-)(?<ver>\d{2})$/ =~ ref
|
98
|
-
if /^I-D/.match? ref
|
99
|
-
ref.sub! /-\d{2}/, "" if ver
|
100
|
-
ref.sub! /(?<=I-D\.)draft-/, ""
|
101
|
-
end
|
102
|
-
|
103
|
-
uri = "#{GH_URL}#{ref.sub(/\s|\u00a0/, '.')}.xml"
|
104
|
-
doc = Nokogiri::XML get_page(uri)
|
105
|
-
fetch_rfc doc.at("/referencegroup", "/reference"), is_relation, uri, ver
|
106
|
-
end
|
107
|
-
|
108
|
-
# @param reference [Nokogiri::XML::Element]
|
109
|
-
# @return [Hash]
|
110
|
-
def relations(reference)
|
111
|
-
reference.xpath("reference").map do |ref|
|
112
|
-
{ type: "includes", bibitem: fetch_rfc(ref, true) }
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
# @param uri [String]
|
117
|
-
# @return [String] HTTP response body
|
118
|
-
def get_page(uri)
|
119
|
-
res = Net::HTTP.get_response(URI(uri))
|
120
|
-
if res.code != "200"
|
121
|
-
raise RelatonBib::RequestError, "No document found at #{uri}"
|
122
|
-
end
|
123
|
-
|
124
|
-
res.body
|
125
|
-
end
|
126
|
-
|
127
|
-
# @param reference [Nokogiri::XML::Element]
|
128
|
-
# @return [String]
|
129
|
-
def language(reference)
|
130
|
-
reference[:lang] || "en"
|
131
|
-
end
|
132
|
-
|
133
|
-
# @param reference [Nokogiri::XML::Element]
|
134
|
-
# @return [Array<Hash>]
|
135
|
-
def titles(reference)
|
136
|
-
reference.xpath("./front/title").map do |title|
|
137
|
-
{ content: title.text, language: language(reference), script: "Latn" }
|
138
|
-
end
|
139
|
-
end
|
10
|
+
extend RelatonBib::BibXMLParser
|
11
|
+
extend Scrapper
|
140
12
|
|
141
|
-
|
142
|
-
# @return [RelatonBib::FormattedRef, nil]
|
143
|
-
def formattedref(reference)
|
144
|
-
return if reference.at "./fornt/title"
|
13
|
+
FLAVOR = "IETF"
|
145
14
|
|
146
|
-
|
147
|
-
RelatonBib::FormattedRef.new content: cont, language: language(reference), script: "Latn" if cont
|
148
|
-
end
|
149
|
-
|
150
|
-
# @param reference [Nokogiri::XML::Element]
|
151
|
-
# @return [Array<RelatonBib::FormattedString>]
|
152
|
-
def abstracts(ref)
|
153
|
-
ref.xpath("./front/abstract").map do |a|
|
154
|
-
RelatonBib::FormattedString.new(
|
155
|
-
content: a.text.gsub(/\\n\\t{2,4}/, " ").strip,
|
156
|
-
language: language(ref), script: "Latn"
|
157
|
-
)
|
158
|
-
end
|
159
|
-
end
|
160
|
-
|
161
|
-
# @param reference [Nokogiri::XML::Element]
|
162
|
-
# @return [Array<Hash>]
|
163
|
-
def contributors(reference)
|
164
|
-
persons(reference) + organizations(reference)
|
165
|
-
end
|
166
|
-
|
167
|
-
# @param reference [Nokogiri::XML::Element]
|
168
|
-
# @return [Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>]
|
169
|
-
def persons(reference)
|
170
|
-
reference.xpath("./front/author[@surname]|./front/author[@fullname]")
|
171
|
-
.map do |author|
|
172
|
-
entity = RelatonBib::Person.new(
|
173
|
-
name: full_name(author, reference),
|
174
|
-
affiliation: [affiliation(author)],
|
175
|
-
contact: contacts(author.at("./address"))
|
176
|
-
)
|
177
|
-
{ entity: entity, role: [contributor_role(author)] }
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
# @param reference [Nokogiri::XML::Element]
|
182
|
-
# @return [Array<Hash{Symbol=>RelatonBib::Organization,Symbol=>Array<String>}>]
|
183
|
-
def organizations(reference)
|
184
|
-
publisher = { entity: new_org, role: [type: "publisher"] }
|
185
|
-
orgs = reference.xpath("./seriesinfo").reduce([publisher]) do |mem, si|
|
186
|
-
next mem unless si[:stream]
|
187
|
-
|
188
|
-
mem << { entity: new_org(si[:stream], nil), role: [type: "author"] }
|
189
|
-
end
|
190
|
-
orgs + reference.xpath(
|
191
|
-
"front/author[not(@surname)][not(@fullname)]/organization"
|
192
|
-
).map do |org|
|
193
|
-
{ entity: new_org(org.text, nil), role: [type: "author"] }
|
194
|
-
end
|
195
|
-
end
|
196
|
-
|
197
|
-
# @param author [Nokogiri::XML::Element]
|
198
|
-
# @param ref [Nokogiri::XML::Element]
|
199
|
-
# @return [RelatonBib::FullName]
|
200
|
-
def full_name(author, ref)
|
201
|
-
lang = language ref
|
202
|
-
RelatonBib::FullName.new(
|
203
|
-
completename: localized_string(author[:fullname], lang),
|
204
|
-
initial: [localized_string(author[:initials], lang)].compact,
|
205
|
-
surname: localized_string(author[:surname], lang)
|
206
|
-
)
|
207
|
-
end
|
208
|
-
|
209
|
-
# @param content [String]
|
210
|
-
# @param lang [String]
|
211
|
-
# @return [RelatonBib::LocalizedString]
|
212
|
-
def localized_string(content, lang)
|
213
|
-
return unless content
|
214
|
-
|
215
|
-
RelatonBib::LocalizedString.new(content, lang)
|
216
|
-
end
|
217
|
-
|
218
|
-
# @param postal [Nokogiri::XML::Element]
|
219
|
-
# @return [Array<RelatonBib::Address, RelatonBib::Phone>]
|
220
|
-
def contacts(addr)
|
221
|
-
contacts = []
|
222
|
-
return contacts unless addr
|
223
|
-
|
224
|
-
postal = addr.at("./postal")
|
225
|
-
contacts << address(postal) if postal
|
226
|
-
add_contact(contacts, "phone", addr.at("./phone"))
|
227
|
-
add_contact(contacts, "email", addr.at("./email"))
|
228
|
-
add_contact(contacts, "uri", addr.at("./uri"))
|
229
|
-
contacts
|
230
|
-
end
|
231
|
-
|
232
|
-
# @param postal [Nokogiri::XML::Element]
|
233
|
-
# @rerurn [RelatonBib::Address]
|
234
|
-
def address(postal) # rubocop:disable Metrics/CyclomaticComplexity
|
235
|
-
RelatonBib::Address.new(
|
236
|
-
street: [(postal.at("./postalLine") || postal.at("./street"))&.text],
|
237
|
-
city: postal.at("./city")&.text,
|
238
|
-
postcode: postal.at("./code")&.text,
|
239
|
-
country: postal.at("./country")&.text,
|
240
|
-
state: postal.at("./region")&.text
|
241
|
-
)
|
242
|
-
end
|
243
|
-
|
244
|
-
# @param type [String] allowed "phone", "email" or "uri"
|
245
|
-
# @param value [String]
|
246
|
-
def add_contact(contacts, type, value)
|
247
|
-
return unless value
|
248
|
-
|
249
|
-
contacts << RelatonBib::Contact.new(type: type, value: value.text)
|
250
|
-
end
|
251
|
-
|
252
|
-
# @param author [Nokogiri::XML::Element]
|
253
|
-
# @return [RelatonBib::Affiliation]
|
254
|
-
def affiliation(author)
|
255
|
-
organization = author.at("./organization")
|
256
|
-
org = if organization.nil? || organization&.text&.empty?
|
257
|
-
new_org
|
258
|
-
else
|
259
|
-
new_org organization.text, organization[:abbrev]
|
260
|
-
end
|
261
|
-
RelatonBib::Affiliation.new organization: org
|
262
|
-
end
|
263
|
-
|
264
|
-
# @param name [String]
|
265
|
-
# @param abbr [String]
|
266
|
-
# @return [RelatonBib::Organization]
|
267
|
-
def new_org(name = "Internet Engineering Task Force", abbr = "IETF")
|
268
|
-
RelatonBib::Organization.new name: name, abbreviation: abbr
|
269
|
-
end
|
270
|
-
|
271
|
-
# @param author [Nokogiri::XML::Document]
|
272
|
-
# @return [Hash]
|
273
|
-
def contributor_role(author)
|
274
|
-
{ type: author[:role] || "author" }
|
275
|
-
end
|
15
|
+
GH_URL = "https://raw.githubusercontent.com/relaton/relaton-data-ietf/master/data/reference."
|
276
16
|
|
277
|
-
|
278
|
-
|
279
|
-
|
17
|
+
# @param text [String]
|
18
|
+
# @param is_relation [TrueClass, FalseClass]
|
19
|
+
# @return [RelatonIetf::IetfBibliographicItem]
|
20
|
+
def scrape_page(text, is_relation: false)
|
21
|
+
# Remove initial "IETF " string if specified
|
22
|
+
ref = text.gsub(/^IETF /, "")
|
23
|
+
/^(?:RFC|BCP|FYI|STD)\s(?<num>\d+)/ =~ ref
|
24
|
+
ref.sub!(/(?<=^(?:RFC|BCP|FYI|STD)\s)(\d+)/, num.rjust(4, "0")) if num
|
25
|
+
rfc_item ref, is_relation
|
26
|
+
rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
27
|
+
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
28
|
+
Net::ProtocolError, SocketError
|
29
|
+
raise RelatonBib::RequestError, "No document found for #{ref} reference"
|
30
|
+
end
|
280
31
|
|
281
|
-
|
32
|
+
# @param reference [Nokogiri::XML::Element, nil]
|
33
|
+
# @param is_relation [Boolean] don't add fetched date for relation
|
34
|
+
# @param url [String, NilClass]
|
35
|
+
# @param ver [String, NilClass] Internet Draft version
|
36
|
+
# @return [RelatonBib::tfBibliographicItem]
|
37
|
+
# def fetch_rfc(reference, is_relation: false, url: nil, ver: nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
38
|
+
# return unless reference
|
39
|
+
|
40
|
+
# hash = {
|
41
|
+
# id: reference[:anchor],
|
42
|
+
# type: "standard",
|
43
|
+
# docid: docids(reference, ver),
|
44
|
+
# status: status(reference),
|
45
|
+
# language: [language(reference)],
|
46
|
+
# script: ["Latn"],
|
47
|
+
# link: link(reference, url, ver),
|
48
|
+
# title: titles(reference),
|
49
|
+
# formattedref: formattedref(reference),
|
50
|
+
# abstract: abstracts(reference),
|
51
|
+
# contributor: contributors(reference),
|
52
|
+
# relation: relations(reference),
|
53
|
+
# date: dates(reference),
|
54
|
+
# series: series(reference),
|
55
|
+
# keyword: reference.xpath("front/keyword").map(&:text),
|
56
|
+
# doctype: doctype(reference[:anchor]),
|
57
|
+
# }
|
58
|
+
# hash[:fetched] = Date.today.to_s unless is_relation
|
59
|
+
# bib_item(**hash)
|
60
|
+
# end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
# @param anchor [String]
|
65
|
+
# @return [String]
|
66
|
+
# def doctype(anchor)
|
67
|
+
# anchor&.include?("I-D") ? "internet-draft" : "rfc"
|
68
|
+
# end
|
69
|
+
|
70
|
+
# @param reference [Nokogiri::XML::Element]
|
71
|
+
# @param url [String]
|
72
|
+
# @param ver [String, NilClass] Internet Draft version
|
73
|
+
# @return [Array<Hash>]
|
74
|
+
# def link(reference, url, ver)
|
75
|
+
# l = []
|
76
|
+
# l << { type: "xml", content: url } if url
|
77
|
+
# l << { type: "src", content: reference[:target] } if reference[:target]
|
78
|
+
# if /^I-D/.match? reference[:anchor]
|
79
|
+
# reference.xpath("format").each do |f|
|
80
|
+
# c = ver ? f[:target].sub(/(?<=-)\d{2}(?=\.)/, ver) : f[:target]
|
81
|
+
# l << { type: f[:type], content: c }
|
82
|
+
# end
|
83
|
+
# end
|
84
|
+
# l
|
85
|
+
# end
|
86
|
+
|
87
|
+
# @param attrs [Hash]
|
88
|
+
# @return [RelatonIetf::IetfBibliographicItem]
|
89
|
+
def bib_item(**attrs)
|
90
|
+
unless attrs.delete(:is_relation)
|
91
|
+
attrs[:fetched] = Date.today.to_s
|
92
|
+
attrs[:place] = ["Fremont, CA"]
|
282
93
|
end
|
94
|
+
RelatonIetf::IetfBibliographicItem.new(**attrs)
|
95
|
+
end
|
283
96
|
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
d = [date[:year], month(date[:month]),
|
294
|
-
(date[:day] || 1)].compact.join "-"
|
295
|
-
date = Time.parse(d).strftime "%Y-%m-%d"
|
296
|
-
[RelatonBib::BibliographicDate.new(type: "published", on: date)]
|
97
|
+
# @param ref [String]
|
98
|
+
# @param is_relation [Boolen, nil]
|
99
|
+
# @return [RelatonIetf::IetfBibliographicItem]
|
100
|
+
def rfc_item(ref, is_relation)
|
101
|
+
/(?<=-)(?<ver>\d{2})$/ =~ ref
|
102
|
+
if /^I-D/.match? ref
|
103
|
+
ref.sub!(/-\d{2}/, "") if ver
|
104
|
+
ref.sub!(/(?<=I-D\.)draft-/, "")
|
297
105
|
end
|
298
106
|
|
299
|
-
#
|
300
|
-
#
|
301
|
-
#
|
302
|
-
#
|
303
|
-
|
304
|
-
|
305
|
-
# @return [Array<RelatonBib::DocumentIdentifier>]
|
306
|
-
#
|
307
|
-
def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/AbcSize
|
308
|
-
id = (reference[:anchor] || reference[:docName] || reference[:number])
|
309
|
-
ret = []
|
310
|
-
if id
|
311
|
-
ret << RelatonBib::DocumentIdentifier.new(
|
312
|
-
type: "IETF", id: id.sub(/^(RFC)/, "\\1 ")
|
313
|
-
)
|
314
|
-
end
|
315
|
-
if (id = reference[:anchor])
|
316
|
-
ret << RelatonBib::DocumentIdentifier.new(type: "rfc-anchor", id: id)
|
317
|
-
end
|
318
|
-
ret + reference.xpath("./seriesInfo").map do |si|
|
319
|
-
next unless ["DOI", "Internet-Draft"].include? si[:name]
|
320
|
-
|
321
|
-
id = si[:value]
|
322
|
-
id.sub! /(?<=-)\d{2}$/, ver if ver && si[:name] == "Internet-Draft"
|
323
|
-
RelatonBib::DocumentIdentifier.new(id: id, type: si[:name])
|
324
|
-
end.compact
|
325
|
-
end
|
107
|
+
uri = "#{GH_URL}#{ref.sub(/\s|\u00a0/, '.')}.xml"
|
108
|
+
# doc = Nokogiri::XML get_page(uri)
|
109
|
+
# r = doc.at("/referencegroup", "/reference")
|
110
|
+
# fetch_rfc r, is_relation: is_relation, url: uri, ver: ver
|
111
|
+
parse get_page(uri), url: uri, is_relation: is_relation, ver: ver
|
112
|
+
end
|
326
113
|
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
)
|
344
|
-
end.compact
|
345
|
-
end
|
114
|
+
# @param reference [Nokogiri::XML::Element]
|
115
|
+
# @return [Hash]
|
116
|
+
# def relations(reference)
|
117
|
+
# reference.xpath("reference").map do |ref|
|
118
|
+
# { type: "includes", bibitem: fetch_rfc(ref, is_relation: true) }
|
119
|
+
# end
|
120
|
+
# end
|
121
|
+
|
122
|
+
# @param uri [String]
|
123
|
+
# @return [String] HTTP response body
|
124
|
+
def get_page(uri)
|
125
|
+
res = Net::HTTP.get_response(URI(uri))
|
126
|
+
return unless res.code == "200"
|
127
|
+
|
128
|
+
res.body
|
129
|
+
end
|
346
130
|
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
# @return [RelatonBib::DocumentStatus]
|
352
|
-
#
|
353
|
-
def status(reference)
|
354
|
-
st = reference.at("./seriesinfo[@status]")
|
355
|
-
return unless st
|
356
|
-
|
357
|
-
RelatonBib::DocumentStatus.new(stage: st[:status])
|
358
|
-
end
|
131
|
+
# @param [RelatonBib::WorkGroup]
|
132
|
+
# @return [RelatonIetf::Committee]
|
133
|
+
def committee(wgr)
|
134
|
+
Committee.new wgr
|
359
135
|
end
|
136
|
+
|
137
|
+
# @param reference [Nokogiri::XML::Element]
|
138
|
+
# @return [String]
|
139
|
+
# def language(reference)
|
140
|
+
# reference[:lang] || "en"
|
141
|
+
# end
|
142
|
+
|
143
|
+
# @param reference [Nokogiri::XML::Element]
|
144
|
+
# @return [Array<Hash>]
|
145
|
+
# def titles(reference)
|
146
|
+
# reference.xpath("./front/title").map do |title|
|
147
|
+
# { content: title.text, language: language(reference), script: "Latn" }
|
148
|
+
# end
|
149
|
+
# end
|
150
|
+
|
151
|
+
# @param reference [Nokogiri::XML::Element]
|
152
|
+
# @return [RelatonBib::FormattedRef, nil]
|
153
|
+
# def formattedref(reference)
|
154
|
+
# return if reference.at "./front/title"
|
155
|
+
|
156
|
+
# cont = (reference[:anchor] || reference[:docName] || reference[:number])
|
157
|
+
# if cont
|
158
|
+
# RelatonBib::FormattedRef.new(
|
159
|
+
# content: cont, language: language(reference), script: "Latn",
|
160
|
+
# )
|
161
|
+
# end
|
162
|
+
# end
|
163
|
+
|
164
|
+
# @param reference [Nokogiri::XML::Element]
|
165
|
+
# @return [Array<RelatonBib::FormattedString>]
|
166
|
+
# def abstracts(ref)
|
167
|
+
# ref.xpath("./front/abstract").map do |a|
|
168
|
+
# RelatonBib::FormattedString.new(
|
169
|
+
# content: a.text.gsub(/\\n\\t{2,4}/, " ").strip,
|
170
|
+
# language: language(ref), script: "Latn"
|
171
|
+
# )
|
172
|
+
# end
|
173
|
+
# end
|
174
|
+
|
175
|
+
# @param reference [Nokogiri::XML::Element]
|
176
|
+
# @return [Array<Hash>]
|
177
|
+
# def contributors(reference)
|
178
|
+
# persons(reference) + organizations(reference)
|
179
|
+
# end
|
180
|
+
|
181
|
+
# @param reference [Nokogiri::XML::Element]
|
182
|
+
# @return [Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>]
|
183
|
+
# def persons(reference)
|
184
|
+
# reference.xpath("./front/author[@surname]|./front/author[@fullname]")
|
185
|
+
# .map do |author|
|
186
|
+
# entity = RelatonBib::Person.new(
|
187
|
+
# name: full_name(author, reference),
|
188
|
+
# affiliation: [affiliation(author)],
|
189
|
+
# contact: contacts(author.at("./address")),
|
190
|
+
# )
|
191
|
+
# { entity: entity, role: [contributor_role(author)] }
|
192
|
+
# end
|
193
|
+
# end
|
194
|
+
|
195
|
+
# @param reference [Nokogiri::XML::Element]
|
196
|
+
# @return [Array<Hash{Symbol=>RelatonBib::Organization,
|
197
|
+
# Symbol=>Array<String>}>]
|
198
|
+
# def organizations(reference)
|
199
|
+
# publisher = { entity: new_org, role: [type: "publisher"] }
|
200
|
+
# orgs = reference.xpath("./seriesinfo").reduce([publisher]) do |mem, si|
|
201
|
+
# next mem unless si[:stream]
|
202
|
+
|
203
|
+
# mem << { entity: new_org(si[:stream], nil), role: [type: "author"] }
|
204
|
+
# end
|
205
|
+
# orgs + reference.xpath(
|
206
|
+
# "front/author[not(@surname)][not(@fullname)]/organization",
|
207
|
+
# ).map do |org|
|
208
|
+
# { entity: new_org(org.text, nil), role: [type: "author"] }
|
209
|
+
# end
|
210
|
+
# end
|
211
|
+
|
212
|
+
# @param author [Nokogiri::XML::Element]
|
213
|
+
# @param ref [Nokogiri::XML::Element]
|
214
|
+
# @return [RelatonBib::FullName]
|
215
|
+
# def full_name(author, ref)
|
216
|
+
# lang = language ref
|
217
|
+
# RelatonBib::FullName.new(
|
218
|
+
# completename: localized_string(author[:fullname], lang),
|
219
|
+
# initial: [localized_string(author[:initials], lang)].compact,
|
220
|
+
# surname: localized_string(author[:surname], lang),
|
221
|
+
# )
|
222
|
+
# end
|
223
|
+
|
224
|
+
# @param content [String]
|
225
|
+
# @param lang [String]
|
226
|
+
# @return [RelatonBib::LocalizedString]
|
227
|
+
# def localized_string(content, lang)
|
228
|
+
# return unless content
|
229
|
+
|
230
|
+
# RelatonBib::LocalizedString.new(content, lang)
|
231
|
+
# end
|
232
|
+
|
233
|
+
# @param postal [Nokogiri::XML::Element]
|
234
|
+
# @return [Array<RelatonBib::Address, RelatonBib::Phone>]
|
235
|
+
# def contacts(addr)
|
236
|
+
# contacts = []
|
237
|
+
# return contacts unless addr
|
238
|
+
|
239
|
+
# postal = addr.at("./postal")
|
240
|
+
# contacts << address(postal) if postal
|
241
|
+
# add_contact(contacts, "phone", addr.at("./phone"))
|
242
|
+
# add_contact(contacts, "email", addr.at("./email"))
|
243
|
+
# add_contact(contacts, "uri", addr.at("./uri"))
|
244
|
+
# contacts
|
245
|
+
# end
|
246
|
+
|
247
|
+
# @param postal [Nokogiri::XML::Element]
|
248
|
+
# @rerurn [RelatonBib::Address]
|
249
|
+
# def address(postal) # rubocop:disable Metrics/CyclomaticComplexity
|
250
|
+
# RelatonBib::Address.new(
|
251
|
+
# street: [(postal.at("./postalLine") || postal.at("./street"))&.text],
|
252
|
+
# city: postal.at("./city")&.text,
|
253
|
+
# postcode: postal.at("./code")&.text,
|
254
|
+
# country: postal.at("./country")&.text,
|
255
|
+
# state: postal.at("./region")&.text,
|
256
|
+
# )
|
257
|
+
# end
|
258
|
+
|
259
|
+
# @param type [String] allowed "phone", "email" or "uri"
|
260
|
+
# @param value [String]
|
261
|
+
# def add_contact(contacts, type, value)
|
262
|
+
# return unless value
|
263
|
+
|
264
|
+
# contacts << RelatonBib::Contact.new(type: type, value: value.text)
|
265
|
+
# end
|
266
|
+
|
267
|
+
# @param author [Nokogiri::XML::Element]
|
268
|
+
# @return [RelatonBib::Affiliation]
|
269
|
+
# def affiliation(author)
|
270
|
+
# organization = author.at("./organization")
|
271
|
+
# org = if organization.nil? || organization&.text&.empty?
|
272
|
+
# new_org
|
273
|
+
# else
|
274
|
+
# new_org organization.text, organization[:abbrev]
|
275
|
+
# end
|
276
|
+
# RelatonBib::Affiliation.new organization: org
|
277
|
+
# end
|
278
|
+
|
279
|
+
# @param name [String]
|
280
|
+
# @param abbr [String]
|
281
|
+
# @return [RelatonBib::Organization]
|
282
|
+
# def new_org(name = "Internet Engineering Task Force", abbr = "IETF")
|
283
|
+
# RelatonBib::Organization.new name: name, abbreviation: abbr
|
284
|
+
# end
|
285
|
+
|
286
|
+
# @param author [Nokogiri::XML::Document]
|
287
|
+
# @return [Hash]
|
288
|
+
# def contributor_role(author)
|
289
|
+
# { type: author[:role] || "author" }
|
290
|
+
# end
|
291
|
+
|
292
|
+
# def month(mon)
|
293
|
+
# return 1 if !mon || mon.empty?
|
294
|
+
# return mon if /^\d+$/.match? mon
|
295
|
+
|
296
|
+
# Date::MONTHNAMES.index(mon)
|
297
|
+
# end
|
298
|
+
|
299
|
+
#
|
300
|
+
# Extract date from reference.
|
301
|
+
#
|
302
|
+
# @param reference [Nokogiri::XML::Element]
|
303
|
+
# @return [Array<RelatonBib::BibliographicDate>] published data.
|
304
|
+
#
|
305
|
+
# def dates(reference)
|
306
|
+
# return unless (date = reference.at "./front/date")
|
307
|
+
|
308
|
+
# d = [date[:year], month(date[:month]),
|
309
|
+
# (date[:day] || 1)].compact.join "-"
|
310
|
+
# date = Time.parse(d).strftime "%Y-%m-%d"
|
311
|
+
# [RelatonBib::BibliographicDate.new(type: "published", on: date)]
|
312
|
+
# end
|
313
|
+
|
314
|
+
#
|
315
|
+
# Extract document identifiers from reference
|
316
|
+
#
|
317
|
+
# @param reference [Nokogiri::XML::Element]
|
318
|
+
# @param ver [String, NilClass] Internet Draft version
|
319
|
+
#
|
320
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
321
|
+
#
|
322
|
+
# def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/AbcSize
|
323
|
+
# id = (reference[:anchor] || reference[:docName] || reference[:number])
|
324
|
+
# ret = []
|
325
|
+
# if id
|
326
|
+
# ret << RelatonBib::DocumentIdentifier.new(
|
327
|
+
# type: "IETF", id: id.sub(/^(RFC)/, "\\1 "),
|
328
|
+
# )
|
329
|
+
# end
|
330
|
+
# if (id = reference[:anchor])
|
331
|
+
# ret << RelatonBib::DocumentIdentifier.new(type: "rfc-anchor", id: id)
|
332
|
+
# end
|
333
|
+
# names = ["DOI", "Internet-Draft"]
|
334
|
+
# ret + reference.xpath("./seriesInfo").map do |si|
|
335
|
+
# next unless names.include? si[:name]
|
336
|
+
|
337
|
+
# id = si[:value]
|
338
|
+
# id.sub!(/(?<=-)\d{2}$/, ver) if ver && si[:name] == "Internet-Draft"
|
339
|
+
# RelatonBib::DocumentIdentifier.new(id: id, type: si[:name])
|
340
|
+
# end.compact
|
341
|
+
# end
|
342
|
+
|
343
|
+
#
|
344
|
+
# Extract series form reference
|
345
|
+
# @param reference [Nokogiri::XML::Element]
|
346
|
+
#
|
347
|
+
# @return [Array<RelatonBib::Series>]
|
348
|
+
#
|
349
|
+
# def series(reference)
|
350
|
+
# reference.xpath("./seriesInfo").map do |si|
|
351
|
+
# next if si[:name] == "DOI" || si[:stream] || si[:status]
|
352
|
+
|
353
|
+
# RelatonBib::Series.new(
|
354
|
+
# title: RelatonBib::TypedTitleString.new(
|
355
|
+
# content: si[:name], language: language(reference), script: "Latn",
|
356
|
+
# ),
|
357
|
+
# number: si[:value],
|
358
|
+
# type: "main",
|
359
|
+
# )
|
360
|
+
# end.compact
|
361
|
+
# end
|
362
|
+
|
363
|
+
#
|
364
|
+
# extract status
|
365
|
+
# @param reference [Nokogiri::XML::Element]
|
366
|
+
#
|
367
|
+
# @return [RelatonBib::DocumentStatus]
|
368
|
+
#
|
369
|
+
# def status(reference)
|
370
|
+
# st = reference.at("./seriesinfo[@status]")
|
371
|
+
# return unless st
|
372
|
+
|
373
|
+
# RelatonBib::DocumentStatus.new(stage: st[:status])
|
374
|
+
# end
|
360
375
|
end
|
361
|
-
# rubocop:enable Metrics/ModuleLength
|
362
376
|
end
|