relaton-ietf 1.7.1 → 1.7.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/relaton_ietf/hash_converter.rb +1 -1
- data/lib/relaton_ietf/scrapper.rb +41 -46
- data/lib/relaton_ietf/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fe0909b3d346a37c7e6363a2c1b4c2822464b1b8bcaa589e3b9efad9148b24a8
|
4
|
+
data.tar.gz: b1a22b2905d52f0f014a27ea7eb11abe049ec62c2e6918818c6ca5cf3e2af843
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 151fddd3a5168eb80b1fcd0f86a33f58ab222993457c2c1eb8b24a03ccf976b4048b01e7a7ff182957c846a258fe77602114f2de394b285b6291e16baca2cd35
|
7
|
+
data.tar.gz: '08ed81561c2737cc7852c868990064e4bf23bfd270c24f55ecb6948b6b97e7bcff09274baebab5d4003797c0babc1fc1c4dfabbd19e72cfd33e73f05533b4e13'
|
@@ -11,22 +11,18 @@ module RelatonIetf
|
|
11
11
|
# Scrapper module
|
12
12
|
module Scrapper
|
13
13
|
GH_URL = "https://raw.githubusercontent.com/relaton/relaton-data-ietf/master/data/reference."
|
14
|
-
RFC_URI_PATTERN = "https://xml2rfc.tools.ietf.org/public/rfc/bibxml"
|
15
|
-
# ID_URI_PATTERN = "https://xml2rfc.tools.ietf.org/public/rfc/bibxml-ids/reference.CODE"
|
16
14
|
BCP_URI_PATTERN = "https://www.rfc-editor.org/info/CODE"
|
17
15
|
|
18
16
|
class << self
|
19
|
-
# rubocop:disable Metrics/MethodLength
|
20
|
-
|
21
17
|
# @param text [String]
|
22
18
|
# @param is_relation [TrueClass, FalseClass]
|
23
19
|
# @return [RelatonIetf::IetfBibliographicItem]
|
24
20
|
def scrape_page(text, is_relation = false)
|
25
21
|
# Remove initial "IETF " string if specified
|
26
22
|
ref = text.gsub(/^IETF /, "")
|
27
|
-
|
28
|
-
|
29
|
-
|
23
|
+
/^(RFC|BCP|FYI|STD)\s(?<num>\d+)/ =~ ref
|
24
|
+
ref.sub! /(?<=^(?:RFC|BCP|FYI|STD)\s)(\d+)/, num.rjust(4, "0") if num
|
25
|
+
rfc_item ref, is_relation
|
30
26
|
rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
31
27
|
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
32
28
|
Net::ProtocolError, SocketError
|
@@ -38,7 +34,7 @@ module RelatonIetf
|
|
38
34
|
# @param url [String, NilClass]
|
39
35
|
# @param ver [String, NilClass] Internet Draft version
|
40
36
|
# @return [RelatonIetf::IetfBibliographicItem]
|
41
|
-
def fetch_rfc(reference, is_relation = false, url = nil, ver = nil) # rubocop:disable Metrics/AbcSize
|
37
|
+
def fetch_rfc(reference, is_relation = false, url = nil, ver = nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
42
38
|
return unless reference
|
43
39
|
|
44
40
|
ietf_item(
|
@@ -50,8 +46,10 @@ module RelatonIetf
|
|
50
46
|
language: [language(reference)],
|
51
47
|
link: link(reference, url, ver),
|
52
48
|
title: titles(reference),
|
49
|
+
formattedref: formattedref(reference),
|
53
50
|
abstract: abstracts(reference),
|
54
51
|
contributor: contributors(reference),
|
52
|
+
relation: relations(reference),
|
55
53
|
date: dates(reference),
|
56
54
|
series: series(reference),
|
57
55
|
place: ["Fremont, CA"],
|
@@ -59,7 +57,6 @@ module RelatonIetf
|
|
59
57
|
doctype: doctype(reference[:anchor])
|
60
58
|
)
|
61
59
|
end
|
62
|
-
# rubocop:enable Metrics/MethodLength
|
63
60
|
|
64
61
|
private
|
65
62
|
|
@@ -105,35 +102,19 @@ module RelatonIetf
|
|
105
102
|
|
106
103
|
uri = "#{GH_URL}#{ref.sub(/\s|\u00a0/, '.')}.xml"
|
107
104
|
doc = Nokogiri::XML get_page(uri)
|
108
|
-
fetch_rfc doc.at("
|
109
|
-
end
|
110
|
-
|
111
|
-
# @param uri_template [String]
|
112
|
-
# @param reference [String]
|
113
|
-
# @return [RelatonIetf::IetfBibliographicItem]
|
114
|
-
def bcp_item(uri_template, reference) # rubocop:disable Metrics/MethodLength
|
115
|
-
uri = uri_template.sub "CODE", reference.sub(" ", "").downcase
|
116
|
-
doc = Nokogiri::HTML get_page(uri)
|
117
|
-
ietf_item(
|
118
|
-
id: reference,
|
119
|
-
title: [content: ""],
|
120
|
-
docid: [RelatonBib::DocumentIdentifier.new(type: "IETF", id: reference)],
|
121
|
-
language: ["en"],
|
122
|
-
link: [{ type: "src", content: uri }],
|
123
|
-
relation: fetch_relations(doc),
|
124
|
-
doctype: "rfc"
|
125
|
-
)
|
105
|
+
fetch_rfc doc.at("/referencegroup", "/reference"), is_relation, uri, ver
|
126
106
|
end
|
127
107
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
)
|
108
|
+
# @param reference [Nokogiri::XML::Element]
|
109
|
+
# @return [Hash]
|
110
|
+
def relations(reference)
|
111
|
+
reference.xpath("reference").map do |ref|
|
112
|
+
{ type: "includes", bibitem: fetch_rfc(ref, true) }
|
134
113
|
end
|
135
114
|
end
|
136
115
|
|
116
|
+
# @param uri [String]
|
117
|
+
# @return [String] HTTP response body
|
137
118
|
def get_page(uri)
|
138
119
|
res = Net::HTTP.get_response(URI(uri))
|
139
120
|
if res.code != "200"
|
@@ -143,17 +124,30 @@ module RelatonIetf
|
|
143
124
|
res.body
|
144
125
|
end
|
145
126
|
|
127
|
+
# @param reference [Nokogiri::XML::Element]
|
146
128
|
# @return [String]
|
147
129
|
def language(reference)
|
148
130
|
reference[:lang] || "en"
|
149
131
|
end
|
150
132
|
|
133
|
+
# @param reference [Nokogiri::XML::Element]
|
151
134
|
# @return [Array<Hash>]
|
152
135
|
def titles(reference)
|
153
|
-
|
154
|
-
|
136
|
+
reference.xpath("./front/title").map do |title|
|
137
|
+
{ content: title.text, language: language(reference), script: "Latn" }
|
138
|
+
end
|
155
139
|
end
|
156
140
|
|
141
|
+
# @param reference [Nokogiri::XML::Element]
|
142
|
+
# @return [RelatonBib::FormattedRef, nil]
|
143
|
+
def formattedref(reference)
|
144
|
+
return if reference.at "./fornt/title"
|
145
|
+
|
146
|
+
cont = (reference[:anchor] || reference[:docName] || reference[:number])
|
147
|
+
RelatonBib::FormattedRef.new content: cont, language: language(reference), script: "Latn" if cont
|
148
|
+
end
|
149
|
+
|
150
|
+
# @param reference [Nokogiri::XML::Element]
|
157
151
|
# @return [Array<RelatonBib::FormattedString>]
|
158
152
|
def abstracts(ref)
|
159
153
|
ref.xpath("./front/abstract").map do |a|
|
@@ -164,11 +158,13 @@ module RelatonIetf
|
|
164
158
|
end
|
165
159
|
end
|
166
160
|
|
161
|
+
# @param reference [Nokogiri::XML::Element]
|
167
162
|
# @return [Array<Hash>]
|
168
163
|
def contributors(reference)
|
169
164
|
persons(reference) + organizations(reference)
|
170
165
|
end
|
171
166
|
|
167
|
+
# @param reference [Nokogiri::XML::Element]
|
172
168
|
# @return [Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>]
|
173
169
|
def persons(reference)
|
174
170
|
reference.xpath("./front/author[@surname]|./front/author[@fullname]")
|
@@ -182,6 +178,7 @@ module RelatonIetf
|
|
182
178
|
end
|
183
179
|
end
|
184
180
|
|
181
|
+
# @param reference [Nokogiri::XML::Element]
|
185
182
|
# @return [Array<Hash{Symbol=>RelatonBib::Organization,Symbol=>Array<String>}>]
|
186
183
|
def organizations(reference)
|
187
184
|
publisher = { entity: new_org, role: [type: "publisher"] }
|
@@ -197,8 +194,8 @@ module RelatonIetf
|
|
197
194
|
end
|
198
195
|
end
|
199
196
|
|
200
|
-
# @param author [Nokogiri::XML::
|
201
|
-
# @param ref [Nokogiri::XML::
|
197
|
+
# @param author [Nokogiri::XML::Element]
|
198
|
+
# @param ref [Nokogiri::XML::Element]
|
202
199
|
# @return [RelatonBib::FullName]
|
203
200
|
def full_name(author, ref)
|
204
201
|
lang = language ref
|
@@ -218,7 +215,7 @@ module RelatonIetf
|
|
218
215
|
RelatonBib::LocalizedString.new(content, lang)
|
219
216
|
end
|
220
217
|
|
221
|
-
# @param postal [Nokogiri::XML::
|
218
|
+
# @param postal [Nokogiri::XML::Element]
|
222
219
|
# @return [Array<RelatonBib::Address, RelatonBib::Phone>]
|
223
220
|
def contacts(addr)
|
224
221
|
contacts = []
|
@@ -232,7 +229,7 @@ module RelatonIetf
|
|
232
229
|
contacts
|
233
230
|
end
|
234
231
|
|
235
|
-
# @param postal [Nokogiri::XML::
|
232
|
+
# @param postal [Nokogiri::XML::Element]
|
236
233
|
# @rerurn [RelatonBib::Address]
|
237
234
|
def address(postal) # rubocop:disable Metrics/CyclomaticComplexity
|
238
235
|
RelatonBib::Address.new(
|
@@ -252,7 +249,7 @@ module RelatonIetf
|
|
252
249
|
contacts << RelatonBib::Contact.new(type: type, value: value.text)
|
253
250
|
end
|
254
251
|
|
255
|
-
# @param author [Nokogiri::XML::
|
252
|
+
# @param author [Nokogiri::XML::Element]
|
256
253
|
# @return [RelatonBib::Affiliation]
|
257
254
|
def affiliation(author)
|
258
255
|
organization = author.at("./organization")
|
@@ -287,6 +284,7 @@ module RelatonIetf
|
|
287
284
|
#
|
288
285
|
# Extract date from reference.
|
289
286
|
#
|
287
|
+
# @param reference [Nokogiri::XML::Element]
|
290
288
|
# @return [Array<RelatonBib::BibliographicDate>] published data.
|
291
289
|
#
|
292
290
|
def dates(reference)
|
@@ -298,8 +296,6 @@ module RelatonIetf
|
|
298
296
|
[RelatonBib::BibliographicDate.new(type: "published", on: date)]
|
299
297
|
end
|
300
298
|
|
301
|
-
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
302
|
-
|
303
299
|
#
|
304
300
|
# Extract document identifiers from reference
|
305
301
|
#
|
@@ -308,7 +304,7 @@ module RelatonIetf
|
|
308
304
|
#
|
309
305
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
310
306
|
#
|
311
|
-
def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
307
|
+
def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/AbcSize
|
312
308
|
id = (reference[:anchor] || reference[:docName] || reference[:number])
|
313
309
|
ret = []
|
314
310
|
if id
|
@@ -327,11 +323,10 @@ module RelatonIetf
|
|
327
323
|
RelatonBib::DocumentIdentifier.new(id: id, type: si[:name])
|
328
324
|
end.compact
|
329
325
|
end
|
330
|
-
# enable Metrics/MethodLength, Metrics/AbcSize
|
331
326
|
|
332
327
|
#
|
333
328
|
# Extract series form reference
|
334
|
-
# @param reference [Nokogiri::XML::
|
329
|
+
# @param reference [Nokogiri::XML::Element]
|
335
330
|
#
|
336
331
|
# @return [Array<RelatonBib::Series>]
|
337
332
|
#
|
@@ -351,7 +346,7 @@ module RelatonIetf
|
|
351
346
|
|
352
347
|
#
|
353
348
|
# extract status
|
354
|
-
# @param reference [Nokogiri::XML::
|
349
|
+
# @param reference [Nokogiri::XML::Element]
|
355
350
|
#
|
356
351
|
# @return [RelatonBib::DocumentStatus]
|
357
352
|
#
|
data/lib/relaton_ietf/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-ietf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.7.
|
4
|
+
version: 1.7.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-02-
|
11
|
+
date: 2021-02-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: debase
|