relaton-ietf 1.7.1 → 1.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/relaton_ietf/hash_converter.rb +1 -1
- data/lib/relaton_ietf/scrapper.rb +41 -46
- data/lib/relaton_ietf/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fe0909b3d346a37c7e6363a2c1b4c2822464b1b8bcaa589e3b9efad9148b24a8
|
4
|
+
data.tar.gz: b1a22b2905d52f0f014a27ea7eb11abe049ec62c2e6918818c6ca5cf3e2af843
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 151fddd3a5168eb80b1fcd0f86a33f58ab222993457c2c1eb8b24a03ccf976b4048b01e7a7ff182957c846a258fe77602114f2de394b285b6291e16baca2cd35
|
7
|
+
data.tar.gz: '08ed81561c2737cc7852c868990064e4bf23bfd270c24f55ecb6948b6b97e7bcff09274baebab5d4003797c0babc1fc1c4dfabbd19e72cfd33e73f05533b4e13'
|
@@ -11,22 +11,18 @@ module RelatonIetf
|
|
11
11
|
# Scrapper module
|
12
12
|
module Scrapper
|
13
13
|
GH_URL = "https://raw.githubusercontent.com/relaton/relaton-data-ietf/master/data/reference."
|
14
|
-
RFC_URI_PATTERN = "https://xml2rfc.tools.ietf.org/public/rfc/bibxml"
|
15
|
-
# ID_URI_PATTERN = "https://xml2rfc.tools.ietf.org/public/rfc/bibxml-ids/reference.CODE"
|
16
14
|
BCP_URI_PATTERN = "https://www.rfc-editor.org/info/CODE"
|
17
15
|
|
18
16
|
class << self
|
19
|
-
# rubocop:disable Metrics/MethodLength
|
20
|
-
|
21
17
|
# @param text [String]
|
22
18
|
# @param is_relation [TrueClass, FalseClass]
|
23
19
|
# @return [RelatonIetf::IetfBibliographicItem]
|
24
20
|
def scrape_page(text, is_relation = false)
|
25
21
|
# Remove initial "IETF " string if specified
|
26
22
|
ref = text.gsub(/^IETF /, "")
|
27
|
-
|
28
|
-
|
29
|
-
|
23
|
+
/^(RFC|BCP|FYI|STD)\s(?<num>\d+)/ =~ ref
|
24
|
+
ref.sub! /(?<=^(?:RFC|BCP|FYI|STD)\s)(\d+)/, num.rjust(4, "0") if num
|
25
|
+
rfc_item ref, is_relation
|
30
26
|
rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
31
27
|
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
32
28
|
Net::ProtocolError, SocketError
|
@@ -38,7 +34,7 @@ module RelatonIetf
|
|
38
34
|
# @param url [String, NilClass]
|
39
35
|
# @param ver [String, NilClass] Internet Draft version
|
40
36
|
# @return [RelatonIetf::IetfBibliographicItem]
|
41
|
-
def fetch_rfc(reference, is_relation = false, url = nil, ver = nil) # rubocop:disable Metrics/AbcSize
|
37
|
+
def fetch_rfc(reference, is_relation = false, url = nil, ver = nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
42
38
|
return unless reference
|
43
39
|
|
44
40
|
ietf_item(
|
@@ -50,8 +46,10 @@ module RelatonIetf
|
|
50
46
|
language: [language(reference)],
|
51
47
|
link: link(reference, url, ver),
|
52
48
|
title: titles(reference),
|
49
|
+
formattedref: formattedref(reference),
|
53
50
|
abstract: abstracts(reference),
|
54
51
|
contributor: contributors(reference),
|
52
|
+
relation: relations(reference),
|
55
53
|
date: dates(reference),
|
56
54
|
series: series(reference),
|
57
55
|
place: ["Fremont, CA"],
|
@@ -59,7 +57,6 @@ module RelatonIetf
|
|
59
57
|
doctype: doctype(reference[:anchor])
|
60
58
|
)
|
61
59
|
end
|
62
|
-
# rubocop:enable Metrics/MethodLength
|
63
60
|
|
64
61
|
private
|
65
62
|
|
@@ -105,35 +102,19 @@ module RelatonIetf
|
|
105
102
|
|
106
103
|
uri = "#{GH_URL}#{ref.sub(/\s|\u00a0/, '.')}.xml"
|
107
104
|
doc = Nokogiri::XML get_page(uri)
|
108
|
-
fetch_rfc doc.at("
|
109
|
-
end
|
110
|
-
|
111
|
-
# @param uri_template [String]
|
112
|
-
# @param reference [String]
|
113
|
-
# @return [RelatonIetf::IetfBibliographicItem]
|
114
|
-
def bcp_item(uri_template, reference) # rubocop:disable Metrics/MethodLength
|
115
|
-
uri = uri_template.sub "CODE", reference.sub(" ", "").downcase
|
116
|
-
doc = Nokogiri::HTML get_page(uri)
|
117
|
-
ietf_item(
|
118
|
-
id: reference,
|
119
|
-
title: [content: ""],
|
120
|
-
docid: [RelatonBib::DocumentIdentifier.new(type: "IETF", id: reference)],
|
121
|
-
language: ["en"],
|
122
|
-
link: [{ type: "src", content: uri }],
|
123
|
-
relation: fetch_relations(doc),
|
124
|
-
doctype: "rfc"
|
125
|
-
)
|
105
|
+
fetch_rfc doc.at("/referencegroup", "/reference"), is_relation, uri, ver
|
126
106
|
end
|
127
107
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
)
|
108
|
+
# @param reference [Nokogiri::XML::Element]
|
109
|
+
# @return [Hash]
|
110
|
+
def relations(reference)
|
111
|
+
reference.xpath("reference").map do |ref|
|
112
|
+
{ type: "includes", bibitem: fetch_rfc(ref, true) }
|
134
113
|
end
|
135
114
|
end
|
136
115
|
|
116
|
+
# @param uri [String]
|
117
|
+
# @return [String] HTTP response body
|
137
118
|
def get_page(uri)
|
138
119
|
res = Net::HTTP.get_response(URI(uri))
|
139
120
|
if res.code != "200"
|
@@ -143,17 +124,30 @@ module RelatonIetf
|
|
143
124
|
res.body
|
144
125
|
end
|
145
126
|
|
127
|
+
# @param reference [Nokogiri::XML::Element]
|
146
128
|
# @return [String]
|
147
129
|
def language(reference)
|
148
130
|
reference[:lang] || "en"
|
149
131
|
end
|
150
132
|
|
133
|
+
# @param reference [Nokogiri::XML::Element]
|
151
134
|
# @return [Array<Hash>]
|
152
135
|
def titles(reference)
|
153
|
-
|
154
|
-
|
136
|
+
reference.xpath("./front/title").map do |title|
|
137
|
+
{ content: title.text, language: language(reference), script: "Latn" }
|
138
|
+
end
|
155
139
|
end
|
156
140
|
|
141
|
+
# @param reference [Nokogiri::XML::Element]
|
142
|
+
# @return [RelatonBib::FormattedRef, nil]
|
143
|
+
def formattedref(reference)
|
144
|
+
return if reference.at "./fornt/title"
|
145
|
+
|
146
|
+
cont = (reference[:anchor] || reference[:docName] || reference[:number])
|
147
|
+
RelatonBib::FormattedRef.new content: cont, language: language(reference), script: "Latn" if cont
|
148
|
+
end
|
149
|
+
|
150
|
+
# @param reference [Nokogiri::XML::Element]
|
157
151
|
# @return [Array<RelatonBib::FormattedString>]
|
158
152
|
def abstracts(ref)
|
159
153
|
ref.xpath("./front/abstract").map do |a|
|
@@ -164,11 +158,13 @@ module RelatonIetf
|
|
164
158
|
end
|
165
159
|
end
|
166
160
|
|
161
|
+
# @param reference [Nokogiri::XML::Element]
|
167
162
|
# @return [Array<Hash>]
|
168
163
|
def contributors(reference)
|
169
164
|
persons(reference) + organizations(reference)
|
170
165
|
end
|
171
166
|
|
167
|
+
# @param reference [Nokogiri::XML::Element]
|
172
168
|
# @return [Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>]
|
173
169
|
def persons(reference)
|
174
170
|
reference.xpath("./front/author[@surname]|./front/author[@fullname]")
|
@@ -182,6 +178,7 @@ module RelatonIetf
|
|
182
178
|
end
|
183
179
|
end
|
184
180
|
|
181
|
+
# @param reference [Nokogiri::XML::Element]
|
185
182
|
# @return [Array<Hash{Symbol=>RelatonBib::Organization,Symbol=>Array<String>}>]
|
186
183
|
def organizations(reference)
|
187
184
|
publisher = { entity: new_org, role: [type: "publisher"] }
|
@@ -197,8 +194,8 @@ module RelatonIetf
|
|
197
194
|
end
|
198
195
|
end
|
199
196
|
|
200
|
-
# @param author [Nokogiri::XML::
|
201
|
-
# @param ref [Nokogiri::XML::
|
197
|
+
# @param author [Nokogiri::XML::Element]
|
198
|
+
# @param ref [Nokogiri::XML::Element]
|
202
199
|
# @return [RelatonBib::FullName]
|
203
200
|
def full_name(author, ref)
|
204
201
|
lang = language ref
|
@@ -218,7 +215,7 @@ module RelatonIetf
|
|
218
215
|
RelatonBib::LocalizedString.new(content, lang)
|
219
216
|
end
|
220
217
|
|
221
|
-
# @param postal [Nokogiri::XML::
|
218
|
+
# @param postal [Nokogiri::XML::Element]
|
222
219
|
# @return [Array<RelatonBib::Address, RelatonBib::Phone>]
|
223
220
|
def contacts(addr)
|
224
221
|
contacts = []
|
@@ -232,7 +229,7 @@ module RelatonIetf
|
|
232
229
|
contacts
|
233
230
|
end
|
234
231
|
|
235
|
-
# @param postal [Nokogiri::XML::
|
232
|
+
# @param postal [Nokogiri::XML::Element]
|
236
233
|
# @rerurn [RelatonBib::Address]
|
237
234
|
def address(postal) # rubocop:disable Metrics/CyclomaticComplexity
|
238
235
|
RelatonBib::Address.new(
|
@@ -252,7 +249,7 @@ module RelatonIetf
|
|
252
249
|
contacts << RelatonBib::Contact.new(type: type, value: value.text)
|
253
250
|
end
|
254
251
|
|
255
|
-
# @param author [Nokogiri::XML::
|
252
|
+
# @param author [Nokogiri::XML::Element]
|
256
253
|
# @return [RelatonBib::Affiliation]
|
257
254
|
def affiliation(author)
|
258
255
|
organization = author.at("./organization")
|
@@ -287,6 +284,7 @@ module RelatonIetf
|
|
287
284
|
#
|
288
285
|
# Extract date from reference.
|
289
286
|
#
|
287
|
+
# @param reference [Nokogiri::XML::Element]
|
290
288
|
# @return [Array<RelatonBib::BibliographicDate>] published data.
|
291
289
|
#
|
292
290
|
def dates(reference)
|
@@ -298,8 +296,6 @@ module RelatonIetf
|
|
298
296
|
[RelatonBib::BibliographicDate.new(type: "published", on: date)]
|
299
297
|
end
|
300
298
|
|
301
|
-
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
302
|
-
|
303
299
|
#
|
304
300
|
# Extract document identifiers from reference
|
305
301
|
#
|
@@ -308,7 +304,7 @@ module RelatonIetf
|
|
308
304
|
#
|
309
305
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
310
306
|
#
|
311
|
-
def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
307
|
+
def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/AbcSize
|
312
308
|
id = (reference[:anchor] || reference[:docName] || reference[:number])
|
313
309
|
ret = []
|
314
310
|
if id
|
@@ -327,11 +323,10 @@ module RelatonIetf
|
|
327
323
|
RelatonBib::DocumentIdentifier.new(id: id, type: si[:name])
|
328
324
|
end.compact
|
329
325
|
end
|
330
|
-
# enable Metrics/MethodLength, Metrics/AbcSize
|
331
326
|
|
332
327
|
#
|
333
328
|
# Extract series form reference
|
334
|
-
# @param reference [Nokogiri::XML::
|
329
|
+
# @param reference [Nokogiri::XML::Element]
|
335
330
|
#
|
336
331
|
# @return [Array<RelatonBib::Series>]
|
337
332
|
#
|
@@ -351,7 +346,7 @@ module RelatonIetf
|
|
351
346
|
|
352
347
|
#
|
353
348
|
# extract status
|
354
|
-
# @param reference [Nokogiri::XML::
|
349
|
+
# @param reference [Nokogiri::XML::Element]
|
355
350
|
#
|
356
351
|
# @return [RelatonBib::DocumentStatus]
|
357
352
|
#
|
data/lib/relaton_ietf/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-ietf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.7.
|
4
|
+
version: 1.7.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-02-
|
11
|
+
date: 2021-02-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: debase
|