relaton-ietf 1.7.0 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/relaton_ietf.rb +1 -6
- data/lib/relaton_ietf/hash_converter.rb +8 -17
- data/lib/relaton_ietf/scrapper.rb +33 -59
- data/lib/relaton_ietf/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 90f95ffe25f46c8cd5ae35332277f6b39bd787c7840a30c9315b4ca393076033
|
4
|
+
data.tar.gz: 0e591717abcf1ae0ff27feb518469ca5703103c9d8184fea7286b3eb43b3764c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4d4cee3c808eedf011dbf3ee0199b6baa70c4e390821a88a685e34dd486a53ba330cda666597521c70f26670c9063cb20c98c867139bee710efe857f0623d734
|
7
|
+
data.tar.gz: 1e9297ebf9b3897ce346a0b1fdedf04779220c7f0210a10fa667a67df923382293029cc3126c0edf6a40857712e9bbf3a72d7fb2f4d94fc90a40f896b8e5dbc2
|
data/lib/relaton_ietf.rb
CHANGED
@@ -6,11 +6,6 @@ require "relaton_ietf/ietf_bibliographic_item"
|
|
6
6
|
require "relaton_ietf/xml_parser"
|
7
7
|
require "relaton_ietf/hash_converter"
|
8
8
|
|
9
|
-
# if defined? Relaton
|
10
|
-
# require_relative "relaton_ietf/processor"
|
11
|
-
# Relaton::Registry.instance.register(RelatonIetf::Processor)
|
12
|
-
# end
|
13
|
-
|
14
9
|
require "relaton/provider_ietf"
|
15
10
|
|
16
11
|
module RelatonIetf
|
@@ -22,4 +17,4 @@ module RelatonIetf
|
|
22
17
|
grammars = Dir[grammars_path].sort.map { |gp| File.read gp }.join
|
23
18
|
Digest::MD5.hexdigest grammars
|
24
19
|
end
|
25
|
-
end
|
20
|
+
end
|
@@ -1,23 +1,14 @@
|
|
1
1
|
module RelatonIetf
|
2
2
|
class HashConverter < RelatonBib::HashConverter
|
3
3
|
class << self
|
4
|
-
#
|
5
|
-
#
|
6
|
-
#
|
7
|
-
# @
|
8
|
-
#
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
# doctype_hash_to_bib(ret)
|
13
|
-
# ret
|
14
|
-
# end
|
15
|
-
|
16
|
-
# private
|
17
|
-
|
18
|
-
# def doctype_hash_to_bib(ret)
|
19
|
-
# ret
|
20
|
-
# end
|
4
|
+
#
|
5
|
+
# Ovverides superclass's method
|
6
|
+
#
|
7
|
+
# @param item [Hash]
|
8
|
+
# @retirn [RelatonIec::IecBibliographicItem]
|
9
|
+
def bib_item(item)
|
10
|
+
IecBibliographicItem.new(item)
|
11
|
+
end
|
21
12
|
end
|
22
13
|
end
|
23
14
|
end
|
@@ -10,6 +10,7 @@ module RelatonIetf
|
|
10
10
|
|
11
11
|
# Scrapper module
|
12
12
|
module Scrapper
|
13
|
+
GH_URL = "https://raw.githubusercontent.com/relaton/relaton-data-ietf/master/data/reference."
|
13
14
|
RFC_URI_PATTERN = "https://xml2rfc.tools.ietf.org/public/rfc/bibxml"
|
14
15
|
# ID_URI_PATTERN = "https://xml2rfc.tools.ietf.org/public/rfc/bibxml-ids/reference.CODE"
|
15
16
|
BCP_URI_PATTERN = "https://www.rfc-editor.org/info/CODE"
|
@@ -23,18 +24,8 @@ module RelatonIetf
|
|
23
24
|
def scrape_page(text, is_relation = false)
|
24
25
|
# Remove initial "IETF " string if specified
|
25
26
|
ref = text.gsub(/^IETF /, "")
|
26
|
-
|
27
|
-
|
28
|
-
when /^RFC/ then rfc_item [""], ref, is_relation
|
29
|
-
when /^I-D/ then rfc_item ["3"], ref, is_relation
|
30
|
-
when /^W3C/ then rfc_item ["4", "2"], ref, is_relation
|
31
|
-
when /^(ANSI|CCITT|FIPS|IANA|ISO|ITU|NIST|OASIS|PKCS)/
|
32
|
-
rfc_item ["2"], ref, is_relation
|
33
|
-
when /^(3GPP|SDO-3GPP)/ then rfc_item ["5"], ref, is_relation
|
34
|
-
when /^IEEE/ then rfc_item ["6", "2"], ref, is_relation
|
35
|
-
when /^BCP/ then bcp_item BCP_URI_PATTERN.dup, ref
|
36
|
-
else
|
37
|
-
raise RelatonBib::RequestError, "#{ref}: not recognised for RFC"
|
27
|
+
if ref.match? /^BCP/ then bcp_item BCP_URI_PATTERN.dup, ref
|
28
|
+
else rfc_item ref, is_relation
|
38
29
|
end
|
39
30
|
rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
40
31
|
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
@@ -42,12 +33,12 @@ module RelatonIetf
|
|
42
33
|
raise RelatonBib::RequestError, "No document found for #{ref} reference."
|
43
34
|
end
|
44
35
|
|
45
|
-
# @param reference [
|
36
|
+
# @param reference [Nokogiri::XML::Element, nil]
|
46
37
|
# @param is_relation [TrueClass, FalseClass]
|
47
38
|
# @param url [String, NilClass]
|
48
39
|
# @param ver [String, NilClass] Internet Draft version
|
49
40
|
# @return [RelatonIetf::IetfBibliographicItem]
|
50
|
-
def fetch_rfc(reference, is_relation = false, url = nil, ver = nil)
|
41
|
+
def fetch_rfc(reference, is_relation = false, url = nil, ver = nil) # rubocop:disable Metrics/AbcSize
|
51
42
|
return unless reference
|
52
43
|
|
53
44
|
ietf_item(
|
@@ -65,7 +56,7 @@ module RelatonIetf
|
|
65
56
|
series: series(reference),
|
66
57
|
place: ["Fremont, CA"],
|
67
58
|
keyword: reference.xpath("front/keyword").map(&:text),
|
68
|
-
doctype: doctype(reference[:anchor])
|
59
|
+
doctype: doctype(reference[:anchor])
|
69
60
|
)
|
70
61
|
end
|
71
62
|
# rubocop:enable Metrics/MethodLength
|
@@ -75,7 +66,7 @@ module RelatonIetf
|
|
75
66
|
# @param anchor [String]
|
76
67
|
# @return [String]
|
77
68
|
def doctype(anchor)
|
78
|
-
anchor
|
69
|
+
anchor&.include?("I-D") ? "internet-draft" : "rfc"
|
79
70
|
end
|
80
71
|
|
81
72
|
# @param reference [Nokogiri::XML::Element]
|
@@ -86,7 +77,7 @@ module RelatonIetf
|
|
86
77
|
l = []
|
87
78
|
l << { type: "xml", content: url } if url
|
88
79
|
l << { type: "src", content: reference[:target] } if reference[:target]
|
89
|
-
if reference[:anchor]
|
80
|
+
if /^I-D/.match? reference[:anchor]
|
90
81
|
reference.xpath("format").each do |f|
|
91
82
|
c = ver ? f[:target].sub(/(?<=-)\d{2}(?=\.)/, ver) : f[:target]
|
92
83
|
l << { type: f[:type], content: c }
|
@@ -103,35 +94,24 @@ module RelatonIetf
|
|
103
94
|
RelatonIetf::IetfBibliographicItem.new **attrs
|
104
95
|
end
|
105
96
|
|
106
|
-
# @param uri_nums [Array<String>]
|
107
97
|
# @param ref [String]
|
108
98
|
# @return [RelatonIetf::IetfBibliographicItem]
|
109
|
-
def rfc_item(
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
/(
|
114
|
-
if n == "3"
|
115
|
-
ref.sub! /-\d{2}/, "" if ver
|
116
|
-
ref.sub! /(?<=I-D\.)draft-/, ""
|
117
|
-
end
|
118
|
-
|
119
|
-
uri = "#{RFC_URI_PATTERN}#{n}/reference.#{ref.sub(/\s|\u00a0/, ".")}.xml"
|
120
|
-
begin
|
121
|
-
doc = Nokogiri::XML get_page(uri)
|
122
|
-
resp = fetch_rfc doc.at("//reference"), is_relation, uri, ver
|
123
|
-
return resp if resp
|
124
|
-
rescue RelatonBib::RequestError => e
|
125
|
-
error = e
|
126
|
-
end
|
99
|
+
def rfc_item(ref, is_relation)
|
100
|
+
/(?<=-)(?<ver>\d{2})$/ =~ ref
|
101
|
+
if /^I-D/.match? ref
|
102
|
+
ref.sub! /-\d{2}/, "" if ver
|
103
|
+
ref.sub! /(?<=I-D\.)draft-/, ""
|
127
104
|
end
|
128
|
-
|
105
|
+
|
106
|
+
uri = "#{GH_URL}#{ref.sub(/\s|\u00a0/, '.')}.xml"
|
107
|
+
doc = Nokogiri::XML get_page(uri)
|
108
|
+
fetch_rfc doc.at("//reference"), is_relation, uri, ver
|
129
109
|
end
|
130
110
|
|
131
111
|
# @param uri_template [String]
|
132
112
|
# @param reference [String]
|
133
113
|
# @return [RelatonIetf::IetfBibliographicItem]
|
134
|
-
def bcp_item(uri_template, reference)
|
114
|
+
def bcp_item(uri_template, reference) # rubocop:disable Metrics/MethodLength
|
135
115
|
uri = uri_template.sub "CODE", reference.sub(" ", "").downcase
|
136
116
|
doc = Nokogiri::HTML get_page(uri)
|
137
117
|
ietf_item(
|
@@ -141,7 +121,7 @@ module RelatonIetf
|
|
141
121
|
language: ["en"],
|
142
122
|
link: [{ type: "src", content: uri }],
|
143
123
|
relation: fetch_relations(doc),
|
144
|
-
doctype: "rfc"
|
124
|
+
doctype: "rfc"
|
145
125
|
)
|
146
126
|
end
|
147
127
|
|
@@ -149,7 +129,7 @@ module RelatonIetf
|
|
149
129
|
doc.xpath("//table/tr/td/a[contains(., 'RFC')]").map do |r|
|
150
130
|
RelatonBib::DocumentRelation.new(
|
151
131
|
type: "merges",
|
152
|
-
bibitem: scrape_page(r.text, true)
|
132
|
+
bibitem: scrape_page(r.text, true)
|
153
133
|
)
|
154
134
|
end
|
155
135
|
end
|
@@ -163,10 +143,6 @@ module RelatonIetf
|
|
163
143
|
res.body
|
164
144
|
end
|
165
145
|
|
166
|
-
# def make_uri(uri_template, reference)
|
167
|
-
# uri_template.gsub("CODE", reference)
|
168
|
-
# end
|
169
|
-
|
170
146
|
# @return [String]
|
171
147
|
def language(reference)
|
172
148
|
reference[:lang] || "en"
|
@@ -195,12 +171,12 @@ module RelatonIetf
|
|
195
171
|
|
196
172
|
# @return [Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>]
|
197
173
|
def persons(reference)
|
198
|
-
reference.xpath("./front/author[@surname]|./front/author[@fullname]")
|
199
|
-
map do |author|
|
174
|
+
reference.xpath("./front/author[@surname]|./front/author[@fullname]")
|
175
|
+
.map do |author|
|
200
176
|
entity = RelatonBib::Person.new(
|
201
177
|
name: full_name(author, reference),
|
202
178
|
affiliation: [affiliation(author)],
|
203
|
-
contact: contacts(author.at("./address"))
|
179
|
+
contact: contacts(author.at("./address"))
|
204
180
|
)
|
205
181
|
{ entity: entity, role: [contributor_role(author)] }
|
206
182
|
end
|
@@ -215,7 +191,7 @@ module RelatonIetf
|
|
215
191
|
mem << { entity: new_org(si[:stream], nil), role: [type: "author"] }
|
216
192
|
end
|
217
193
|
orgs + reference.xpath(
|
218
|
-
"front/author[not(@surname)][not(@fullname)]/organization"
|
194
|
+
"front/author[not(@surname)][not(@fullname)]/organization"
|
219
195
|
).map do |org|
|
220
196
|
{ entity: new_org(org.text, nil), role: [type: "author"] }
|
221
197
|
end
|
@@ -229,7 +205,7 @@ module RelatonIetf
|
|
229
205
|
RelatonBib::FullName.new(
|
230
206
|
completename: localized_string(author[:fullname], lang),
|
231
207
|
initial: [localized_string(author[:initials], lang)].compact,
|
232
|
-
surname: localized_string(author[:surname], lang)
|
208
|
+
surname: localized_string(author[:surname], lang)
|
233
209
|
)
|
234
210
|
end
|
235
211
|
|
@@ -258,13 +234,13 @@ module RelatonIetf
|
|
258
234
|
|
259
235
|
# @param postal [Nokogiri::XML::Document]
|
260
236
|
# @rerurn [RelatonBib::Address]
|
261
|
-
def address(postal)
|
237
|
+
def address(postal) # rubocop:disable Metrics/CyclomaticComplexity
|
262
238
|
RelatonBib::Address.new(
|
263
239
|
street: [(postal.at("./postalLine") || postal.at("./street"))&.text],
|
264
240
|
city: postal.at("./city")&.text,
|
265
241
|
postcode: postal.at("./code")&.text,
|
266
242
|
country: postal.at("./country")&.text,
|
267
|
-
state: postal.at("./region")&.text
|
243
|
+
state: postal.at("./region")&.text
|
268
244
|
)
|
269
245
|
end
|
270
246
|
|
@@ -303,7 +279,7 @@ module RelatonIetf
|
|
303
279
|
|
304
280
|
def month(mon)
|
305
281
|
return 1 if !mon || mon.empty?
|
306
|
-
return mon if /^\d
|
282
|
+
return mon if /^\d+$/.match? mon
|
307
283
|
|
308
284
|
Date::MONTHNAMES.index(mon)
|
309
285
|
end
|
@@ -332,12 +308,12 @@ module RelatonIetf
|
|
332
308
|
#
|
333
309
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
334
310
|
#
|
335
|
-
def docids(reference, ver)
|
311
|
+
def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
336
312
|
id = (reference[:anchor] || reference[:docName] || reference[:number])
|
337
313
|
ret = []
|
338
314
|
if id
|
339
315
|
ret << RelatonBib::DocumentIdentifier.new(
|
340
|
-
type: "IETF", id: id.sub(/^(RFC)/, "\\1 ")
|
316
|
+
type: "IETF", id: id.sub(/^(RFC)/, "\\1 ")
|
341
317
|
)
|
342
318
|
end
|
343
319
|
if (id = reference[:anchor])
|
@@ -365,10 +341,10 @@ module RelatonIetf
|
|
365
341
|
|
366
342
|
RelatonBib::Series.new(
|
367
343
|
title: RelatonBib::TypedTitleString.new(
|
368
|
-
content: si[:name], language: language(reference), script: "Latn"
|
344
|
+
content: si[:name], language: language(reference), script: "Latn"
|
369
345
|
),
|
370
346
|
number: si[:value],
|
371
|
-
type: "main"
|
347
|
+
type: "main"
|
372
348
|
)
|
373
349
|
end.compact
|
374
350
|
end
|
@@ -383,9 +359,7 @@ module RelatonIetf
|
|
383
359
|
st = reference.at("./seriesinfo[@status]")
|
384
360
|
return unless st
|
385
361
|
|
386
|
-
RelatonBib::DocumentStatus.new(
|
387
|
-
stage: st[:status],
|
388
|
-
)
|
362
|
+
RelatonBib::DocumentStatus.new(stage: st[:status])
|
389
363
|
end
|
390
364
|
end
|
391
365
|
end
|
data/lib/relaton_ietf/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-ietf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.7.
|
4
|
+
version: 1.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-02-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: debase
|