relaton-ietf 1.7.1 → 1.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +46 -0
- data/README.adoc +16 -9
- data/grammars/basicdoc.rng +165 -20
- data/grammars/biblio.rng +4 -6
- data/grammars/ietf.rng +39 -0
- data/grammars/isodoc.rng +460 -6
- data/grammars/reqt.rng +31 -2
- data/lib/relaton_ietf/hash_converter.rb +2 -2
- data/lib/relaton_ietf/ietf_bibliographic_item.rb +7 -0
- data/lib/relaton_ietf/ietf_bibliography.rb +5 -1
- data/lib/relaton_ietf/processor.rb +1 -2
- data/lib/relaton_ietf/scrapper.rb +63 -61
- data/lib/relaton_ietf/version.rb +1 -1
- data/lib/relaton_ietf/xml_parser.rb +1 -1
- data/relaton_ietf.gemspec +4 -4
- metadata +8 -38
- data/.github/workflows/macos.yml +0 -34
- data/.github/workflows/ubuntu.yml +0 -33
- data/.github/workflows/windows.yml +0 -35
data/grammars/reqt.rng
CHANGED
@@ -30,15 +30,34 @@
|
|
30
30
|
<data type="boolean"/>
|
31
31
|
</attribute>
|
32
32
|
</optional>
|
33
|
+
<optional>
|
34
|
+
<attribute name="number"/>
|
35
|
+
</optional>
|
33
36
|
<optional>
|
34
37
|
<attribute name="subsequence"/>
|
35
38
|
</optional>
|
39
|
+
<optional>
|
40
|
+
<attribute name="keep-with-next">
|
41
|
+
<data type="boolean"/>
|
42
|
+
</attribute>
|
43
|
+
</optional>
|
44
|
+
<optional>
|
45
|
+
<attribute name="keep-lines-together">
|
46
|
+
<data type="boolean"/>
|
47
|
+
</attribute>
|
48
|
+
</optional>
|
36
49
|
<attribute name="id">
|
37
50
|
<data type="ID"/>
|
38
51
|
</attribute>
|
39
52
|
<optional>
|
40
53
|
<attribute name="filename"/>
|
41
54
|
</optional>
|
55
|
+
<optional>
|
56
|
+
<attribute name="model"/>
|
57
|
+
</optional>
|
58
|
+
<optional>
|
59
|
+
<attribute name="type"/>
|
60
|
+
</optional>
|
42
61
|
<optional>
|
43
62
|
<ref name="reqtitle"/>
|
44
63
|
</optional>
|
@@ -48,9 +67,9 @@
|
|
48
67
|
<optional>
|
49
68
|
<ref name="subject"/>
|
50
69
|
</optional>
|
51
|
-
<
|
70
|
+
<zeroOrMore>
|
52
71
|
<ref name="reqinherit"/>
|
53
|
-
</
|
72
|
+
</zeroOrMore>
|
54
73
|
<zeroOrMore>
|
55
74
|
<ref name="classification"/>
|
56
75
|
</zeroOrMore>
|
@@ -135,6 +154,16 @@
|
|
135
154
|
<data type="boolean"/>
|
136
155
|
</attribute>
|
137
156
|
</optional>
|
157
|
+
<optional>
|
158
|
+
<attribute name="keep-with-next">
|
159
|
+
<data type="boolean"/>
|
160
|
+
</attribute>
|
161
|
+
</optional>
|
162
|
+
<optional>
|
163
|
+
<attribute name="keep-lines-together">
|
164
|
+
<data type="boolean"/>
|
165
|
+
</attribute>
|
166
|
+
</optional>
|
138
167
|
<oneOrMore>
|
139
168
|
<ref name="BasicBlock"/>
|
140
169
|
</oneOrMore>
|
@@ -5,9 +5,9 @@ module RelatonIetf
|
|
5
5
|
# Ovverides superclass's method
|
6
6
|
#
|
7
7
|
# @param item [Hash]
|
8
|
-
# @retirn [
|
8
|
+
# @retirn [RelatonIetf::IetfBibliographicItem]
|
9
9
|
def bib_item(item)
|
10
|
-
|
10
|
+
IetfBibliographicItem.new(**item)
|
11
11
|
end
|
12
12
|
end
|
13
13
|
end
|
@@ -17,6 +17,13 @@ module RelatonIetf
|
|
17
17
|
super
|
18
18
|
end
|
19
19
|
|
20
|
+
# @param hash [Hash]
|
21
|
+
# @return [RelatonIetf::IetfBibliographicItem]
|
22
|
+
def self.from_hash(hash)
|
23
|
+
item_hash = ::RelatonIetf::HashConverter.hash_to_bib(hash)
|
24
|
+
new **item_hash
|
25
|
+
end
|
26
|
+
|
20
27
|
# @param opts [Hash]
|
21
28
|
# @option opts [Nokogiri::XML::Builder] :builder XML builder
|
22
29
|
# @option opts [Boolean] :bibdata
|
@@ -20,7 +20,11 @@ module RelatonIetf
|
|
20
20
|
def get(code, _year = nil, _opts = {})
|
21
21
|
warn "[relaton-ietf] (\"#{code}\") fetching..."
|
22
22
|
result = search code
|
23
|
-
|
23
|
+
if result
|
24
|
+
warn "[relaton-ietf] (\"#{code}\") found #{result.docidentifier.first.id}"
|
25
|
+
else
|
26
|
+
warn "[relaton-ietf] (\"#{code}\") not found"
|
27
|
+
end
|
24
28
|
result
|
25
29
|
end
|
26
30
|
end
|
@@ -27,8 +27,7 @@ module RelatonIetf
|
|
27
27
|
# @param hash [Hash]
|
28
28
|
# @return [RelatonIetf::IetfBibliographicItem]
|
29
29
|
def hash_to_bib(hash)
|
30
|
-
|
31
|
-
::RelatonIetf::IetfBibliographicItem.new item_hash
|
30
|
+
::RelatonIetf::IetfBibliographicItem.from_hash hash
|
32
31
|
end
|
33
32
|
|
34
33
|
# Returns hash of XML grammar
|
@@ -1,7 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "net/http"
|
4
|
-
require "nokogiri"
|
5
4
|
require "relaton_bib"
|
6
5
|
require "relaton_ietf/ietf_bibliographic_item"
|
7
6
|
|
@@ -11,26 +10,22 @@ module RelatonIetf
|
|
11
10
|
# Scrapper module
|
12
11
|
module Scrapper
|
13
12
|
GH_URL = "https://raw.githubusercontent.com/relaton/relaton-data-ietf/master/data/reference."
|
14
|
-
RFC_URI_PATTERN = "https://xml2rfc.tools.ietf.org/public/rfc/bibxml"
|
15
|
-
# ID_URI_PATTERN = "https://xml2rfc.tools.ietf.org/public/rfc/bibxml-ids/reference.CODE"
|
16
13
|
BCP_URI_PATTERN = "https://www.rfc-editor.org/info/CODE"
|
17
14
|
|
18
15
|
class << self
|
19
|
-
# rubocop:disable Metrics/MethodLength
|
20
|
-
|
21
16
|
# @param text [String]
|
22
17
|
# @param is_relation [TrueClass, FalseClass]
|
23
18
|
# @return [RelatonIetf::IetfBibliographicItem]
|
24
|
-
def scrape_page(text, is_relation
|
19
|
+
def scrape_page(text, is_relation: false)
|
25
20
|
# Remove initial "IETF " string if specified
|
26
21
|
ref = text.gsub(/^IETF /, "")
|
27
|
-
|
28
|
-
|
29
|
-
|
22
|
+
/^(?:RFC|BCP|FYI|STD)\s(?<num>\d+)/ =~ ref
|
23
|
+
ref.sub! /(?<=^(?:RFC|BCP|FYI|STD)\s)(\d+)/, num.rjust(4, "0") if num
|
24
|
+
rfc_item ref, is_relation
|
30
25
|
rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
31
26
|
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
32
27
|
Net::ProtocolError, SocketError
|
33
|
-
raise RelatonBib::RequestError, "No document found for #{ref} reference
|
28
|
+
raise RelatonBib::RequestError, "No document found for #{ref} reference"
|
34
29
|
end
|
35
30
|
|
36
31
|
# @param reference [Nokogiri::XML::Element, nil]
|
@@ -38,7 +33,7 @@ module RelatonIetf
|
|
38
33
|
# @param url [String, NilClass]
|
39
34
|
# @param ver [String, NilClass] Internet Draft version
|
40
35
|
# @return [RelatonIetf::IetfBibliographicItem]
|
41
|
-
def fetch_rfc(reference, is_relation
|
36
|
+
def fetch_rfc(reference, is_relation: false, url: nil, ver: nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
42
37
|
return unless reference
|
43
38
|
|
44
39
|
ietf_item(
|
@@ -50,16 +45,17 @@ module RelatonIetf
|
|
50
45
|
language: [language(reference)],
|
51
46
|
link: link(reference, url, ver),
|
52
47
|
title: titles(reference),
|
48
|
+
formattedref: formattedref(reference),
|
53
49
|
abstract: abstracts(reference),
|
54
50
|
contributor: contributors(reference),
|
51
|
+
relation: relations(reference),
|
55
52
|
date: dates(reference),
|
56
53
|
series: series(reference),
|
57
54
|
place: ["Fremont, CA"],
|
58
55
|
keyword: reference.xpath("front/keyword").map(&:text),
|
59
|
-
doctype: doctype(reference[:anchor])
|
56
|
+
doctype: doctype(reference[:anchor]),
|
60
57
|
)
|
61
58
|
end
|
62
|
-
# rubocop:enable Metrics/MethodLength
|
63
59
|
|
64
60
|
private
|
65
61
|
|
@@ -95,6 +91,7 @@ module RelatonIetf
|
|
95
91
|
end
|
96
92
|
|
97
93
|
# @param ref [String]
|
94
|
+
# @param is_relation [Boolen, nil]
|
98
95
|
# @return [RelatonIetf::IetfBibliographicItem]
|
99
96
|
def rfc_item(ref, is_relation)
|
100
97
|
/(?<=-)(?<ver>\d{2})$/ =~ ref
|
@@ -105,55 +102,58 @@ module RelatonIetf
|
|
105
102
|
|
106
103
|
uri = "#{GH_URL}#{ref.sub(/\s|\u00a0/, '.')}.xml"
|
107
104
|
doc = Nokogiri::XML get_page(uri)
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
# @param uri_template [String]
|
112
|
-
# @param reference [String]
|
113
|
-
# @return [RelatonIetf::IetfBibliographicItem]
|
114
|
-
def bcp_item(uri_template, reference) # rubocop:disable Metrics/MethodLength
|
115
|
-
uri = uri_template.sub "CODE", reference.sub(" ", "").downcase
|
116
|
-
doc = Nokogiri::HTML get_page(uri)
|
117
|
-
ietf_item(
|
118
|
-
id: reference,
|
119
|
-
title: [content: ""],
|
120
|
-
docid: [RelatonBib::DocumentIdentifier.new(type: "IETF", id: reference)],
|
121
|
-
language: ["en"],
|
122
|
-
link: [{ type: "src", content: uri }],
|
123
|
-
relation: fetch_relations(doc),
|
124
|
-
doctype: "rfc"
|
125
|
-
)
|
105
|
+
r = doc.at("/referencegroup", "/reference")
|
106
|
+
fetch_rfc r, is_relation: is_relation, url: uri, ver: ver
|
126
107
|
end
|
127
108
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
)
|
109
|
+
# @param reference [Nokogiri::XML::Element]
|
110
|
+
# @return [Hash]
|
111
|
+
def relations(reference)
|
112
|
+
reference.xpath("reference").map do |ref|
|
113
|
+
{ type: "includes", bibitem: fetch_rfc(ref, is_relation: true) }
|
134
114
|
end
|
135
115
|
end
|
136
116
|
|
117
|
+
# @param uri [String]
|
118
|
+
# @return [String] HTTP response body
|
137
119
|
def get_page(uri)
|
138
120
|
res = Net::HTTP.get_response(URI(uri))
|
139
|
-
|
140
|
-
|
141
|
-
|
121
|
+
return unless res.code == "200"
|
122
|
+
|
123
|
+
# raise RelatonBib::RequestError, "No document found at #{uri}"
|
124
|
+
# end
|
142
125
|
|
143
126
|
res.body
|
144
127
|
end
|
145
128
|
|
129
|
+
# @param reference [Nokogiri::XML::Element]
|
146
130
|
# @return [String]
|
147
131
|
def language(reference)
|
148
132
|
reference[:lang] || "en"
|
149
133
|
end
|
150
134
|
|
135
|
+
# @param reference [Nokogiri::XML::Element]
|
151
136
|
# @return [Array<Hash>]
|
152
137
|
def titles(reference)
|
153
|
-
|
154
|
-
|
138
|
+
reference.xpath("./front/title").map do |title|
|
139
|
+
{ content: title.text, language: language(reference), script: "Latn" }
|
140
|
+
end
|
155
141
|
end
|
156
142
|
|
143
|
+
# @param reference [Nokogiri::XML::Element]
|
144
|
+
# @return [RelatonBib::FormattedRef, nil]
|
145
|
+
def formattedref(reference)
|
146
|
+
return if reference.at "./fornt/title"
|
147
|
+
|
148
|
+
cont = (reference[:anchor] || reference[:docName] || reference[:number])
|
149
|
+
if cont
|
150
|
+
RelatonBib::FormattedRef.new(
|
151
|
+
content: cont, language: language(reference), script: "Latn",
|
152
|
+
)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# @param reference [Nokogiri::XML::Element]
|
157
157
|
# @return [Array<RelatonBib::FormattedString>]
|
158
158
|
def abstracts(ref)
|
159
159
|
ref.xpath("./front/abstract").map do |a|
|
@@ -164,11 +164,13 @@ module RelatonIetf
|
|
164
164
|
end
|
165
165
|
end
|
166
166
|
|
167
|
+
# @param reference [Nokogiri::XML::Element]
|
167
168
|
# @return [Array<Hash>]
|
168
169
|
def contributors(reference)
|
169
170
|
persons(reference) + organizations(reference)
|
170
171
|
end
|
171
172
|
|
173
|
+
# @param reference [Nokogiri::XML::Element]
|
172
174
|
# @return [Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>]
|
173
175
|
def persons(reference)
|
174
176
|
reference.xpath("./front/author[@surname]|./front/author[@fullname]")
|
@@ -176,13 +178,15 @@ module RelatonIetf
|
|
176
178
|
entity = RelatonBib::Person.new(
|
177
179
|
name: full_name(author, reference),
|
178
180
|
affiliation: [affiliation(author)],
|
179
|
-
contact: contacts(author.at("./address"))
|
181
|
+
contact: contacts(author.at("./address")),
|
180
182
|
)
|
181
183
|
{ entity: entity, role: [contributor_role(author)] }
|
182
184
|
end
|
183
185
|
end
|
184
186
|
|
185
|
-
# @
|
187
|
+
# @param reference [Nokogiri::XML::Element]
|
188
|
+
# @return [Array<Hash{Symbol=>RelatonBib::Organization,
|
189
|
+
# Symbol=>Array<String>}>]
|
186
190
|
def organizations(reference)
|
187
191
|
publisher = { entity: new_org, role: [type: "publisher"] }
|
188
192
|
orgs = reference.xpath("./seriesinfo").reduce([publisher]) do |mem, si|
|
@@ -191,21 +195,21 @@ module RelatonIetf
|
|
191
195
|
mem << { entity: new_org(si[:stream], nil), role: [type: "author"] }
|
192
196
|
end
|
193
197
|
orgs + reference.xpath(
|
194
|
-
"front/author[not(@surname)][not(@fullname)]/organization"
|
198
|
+
"front/author[not(@surname)][not(@fullname)]/organization",
|
195
199
|
).map do |org|
|
196
200
|
{ entity: new_org(org.text, nil), role: [type: "author"] }
|
197
201
|
end
|
198
202
|
end
|
199
203
|
|
200
|
-
# @param author [Nokogiri::XML::
|
201
|
-
# @param ref [Nokogiri::XML::
|
204
|
+
# @param author [Nokogiri::XML::Element]
|
205
|
+
# @param ref [Nokogiri::XML::Element]
|
202
206
|
# @return [RelatonBib::FullName]
|
203
207
|
def full_name(author, ref)
|
204
208
|
lang = language ref
|
205
209
|
RelatonBib::FullName.new(
|
206
210
|
completename: localized_string(author[:fullname], lang),
|
207
211
|
initial: [localized_string(author[:initials], lang)].compact,
|
208
|
-
surname: localized_string(author[:surname], lang)
|
212
|
+
surname: localized_string(author[:surname], lang),
|
209
213
|
)
|
210
214
|
end
|
211
215
|
|
@@ -218,7 +222,7 @@ module RelatonIetf
|
|
218
222
|
RelatonBib::LocalizedString.new(content, lang)
|
219
223
|
end
|
220
224
|
|
221
|
-
# @param postal [Nokogiri::XML::
|
225
|
+
# @param postal [Nokogiri::XML::Element]
|
222
226
|
# @return [Array<RelatonBib::Address, RelatonBib::Phone>]
|
223
227
|
def contacts(addr)
|
224
228
|
contacts = []
|
@@ -232,7 +236,7 @@ module RelatonIetf
|
|
232
236
|
contacts
|
233
237
|
end
|
234
238
|
|
235
|
-
# @param postal [Nokogiri::XML::
|
239
|
+
# @param postal [Nokogiri::XML::Element]
|
236
240
|
# @rerurn [RelatonBib::Address]
|
237
241
|
def address(postal) # rubocop:disable Metrics/CyclomaticComplexity
|
238
242
|
RelatonBib::Address.new(
|
@@ -240,7 +244,7 @@ module RelatonIetf
|
|
240
244
|
city: postal.at("./city")&.text,
|
241
245
|
postcode: postal.at("./code")&.text,
|
242
246
|
country: postal.at("./country")&.text,
|
243
|
-
state: postal.at("./region")&.text
|
247
|
+
state: postal.at("./region")&.text,
|
244
248
|
)
|
245
249
|
end
|
246
250
|
|
@@ -252,7 +256,7 @@ module RelatonIetf
|
|
252
256
|
contacts << RelatonBib::Contact.new(type: type, value: value.text)
|
253
257
|
end
|
254
258
|
|
255
|
-
# @param author [Nokogiri::XML::
|
259
|
+
# @param author [Nokogiri::XML::Element]
|
256
260
|
# @return [RelatonBib::Affiliation]
|
257
261
|
def affiliation(author)
|
258
262
|
organization = author.at("./organization")
|
@@ -287,6 +291,7 @@ module RelatonIetf
|
|
287
291
|
#
|
288
292
|
# Extract date from reference.
|
289
293
|
#
|
294
|
+
# @param reference [Nokogiri::XML::Element]
|
290
295
|
# @return [Array<RelatonBib::BibliographicDate>] published data.
|
291
296
|
#
|
292
297
|
def dates(reference)
|
@@ -298,8 +303,6 @@ module RelatonIetf
|
|
298
303
|
[RelatonBib::BibliographicDate.new(type: "published", on: date)]
|
299
304
|
end
|
300
305
|
|
301
|
-
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
302
|
-
|
303
306
|
#
|
304
307
|
# Extract document identifiers from reference
|
305
308
|
#
|
@@ -308,12 +311,12 @@ module RelatonIetf
|
|
308
311
|
#
|
309
312
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
310
313
|
#
|
311
|
-
def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
314
|
+
def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/AbcSize
|
312
315
|
id = (reference[:anchor] || reference[:docName] || reference[:number])
|
313
316
|
ret = []
|
314
317
|
if id
|
315
318
|
ret << RelatonBib::DocumentIdentifier.new(
|
316
|
-
type: "IETF", id: id.sub(/^(RFC)/, "\\1 ")
|
319
|
+
type: "IETF", id: id.sub(/^(RFC)/, "\\1 "),
|
317
320
|
)
|
318
321
|
end
|
319
322
|
if (id = reference[:anchor])
|
@@ -327,11 +330,10 @@ module RelatonIetf
|
|
327
330
|
RelatonBib::DocumentIdentifier.new(id: id, type: si[:name])
|
328
331
|
end.compact
|
329
332
|
end
|
330
|
-
# enable Metrics/MethodLength, Metrics/AbcSize
|
331
333
|
|
332
334
|
#
|
333
335
|
# Extract series form reference
|
334
|
-
# @param reference [Nokogiri::XML::
|
336
|
+
# @param reference [Nokogiri::XML::Element]
|
335
337
|
#
|
336
338
|
# @return [Array<RelatonBib::Series>]
|
337
339
|
#
|
@@ -341,17 +343,17 @@ module RelatonIetf
|
|
341
343
|
|
342
344
|
RelatonBib::Series.new(
|
343
345
|
title: RelatonBib::TypedTitleString.new(
|
344
|
-
content: si[:name], language: language(reference), script: "Latn"
|
346
|
+
content: si[:name], language: language(reference), script: "Latn",
|
345
347
|
),
|
346
348
|
number: si[:value],
|
347
|
-
type: "main"
|
349
|
+
type: "main",
|
348
350
|
)
|
349
351
|
end.compact
|
350
352
|
end
|
351
353
|
|
352
354
|
#
|
353
355
|
# extract status
|
354
|
-
# @param reference [Nokogiri::XML::
|
356
|
+
# @param reference [Nokogiri::XML::Element]
|
355
357
|
#
|
356
358
|
# @return [RelatonBib::DocumentStatus]
|
357
359
|
#
|
data/lib/relaton_ietf/version.rb
CHANGED