relaton-ietf 1.7.1 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +46 -0
- data/README.adoc +16 -9
- data/grammars/basicdoc.rng +165 -20
- data/grammars/biblio.rng +4 -6
- data/grammars/ietf.rng +39 -0
- data/grammars/isodoc.rng +460 -6
- data/grammars/reqt.rng +31 -2
- data/lib/relaton_ietf/hash_converter.rb +2 -2
- data/lib/relaton_ietf/ietf_bibliographic_item.rb +7 -0
- data/lib/relaton_ietf/ietf_bibliography.rb +5 -1
- data/lib/relaton_ietf/processor.rb +1 -2
- data/lib/relaton_ietf/scrapper.rb +63 -61
- data/lib/relaton_ietf/version.rb +1 -1
- data/lib/relaton_ietf/xml_parser.rb +1 -1
- data/relaton_ietf.gemspec +4 -4
- metadata +8 -38
- data/.github/workflows/macos.yml +0 -34
- data/.github/workflows/ubuntu.yml +0 -33
- data/.github/workflows/windows.yml +0 -35
data/grammars/reqt.rng
CHANGED
@@ -30,15 +30,34 @@
|
|
30
30
|
<data type="boolean"/>
|
31
31
|
</attribute>
|
32
32
|
</optional>
|
33
|
+
<optional>
|
34
|
+
<attribute name="number"/>
|
35
|
+
</optional>
|
33
36
|
<optional>
|
34
37
|
<attribute name="subsequence"/>
|
35
38
|
</optional>
|
39
|
+
<optional>
|
40
|
+
<attribute name="keep-with-next">
|
41
|
+
<data type="boolean"/>
|
42
|
+
</attribute>
|
43
|
+
</optional>
|
44
|
+
<optional>
|
45
|
+
<attribute name="keep-lines-together">
|
46
|
+
<data type="boolean"/>
|
47
|
+
</attribute>
|
48
|
+
</optional>
|
36
49
|
<attribute name="id">
|
37
50
|
<data type="ID"/>
|
38
51
|
</attribute>
|
39
52
|
<optional>
|
40
53
|
<attribute name="filename"/>
|
41
54
|
</optional>
|
55
|
+
<optional>
|
56
|
+
<attribute name="model"/>
|
57
|
+
</optional>
|
58
|
+
<optional>
|
59
|
+
<attribute name="type"/>
|
60
|
+
</optional>
|
42
61
|
<optional>
|
43
62
|
<ref name="reqtitle"/>
|
44
63
|
</optional>
|
@@ -48,9 +67,9 @@
|
|
48
67
|
<optional>
|
49
68
|
<ref name="subject"/>
|
50
69
|
</optional>
|
51
|
-
<
|
70
|
+
<zeroOrMore>
|
52
71
|
<ref name="reqinherit"/>
|
53
|
-
</
|
72
|
+
</zeroOrMore>
|
54
73
|
<zeroOrMore>
|
55
74
|
<ref name="classification"/>
|
56
75
|
</zeroOrMore>
|
@@ -135,6 +154,16 @@
|
|
135
154
|
<data type="boolean"/>
|
136
155
|
</attribute>
|
137
156
|
</optional>
|
157
|
+
<optional>
|
158
|
+
<attribute name="keep-with-next">
|
159
|
+
<data type="boolean"/>
|
160
|
+
</attribute>
|
161
|
+
</optional>
|
162
|
+
<optional>
|
163
|
+
<attribute name="keep-lines-together">
|
164
|
+
<data type="boolean"/>
|
165
|
+
</attribute>
|
166
|
+
</optional>
|
138
167
|
<oneOrMore>
|
139
168
|
<ref name="BasicBlock"/>
|
140
169
|
</oneOrMore>
|
@@ -5,9 +5,9 @@ module RelatonIetf
|
|
5
5
|
# Ovverides superclass's method
|
6
6
|
#
|
7
7
|
# @param item [Hash]
|
8
|
-
# @retirn [
|
8
|
+
# @retirn [RelatonIetf::IetfBibliographicItem]
|
9
9
|
def bib_item(item)
|
10
|
-
|
10
|
+
IetfBibliographicItem.new(**item)
|
11
11
|
end
|
12
12
|
end
|
13
13
|
end
|
@@ -17,6 +17,13 @@ module RelatonIetf
|
|
17
17
|
super
|
18
18
|
end
|
19
19
|
|
20
|
+
# @param hash [Hash]
|
21
|
+
# @return [RelatonIetf::IetfBibliographicItem]
|
22
|
+
def self.from_hash(hash)
|
23
|
+
item_hash = ::RelatonIetf::HashConverter.hash_to_bib(hash)
|
24
|
+
new **item_hash
|
25
|
+
end
|
26
|
+
|
20
27
|
# @param opts [Hash]
|
21
28
|
# @option opts [Nokogiri::XML::Builder] :builder XML builder
|
22
29
|
# @option opts [Boolean] :bibdata
|
@@ -20,7 +20,11 @@ module RelatonIetf
|
|
20
20
|
def get(code, _year = nil, _opts = {})
|
21
21
|
warn "[relaton-ietf] (\"#{code}\") fetching..."
|
22
22
|
result = search code
|
23
|
-
|
23
|
+
if result
|
24
|
+
warn "[relaton-ietf] (\"#{code}\") found #{result.docidentifier.first.id}"
|
25
|
+
else
|
26
|
+
warn "[relaton-ietf] (\"#{code}\") not found"
|
27
|
+
end
|
24
28
|
result
|
25
29
|
end
|
26
30
|
end
|
@@ -27,8 +27,7 @@ module RelatonIetf
|
|
27
27
|
# @param hash [Hash]
|
28
28
|
# @return [RelatonIetf::IetfBibliographicItem]
|
29
29
|
def hash_to_bib(hash)
|
30
|
-
|
31
|
-
::RelatonIetf::IetfBibliographicItem.new item_hash
|
30
|
+
::RelatonIetf::IetfBibliographicItem.from_hash hash
|
32
31
|
end
|
33
32
|
|
34
33
|
# Returns hash of XML grammar
|
@@ -1,7 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "net/http"
|
4
|
-
require "nokogiri"
|
5
4
|
require "relaton_bib"
|
6
5
|
require "relaton_ietf/ietf_bibliographic_item"
|
7
6
|
|
@@ -11,26 +10,22 @@ module RelatonIetf
|
|
11
10
|
# Scrapper module
|
12
11
|
module Scrapper
|
13
12
|
GH_URL = "https://raw.githubusercontent.com/relaton/relaton-data-ietf/master/data/reference."
|
14
|
-
RFC_URI_PATTERN = "https://xml2rfc.tools.ietf.org/public/rfc/bibxml"
|
15
|
-
# ID_URI_PATTERN = "https://xml2rfc.tools.ietf.org/public/rfc/bibxml-ids/reference.CODE"
|
16
13
|
BCP_URI_PATTERN = "https://www.rfc-editor.org/info/CODE"
|
17
14
|
|
18
15
|
class << self
|
19
|
-
# rubocop:disable Metrics/MethodLength
|
20
|
-
|
21
16
|
# @param text [String]
|
22
17
|
# @param is_relation [TrueClass, FalseClass]
|
23
18
|
# @return [RelatonIetf::IetfBibliographicItem]
|
24
|
-
def scrape_page(text, is_relation
|
19
|
+
def scrape_page(text, is_relation: false)
|
25
20
|
# Remove initial "IETF " string if specified
|
26
21
|
ref = text.gsub(/^IETF /, "")
|
27
|
-
|
28
|
-
|
29
|
-
|
22
|
+
/^(?:RFC|BCP|FYI|STD)\s(?<num>\d+)/ =~ ref
|
23
|
+
ref.sub! /(?<=^(?:RFC|BCP|FYI|STD)\s)(\d+)/, num.rjust(4, "0") if num
|
24
|
+
rfc_item ref, is_relation
|
30
25
|
rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
31
26
|
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
32
27
|
Net::ProtocolError, SocketError
|
33
|
-
raise RelatonBib::RequestError, "No document found for #{ref} reference
|
28
|
+
raise RelatonBib::RequestError, "No document found for #{ref} reference"
|
34
29
|
end
|
35
30
|
|
36
31
|
# @param reference [Nokogiri::XML::Element, nil]
|
@@ -38,7 +33,7 @@ module RelatonIetf
|
|
38
33
|
# @param url [String, NilClass]
|
39
34
|
# @param ver [String, NilClass] Internet Draft version
|
40
35
|
# @return [RelatonIetf::IetfBibliographicItem]
|
41
|
-
def fetch_rfc(reference, is_relation
|
36
|
+
def fetch_rfc(reference, is_relation: false, url: nil, ver: nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
42
37
|
return unless reference
|
43
38
|
|
44
39
|
ietf_item(
|
@@ -50,16 +45,17 @@ module RelatonIetf
|
|
50
45
|
language: [language(reference)],
|
51
46
|
link: link(reference, url, ver),
|
52
47
|
title: titles(reference),
|
48
|
+
formattedref: formattedref(reference),
|
53
49
|
abstract: abstracts(reference),
|
54
50
|
contributor: contributors(reference),
|
51
|
+
relation: relations(reference),
|
55
52
|
date: dates(reference),
|
56
53
|
series: series(reference),
|
57
54
|
place: ["Fremont, CA"],
|
58
55
|
keyword: reference.xpath("front/keyword").map(&:text),
|
59
|
-
doctype: doctype(reference[:anchor])
|
56
|
+
doctype: doctype(reference[:anchor]),
|
60
57
|
)
|
61
58
|
end
|
62
|
-
# rubocop:enable Metrics/MethodLength
|
63
59
|
|
64
60
|
private
|
65
61
|
|
@@ -95,6 +91,7 @@ module RelatonIetf
|
|
95
91
|
end
|
96
92
|
|
97
93
|
# @param ref [String]
|
94
|
+
# @param is_relation [Boolen, nil]
|
98
95
|
# @return [RelatonIetf::IetfBibliographicItem]
|
99
96
|
def rfc_item(ref, is_relation)
|
100
97
|
/(?<=-)(?<ver>\d{2})$/ =~ ref
|
@@ -105,55 +102,58 @@ module RelatonIetf
|
|
105
102
|
|
106
103
|
uri = "#{GH_URL}#{ref.sub(/\s|\u00a0/, '.')}.xml"
|
107
104
|
doc = Nokogiri::XML get_page(uri)
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
# @param uri_template [String]
|
112
|
-
# @param reference [String]
|
113
|
-
# @return [RelatonIetf::IetfBibliographicItem]
|
114
|
-
def bcp_item(uri_template, reference) # rubocop:disable Metrics/MethodLength
|
115
|
-
uri = uri_template.sub "CODE", reference.sub(" ", "").downcase
|
116
|
-
doc = Nokogiri::HTML get_page(uri)
|
117
|
-
ietf_item(
|
118
|
-
id: reference,
|
119
|
-
title: [content: ""],
|
120
|
-
docid: [RelatonBib::DocumentIdentifier.new(type: "IETF", id: reference)],
|
121
|
-
language: ["en"],
|
122
|
-
link: [{ type: "src", content: uri }],
|
123
|
-
relation: fetch_relations(doc),
|
124
|
-
doctype: "rfc"
|
125
|
-
)
|
105
|
+
r = doc.at("/referencegroup", "/reference")
|
106
|
+
fetch_rfc r, is_relation: is_relation, url: uri, ver: ver
|
126
107
|
end
|
127
108
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
)
|
109
|
+
# @param reference [Nokogiri::XML::Element]
|
110
|
+
# @return [Hash]
|
111
|
+
def relations(reference)
|
112
|
+
reference.xpath("reference").map do |ref|
|
113
|
+
{ type: "includes", bibitem: fetch_rfc(ref, is_relation: true) }
|
134
114
|
end
|
135
115
|
end
|
136
116
|
|
117
|
+
# @param uri [String]
|
118
|
+
# @return [String] HTTP response body
|
137
119
|
def get_page(uri)
|
138
120
|
res = Net::HTTP.get_response(URI(uri))
|
139
|
-
|
140
|
-
|
141
|
-
|
121
|
+
return unless res.code == "200"
|
122
|
+
|
123
|
+
# raise RelatonBib::RequestError, "No document found at #{uri}"
|
124
|
+
# end
|
142
125
|
|
143
126
|
res.body
|
144
127
|
end
|
145
128
|
|
129
|
+
# @param reference [Nokogiri::XML::Element]
|
146
130
|
# @return [String]
|
147
131
|
def language(reference)
|
148
132
|
reference[:lang] || "en"
|
149
133
|
end
|
150
134
|
|
135
|
+
# @param reference [Nokogiri::XML::Element]
|
151
136
|
# @return [Array<Hash>]
|
152
137
|
def titles(reference)
|
153
|
-
|
154
|
-
|
138
|
+
reference.xpath("./front/title").map do |title|
|
139
|
+
{ content: title.text, language: language(reference), script: "Latn" }
|
140
|
+
end
|
155
141
|
end
|
156
142
|
|
143
|
+
# @param reference [Nokogiri::XML::Element]
|
144
|
+
# @return [RelatonBib::FormattedRef, nil]
|
145
|
+
def formattedref(reference)
|
146
|
+
return if reference.at "./fornt/title"
|
147
|
+
|
148
|
+
cont = (reference[:anchor] || reference[:docName] || reference[:number])
|
149
|
+
if cont
|
150
|
+
RelatonBib::FormattedRef.new(
|
151
|
+
content: cont, language: language(reference), script: "Latn",
|
152
|
+
)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# @param reference [Nokogiri::XML::Element]
|
157
157
|
# @return [Array<RelatonBib::FormattedString>]
|
158
158
|
def abstracts(ref)
|
159
159
|
ref.xpath("./front/abstract").map do |a|
|
@@ -164,11 +164,13 @@ module RelatonIetf
|
|
164
164
|
end
|
165
165
|
end
|
166
166
|
|
167
|
+
# @param reference [Nokogiri::XML::Element]
|
167
168
|
# @return [Array<Hash>]
|
168
169
|
def contributors(reference)
|
169
170
|
persons(reference) + organizations(reference)
|
170
171
|
end
|
171
172
|
|
173
|
+
# @param reference [Nokogiri::XML::Element]
|
172
174
|
# @return [Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>]
|
173
175
|
def persons(reference)
|
174
176
|
reference.xpath("./front/author[@surname]|./front/author[@fullname]")
|
@@ -176,13 +178,15 @@ module RelatonIetf
|
|
176
178
|
entity = RelatonBib::Person.new(
|
177
179
|
name: full_name(author, reference),
|
178
180
|
affiliation: [affiliation(author)],
|
179
|
-
contact: contacts(author.at("./address"))
|
181
|
+
contact: contacts(author.at("./address")),
|
180
182
|
)
|
181
183
|
{ entity: entity, role: [contributor_role(author)] }
|
182
184
|
end
|
183
185
|
end
|
184
186
|
|
185
|
-
# @
|
187
|
+
# @param reference [Nokogiri::XML::Element]
|
188
|
+
# @return [Array<Hash{Symbol=>RelatonBib::Organization,
|
189
|
+
# Symbol=>Array<String>}>]
|
186
190
|
def organizations(reference)
|
187
191
|
publisher = { entity: new_org, role: [type: "publisher"] }
|
188
192
|
orgs = reference.xpath("./seriesinfo").reduce([publisher]) do |mem, si|
|
@@ -191,21 +195,21 @@ module RelatonIetf
|
|
191
195
|
mem << { entity: new_org(si[:stream], nil), role: [type: "author"] }
|
192
196
|
end
|
193
197
|
orgs + reference.xpath(
|
194
|
-
"front/author[not(@surname)][not(@fullname)]/organization"
|
198
|
+
"front/author[not(@surname)][not(@fullname)]/organization",
|
195
199
|
).map do |org|
|
196
200
|
{ entity: new_org(org.text, nil), role: [type: "author"] }
|
197
201
|
end
|
198
202
|
end
|
199
203
|
|
200
|
-
# @param author [Nokogiri::XML::
|
201
|
-
# @param ref [Nokogiri::XML::
|
204
|
+
# @param author [Nokogiri::XML::Element]
|
205
|
+
# @param ref [Nokogiri::XML::Element]
|
202
206
|
# @return [RelatonBib::FullName]
|
203
207
|
def full_name(author, ref)
|
204
208
|
lang = language ref
|
205
209
|
RelatonBib::FullName.new(
|
206
210
|
completename: localized_string(author[:fullname], lang),
|
207
211
|
initial: [localized_string(author[:initials], lang)].compact,
|
208
|
-
surname: localized_string(author[:surname], lang)
|
212
|
+
surname: localized_string(author[:surname], lang),
|
209
213
|
)
|
210
214
|
end
|
211
215
|
|
@@ -218,7 +222,7 @@ module RelatonIetf
|
|
218
222
|
RelatonBib::LocalizedString.new(content, lang)
|
219
223
|
end
|
220
224
|
|
221
|
-
# @param postal [Nokogiri::XML::
|
225
|
+
# @param postal [Nokogiri::XML::Element]
|
222
226
|
# @return [Array<RelatonBib::Address, RelatonBib::Phone>]
|
223
227
|
def contacts(addr)
|
224
228
|
contacts = []
|
@@ -232,7 +236,7 @@ module RelatonIetf
|
|
232
236
|
contacts
|
233
237
|
end
|
234
238
|
|
235
|
-
# @param postal [Nokogiri::XML::
|
239
|
+
# @param postal [Nokogiri::XML::Element]
|
236
240
|
# @rerurn [RelatonBib::Address]
|
237
241
|
def address(postal) # rubocop:disable Metrics/CyclomaticComplexity
|
238
242
|
RelatonBib::Address.new(
|
@@ -240,7 +244,7 @@ module RelatonIetf
|
|
240
244
|
city: postal.at("./city")&.text,
|
241
245
|
postcode: postal.at("./code")&.text,
|
242
246
|
country: postal.at("./country")&.text,
|
243
|
-
state: postal.at("./region")&.text
|
247
|
+
state: postal.at("./region")&.text,
|
244
248
|
)
|
245
249
|
end
|
246
250
|
|
@@ -252,7 +256,7 @@ module RelatonIetf
|
|
252
256
|
contacts << RelatonBib::Contact.new(type: type, value: value.text)
|
253
257
|
end
|
254
258
|
|
255
|
-
# @param author [Nokogiri::XML::
|
259
|
+
# @param author [Nokogiri::XML::Element]
|
256
260
|
# @return [RelatonBib::Affiliation]
|
257
261
|
def affiliation(author)
|
258
262
|
organization = author.at("./organization")
|
@@ -287,6 +291,7 @@ module RelatonIetf
|
|
287
291
|
#
|
288
292
|
# Extract date from reference.
|
289
293
|
#
|
294
|
+
# @param reference [Nokogiri::XML::Element]
|
290
295
|
# @return [Array<RelatonBib::BibliographicDate>] published data.
|
291
296
|
#
|
292
297
|
def dates(reference)
|
@@ -298,8 +303,6 @@ module RelatonIetf
|
|
298
303
|
[RelatonBib::BibliographicDate.new(type: "published", on: date)]
|
299
304
|
end
|
300
305
|
|
301
|
-
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
302
|
-
|
303
306
|
#
|
304
307
|
# Extract document identifiers from reference
|
305
308
|
#
|
@@ -308,12 +311,12 @@ module RelatonIetf
|
|
308
311
|
#
|
309
312
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
310
313
|
#
|
311
|
-
def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
314
|
+
def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/AbcSize
|
312
315
|
id = (reference[:anchor] || reference[:docName] || reference[:number])
|
313
316
|
ret = []
|
314
317
|
if id
|
315
318
|
ret << RelatonBib::DocumentIdentifier.new(
|
316
|
-
type: "IETF", id: id.sub(/^(RFC)/, "\\1 ")
|
319
|
+
type: "IETF", id: id.sub(/^(RFC)/, "\\1 "),
|
317
320
|
)
|
318
321
|
end
|
319
322
|
if (id = reference[:anchor])
|
@@ -327,11 +330,10 @@ module RelatonIetf
|
|
327
330
|
RelatonBib::DocumentIdentifier.new(id: id, type: si[:name])
|
328
331
|
end.compact
|
329
332
|
end
|
330
|
-
# enable Metrics/MethodLength, Metrics/AbcSize
|
331
333
|
|
332
334
|
#
|
333
335
|
# Extract series form reference
|
334
|
-
# @param reference [Nokogiri::XML::
|
336
|
+
# @param reference [Nokogiri::XML::Element]
|
335
337
|
#
|
336
338
|
# @return [Array<RelatonBib::Series>]
|
337
339
|
#
|
@@ -341,17 +343,17 @@ module RelatonIetf
|
|
341
343
|
|
342
344
|
RelatonBib::Series.new(
|
343
345
|
title: RelatonBib::TypedTitleString.new(
|
344
|
-
content: si[:name], language: language(reference), script: "Latn"
|
346
|
+
content: si[:name], language: language(reference), script: "Latn",
|
345
347
|
),
|
346
348
|
number: si[:value],
|
347
|
-
type: "main"
|
349
|
+
type: "main",
|
348
350
|
)
|
349
351
|
end.compact
|
350
352
|
end
|
351
353
|
|
352
354
|
#
|
353
355
|
# extract status
|
354
|
-
# @param reference [Nokogiri::XML::
|
356
|
+
# @param reference [Nokogiri::XML::Element]
|
355
357
|
#
|
356
358
|
# @return [RelatonBib::DocumentStatus]
|
357
359
|
#
|
data/lib/relaton_ietf/version.rb
CHANGED