relaton-ietf 1.9.9 → 1.9.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.adoc +1 -1
- data/lib/relaton_ietf/data_fetcher.rb +18 -12
- data/lib/relaton_ietf/rfc_entry.rb +89 -9
- data/lib/relaton_ietf/rfc_index_entry.rb +6 -5
- data/lib/relaton_ietf/version.rb +1 -1
- data/relaton_ietf.gemspec +2 -2
- metadata +4 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7b66f8a6c856c44ac8b16ab20f9efe24f1c87e3087afd02870938e38cf28d76a
|
4
|
+
data.tar.gz: 8ea240983edf3b80fec6de69de1845d80e318fbd12a724e13d56c6b7901c2deb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aaed3229833cb24cc7694ee1ac5d817097905c1e520ff8e24e15fbb43ef615548eafb646e992f255cbb7492319ceae139457df8992e143b1ca5c248bb8d6982d
|
7
|
+
data.tar.gz: '08b765ebc09ff7c328f4c8059a33acb559e6e1ebb0bdcd7911ea2d71a8bcece6ef5d0ffbee6120876940a1df04a93a5e67a9ad6edf83305588c4955e7ba66309'
|
data/.gitignore
CHANGED
data/README.adoc
CHANGED
@@ -138,7 +138,7 @@ RelatonIetf::IetfBibliographicItem.from_hash hash
|
|
138
138
|
There is a IETF datasets what can be converted into RelatonXML/BibXML/BibYAML formats:
|
139
139
|
|
140
140
|
- `ietf-rfcsubseries` - https://www.rfc-editor.org/rfc-index.xml (`<bcp-entry>`, `<fyi-entry>`, `<std-entry>`)
|
141
|
-
- `ietf-internet-drafts` -
|
141
|
+
- `ietf-internet-drafts` - converts files from local directory `./bibxml-ids`. Use `rsync -avcizxL rsync.ietf.org::bibxml-ids ./bibxml-ids` command to fetch the files.
|
142
142
|
- `ietf-rfc-entries` - https://www.rfc-editor.org/rfc-index.xml (`<rfc-entry>`)
|
143
143
|
|
144
144
|
The method `RelatonIetf::DataFetcher.fetch(source, output: "data", format: "yaml")` converts all the documents from the dataset and save them to the `./data` folder in YAML format.
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require "rubygems"
|
2
|
-
require "rubygems/package"
|
3
|
-
require "zlib"
|
1
|
+
# require "rubygems"
|
2
|
+
# require "rubygems/package"
|
3
|
+
# require "zlib"
|
4
4
|
require "relaton_ietf/rfc_index_entry"
|
5
5
|
require "relaton_ietf/rfc_entry"
|
6
6
|
|
@@ -64,22 +64,28 @@ module RelatonIetf
|
|
64
64
|
# Fetches ietf-internet-drafts documents
|
65
65
|
#
|
66
66
|
def fetch_ieft_internet_drafts # rubocop:disable Metrics/MethodLength
|
67
|
-
gz = OpenURI.open_uri("https://www.ietf.org/lib/dt/sprint/bibxml-ids.tgz")
|
68
|
-
z = Zlib::GzipReader.new(gz)
|
69
|
-
io = StringIO.new(z.read)
|
70
|
-
z.close
|
71
|
-
Gem::Package::TarReader.new io do |tar|
|
72
|
-
|
73
|
-
|
67
|
+
# gz = OpenURI.open_uri("https://www.ietf.org/lib/dt/sprint/bibxml-ids.tgz")
|
68
|
+
# z = Zlib::GzipReader.new(gz)
|
69
|
+
# io = StringIO.new(z.read)
|
70
|
+
# z.close
|
71
|
+
# Gem::Package::TarReader.new io do |tar|
|
72
|
+
# tar.each do |tarfile|
|
73
|
+
# next if tarfile.directory?
|
74
74
|
|
75
|
-
|
76
|
-
|
75
|
+
# save_doc BibXMLParser.parse(tarfile.read)
|
76
|
+
# end
|
77
|
+
# end
|
78
|
+
Dir["bibxml-ids/*.xml"].each do |file|
|
79
|
+
save_doc BibXMLParser.parse(File.read(file, encoding: "UTF-8"))
|
77
80
|
end
|
78
81
|
end
|
79
82
|
|
80
83
|
def fetch_ieft_rfcs
|
81
84
|
rfc_index.xpath("xmlns:rfc-entry").each do |doc|
|
82
85
|
save_doc RfcEntry.parse(doc)
|
86
|
+
rescue StandardError => e
|
87
|
+
warn "Error parsing #{doc.at('./xmlns:doc-id').text}: #{e.message}"
|
88
|
+
warn e.backtrace[0..5].join("\n")
|
83
89
|
end
|
84
90
|
end
|
85
91
|
|
@@ -41,17 +41,35 @@ module RelatonIetf
|
|
41
41
|
abstract: parse_abstract,
|
42
42
|
relation: parse_relation,
|
43
43
|
status: parse_status,
|
44
|
+
series: parse_series,
|
44
45
|
editorialgroup: parse_editorialgroup,
|
45
46
|
)
|
46
47
|
end
|
47
48
|
|
49
|
+
#
|
50
|
+
# Parse series
|
51
|
+
#
|
52
|
+
# @return [Array<RelatonBib::Series>] series
|
53
|
+
#
|
54
|
+
def parse_series
|
55
|
+
title = RelatonBib::TypedTitleString.new(content: "RFC")
|
56
|
+
@doc.xpath("./xmlns:is-also/xmlns:doc-id").map do |s|
|
57
|
+
/^(?<name>\D+)(?<num>\d+)/ =~ s.text
|
58
|
+
t = RelatonBib::TypedTitleString.new(content: name)
|
59
|
+
RelatonBib::Series.new title: t, number: num.gsub(/^0+/, "")
|
60
|
+
end + [RelatonBib::Series.new(title: title, number: docnum)]
|
61
|
+
end
|
62
|
+
|
48
63
|
#
|
49
64
|
# Parse document identifiers
|
50
65
|
#
|
51
66
|
# @return [Array<RelatonBib::DocumentIdettifier>] document identifiers
|
52
67
|
#
|
53
68
|
def parse_docid
|
54
|
-
ids = [
|
69
|
+
ids = [
|
70
|
+
RelatonBib::DocumentIdentifier.new(id: pub_id, type: "IETF"),
|
71
|
+
RelatonBib::DocumentIdentifier.new(id: code, type: "IETF", scope: "anchor"),
|
72
|
+
]
|
55
73
|
doi = @doc.at("./xmlns:doi").text
|
56
74
|
ids << RelatonBib::DocumentIdentifier.new(id: doi, type: "DOI")
|
57
75
|
ids
|
@@ -73,7 +91,16 @@ module RelatonIetf
|
|
73
91
|
# @return [String] PubID
|
74
92
|
#
|
75
93
|
def pub_id
|
76
|
-
"
|
94
|
+
"RFC #{docnum}"
|
95
|
+
end
|
96
|
+
|
97
|
+
#
|
98
|
+
# Parse document number
|
99
|
+
#
|
100
|
+
# @return [String] document number
|
101
|
+
#
|
102
|
+
def docnum
|
103
|
+
/\d+$/.match(code).to_s.sub(/^0+/, "")
|
77
104
|
end
|
78
105
|
|
79
106
|
#
|
@@ -115,16 +142,66 @@ module RelatonIetf
|
|
115
142
|
#
|
116
143
|
# @return [Array<RelatonBib::ContributionInfo>] document contributors
|
117
144
|
#
|
118
|
-
def parse_contributor
|
119
|
-
@doc.xpath("./xmlns:author").map do |contributor|
|
145
|
+
def parse_contributor # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity
|
146
|
+
@doc.xpath("./xmlns:author").map do |contributor| # rubocop:disable Metrics/BlockLength
|
120
147
|
n = contributor.at("./xmlns:name").text
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
148
|
+
case n
|
149
|
+
when "ISO"
|
150
|
+
entity = RelatonBib::Organization.new(abbrev: n, name: "International Organization for Standardization")
|
151
|
+
when "International Organization for Standardization"
|
152
|
+
entity = RelatonBib::Organization.new(abbrev: "ISO", name: n)
|
153
|
+
when "IAB"
|
154
|
+
entity = RelatonBib::Organization.new(abbrev: n, name: "Internet Architecture Board")
|
155
|
+
when "IESG"
|
156
|
+
entity = RelatonBib::Organization.new(abbrev: n, name: "Internet Engineering Steering Group")
|
157
|
+
when "Internet Engineering Steering Group", "Federal Networking Council", "Internet Architecture Board",
|
158
|
+
"Internet Activities Board", "Defense Advanced Research Projects Agency", "National Science Foundation",
|
159
|
+
"National Research Council", "National Bureau of Standards"
|
160
|
+
abbr = n.split.map { |w| w[0] if w[0] == w[0].upcase }.join
|
161
|
+
entity = RelatonBib::Organization.new(abbrev: abbr, name: n)
|
162
|
+
when "IETF Secretariat"
|
163
|
+
entity = RelatonBib::Organization.new(abbrev: "IETF", name: n)
|
164
|
+
when "Audio-Video Transport Working Group", /North American Directory Forum/, "EARN Staff",
|
165
|
+
"Vietnamese Standardization Working Group", "ACM SIGUCCS", "ESCC X.500/X.400 Task Force",
|
166
|
+
"Sun Microsystems", "NetBIOS Working Group in the Defense Advanced Research Projects Agency",
|
167
|
+
"End-to-End Services Task Force", "Network Technical Advisory Group", "Bolt Beranek",
|
168
|
+
"Newman Laboratories", "Gateway Algorithms and Data Structures Task Force",
|
169
|
+
"Network Information Center. Stanford Research Institute", "RFC Editor",
|
170
|
+
"Information Sciences Institute University of Southern California"
|
171
|
+
entity = RelatonBib::Organization.new(name: n)
|
172
|
+
when "Internet Assigned Numbers Authority (IANA)"
|
173
|
+
entity = RelatonBib::Organization.new(abbrev: "IANA", name: "Internet Assigned Numbers Authority")
|
174
|
+
when "ESnet Site Coordinating Comittee (ESCC)"
|
175
|
+
entity = RelatonBib::Organization.new(abbrev: "ESCC", name: "ESnet Site Coordinating Comittee")
|
176
|
+
when "Energy Sciences Network (ESnet)"
|
177
|
+
entity = RelatonBib::Organization.new(abbrev: "ESnet", name: "Energy Sciences Network")
|
178
|
+
when "International Telegraph and Telephone Consultative Committee of the International Telecommunication Union"
|
179
|
+
entity = RelatonBib::Organization.new(abbrev: "CCITT", name: n)
|
180
|
+
else
|
181
|
+
# int, snm = n.split
|
182
|
+
/^(?:(?<int>(?:\p{Lu}+(?:-\w|\(\w\))?\.{0,2}[-\s]?)+)\s)?(?<snm>[[:alnum:]\s'-.]+)$/ =~ n
|
183
|
+
surname = RelatonBib::LocalizedString.new(snm, "en", "Latn")
|
184
|
+
name = RelatonBib::LocalizedString.new(n, "en", "Latn")
|
185
|
+
fname = RelatonBib::FullName.new(completename: name, initial: initials(int), surname: surname)
|
186
|
+
entity = RelatonBib::Person.new(name: fname)
|
187
|
+
end
|
188
|
+
RelatonBib::ContributionInfo.new(entity: entity, role: [{ type: "author" }])
|
125
189
|
end
|
126
190
|
end
|
127
191
|
|
192
|
+
#
|
193
|
+
# Ctreat initials
|
194
|
+
#
|
195
|
+
# @param [String] int
|
196
|
+
#
|
197
|
+
# @return [Array<RelatonBib::LocalizedString>]
|
198
|
+
#
|
199
|
+
def initials(int)
|
200
|
+
return [] unless int
|
201
|
+
|
202
|
+
int.split(/\.-?\s?|\s/).map { |i| RelatonBib::LocalizedString.new i, "en", "Latn" }
|
203
|
+
end
|
204
|
+
|
128
205
|
#
|
129
206
|
# Parse document keywords
|
130
207
|
#
|
@@ -141,7 +218,10 @@ module RelatonIetf
|
|
141
218
|
#
|
142
219
|
def parse_abstract
|
143
220
|
@doc.xpath("./xmlns:abstract").map do |c|
|
144
|
-
|
221
|
+
content = c.xpath("./xmlns:p").map do |p|
|
222
|
+
"<#{p.name}>#{p.text.strip}</#{p.name}>"
|
223
|
+
end.join
|
224
|
+
RelatonBib::FormattedString.new(content: content, language: "en",
|
145
225
|
script: "Latn", format: "text/html")
|
146
226
|
end
|
147
227
|
end
|
@@ -19,7 +19,7 @@ module RelatonIetf
|
|
19
19
|
#
|
20
20
|
# @param [Nokogiri::XML::Element] doc document
|
21
21
|
#
|
22
|
-
# @return [RelatonIetf:
|
22
|
+
# @return [RelatonIetf:IetfBibliographicItem, nil]
|
23
23
|
#
|
24
24
|
def self.parse(doc)
|
25
25
|
doc_id = doc.at("./xmlns:doc-id")
|
@@ -54,13 +54,14 @@ module RelatonIetf
|
|
54
54
|
|
55
55
|
def parse_docid
|
56
56
|
[
|
57
|
-
RelatonBib::DocumentIdentifier.new(type: "IETF",
|
57
|
+
RelatonBib::DocumentIdentifier.new(type: "IETF", id: pub_id),
|
58
|
+
RelatonBib::DocumentIdentifier.new(type: "IETF", scope: "anchor", id: @doc_id),
|
58
59
|
]
|
59
60
|
end
|
60
61
|
|
61
|
-
|
62
|
-
|
63
|
-
|
62
|
+
def pub_id
|
63
|
+
"#{@name.upcase} #{@shortnum}"
|
64
|
+
end
|
64
65
|
|
65
66
|
def anchor
|
66
67
|
"#{@name.upcase}#{@shortnum}"
|
data/lib/relaton_ietf/version.rb
CHANGED
data/relaton_ietf.gemspec
CHANGED
@@ -38,6 +38,6 @@ Gem::Specification.new do |spec|
|
|
38
38
|
spec.add_development_dependency "vcr"
|
39
39
|
spec.add_development_dependency "webmock"
|
40
40
|
|
41
|
-
spec.add_dependency "relaton-bib", ">= 1.9.
|
42
|
-
spec.add_dependency "zlib", "~> 1.1.0"
|
41
|
+
spec.add_dependency "relaton-bib", ">= 1.9.19"
|
42
|
+
# spec.add_dependency "zlib", "~> 1.1.0"
|
43
43
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-ietf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.9.
|
4
|
+
version: 1.9.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: equivalent-xml
|
@@ -128,28 +128,14 @@ dependencies:
|
|
128
128
|
requirements:
|
129
129
|
- - ">="
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version: 1.9.
|
131
|
+
version: 1.9.19
|
132
132
|
type: :runtime
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
136
|
- - ">="
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version: 1.9.
|
139
|
-
- !ruby/object:Gem::Dependency
|
140
|
-
name: zlib
|
141
|
-
requirement: !ruby/object:Gem::Requirement
|
142
|
-
requirements:
|
143
|
-
- - "~>"
|
144
|
-
- !ruby/object:Gem::Version
|
145
|
-
version: 1.1.0
|
146
|
-
type: :runtime
|
147
|
-
prerelease: false
|
148
|
-
version_requirements: !ruby/object:Gem::Requirement
|
149
|
-
requirements:
|
150
|
-
- - "~>"
|
151
|
-
- !ruby/object:Gem::Version
|
152
|
-
version: 1.1.0
|
138
|
+
version: 1.9.19
|
153
139
|
description: "RelatonIetf: retrieve IETF Standards for bibliographic use \nusing the
|
154
140
|
BibliographicItem model.\n\nFormerly known as rfcbib.\n"
|
155
141
|
email:
|