relaton-ietf 1.9.0 → 1.9.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/README.adoc +50 -0
- data/lib/relaton_ietf/committee.rb +8 -0
- data/lib/relaton_ietf/data_fetcher.rb +130 -0
- data/lib/relaton_ietf/ietf_bibliographic_item.rb +2 -2
- data/lib/relaton_ietf/processor.rb +14 -1
- data/lib/relaton_ietf/rfc_entry.rb +186 -0
- data/lib/relaton_ietf/rfc_index_entry.rb +60 -0
- data/lib/relaton_ietf/scrapper.rb +354 -346
- data/lib/relaton_ietf/version.rb +1 -1
- data/lib/relaton_ietf/xml_parser.rb +1 -1
- data/lib/relaton_ietf.rb +2 -0
- data/relaton_ietf.gemspec +2 -1
- metadata +22 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 639209bd0a93d43f7d9d671d709a40e635ef7978ac33b502afde91b64e03c857
|
4
|
+
data.tar.gz: d33475e9becda05d7eadc6b9e48e3df700aa4620dc6081811708faddd84b650f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 87f20d0e95cc16aa9462b78cf67eef1ca40e0124bda85ec2a480de960b44d33ef333083ce25fdd2fff6e109b071fdbf2cf27226c210eef65331b1786daa59f85
|
7
|
+
data.tar.gz: 3865277ac823c1fe10731a813ca5085d9e3ced27bb5af6e1bb53cb9a7c952c76e47f249cbb6cc013293c5ba2f46410f309689be307e55e1fc1aff4d508219d88
|
data/.rubocop.yml
CHANGED
data/README.adoc
CHANGED
@@ -90,6 +90,21 @@ item.to_xml bibdata: true
|
|
90
90
|
</bibdata>"
|
91
91
|
----
|
92
92
|
|
93
|
+
=== Typed links
|
94
|
+
|
95
|
+
IETF documents may have `src`, `xml`, and `doi` link type.
|
96
|
+
|
97
|
+
* `src` - web pulication
|
98
|
+
* `xml` - BibXML publication
|
99
|
+
* `doi` - DOI reference
|
100
|
+
|
101
|
+
[source,ruby]
|
102
|
+
----
|
103
|
+
item.link
|
104
|
+
=> [#<RelatonBib::TypedUri:0x00007fe8b287a120 @content=#<Addressable::URI:0x7e4 URI:https://raw.githubusercontent.com/relaton/relaton-data-ietf/master/data/reference.RFC.8341.xml>, @type="xml">,
|
105
|
+
#<RelatonBib::TypedUri:0x00007fe8b2237ec0 @content=#<Addressable::URI:0x7f8 URI:https://www.rfc-editor.org/info/rfc8341>, @type="src">]
|
106
|
+
----
|
107
|
+
|
93
108
|
=== Parse a file locally
|
94
109
|
|
95
110
|
[source,ruby]
|
@@ -118,6 +133,41 @@ RelatonIetf::IetfBibliographicItem.from_hash hash
|
|
118
133
|
...
|
119
134
|
----
|
120
135
|
|
136
|
+
=== Fetch data
|
137
|
+
|
138
|
+
There is a IETF datasets what can be converted into RelatonXML/BibXML/BibYAML formats:
|
139
|
+
|
140
|
+
- `ietf-rfcsubseries` - https://www.rfc-editor.org/rfc-index.xml (`<bcp-entry>`, `<fyi-entry>`, `<std-entry>`)
|
141
|
+
- `ietf-internet-drafts` - https://www.ietf.org/lib/dt/sprint/bibxml-ids.tgz
|
142
|
+
- `ietf-rfc-entries` - https://www.rfc-editor.org/rfc-index.xml (`<rfc-entry>`)
|
143
|
+
|
144
|
+
The method `RelatonIetf::DataFetcher.fetch(source, output: "data", format: "yaml")` converts all the documents from the dataset and save them to the `./data` folder in YAML format.
|
145
|
+
|
146
|
+
Arguments:
|
147
|
+
|
148
|
+
- `source` - dataset name (`ietf-rfcsubseries` or `ietf-internet-drafts`)
|
149
|
+
- `output` - folder to save documents (default './data').
|
150
|
+
- `format` - format in which the documents are saved. Possimle formats are: `yaml`, `xml`, `bibxml` (default `yaml`).
|
151
|
+
|
152
|
+
For `ietf-rfcsubseries` dataset only special XML format is supported:
|
153
|
+
|
154
|
+
[sourse.xml]
|
155
|
+
----
|
156
|
+
<referencegroup anchor="BCP14" target="https://www.rfc-editor.org/info/bcp14">
|
157
|
+
<xi:include href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.2119.xml" />
|
158
|
+
<xi:include href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.8174.xml" />
|
159
|
+
</referencegroup>
|
160
|
+
----
|
161
|
+
|
162
|
+
[source,ruby]
|
163
|
+
----
|
164
|
+
RelatonIetf::DataFetcher.fetch "ietf-internet-drafts"
|
165
|
+
Started at: 2021-12-17 10:23:20 +0100
|
166
|
+
Stopped at: 2021-12-17 10:29:19 +0100
|
167
|
+
Done in: 360 sec.
|
168
|
+
=> nil
|
169
|
+
----
|
170
|
+
|
121
171
|
== Contributing
|
122
172
|
|
123
173
|
Bug reports and pull requests are welcome on GitHub at https://github.com/metanorma/relaton-ietf.
|
@@ -0,0 +1,130 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "rubygems/package"
|
3
|
+
require "zlib"
|
4
|
+
require "relaton_ietf/rfc_index_entry"
|
5
|
+
require "relaton_ietf/rfc_entry"
|
6
|
+
|
7
|
+
module RelatonIetf
|
8
|
+
class DataFetcher
|
9
|
+
#
|
10
|
+
# Data fetcher initializer
|
11
|
+
#
|
12
|
+
# @param [String] source source name
|
13
|
+
# @param [String] output directory to save files
|
14
|
+
# @param [String] format format of output files (xml, yaml, bibxml);
|
15
|
+
# for ietf-rfcsubseries source only: xml
|
16
|
+
#
|
17
|
+
def initialize(source, output, format)
|
18
|
+
@source = source
|
19
|
+
@output = output
|
20
|
+
@format = source == "ietf-rfcsubseries" ? "rfcxml" : format
|
21
|
+
@ext = @format.sub(/^bib|^rfc/, "")
|
22
|
+
@files = []
|
23
|
+
end
|
24
|
+
|
25
|
+
#
|
26
|
+
# Initialize fetcher and run fetch
|
27
|
+
#
|
28
|
+
# @param [String] source source name
|
29
|
+
# @param [Strin] output directory to save files, default: "data"
|
30
|
+
# @param [Strin] format format of output files (xml, yaml, bibxml);
|
31
|
+
# default: yaml; for ietf-rfcsubseries source only: xml
|
32
|
+
#
|
33
|
+
def self.fetch(source, output: "data", format: "yaml")
|
34
|
+
t1 = Time.now
|
35
|
+
puts "Started at: #{t1}"
|
36
|
+
FileUtils.mkdir_p output unless Dir.exist? output
|
37
|
+
new(source, output, format).fetch
|
38
|
+
t2 = Time.now
|
39
|
+
puts "Stopped at: #{t2}"
|
40
|
+
puts "Done in: #{(t2 - t1).round} sec."
|
41
|
+
end
|
42
|
+
|
43
|
+
#
|
44
|
+
# Fetch documents
|
45
|
+
#
|
46
|
+
def fetch
|
47
|
+
case @source
|
48
|
+
when "ietf-rfcsubseries" then fetch_ieft_rfcsubseries
|
49
|
+
when "ietf-internet-drafts" then fetch_ieft_internet_drafts
|
50
|
+
when "ietf-rfc-entries" then fetch_ieft_rfcs
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
#
|
55
|
+
# Fetches ietf-rfcsubseries documents
|
56
|
+
#
|
57
|
+
def fetch_ieft_rfcsubseries
|
58
|
+
rfc_index.xpath("xmlns:bcp-entry|xmlns:fyi-entry|xmlns:std-entry").each do |doc|
|
59
|
+
save_doc RfcIndexEntry.parse(doc)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
#
|
64
|
+
# Fetches ietf-internet-drafts documents
|
65
|
+
#
|
66
|
+
def fetch_ieft_internet_drafts # rubocop:disable Metrics/MethodLength
|
67
|
+
gz = OpenURI.open_uri("https://www.ietf.org/lib/dt/sprint/bibxml-ids.tgz")
|
68
|
+
z = Zlib::GzipReader.new(gz)
|
69
|
+
io = StringIO.new(z.read)
|
70
|
+
z.close
|
71
|
+
Gem::Package::TarReader.new io do |tar|
|
72
|
+
tar.each do |tarfile|
|
73
|
+
next if tarfile.directory?
|
74
|
+
|
75
|
+
save_doc RelatonBib::BibXMLParser.parse(tarfile.read)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def fetch_ieft_rfcs
|
81
|
+
rfc_index.xpath("xmlns:rfc-entry").each do |doc|
|
82
|
+
save_doc RfcEntry.parse(doc)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def rfc_index
|
87
|
+
uri = URI "https://www.rfc-editor.org/rfc-index.xml"
|
88
|
+
Nokogiri::XML(Net::HTTP.get(uri)).at("/xmlns:rfc-index")
|
89
|
+
end
|
90
|
+
|
91
|
+
#
|
92
|
+
# Save document to file
|
93
|
+
#
|
94
|
+
# @param [RelatonIetf::RfcIndexEntry, nil] rfc index entry
|
95
|
+
#
|
96
|
+
def save_doc(entry) # rubocop:disable Metrics/MethodLength
|
97
|
+
return unless entry
|
98
|
+
|
99
|
+
c = case @format
|
100
|
+
when "xml" then entry.to_xml(bibdata: true)
|
101
|
+
when "yaml" then entry.to_hash.to_yaml
|
102
|
+
when "rfcxml" then entry.to_xml
|
103
|
+
else entry.send("to_#{@format}")
|
104
|
+
end
|
105
|
+
file = file_name entry
|
106
|
+
if @files.include? file
|
107
|
+
warn "File #{file} already exists. Document: #{entry.docnumber}"
|
108
|
+
else
|
109
|
+
@files << file
|
110
|
+
end
|
111
|
+
File.write file, c, encoding: "UTF-8"
|
112
|
+
end
|
113
|
+
|
114
|
+
#
|
115
|
+
# Generate file name
|
116
|
+
#
|
117
|
+
# @param [RelatonIetf::RfcIndexEntry] entry
|
118
|
+
#
|
119
|
+
# @return [String] file name
|
120
|
+
#
|
121
|
+
def file_name(entry)
|
122
|
+
id = if entry.respond_to? :docidentifier
|
123
|
+
entry.docidentifier.detect { |i| i.type == "Internet-Draft" }&.id&.downcase
|
124
|
+
end
|
125
|
+
id ||= entry.docnumber.upcase
|
126
|
+
name = id.gsub(/[\s,:\/]/, "_").squeeze("_")
|
127
|
+
File.join @output, "#{name}.#{@ext}"
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
@@ -21,7 +21,7 @@ module RelatonIetf
|
|
21
21
|
# @return [RelatonIetf::IetfBibliographicItem]
|
22
22
|
def self.from_hash(hash)
|
23
23
|
item_hash = ::RelatonIetf::HashConverter.hash_to_bib(hash)
|
24
|
-
new
|
24
|
+
new(**item_hash)
|
25
25
|
end
|
26
26
|
|
27
27
|
# @param opts [Hash]
|
@@ -32,7 +32,7 @@ module RelatonIetf
|
|
32
32
|
# @return [String] XML
|
33
33
|
def to_xml(**opts)
|
34
34
|
opts[:date_format] ||= :short
|
35
|
-
super
|
35
|
+
super(**opts)
|
36
36
|
end
|
37
37
|
end
|
38
38
|
end
|
@@ -3,11 +3,12 @@ require "relaton_ietf/xml_parser"
|
|
3
3
|
|
4
4
|
module RelatonIetf
|
5
5
|
class Processor < Relaton::Processor
|
6
|
-
def initialize
|
6
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
7
7
|
@short = :relaton_ietf
|
8
8
|
@prefix = "IETF"
|
9
9
|
@defaultprefix = /^RFC /
|
10
10
|
@idtype = "IETF"
|
11
|
+
@datasets = %w[ietf-rfcsubseries ietf-internet-drafts ietf-rfc-entries]
|
11
12
|
end
|
12
13
|
|
13
14
|
# @param code [String]
|
@@ -18,6 +19,18 @@ module RelatonIetf
|
|
18
19
|
::RelatonIetf::IetfBibliography.get(code, date, opts)
|
19
20
|
end
|
20
21
|
|
22
|
+
#
|
23
|
+
# Fetch all the documents from https://www.rfc-editor.org/rfc-index.xml
|
24
|
+
#
|
25
|
+
# @param [String] source source name
|
26
|
+
# @param [Hash] opts
|
27
|
+
# @option opts [String] :output directory to output documents
|
28
|
+
# @option opts [String] :format
|
29
|
+
#
|
30
|
+
def fetch_data(source, opts)
|
31
|
+
DataFetcher.fetch(source, **opts)
|
32
|
+
end
|
33
|
+
|
21
34
|
# @param xml [String]
|
22
35
|
# @return [RelatonIetf::IetfBibliographicItem]
|
23
36
|
def from_xml(xml)
|
@@ -0,0 +1,186 @@
|
|
1
|
+
module RelatonIetf
|
2
|
+
class RfcEntry
|
3
|
+
#
|
4
|
+
# Initalize parser
|
5
|
+
#
|
6
|
+
# @param [Nokogiri::XML::Element] doc document
|
7
|
+
#
|
8
|
+
def initialize(doc)
|
9
|
+
@doc = doc
|
10
|
+
end
|
11
|
+
|
12
|
+
#
|
13
|
+
# Initialize parser & parse document
|
14
|
+
#
|
15
|
+
# @param [Nokogiri::XML::Element] doc document
|
16
|
+
#
|
17
|
+
# @return [RelatonIetf::IetfBibliographicItem] bib item
|
18
|
+
#
|
19
|
+
def self.parse(doc)
|
20
|
+
new(doc).parse
|
21
|
+
end
|
22
|
+
|
23
|
+
#
|
24
|
+
# Parse document
|
25
|
+
#
|
26
|
+
# @return [RelatonIetf::IetfBibliographicItem] bib item
|
27
|
+
#
|
28
|
+
def parse # rubocop:disable Metrics/MethodLength
|
29
|
+
IetfBibliographicItem.new(
|
30
|
+
type: "standard",
|
31
|
+
language: ["en"],
|
32
|
+
script: ["Latn"],
|
33
|
+
fetched: Date.today.to_s,
|
34
|
+
docid: parse_docid,
|
35
|
+
docnumber: code,
|
36
|
+
title: parse_title,
|
37
|
+
link: parse_link,
|
38
|
+
date: parse_date,
|
39
|
+
contributor: parse_contributor,
|
40
|
+
keyword: parse_keyword,
|
41
|
+
abstract: parse_abstract,
|
42
|
+
relation: parse_relation,
|
43
|
+
status: parse_status,
|
44
|
+
editorialgroup: parse_editorialgroup,
|
45
|
+
)
|
46
|
+
end
|
47
|
+
|
48
|
+
#
|
49
|
+
# Parse document identifiers
|
50
|
+
#
|
51
|
+
# @return [Array<RelatonBib::DocumentIdettifier>] document identifiers
|
52
|
+
#
|
53
|
+
def parse_docid
|
54
|
+
ids = [RelatonBib::DocumentIdentifier.new(id: pub_id, type: "IETF")]
|
55
|
+
doi = @doc.at("./xmlns:doi").text
|
56
|
+
ids << RelatonBib::DocumentIdentifier.new(id: doi, type: "DOI")
|
57
|
+
ids
|
58
|
+
end
|
59
|
+
|
60
|
+
#
|
61
|
+
# Parse document title
|
62
|
+
#
|
63
|
+
# @return [Array<RelatonBib::TypedTileString>] document title
|
64
|
+
#
|
65
|
+
def parse_title
|
66
|
+
content = @doc.at("./xmlns:title").text
|
67
|
+
[RelatonBib::TypedTitleString.new(content: content, type: "main")]
|
68
|
+
end
|
69
|
+
|
70
|
+
#
|
71
|
+
# Create PubID
|
72
|
+
#
|
73
|
+
# @return [String] PubID
|
74
|
+
#
|
75
|
+
def pub_id
|
76
|
+
"IETF #{code.sub(/^(RFC)(\d+)/, '\1 \2')}"
|
77
|
+
end
|
78
|
+
|
79
|
+
#
|
80
|
+
# Parse document code
|
81
|
+
#
|
82
|
+
# @return [String] document code
|
83
|
+
#
|
84
|
+
def code
|
85
|
+
@doc.at("./xmlns:doc-id").text
|
86
|
+
end
|
87
|
+
|
88
|
+
#
|
89
|
+
# Create link
|
90
|
+
#
|
91
|
+
# @return [Array<RelatonBib::TypedUri>]
|
92
|
+
#
|
93
|
+
def parse_link
|
94
|
+
num = code[-4..-1].sub(/^0+/, "")
|
95
|
+
url = "https://www.rfc-editor.org/info/rfc#{num}"
|
96
|
+
[RelatonBib::TypedUri.new(content: url, type: "src")]
|
97
|
+
end
|
98
|
+
|
99
|
+
#
|
100
|
+
# Parse document date
|
101
|
+
#
|
102
|
+
# @return [Array<RelatonBib::BibliographicDate>] document date
|
103
|
+
#
|
104
|
+
def parse_date
|
105
|
+
@doc.xpath("./xmlns:date").map do |date|
|
106
|
+
month = date.at("./xmlns:month").text
|
107
|
+
year = date.at("./xmlns:year").text
|
108
|
+
on = "#{year}-#{Date::MONTHNAMES.index(month).to_s.rjust(2, '0')}"
|
109
|
+
RelatonBib::BibliographicDate.new(on: on, type: "published")
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
#
|
114
|
+
# Parse document contributors
|
115
|
+
#
|
116
|
+
# @return [Array<RelatonBib::ContributionInfo>] document contributors
|
117
|
+
#
|
118
|
+
def parse_contributor
|
119
|
+
@doc.xpath("./xmlns:author").map do |contributor|
|
120
|
+
n = contributor.at("./xmlns:name").text
|
121
|
+
name = RelatonBib::LocalizedString.new( n, "en", "Latn")
|
122
|
+
fname = RelatonBib::FullName.new(completename: name)
|
123
|
+
person = RelatonBib::Person.new(name: fname)
|
124
|
+
RelatonBib::ContributionInfo.new(entity: person, role: [{ type: "author" }])
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
#
|
129
|
+
# Parse document keywords
|
130
|
+
#
|
131
|
+
# @return [Array<String>] document keywords
|
132
|
+
#
|
133
|
+
def parse_keyword
|
134
|
+
@doc.xpath("./xmlns:keywords/xmlns:kw").map &:text
|
135
|
+
end
|
136
|
+
|
137
|
+
#
|
138
|
+
# Parse document abstract
|
139
|
+
#
|
140
|
+
# @return [Array<RelatonBib::FormattedString>] document abstract
|
141
|
+
#
|
142
|
+
def parse_abstract
|
143
|
+
@doc.xpath("./xmlns:abstract").map do |c|
|
144
|
+
RelatonBib::FormattedString.new(content: c.text, language: "en",
|
145
|
+
script: "Latn", format: "text/html")
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
#
|
150
|
+
# Parse document relations
|
151
|
+
#
|
152
|
+
# @return [Arra<RelatonBib::DocumentRelation>] document relations
|
153
|
+
#
|
154
|
+
def parse_relation
|
155
|
+
types = { "updates" => "updates", "obsoleted-by" => "obsoletedBy"}
|
156
|
+
@doc.xpath("./xmlns:updates/xmlns:doc-id|./xmlns:obsoleted-by/xmlns:doc-id").map do |r|
|
157
|
+
fref = RelatonBib::FormattedRef.new(content: r.text)
|
158
|
+
bib = IetfBibliographicItem.new(formattedref: fref)
|
159
|
+
RelatonBib::DocumentRelation.new(type: types[r.parent.name], bibitem: bib)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
#
|
164
|
+
# Parse document status
|
165
|
+
#
|
166
|
+
# @return [RelatonBib::DocuemntStatus] document status
|
167
|
+
#
|
168
|
+
def parse_status
|
169
|
+
stage = @doc.at("./xmlns:current-status").text
|
170
|
+
RelatonBib::DocumentStatus.new(stage: stage)
|
171
|
+
end
|
172
|
+
|
173
|
+
#
|
174
|
+
# Parse document editorial group
|
175
|
+
#
|
176
|
+
# @return [RelatonBib::EditorialGroup] document editorial group
|
177
|
+
#
|
178
|
+
def parse_editorialgroup
|
179
|
+
tc = @doc.xpath("./xmlns:wg_acronym").map do |wg|
|
180
|
+
wg = RelatonBib::WorkGroup.new(name: wg.text)
|
181
|
+
RelatonBib::TechnicalCommittee.new(wg)
|
182
|
+
end
|
183
|
+
RelatonBib::EditorialGroup.new(tc)
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module RelatonIetf
|
2
|
+
class RfcIndexEntry
|
3
|
+
#
|
4
|
+
# Document parser initalization
|
5
|
+
#
|
6
|
+
# @param [String] doc_id document id
|
7
|
+
# @param [Array<String>] is_also also document ids
|
8
|
+
#
|
9
|
+
def initialize(name, doc_id, is_also)
|
10
|
+
@name = name
|
11
|
+
@shortnum = doc_id[-4..-1].sub(/^0+/, "")
|
12
|
+
@doc_id = doc_id
|
13
|
+
@is_also = is_also
|
14
|
+
end
|
15
|
+
|
16
|
+
#
|
17
|
+
# Document id
|
18
|
+
#
|
19
|
+
# @return [Strinng] document id
|
20
|
+
#
|
21
|
+
def docnumber
|
22
|
+
@doc_id
|
23
|
+
end
|
24
|
+
|
25
|
+
#
|
26
|
+
# Initialize document parser and run it
|
27
|
+
#
|
28
|
+
# @param [Nokogiri::XML::Element] doc document
|
29
|
+
#
|
30
|
+
# @return [RelatonIetf:RfcIndexEntry, nil]
|
31
|
+
#
|
32
|
+
def self.parse(doc)
|
33
|
+
doc_id = doc.at("./xmlns:doc-id")
|
34
|
+
is_also = doc.xpath("./xmlns:is-also/xmlns:doc-id").map &:text
|
35
|
+
return unless doc_id && is_also.any?
|
36
|
+
|
37
|
+
name = doc.name.split("-").first
|
38
|
+
new(name, doc_id.text, is_also)
|
39
|
+
end
|
40
|
+
|
41
|
+
#
|
42
|
+
# Render document as XML
|
43
|
+
#
|
44
|
+
# @return [String] XML
|
45
|
+
#
|
46
|
+
def to_xml # rubocop:disable Metrics/MethodLength
|
47
|
+
Nokogiri::XML::Builder.new do |xml|
|
48
|
+
anchor = "#{@name.upcase}#{@shortnum}"
|
49
|
+
url = "https://www.rfc-editor.org/info/#{@name}#{@shortnum}"
|
50
|
+
xml.referencegroup("xmlns:xi" => "http://www.w3.org/2001/XInclude",
|
51
|
+
anchor: anchor, target: url) do
|
52
|
+
@is_also.each do |did|
|
53
|
+
num = did[-4..-1]
|
54
|
+
xml["xi"].send("include", href: "https://www.rfc-editor.org/refs/bibxml/reference.RFC.#{num}.xml")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end.doc.root.to_xml
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -5,365 +5,373 @@ require "relaton_bib"
|
|
5
5
|
require "relaton_ietf/ietf_bibliographic_item"
|
6
6
|
|
7
7
|
module RelatonIetf
|
8
|
-
# rubocop:disable Metrics/ModuleLength
|
9
|
-
|
10
8
|
# Scrapper module
|
11
9
|
module Scrapper
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
class << self
|
16
|
-
# @param text [String]
|
17
|
-
# @param is_relation [TrueClass, FalseClass]
|
18
|
-
# @return [RelatonIetf::IetfBibliographicItem]
|
19
|
-
def scrape_page(text, is_relation: false)
|
20
|
-
# Remove initial "IETF " string if specified
|
21
|
-
ref = text.gsub(/^IETF /, "")
|
22
|
-
/^(?:RFC|BCP|FYI|STD)\s(?<num>\d+)/ =~ ref
|
23
|
-
ref.sub! /(?<=^(?:RFC|BCP|FYI|STD)\s)(\d+)/, num.rjust(4, "0") if num
|
24
|
-
rfc_item ref, is_relation
|
25
|
-
rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
26
|
-
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
27
|
-
Net::ProtocolError, SocketError
|
28
|
-
raise RelatonBib::RequestError, "No document found for #{ref} reference"
|
29
|
-
end
|
30
|
-
|
31
|
-
# @param reference [Nokogiri::XML::Element, nil]
|
32
|
-
# @param is_relation [TrueClass, FalseClass]
|
33
|
-
# @param url [String, NilClass]
|
34
|
-
# @param ver [String, NilClass] Internet Draft version
|
35
|
-
# @return [RelatonIetf::IetfBibliographicItem]
|
36
|
-
def fetch_rfc(reference, is_relation: false, url: nil, ver: nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
37
|
-
return unless reference
|
38
|
-
|
39
|
-
ietf_item(
|
40
|
-
is_relation: is_relation,
|
41
|
-
id: reference[:anchor],
|
42
|
-
type: "standard",
|
43
|
-
docid: docids(reference, ver),
|
44
|
-
status: status(reference),
|
45
|
-
language: [language(reference)],
|
46
|
-
link: link(reference, url, ver),
|
47
|
-
title: titles(reference),
|
48
|
-
formattedref: formattedref(reference),
|
49
|
-
abstract: abstracts(reference),
|
50
|
-
contributor: contributors(reference),
|
51
|
-
relation: relations(reference),
|
52
|
-
date: dates(reference),
|
53
|
-
series: series(reference),
|
54
|
-
place: ["Fremont, CA"],
|
55
|
-
keyword: reference.xpath("front/keyword").map(&:text),
|
56
|
-
doctype: doctype(reference[:anchor]),
|
57
|
-
)
|
58
|
-
end
|
59
|
-
|
60
|
-
private
|
61
|
-
|
62
|
-
# @param anchor [String]
|
63
|
-
# @return [String]
|
64
|
-
def doctype(anchor)
|
65
|
-
anchor&.include?("I-D") ? "internet-draft" : "rfc"
|
66
|
-
end
|
67
|
-
|
68
|
-
# @param reference [Nokogiri::XML::Element]
|
69
|
-
# @param url [String]
|
70
|
-
# @param ver [String, NilClass] Internet Draft version
|
71
|
-
# @return [Array<Hash>]
|
72
|
-
def link(reference, url, ver)
|
73
|
-
l = []
|
74
|
-
l << { type: "xml", content: url } if url
|
75
|
-
l << { type: "src", content: reference[:target] } if reference[:target]
|
76
|
-
if /^I-D/.match? reference[:anchor]
|
77
|
-
reference.xpath("format").each do |f|
|
78
|
-
c = ver ? f[:target].sub(/(?<=-)\d{2}(?=\.)/, ver) : f[:target]
|
79
|
-
l << { type: f[:type], content: c }
|
80
|
-
end
|
81
|
-
end
|
82
|
-
l
|
83
|
-
end
|
84
|
-
|
85
|
-
# @param attrs [Hash]
|
86
|
-
# @return [RelatonIetf::IetfBibliographicItem]
|
87
|
-
def ietf_item(**attrs)
|
88
|
-
attrs[:fetched] = Date.today.to_s unless attrs.delete(:is_relation)
|
89
|
-
attrs[:script] = ["Latn"]
|
90
|
-
RelatonIetf::IetfBibliographicItem.new **attrs
|
91
|
-
end
|
92
|
-
|
93
|
-
# @param ref [String]
|
94
|
-
# @param is_relation [Boolen, nil]
|
95
|
-
# @return [RelatonIetf::IetfBibliographicItem]
|
96
|
-
def rfc_item(ref, is_relation)
|
97
|
-
/(?<=-)(?<ver>\d{2})$/ =~ ref
|
98
|
-
if /^I-D/.match? ref
|
99
|
-
ref.sub! /-\d{2}/, "" if ver
|
100
|
-
ref.sub! /(?<=I-D\.)draft-/, ""
|
101
|
-
end
|
102
|
-
|
103
|
-
uri = "#{GH_URL}#{ref.sub(/\s|\u00a0/, '.')}.xml"
|
104
|
-
doc = Nokogiri::XML get_page(uri)
|
105
|
-
r = doc.at("/referencegroup", "/reference")
|
106
|
-
fetch_rfc r, is_relation: is_relation, url: uri, ver: ver
|
107
|
-
end
|
108
|
-
|
109
|
-
# @param reference [Nokogiri::XML::Element]
|
110
|
-
# @return [Hash]
|
111
|
-
def relations(reference)
|
112
|
-
reference.xpath("reference").map do |ref|
|
113
|
-
{ type: "includes", bibitem: fetch_rfc(ref, is_relation: true) }
|
114
|
-
end
|
115
|
-
end
|
116
|
-
|
117
|
-
# @param uri [String]
|
118
|
-
# @return [String] HTTP response body
|
119
|
-
def get_page(uri)
|
120
|
-
res = Net::HTTP.get_response(URI(uri))
|
121
|
-
return unless res.code == "200"
|
122
|
-
|
123
|
-
# raise RelatonBib::RequestError, "No document found at #{uri}"
|
124
|
-
# end
|
125
|
-
|
126
|
-
res.body
|
127
|
-
end
|
128
|
-
|
129
|
-
# @param reference [Nokogiri::XML::Element]
|
130
|
-
# @return [String]
|
131
|
-
def language(reference)
|
132
|
-
reference[:lang] || "en"
|
133
|
-
end
|
10
|
+
extend RelatonBib::BibXMLParser
|
11
|
+
extend Scrapper
|
134
12
|
|
135
|
-
|
136
|
-
# @return [Array<Hash>]
|
137
|
-
def titles(reference)
|
138
|
-
reference.xpath("./front/title").map do |title|
|
139
|
-
{ content: title.text, language: language(reference), script: "Latn" }
|
140
|
-
end
|
141
|
-
end
|
13
|
+
FLAVOR = "IETF"
|
142
14
|
|
143
|
-
|
144
|
-
# @return [RelatonBib::FormattedRef, nil]
|
145
|
-
def formattedref(reference)
|
146
|
-
return if reference.at "./fornt/title"
|
147
|
-
|
148
|
-
cont = (reference[:anchor] || reference[:docName] || reference[:number])
|
149
|
-
if cont
|
150
|
-
RelatonBib::FormattedRef.new(
|
151
|
-
content: cont, language: language(reference), script: "Latn",
|
152
|
-
)
|
153
|
-
end
|
154
|
-
end
|
155
|
-
|
156
|
-
# @param reference [Nokogiri::XML::Element]
|
157
|
-
# @return [Array<RelatonBib::FormattedString>]
|
158
|
-
def abstracts(ref)
|
159
|
-
ref.xpath("./front/abstract").map do |a|
|
160
|
-
RelatonBib::FormattedString.new(
|
161
|
-
content: a.text.gsub(/\\n\\t{2,4}/, " ").strip,
|
162
|
-
language: language(ref), script: "Latn"
|
163
|
-
)
|
164
|
-
end
|
165
|
-
end
|
166
|
-
|
167
|
-
# @param reference [Nokogiri::XML::Element]
|
168
|
-
# @return [Array<Hash>]
|
169
|
-
def contributors(reference)
|
170
|
-
persons(reference) + organizations(reference)
|
171
|
-
end
|
172
|
-
|
173
|
-
# @param reference [Nokogiri::XML::Element]
|
174
|
-
# @return [Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>]
|
175
|
-
def persons(reference)
|
176
|
-
reference.xpath("./front/author[@surname]|./front/author[@fullname]")
|
177
|
-
.map do |author|
|
178
|
-
entity = RelatonBib::Person.new(
|
179
|
-
name: full_name(author, reference),
|
180
|
-
affiliation: [affiliation(author)],
|
181
|
-
contact: contacts(author.at("./address")),
|
182
|
-
)
|
183
|
-
{ entity: entity, role: [contributor_role(author)] }
|
184
|
-
end
|
185
|
-
end
|
186
|
-
|
187
|
-
# @param reference [Nokogiri::XML::Element]
|
188
|
-
# @return [Array<Hash{Symbol=>RelatonBib::Organization,
|
189
|
-
# Symbol=>Array<String>}>]
|
190
|
-
def organizations(reference)
|
191
|
-
publisher = { entity: new_org, role: [type: "publisher"] }
|
192
|
-
orgs = reference.xpath("./seriesinfo").reduce([publisher]) do |mem, si|
|
193
|
-
next mem unless si[:stream]
|
194
|
-
|
195
|
-
mem << { entity: new_org(si[:stream], nil), role: [type: "author"] }
|
196
|
-
end
|
197
|
-
orgs + reference.xpath(
|
198
|
-
"front/author[not(@surname)][not(@fullname)]/organization",
|
199
|
-
).map do |org|
|
200
|
-
{ entity: new_org(org.text, nil), role: [type: "author"] }
|
201
|
-
end
|
202
|
-
end
|
203
|
-
|
204
|
-
# @param author [Nokogiri::XML::Element]
|
205
|
-
# @param ref [Nokogiri::XML::Element]
|
206
|
-
# @return [RelatonBib::FullName]
|
207
|
-
def full_name(author, ref)
|
208
|
-
lang = language ref
|
209
|
-
RelatonBib::FullName.new(
|
210
|
-
completename: localized_string(author[:fullname], lang),
|
211
|
-
initial: [localized_string(author[:initials], lang)].compact,
|
212
|
-
surname: localized_string(author[:surname], lang),
|
213
|
-
)
|
214
|
-
end
|
215
|
-
|
216
|
-
# @param content [String]
|
217
|
-
# @param lang [String]
|
218
|
-
# @return [RelatonBib::LocalizedString]
|
219
|
-
def localized_string(content, lang)
|
220
|
-
return unless content
|
221
|
-
|
222
|
-
RelatonBib::LocalizedString.new(content, lang)
|
223
|
-
end
|
224
|
-
|
225
|
-
# @param postal [Nokogiri::XML::Element]
|
226
|
-
# @return [Array<RelatonBib::Address, RelatonBib::Phone>]
|
227
|
-
def contacts(addr)
|
228
|
-
contacts = []
|
229
|
-
return contacts unless addr
|
230
|
-
|
231
|
-
postal = addr.at("./postal")
|
232
|
-
contacts << address(postal) if postal
|
233
|
-
add_contact(contacts, "phone", addr.at("./phone"))
|
234
|
-
add_contact(contacts, "email", addr.at("./email"))
|
235
|
-
add_contact(contacts, "uri", addr.at("./uri"))
|
236
|
-
contacts
|
237
|
-
end
|
238
|
-
|
239
|
-
# @param postal [Nokogiri::XML::Element]
|
240
|
-
# @rerurn [RelatonBib::Address]
|
241
|
-
def address(postal) # rubocop:disable Metrics/CyclomaticComplexity
|
242
|
-
RelatonBib::Address.new(
|
243
|
-
street: [(postal.at("./postalLine") || postal.at("./street"))&.text],
|
244
|
-
city: postal.at("./city")&.text,
|
245
|
-
postcode: postal.at("./code")&.text,
|
246
|
-
country: postal.at("./country")&.text,
|
247
|
-
state: postal.at("./region")&.text,
|
248
|
-
)
|
249
|
-
end
|
250
|
-
|
251
|
-
# @param type [String] allowed "phone", "email" or "uri"
|
252
|
-
# @param value [String]
|
253
|
-
def add_contact(contacts, type, value)
|
254
|
-
return unless value
|
255
|
-
|
256
|
-
contacts << RelatonBib::Contact.new(type: type, value: value.text)
|
257
|
-
end
|
258
|
-
|
259
|
-
# @param author [Nokogiri::XML::Element]
|
260
|
-
# @return [RelatonBib::Affiliation]
|
261
|
-
def affiliation(author)
|
262
|
-
organization = author.at("./organization")
|
263
|
-
org = if organization.nil? || organization&.text&.empty?
|
264
|
-
new_org
|
265
|
-
else
|
266
|
-
new_org organization.text, organization[:abbrev]
|
267
|
-
end
|
268
|
-
RelatonBib::Affiliation.new organization: org
|
269
|
-
end
|
15
|
+
GH_URL = "https://raw.githubusercontent.com/relaton/relaton-data-ietf/master/data/reference."
|
270
16
|
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
17
|
+
# @param text [String]
|
18
|
+
# @param is_relation [TrueClass, FalseClass]
|
19
|
+
# @return [RelatonIetf::IetfBibliographicItem]
|
20
|
+
def scrape_page(text, is_relation: false)
|
21
|
+
# Remove initial "IETF " string if specified
|
22
|
+
ref = text.gsub(/^IETF /, "")
|
23
|
+
/^(?:RFC|BCP|FYI|STD)\s(?<num>\d+)/ =~ ref
|
24
|
+
ref.sub!(/(?<=^(?:RFC|BCP|FYI|STD)\s)(\d+)/, num.rjust(4, "0")) if num
|
25
|
+
rfc_item ref, is_relation
|
26
|
+
rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
27
|
+
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
28
|
+
Net::ProtocolError, SocketError
|
29
|
+
raise RelatonBib::RequestError, "No document found for #{ref} reference"
|
30
|
+
end
|
277
31
|
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
32
|
+
# @param reference [Nokogiri::XML::Element, nil]
|
33
|
+
# @param is_relation [Boolean] don't add fetched date for relation
|
34
|
+
# @param url [String, NilClass]
|
35
|
+
# @param ver [String, NilClass] Internet Draft version
|
36
|
+
# @return [RelatonBib::tfBibliographicItem]
|
37
|
+
# def fetch_rfc(reference, is_relation: false, url: nil, ver: nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
38
|
+
# return unless reference
|
39
|
+
|
40
|
+
# hash = {
|
41
|
+
# id: reference[:anchor],
|
42
|
+
# type: "standard",
|
43
|
+
# docid: docids(reference, ver),
|
44
|
+
# status: status(reference),
|
45
|
+
# language: [language(reference)],
|
46
|
+
# script: ["Latn"],
|
47
|
+
# link: link(reference, url, ver),
|
48
|
+
# title: titles(reference),
|
49
|
+
# formattedref: formattedref(reference),
|
50
|
+
# abstract: abstracts(reference),
|
51
|
+
# contributor: contributors(reference),
|
52
|
+
# relation: relations(reference),
|
53
|
+
# date: dates(reference),
|
54
|
+
# series: series(reference),
|
55
|
+
# keyword: reference.xpath("front/keyword").map(&:text),
|
56
|
+
# doctype: doctype(reference[:anchor]),
|
57
|
+
# }
|
58
|
+
# hash[:fetched] = Date.today.to_s unless is_relation
|
59
|
+
# bib_item(**hash)
|
60
|
+
# end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
# @param anchor [String]
|
65
|
+
# @return [String]
|
66
|
+
# def doctype(anchor)
|
67
|
+
# anchor&.include?("I-D") ? "internet-draft" : "rfc"
|
68
|
+
# end
|
69
|
+
|
70
|
+
# @param reference [Nokogiri::XML::Element]
|
71
|
+
# @param url [String]
|
72
|
+
# @param ver [String, NilClass] Internet Draft version
|
73
|
+
# @return [Array<Hash>]
|
74
|
+
# def link(reference, url, ver)
|
75
|
+
# l = []
|
76
|
+
# l << { type: "xml", content: url } if url
|
77
|
+
# l << { type: "src", content: reference[:target] } if reference[:target]
|
78
|
+
# if /^I-D/.match? reference[:anchor]
|
79
|
+
# reference.xpath("format").each do |f|
|
80
|
+
# c = ver ? f[:target].sub(/(?<=-)\d{2}(?=\.)/, ver) : f[:target]
|
81
|
+
# l << { type: f[:type], content: c }
|
82
|
+
# end
|
83
|
+
# end
|
84
|
+
# l
|
85
|
+
# end
|
86
|
+
|
87
|
+
# @param attrs [Hash]
|
88
|
+
# @return [RelatonIetf::IetfBibliographicItem]
|
89
|
+
def bib_item(**attrs)
|
90
|
+
unless attrs.delete(:is_relation)
|
91
|
+
attrs[:fetched] = Date.today.to_s
|
92
|
+
attrs[:place] = ["Fremont, CA"]
|
282
93
|
end
|
94
|
+
RelatonIetf::IetfBibliographicItem.new(**attrs)
|
95
|
+
end
|
283
96
|
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
97
|
+
# @param ref [String]
|
98
|
+
# @param is_relation [Boolen, nil]
|
99
|
+
# @return [RelatonIetf::IetfBibliographicItem]
|
100
|
+
def rfc_item(ref, is_relation)
|
101
|
+
/(?<=-)(?<ver>\d{2})$/ =~ ref
|
102
|
+
if /^I-D/.match? ref
|
103
|
+
ref.sub!(/-\d{2}/, "") if ver
|
104
|
+
ref.sub!(/(?<=I-D\.)draft-/, "")
|
289
105
|
end
|
290
106
|
|
291
|
-
#
|
292
|
-
#
|
293
|
-
#
|
294
|
-
#
|
295
|
-
|
296
|
-
|
297
|
-
def dates(reference)
|
298
|
-
return unless (date = reference.at "./front/date")
|
299
|
-
|
300
|
-
d = [date[:year], month(date[:month]),
|
301
|
-
(date[:day] || 1)].compact.join "-"
|
302
|
-
date = Time.parse(d).strftime "%Y-%m-%d"
|
303
|
-
[RelatonBib::BibliographicDate.new(type: "published", on: date)]
|
304
|
-
end
|
107
|
+
uri = "#{GH_URL}#{ref.sub(/\s|\u00a0/, '.')}.xml"
|
108
|
+
# doc = Nokogiri::XML get_page(uri)
|
109
|
+
# r = doc.at("/referencegroup", "/reference")
|
110
|
+
# fetch_rfc r, is_relation: is_relation, url: uri, ver: ver
|
111
|
+
parse get_page(uri), url: uri, is_relation: is_relation, ver: ver
|
112
|
+
end
|
305
113
|
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
if (id = reference[:anchor])
|
323
|
-
ret << RelatonBib::DocumentIdentifier.new(type: "rfc-anchor", id: id)
|
324
|
-
end
|
325
|
-
ret + reference.xpath("./seriesInfo").map do |si|
|
326
|
-
next unless ["DOI", "Internet-Draft"].include? si[:name]
|
327
|
-
|
328
|
-
id = si[:value]
|
329
|
-
id.sub! /(?<=-)\d{2}$/, ver if ver && si[:name] == "Internet-Draft"
|
330
|
-
RelatonBib::DocumentIdentifier.new(id: id, type: si[:name])
|
331
|
-
end.compact
|
332
|
-
end
|
114
|
+
# @param reference [Nokogiri::XML::Element]
|
115
|
+
# @return [Hash]
|
116
|
+
# def relations(reference)
|
117
|
+
# reference.xpath("reference").map do |ref|
|
118
|
+
# { type: "includes", bibitem: fetch_rfc(ref, is_relation: true) }
|
119
|
+
# end
|
120
|
+
# end
|
121
|
+
|
122
|
+
# @param uri [String]
|
123
|
+
# @return [String] HTTP response body
|
124
|
+
def get_page(uri)
|
125
|
+
res = Net::HTTP.get_response(URI(uri))
|
126
|
+
return unless res.code == "200"
|
127
|
+
|
128
|
+
res.body
|
129
|
+
end
|
333
130
|
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
#
|
340
|
-
def series(reference)
|
341
|
-
reference.xpath("./seriesInfo").map do |si|
|
342
|
-
next if si[:name] == "DOI" || si[:stream] || si[:status]
|
343
|
-
|
344
|
-
RelatonBib::Series.new(
|
345
|
-
title: RelatonBib::TypedTitleString.new(
|
346
|
-
content: si[:name], language: language(reference), script: "Latn",
|
347
|
-
),
|
348
|
-
number: si[:value],
|
349
|
-
type: "main",
|
350
|
-
)
|
351
|
-
end.compact
|
352
|
-
end
|
131
|
+
# @param [RelatonBib::WorkGroup]
|
132
|
+
# @return [RelatonIetf::Committee]
|
133
|
+
def committee(wgr)
|
134
|
+
Committee.new wgr
|
135
|
+
end
|
353
136
|
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
137
|
+
# @param reference [Nokogiri::XML::Element]
|
138
|
+
# @return [String]
|
139
|
+
# def language(reference)
|
140
|
+
# reference[:lang] || "en"
|
141
|
+
# end
|
142
|
+
|
143
|
+
# @param reference [Nokogiri::XML::Element]
|
144
|
+
# @return [Array<Hash>]
|
145
|
+
# def titles(reference)
|
146
|
+
# reference.xpath("./front/title").map do |title|
|
147
|
+
# { content: title.text, language: language(reference), script: "Latn" }
|
148
|
+
# end
|
149
|
+
# end
|
150
|
+
|
151
|
+
# @param reference [Nokogiri::XML::Element]
|
152
|
+
# @return [RelatonBib::FormattedRef, nil]
|
153
|
+
# def formattedref(reference)
|
154
|
+
# return if reference.at "./front/title"
|
155
|
+
|
156
|
+
# cont = (reference[:anchor] || reference[:docName] || reference[:number])
|
157
|
+
# if cont
|
158
|
+
# RelatonBib::FormattedRef.new(
|
159
|
+
# content: cont, language: language(reference), script: "Latn",
|
160
|
+
# )
|
161
|
+
# end
|
162
|
+
# end
|
163
|
+
|
164
|
+
# @param reference [Nokogiri::XML::Element]
|
165
|
+
# @return [Array<RelatonBib::FormattedString>]
|
166
|
+
# def abstracts(ref)
|
167
|
+
# ref.xpath("./front/abstract").map do |a|
|
168
|
+
# RelatonBib::FormattedString.new(
|
169
|
+
# content: a.text.gsub(/\\n\\t{2,4}/, " ").strip,
|
170
|
+
# language: language(ref), script: "Latn"
|
171
|
+
# )
|
172
|
+
# end
|
173
|
+
# end
|
174
|
+
|
175
|
+
# @param reference [Nokogiri::XML::Element]
|
176
|
+
# @return [Array<Hash>]
|
177
|
+
def contributors(reference)
|
178
|
+
[{ entity: new_org("Internet Engineering Task Force", "IETF"), role: [type: "publisher"] }] + super
|
179
|
+
# persons(reference) + organizations(reference)
|
366
180
|
end
|
181
|
+
|
182
|
+
# @param reference [Nokogiri::XML::Element]
|
183
|
+
# @return [Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>]
|
184
|
+
# def persons(reference)
|
185
|
+
# reference.xpath("./front/author[@surname]|./front/author[@fullname]")
|
186
|
+
# .map do |author|
|
187
|
+
# entity = RelatonBib::Person.new(
|
188
|
+
# name: full_name(author, reference),
|
189
|
+
# affiliation: affiliation(author),
|
190
|
+
# contact: contacts(author.at("./address")),
|
191
|
+
# )
|
192
|
+
# { entity: entity, role: [contributor_role(author)] }
|
193
|
+
# end
|
194
|
+
# end
|
195
|
+
|
196
|
+
# @param reference [Nokogiri::XML::Element]
|
197
|
+
# @return [Array<Hash{Symbol=>RelatonBib::Organization,
|
198
|
+
# Symbol=>Array<String>}>]
|
199
|
+
# def organizations(reference)
|
200
|
+
# publisher = { entity: new_org, role: [type: "publisher"] }
|
201
|
+
# orgs = reference.xpath("./seriesinfo").reduce([publisher]) do |mem, si|
|
202
|
+
# next mem unless si[:stream]
|
203
|
+
|
204
|
+
# mem << { entity: new_org(si[:stream], nil), role: [type: "author"] }
|
205
|
+
# end
|
206
|
+
# orgs + reference.xpath(
|
207
|
+
# "front/author[not(@surname)][not(@fullname)]/organization",
|
208
|
+
# ).map do |org|
|
209
|
+
# { entity: new_org(org.text, nil), role: [type: "author"] }
|
210
|
+
# end
|
211
|
+
# end
|
212
|
+
|
213
|
+
# @param author [Nokogiri::XML::Element]
|
214
|
+
# @param ref [Nokogiri::XML::Element]
|
215
|
+
# @return [RelatonBib::FullName]
|
216
|
+
# def full_name(author, ref)
|
217
|
+
# lang = language ref
|
218
|
+
# RelatonBib::FullName.new(
|
219
|
+
# completename: localized_string(author[:fullname], lang),
|
220
|
+
# initial: [localized_string(author[:initials], lang)].compact,
|
221
|
+
# surname: localized_string(author[:surname], lang),
|
222
|
+
# )
|
223
|
+
# end
|
224
|
+
|
225
|
+
# @param content [String]
|
226
|
+
# @param lang [String]
|
227
|
+
# @return [RelatonBib::LocalizedString]
|
228
|
+
# def localized_string(content, lang)
|
229
|
+
# return unless content
|
230
|
+
|
231
|
+
# RelatonBib::LocalizedString.new(content, lang)
|
232
|
+
# end
|
233
|
+
|
234
|
+
# @param postal [Nokogiri::XML::Element]
|
235
|
+
# @return [Array<RelatonBib::Address, RelatonBib::Phone>]
|
236
|
+
# def contacts(addr)
|
237
|
+
# contacts = []
|
238
|
+
# return contacts unless addr
|
239
|
+
|
240
|
+
# postal = addr.at("./postal")
|
241
|
+
# contacts << address(postal) if postal
|
242
|
+
# add_contact(contacts, "phone", addr.at("./phone"))
|
243
|
+
# add_contact(contacts, "email", addr.at("./email"))
|
244
|
+
# add_contact(contacts, "uri", addr.at("./uri"))
|
245
|
+
# contacts
|
246
|
+
# end
|
247
|
+
|
248
|
+
# @param postal [Nokogiri::XML::Element]
|
249
|
+
# @rerurn [RelatonBib::Address]
|
250
|
+
# def address(postal) # rubocop:disable Metrics/CyclomaticComplexity
|
251
|
+
# RelatonBib::Address.new(
|
252
|
+
# street: [(postal.at("./postalLine") || postal.at("./street"))&.text],
|
253
|
+
# city: postal.at("./city")&.text,
|
254
|
+
# postcode: postal.at("./code")&.text,
|
255
|
+
# country: postal.at("./country")&.text,
|
256
|
+
# state: postal.at("./region")&.text,
|
257
|
+
# )
|
258
|
+
# end
|
259
|
+
|
260
|
+
# @param type [String] allowed "phone", "email" or "uri"
|
261
|
+
# @param value [String]
|
262
|
+
# def add_contact(contacts, type, value)
|
263
|
+
# return unless value
|
264
|
+
|
265
|
+
# contacts << RelatonBib::Contact.new(type: type, value: value.text)
|
266
|
+
# end
|
267
|
+
|
268
|
+
# @param author [Nokogiri::XML::Element]
|
269
|
+
# @return [RelatonBib::Affiliation]
|
270
|
+
# def affiliation(author)
|
271
|
+
# organization = author.at("./organization")
|
272
|
+
# org = if organization.nil? || organization&.text&.empty?
|
273
|
+
# new_org
|
274
|
+
# else
|
275
|
+
# new_org organization.text, organization[:abbrev]
|
276
|
+
# end
|
277
|
+
# RelatonBib::Affiliation.new organization: org
|
278
|
+
# end
|
279
|
+
|
280
|
+
# @param name [String]
|
281
|
+
# @param abbr [String]
|
282
|
+
# @return [RelatonBib::Organization]
|
283
|
+
# def new_org(name = "Internet Engineering Task Force", abbr = "IETF")
|
284
|
+
# RelatonBib::Organization.new name: name, abbreviation: abbr
|
285
|
+
# end
|
286
|
+
|
287
|
+
# @param author [Nokogiri::XML::Document]
|
288
|
+
# @return [Hash]
|
289
|
+
# def contributor_role(author)
|
290
|
+
# { type: author[:role] || "author" }
|
291
|
+
# end
|
292
|
+
|
293
|
+
# def month(mon)
|
294
|
+
# return 1 if !mon || mon.empty?
|
295
|
+
# return mon if /^\d+$/.match? mon
|
296
|
+
|
297
|
+
# Date::MONTHNAMES.index(mon)
|
298
|
+
# end
|
299
|
+
|
300
|
+
#
|
301
|
+
# Extract date from reference.
|
302
|
+
#
|
303
|
+
# @param reference [Nokogiri::XML::Element]
|
304
|
+
# @return [Array<RelatonBib::BibliographicDate>] published data.
|
305
|
+
#
|
306
|
+
# def dates(reference)
|
307
|
+
# return unless (date = reference.at "./front/date")
|
308
|
+
|
309
|
+
# d = [date[:year], month(date[:month]),
|
310
|
+
# (date[:day] || 1)].compact.join "-"
|
311
|
+
# date = Time.parse(d).strftime "%Y-%m-%d"
|
312
|
+
# [RelatonBib::BibliographicDate.new(type: "published", on: date)]
|
313
|
+
# end
|
314
|
+
|
315
|
+
#
|
316
|
+
# Extract document identifiers from reference
|
317
|
+
#
|
318
|
+
# @param reference [Nokogiri::XML::Element]
|
319
|
+
# @param ver [String, NilClass] Internet Draft version
|
320
|
+
#
|
321
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
322
|
+
#
|
323
|
+
# def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/AbcSize
|
324
|
+
# id = (reference[:anchor] || reference[:docName] || reference[:number])
|
325
|
+
# ret = []
|
326
|
+
# if id
|
327
|
+
# ret << RelatonBib::DocumentIdentifier.new(
|
328
|
+
# type: "IETF", id: id.sub(/^(RFC)/, "\\1 "),
|
329
|
+
# )
|
330
|
+
# end
|
331
|
+
# if (id = reference[:anchor])
|
332
|
+
# ret << RelatonBib::DocumentIdentifier.new(type: "rfc-anchor", id: id)
|
333
|
+
# end
|
334
|
+
# names = ["DOI", "Internet-Draft"]
|
335
|
+
# ret + reference.xpath("./seriesInfo").map do |si|
|
336
|
+
# next unless names.include? si[:name]
|
337
|
+
|
338
|
+
# id = si[:value]
|
339
|
+
# id.sub!(/(?<=-)\d{2}$/, ver) if ver && si[:name] == "Internet-Draft"
|
340
|
+
# RelatonBib::DocumentIdentifier.new(id: id, type: si[:name])
|
341
|
+
# end.compact
|
342
|
+
# end
|
343
|
+
|
344
|
+
#
|
345
|
+
# Extract series form reference
|
346
|
+
# @param reference [Nokogiri::XML::Element]
|
347
|
+
#
|
348
|
+
# @return [Array<RelatonBib::Series>]
|
349
|
+
#
|
350
|
+
# def series(reference)
|
351
|
+
# reference.xpath("./seriesInfo").map do |si|
|
352
|
+
# next if si[:name] == "DOI" || si[:stream] || si[:status]
|
353
|
+
|
354
|
+
# RelatonBib::Series.new(
|
355
|
+
# title: RelatonBib::TypedTitleString.new(
|
356
|
+
# content: si[:name], language: language(reference), script: "Latn",
|
357
|
+
# ),
|
358
|
+
# number: si[:value],
|
359
|
+
# type: "main",
|
360
|
+
# )
|
361
|
+
# end.compact
|
362
|
+
# end
|
363
|
+
|
364
|
+
#
|
365
|
+
# extract status
|
366
|
+
# @param reference [Nokogiri::XML::Element]
|
367
|
+
#
|
368
|
+
# @return [RelatonBib::DocumentStatus]
|
369
|
+
#
|
370
|
+
# def status(reference)
|
371
|
+
# st = reference.at("./seriesinfo[@status]")
|
372
|
+
# return unless st
|
373
|
+
|
374
|
+
# RelatonBib::DocumentStatus.new(stage: st[:status])
|
375
|
+
# end
|
367
376
|
end
|
368
|
-
# rubocop:enable Metrics/ModuleLength
|
369
377
|
end
|
data/lib/relaton_ietf/version.rb
CHANGED
data/lib/relaton_ietf.rb
CHANGED
@@ -5,6 +5,8 @@ require "relaton_ietf/ietf_bibliography"
|
|
5
5
|
require "relaton_ietf/ietf_bibliographic_item"
|
6
6
|
require "relaton_ietf/xml_parser"
|
7
7
|
require "relaton_ietf/hash_converter"
|
8
|
+
require "relaton_ietf/committee"
|
9
|
+
require "relaton_ietf/data_fetcher"
|
8
10
|
|
9
11
|
require "relaton/provider_ietf"
|
10
12
|
|
data/relaton_ietf.gemspec
CHANGED
@@ -38,5 +38,6 @@ Gem::Specification.new do |spec|
|
|
38
38
|
spec.add_development_dependency "vcr"
|
39
39
|
spec.add_development_dependency "webmock"
|
40
40
|
|
41
|
-
spec.add_dependency "relaton-bib", "
|
41
|
+
spec.add_dependency "relaton-bib", ">= 1.9.8"
|
42
|
+
spec.add_dependency "zlib", "~> 1.1.0"
|
42
43
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-ietf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.9.
|
4
|
+
version: 1.9.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-12-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: equivalent-xml
|
@@ -124,18 +124,32 @@ dependencies:
|
|
124
124
|
version: '0'
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
126
|
name: relaton-bib
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 1.9.8
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: 1.9.8
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: zlib
|
127
141
|
requirement: !ruby/object:Gem::Requirement
|
128
142
|
requirements:
|
129
143
|
- - "~>"
|
130
144
|
- !ruby/object:Gem::Version
|
131
|
-
version: 1.
|
145
|
+
version: 1.1.0
|
132
146
|
type: :runtime
|
133
147
|
prerelease: false
|
134
148
|
version_requirements: !ruby/object:Gem::Requirement
|
135
149
|
requirements:
|
136
150
|
- - "~>"
|
137
151
|
- !ruby/object:Gem::Version
|
138
|
-
version: 1.
|
152
|
+
version: 1.1.0
|
139
153
|
description: "RelatonIetf: retrieve IETF Standards for bibliographic use \nusing the
|
140
154
|
BibliographicItem model.\n\nFormerly known as rfcbib.\n"
|
141
155
|
email:
|
@@ -163,10 +177,14 @@ files:
|
|
163
177
|
- grammars/reqt.rng
|
164
178
|
- lib/relaton/provider_ietf.rb
|
165
179
|
- lib/relaton_ietf.rb
|
180
|
+
- lib/relaton_ietf/committee.rb
|
181
|
+
- lib/relaton_ietf/data_fetcher.rb
|
166
182
|
- lib/relaton_ietf/hash_converter.rb
|
167
183
|
- lib/relaton_ietf/ietf_bibliographic_item.rb
|
168
184
|
- lib/relaton_ietf/ietf_bibliography.rb
|
169
185
|
- lib/relaton_ietf/processor.rb
|
186
|
+
- lib/relaton_ietf/rfc_entry.rb
|
187
|
+
- lib/relaton_ietf/rfc_index_entry.rb
|
170
188
|
- lib/relaton_ietf/scrapper.rb
|
171
189
|
- lib/relaton_ietf/version.rb
|
172
190
|
- lib/relaton_ietf/xml_parser.rb
|