relaton-ietf 1.9.2 → 1.9.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e16b219a2bc4343e1f47d35629131477d8dd88199fc012b252d2404277b6a2b2
4
- data.tar.gz: ec0c784f65452bd35f9d9a43176654e3a1da583fdd028e001cb594707a0252c7
3
+ metadata.gz: 05a6c063cec7b11a826ebe390cc8ebc70974504a70098886a9ac815036428314
4
+ data.tar.gz: e99a83cfdc93c51e58af1c50baba6a4b518a1895da6a7949cb4fa2e500b56f0b
5
5
  SHA512:
6
- metadata.gz: b3f427e275c57c2de8d6c6fb4d27fb330c0674facb4b02e2eab172228aa758f27343485dba4306efa68b5fd72c799cce6e3de33d79d5183de524f8cd4ec4ea69
7
- data.tar.gz: 926b1b544cbad65a57561f0566f644795c0d31b2173aadcbe054d4e305e8028796fe01a31a1ca3deae5aac0e1db7f6f1fa057c8113ad6f827c2e8d1b7a04db53
6
+ metadata.gz: 10ec58d5177d45a71079daee30517c3ea040cf5b231ff5fb0688c10818ba7d77b0cd5e61cec478739851a1a460d4b37cec8dcb1d4ce3b53ae2a84bca3300348f
7
+ data.tar.gz: 4fda1bcdab90ddc307f11d9684016b625d2f7c7221aeb25b9f7fcc1b1679f65b2829833fd982df913e8a7f0dfc910522bf2905dd7555c8da30e80cfa64ae18fb
data/.rubocop.yml CHANGED
@@ -2,6 +2,8 @@
2
2
  # https://github.com/riboseinc/oss-guides
3
3
  # All project-specific additions and overrides should be specified in this file.
4
4
 
5
+ require: rubocop-rails
6
+
5
7
  inherit_from:
6
8
  - https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
7
9
  AllCops:
data/README.adoc CHANGED
@@ -133,6 +133,41 @@ RelatonIetf::IetfBibliographicItem.from_hash hash
133
133
  ...
134
134
  ----
135
135
 
136
+ === Fetch data
137
+
138
+ There is a IETF datasets what can be converted into RelatonXML/BibXML/BibYAML formats:
139
+
140
+ - `ietf-rfcsubseries` - https://www.rfc-editor.org/rfc-index.xml (`<bcp-entry>`, `<fyi-entry>`, `<std-entry>`)
141
+ - `ietf-internet-drafts` - https://www.ietf.org/lib/dt/sprint/bibxml-ids.tgz
142
+ - `ietf-rfc-entries` - https://www.rfc-editor.org/rfc-index.xml (`<rfc-entry>`)
143
+
144
+ The method `RelatonIetf::DataFetcher.fetch(source, output: "data", format: "yaml")` converts all the documents from the dataset and save them to the `./data` folder in YAML format.
145
+
146
+ Arguments:
147
+
148
+ - `source` - dataset name (`ietf-rfcsubseries` or `ietf-internet-drafts`)
149
+ - `output` - folder to save documents (default './data').
150
+ - `format` - format in which the documents are saved. Possimle formats are: `yaml`, `xml`, `bibxml` (default `yaml`).
151
+
152
+ For `ietf-rfcsubseries` dataset only special XML format is supported:
153
+
154
+ [sourse.xml]
155
+ ----
156
+ <referencegroup anchor="BCP14" target="https://www.rfc-editor.org/info/bcp14">
157
+ <xi:include href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.2119.xml" />
158
+ <xi:include href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.8174.xml" />
159
+ </referencegroup>
160
+ ----
161
+
162
+ [source,ruby]
163
+ ----
164
+ RelatonIetf::DataFetcher.fetch "ietf-internet-drafts"
165
+ Started at: 2021-12-17 10:23:20 +0100
166
+ Stopped at: 2021-12-17 10:29:19 +0100
167
+ Done in: 360 sec.
168
+ => nil
169
+ ----
170
+
136
171
  == Contributing
137
172
 
138
173
  Bug reports and pull requests are welcome on GitHub at https://github.com/metanorma/relaton-ietf.
@@ -0,0 +1,130 @@
1
+ require "rubygems"
2
+ require "rubygems/package"
3
+ require "zlib"
4
+ require "relaton_ietf/rfc_index_entry"
5
+ require "relaton_ietf/rfc_entry"
6
+
7
+ module RelatonIetf
8
+ class DataFetcher
9
+ #
10
+ # Data fetcher initializer
11
+ #
12
+ # @param [String] source source name
13
+ # @param [String] output directory to save files
14
+ # @param [String] format format of output files (xml, yaml, bibxml);
15
+ # for ietf-rfcsubseries source only: xml
16
+ #
17
+ def initialize(source, output, format)
18
+ @source = source
19
+ @output = output
20
+ @format = source == "ietf-rfcsubseries" ? "rfcxml" : format
21
+ @ext = @format.sub(/^bib|^rfc/, "")
22
+ @files = []
23
+ end
24
+
25
+ #
26
+ # Initialize fetcher and run fetch
27
+ #
28
+ # @param [String] source source name
29
+ # @param [Strin] output directory to save files, default: "data"
30
+ # @param [Strin] format format of output files (xml, yaml, bibxml);
31
+ # default: yaml; for ietf-rfcsubseries source only: xml
32
+ #
33
+ def self.fetch(source, output: "data", format: "yaml")
34
+ t1 = Time.now
35
+ puts "Started at: #{t1}"
36
+ FileUtils.mkdir_p output unless Dir.exist? output
37
+ new(source, output, format).fetch
38
+ t2 = Time.now
39
+ puts "Stopped at: #{t2}"
40
+ puts "Done in: #{(t2 - t1).round} sec."
41
+ end
42
+
43
+ #
44
+ # Fetch documents
45
+ #
46
+ def fetch
47
+ case @source
48
+ when "ietf-rfcsubseries" then fetch_ieft_rfcsubseries
49
+ when "ietf-internet-drafts" then fetch_ieft_internet_drafts
50
+ when "ietf-rfc-entries" then fetch_ieft_rfcs
51
+ end
52
+ end
53
+
54
+ #
55
+ # Fetches ietf-rfcsubseries documents
56
+ #
57
+ def fetch_ieft_rfcsubseries
58
+ rfc_index.xpath("xmlns:bcp-entry|xmlns:fyi-entry|xmlns:std-entry").each do |doc|
59
+ save_doc RfcIndexEntry.parse(doc)
60
+ end
61
+ end
62
+
63
+ #
64
+ # Fetches ietf-internet-drafts documents
65
+ #
66
+ def fetch_ieft_internet_drafts # rubocop:disable Metrics/MethodLength
67
+ gz = OpenURI.open_uri("https://www.ietf.org/lib/dt/sprint/bibxml-ids.tgz")
68
+ z = Zlib::GzipReader.new(gz)
69
+ io = StringIO.new(z.read)
70
+ z.close
71
+ Gem::Package::TarReader.new io do |tar|
72
+ tar.each do |tarfile|
73
+ next if tarfile.directory?
74
+
75
+ save_doc RelatonBib::BibXMLParser.parse(tarfile.read)
76
+ end
77
+ end
78
+ end
79
+
80
+ def fetch_ieft_rfcs
81
+ rfc_index.xpath("xmlns:rfc-entry").each do |doc|
82
+ save_doc RfcEntry.parse(doc)
83
+ end
84
+ end
85
+
86
+ def rfc_index
87
+ uri = URI "https://www.rfc-editor.org/rfc-index.xml"
88
+ Nokogiri::XML(Net::HTTP.get(uri)).at("/xmlns:rfc-index")
89
+ end
90
+
91
+ #
92
+ # Save document to file
93
+ #
94
+ # @param [RelatonIetf::RfcIndexEntry, nil] rfc index entry
95
+ #
96
+ def save_doc(entry) # rubocop:disable Metrics/MethodLength
97
+ return unless entry
98
+
99
+ c = case @format
100
+ when "xml" then entry.to_xml(bibdata: true)
101
+ when "yaml" then entry.to_hash.to_yaml
102
+ when "rfcxml" then entry.to_xml
103
+ else entry.send("to_#{@format}")
104
+ end
105
+ file = file_name entry
106
+ if @files.include? file
107
+ warn "File #{file} already exists. Document: #{entry.docnumber}"
108
+ else
109
+ @files << file
110
+ end
111
+ File.write file, c, encoding: "UTF-8"
112
+ end
113
+
114
+ #
115
+ # Generate file name
116
+ #
117
+ # @param [RelatonIetf::RfcIndexEntry] entry
118
+ #
119
+ # @return [String] file name
120
+ #
121
+ def file_name(entry)
122
+ id = if entry.respond_to? :docidentifier
123
+ entry.docidentifier.detect { |i| i.type == "Internet-Draft" }&.id
124
+ end
125
+ id ||= entry.docnumber
126
+ name = id.gsub(/[\s,:\/]/, "_").squeeze("_").upcase
127
+ File.join @output, "#{name}.#{@ext}"
128
+ end
129
+ end
130
+ end
@@ -3,11 +3,12 @@ require "relaton_ietf/xml_parser"
3
3
 
4
4
  module RelatonIetf
5
5
  class Processor < Relaton::Processor
6
- def initialize
6
+ def initialize # rubocop:disable Lint/MissingSuper
7
7
  @short = :relaton_ietf
8
8
  @prefix = "IETF"
9
9
  @defaultprefix = /^RFC /
10
10
  @idtype = "IETF"
11
+ @datasets = %w[ietf-rfcsubseries ietf-internet-drafts ietf-rfc-entries]
11
12
  end
12
13
 
13
14
  # @param code [String]
@@ -18,6 +19,18 @@ module RelatonIetf
18
19
  ::RelatonIetf::IetfBibliography.get(code, date, opts)
19
20
  end
20
21
 
22
+ #
23
+ # Fetch all the documents from https://www.rfc-editor.org/rfc-index.xml
24
+ #
25
+ # @param [String] source source name
26
+ # @param [Hash] opts
27
+ # @option opts [String] :output directory to output documents
28
+ # @option opts [String] :format
29
+ #
30
+ def fetch_data(source, opts)
31
+ DataFetcher.fetch(source, **opts)
32
+ end
33
+
21
34
  # @param xml [String]
22
35
  # @return [RelatonIetf::IetfBibliographicItem]
23
36
  def from_xml(xml)
@@ -0,0 +1,186 @@
1
+ module RelatonIetf
2
+ class RfcEntry
3
+ #
4
+ # Initalize parser
5
+ #
6
+ # @param [Nokogiri::XML::Element] doc document
7
+ #
8
+ def initialize(doc)
9
+ @doc = doc
10
+ end
11
+
12
+ #
13
+ # Initialize parser & parse document
14
+ #
15
+ # @param [Nokogiri::XML::Element] doc document
16
+ #
17
+ # @return [RelatonIetf::IetfBibliographicItem] bib item
18
+ #
19
+ def self.parse(doc)
20
+ new(doc).parse
21
+ end
22
+
23
+ #
24
+ # Parse document
25
+ #
26
+ # @return [RelatonIetf::IetfBibliographicItem] bib item
27
+ #
28
+ def parse # rubocop:disable Metrics/MethodLength
29
+ IetfBibliographicItem.new(
30
+ type: "standard",
31
+ language: ["en"],
32
+ script: ["Latn"],
33
+ fetched: Date.today.to_s,
34
+ docid: parse_docid,
35
+ docnumber: code,
36
+ title: parse_title,
37
+ link: parse_link,
38
+ date: parse_date,
39
+ contributor: parse_contributor,
40
+ keyword: parse_keyword,
41
+ abstract: parse_abstract,
42
+ relation: parse_relation,
43
+ status: parse_status,
44
+ editorialgroup: parse_editorialgroup,
45
+ )
46
+ end
47
+
48
+ #
49
+ # Parse document identifiers
50
+ #
51
+ # @return [Array<RelatonBib::DocumentIdettifier>] document identifiers
52
+ #
53
+ def parse_docid
54
+ ids = [RelatonBib::DocumentIdentifier.new(id: pub_id, type: "IETF")]
55
+ doi = @doc.at("./xmlns:doi").text
56
+ ids << RelatonBib::DocumentIdentifier.new(id: doi, type: "DOI")
57
+ ids
58
+ end
59
+
60
+ #
61
+ # Parse document title
62
+ #
63
+ # @return [Array<RelatonBib::TypedTileString>] document title
64
+ #
65
+ def parse_title
66
+ content = @doc.at("./xmlns:title").text
67
+ [RelatonBib::TypedTitleString.new(content: content, type: "main")]
68
+ end
69
+
70
+ #
71
+ # Create PubID
72
+ #
73
+ # @return [String] PubID
74
+ #
75
+ def pub_id
76
+ "IETF #{code}"
77
+ end
78
+
79
+ #
80
+ # Parse document code
81
+ #
82
+ # @return [String] document code
83
+ #
84
+ def code
85
+ @doc.at("./xmlns:doc-id").text
86
+ end
87
+
88
+ #
89
+ # Create link
90
+ #
91
+ # @return [Array<RelatonBib::TypedUri>]
92
+ #
93
+ def parse_link
94
+ num = code[-4..-1].sub(/^0+/, "")
95
+ url = "https://www.rfc-editor.org/info/rfc#{num}"
96
+ [RelatonBib::TypedUri.new(content: url, type: "src")]
97
+ end
98
+
99
+ #
100
+ # Parse document date
101
+ #
102
+ # @return [Array<RelatonBib::BibliographicDate>] document date
103
+ #
104
+ def parse_date
105
+ @doc.xpath("./xmlns:date").map do |date|
106
+ month = date.at("./xmlns:month").text
107
+ year = date.at("./xmlns:year").text
108
+ on = "#{year}-#{Date::MONTHNAMES.index(month).to_s.rjust(2, '0')}"
109
+ RelatonBib::BibliographicDate.new(on: on, type: "published")
110
+ end
111
+ end
112
+
113
+ #
114
+ # Parse document contributors
115
+ #
116
+ # @return [Array<RelatonBib::ContributionInfo>] document contributors
117
+ #
118
+ def parse_contributor
119
+ @doc.xpath("./xmlns:author").map do |contributor|
120
+ n = contributor.at("./xmlns:name").text
121
+ name = RelatonBib::LocalizedString.new( n, "en", "Latn")
122
+ fname = RelatonBib::FullName.new(completename: name)
123
+ person = RelatonBib::Person.new(name: fname)
124
+ RelatonBib::ContributionInfo.new(entity: person, role: [{ type: "author" }])
125
+ end
126
+ end
127
+
128
+ #
129
+ # Parse document keywords
130
+ #
131
+ # @return [Array<String>] document keywords
132
+ #
133
+ def parse_keyword
134
+ @doc.xpath("./xmlns:keywords/xmlns:kw").map &:text
135
+ end
136
+
137
+ #
138
+ # Parse document abstract
139
+ #
140
+ # @return [Array<RelatonBib::FormattedString>] document abstract
141
+ #
142
+ def parse_abstract
143
+ @doc.xpath("./xmlns:abstract").map do |c|
144
+ RelatonBib::FormattedString.new(content: c.text, language: "en",
145
+ script: "Latn", format: "text/html")
146
+ end
147
+ end
148
+
149
+ #
150
+ # Parse document relations
151
+ #
152
+ # @return [Arra<RelatonBib::DocumentRelation>] document relations
153
+ #
154
+ def parse_relation
155
+ types = { "updates" => "updates", "obsoleted-by" => "obsoletedBy"}
156
+ @doc.xpath("./xmlns:updates/xmlns:doc-id|./xmlns:obsoleted-by/xmlns:doc-id").map do |r|
157
+ fref = RelatonBib::FormattedRef.new(content: r.text)
158
+ bib = IetfBibliographicItem.new(formattedref: fref)
159
+ RelatonBib::DocumentRelation.new(type: types[r.parent.name], bibitem: bib)
160
+ end
161
+ end
162
+
163
+ #
164
+ # Parse document status
165
+ #
166
+ # @return [RelatonBib::DocuemntStatus] document status
167
+ #
168
+ def parse_status
169
+ stage = @doc.at("./xmlns:current-status").text
170
+ RelatonBib::DocumentStatus.new(stage: stage)
171
+ end
172
+
173
+ #
174
+ # Parse document editorial group
175
+ #
176
+ # @return [RelatonBib::EditorialGroup] document editorial group
177
+ #
178
+ def parse_editorialgroup
179
+ tc = @doc.xpath("./xmlns:wg_acronym").map do |wg|
180
+ wg = RelatonBib::WorkGroup.new(name: wg.text)
181
+ RelatonBib::TechnicalCommittee.new(wg)
182
+ end
183
+ RelatonBib::EditorialGroup.new(tc)
184
+ end
185
+ end
186
+ end
@@ -0,0 +1,60 @@
1
+ module RelatonIetf
2
+ class RfcIndexEntry
3
+ #
4
+ # Document parser initalization
5
+ #
6
+ # @param [String] doc_id document id
7
+ # @param [Array<String>] is_also also document ids
8
+ #
9
+ def initialize(name, doc_id, is_also)
10
+ @name = name
11
+ @shortnum = doc_id[-4..-1].sub(/^0+/, "")
12
+ @doc_id = doc_id
13
+ @is_also = is_also
14
+ end
15
+
16
+ #
17
+ # Document id
18
+ #
19
+ # @return [Strinng] document id
20
+ #
21
+ def docnumber
22
+ @doc_id
23
+ end
24
+
25
+ #
26
+ # Initialize document parser and run it
27
+ #
28
+ # @param [Nokogiri::XML::Element] doc document
29
+ #
30
+ # @return [RelatonIetf:RfcIndexEntry, nil]
31
+ #
32
+ def self.parse(doc)
33
+ doc_id = doc.at("./xmlns:doc-id")
34
+ is_also = doc.xpath("./xmlns:is-also/xmlns:doc-id").map &:text
35
+ return unless doc_id && is_also.any?
36
+
37
+ name = doc.name.split("-").first
38
+ new(name, doc_id.text, is_also)
39
+ end
40
+
41
+ #
42
+ # Render document as XML
43
+ #
44
+ # @return [String] XML
45
+ #
46
+ def to_xml # rubocop:disable Metrics/MethodLength
47
+ Nokogiri::XML::Builder.new do |xml|
48
+ anchor = "#{@name.upcase}#{@shortnum}"
49
+ url = "https://www.rfc-editor.org/info/#{@name}#{@shortnum}"
50
+ xml.referencegroup("xmlns:xi" => "http://www.w3.org/2001/XInclude",
51
+ anchor: anchor, target: url) do
52
+ @is_also.each do |did|
53
+ num = did[-4..-1]
54
+ xml["xi"].send("include", href: "https://www.rfc-editor.org/refs/bibxml/reference.RFC.#{num}.xml")
55
+ end
56
+ end
57
+ end.doc.root.to_xml
58
+ end
59
+ end
60
+ end
@@ -1,3 +1,3 @@
1
1
  module RelatonIetf
2
- VERSION = "1.9.2".freeze
2
+ VERSION = "1.9.3".freeze
3
3
  end
data/lib/relaton_ietf.rb CHANGED
@@ -6,6 +6,7 @@ require "relaton_ietf/ietf_bibliographic_item"
6
6
  require "relaton_ietf/xml_parser"
7
7
  require "relaton_ietf/hash_converter"
8
8
  require "relaton_ietf/committee"
9
+ require "relaton_ietf/data_fetcher"
9
10
 
10
11
  require "relaton/provider_ietf"
11
12
 
data/relaton_ietf.gemspec CHANGED
@@ -38,5 +38,6 @@ Gem::Specification.new do |spec|
38
38
  spec.add_development_dependency "vcr"
39
39
  spec.add_development_dependency "webmock"
40
40
 
41
- spec.add_dependency "relaton-bib", ">= 1.9.5"
41
+ spec.add_dependency "relaton-bib", ">= 1.9.8"
42
+ spec.add_dependency "zlib", "~> 1.1.0"
42
43
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-ietf
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.2
4
+ version: 1.9.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-10-26 00:00:00.000000000 Z
11
+ date: 2021-12-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: equivalent-xml
@@ -128,14 +128,28 @@ dependencies:
128
128
  requirements:
129
129
  - - ">="
130
130
  - !ruby/object:Gem::Version
131
- version: 1.9.5
131
+ version: 1.9.8
132
132
  type: :runtime
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
136
  - - ">="
137
137
  - !ruby/object:Gem::Version
138
- version: 1.9.5
138
+ version: 1.9.8
139
+ - !ruby/object:Gem::Dependency
140
+ name: zlib
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: 1.1.0
146
+ type: :runtime
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: 1.1.0
139
153
  description: "RelatonIetf: retrieve IETF Standards for bibliographic use \nusing the
140
154
  BibliographicItem model.\n\nFormerly known as rfcbib.\n"
141
155
  email:
@@ -164,10 +178,13 @@ files:
164
178
  - lib/relaton/provider_ietf.rb
165
179
  - lib/relaton_ietf.rb
166
180
  - lib/relaton_ietf/committee.rb
181
+ - lib/relaton_ietf/data_fetcher.rb
167
182
  - lib/relaton_ietf/hash_converter.rb
168
183
  - lib/relaton_ietf/ietf_bibliographic_item.rb
169
184
  - lib/relaton_ietf/ietf_bibliography.rb
170
185
  - lib/relaton_ietf/processor.rb
186
+ - lib/relaton_ietf/rfc_entry.rb
187
+ - lib/relaton_ietf/rfc_index_entry.rb
171
188
  - lib/relaton_ietf/scrapper.rb
172
189
  - lib/relaton_ietf/version.rb
173
190
  - lib/relaton_ietf/xml_parser.rb