relaton-ietf 0.5.3 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +1 -1
- data/Gemfile.lock +4 -4
- data/appveyor.yml +1 -0
- data/lib/relaton/provider_ietf.rb +1 -1
- data/lib/relaton_ietf/scrapper.rb +79 -31
- data/lib/relaton_ietf/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 306fed5a14cbe156879bc5a48d8eb068d73cc1253ed5ea2d397c3a31561b1f8d
|
4
|
+
data.tar.gz: 93be8c725f4ae84d6e7c665da73daf42bc58f127e12de6dfb30cdef44655a994
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ed500f627c2b017a384c144460a863fa502a8e45c185f7993684a39c0f566e4d6dba9bca537756308e90252062032507c5091d9744acf838db95570f1e76f9ed
|
7
|
+
data.tar.gz: 97c4fc936d9111e13e131a44a2283f972c9018644f28f52771db5a6fa02dfba6580d6964c11972e1366b6ecd29710a596ac14a0601d45739ddfb0712247082e8
|
data/.travis.yml
CHANGED
data/Gemfile.lock
CHANGED
@@ -13,7 +13,7 @@ GEM
|
|
13
13
|
coderay (1.1.2)
|
14
14
|
crack (0.4.3)
|
15
15
|
safe_yaml (~> 1.0.0)
|
16
|
-
debase (0.2.
|
16
|
+
debase (0.2.3)
|
17
17
|
debase-ruby_core_source (>= 0.10.2)
|
18
18
|
debase-ruby_core_source (0.10.5)
|
19
19
|
diff-lcs (1.3)
|
@@ -35,7 +35,7 @@ GEM
|
|
35
35
|
pry (~> 0.10)
|
36
36
|
public_suffix (3.1.1)
|
37
37
|
rake (10.5.0)
|
38
|
-
relaton-bib (0.2.
|
38
|
+
relaton-bib (0.2.4)
|
39
39
|
addressable
|
40
40
|
nokogiri (~> 1.8.4)
|
41
41
|
relaton-iso-bib (0.2.3)
|
@@ -47,7 +47,7 @@ GEM
|
|
47
47
|
rspec-core (~> 3.8.0)
|
48
48
|
rspec-expectations (~> 3.8.0)
|
49
49
|
rspec-mocks (~> 3.8.0)
|
50
|
-
rspec-core (3.8.
|
50
|
+
rspec-core (3.8.2)
|
51
51
|
rspec-support (~> 3.8.0)
|
52
52
|
rspec-expectations (3.8.4)
|
53
53
|
diff-lcs (>= 1.2.0, < 2.0)
|
@@ -60,7 +60,7 @@ GEM
|
|
60
60
|
rake (>= 0.8.1)
|
61
61
|
ruby_deep_clone (0.8.0)
|
62
62
|
safe_yaml (1.0.5)
|
63
|
-
simplecov (0.
|
63
|
+
simplecov (0.17.0)
|
64
64
|
docile (~> 1.1)
|
65
65
|
json (>= 1.8, < 3)
|
66
66
|
simplecov-html (~> 0.10.0)
|
data/appveyor.yml
CHANGED
@@ -12,62 +12,48 @@ module RelatonIetf
|
|
12
12
|
module Scrapper
|
13
13
|
RFC_URI_PATTERN = "https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.CODE"
|
14
14
|
ID_URI_PATTERN = "https://xml2rfc.tools.ietf.org/public/rfc/bibxml-ids/reference.CODE"
|
15
|
+
BCP_URI_PATTERN = "https://www.rfc-editor.org/info/CODE"
|
15
16
|
|
16
17
|
class << self
|
18
|
+
# rubocop:disable Metrics/MethodLength
|
19
|
+
|
17
20
|
# @param text [String]
|
21
|
+
# @param is_relation [TrueClass, FalseClass]
|
18
22
|
# @return [RelatonIetf::IetfBibliographicItem]
|
19
|
-
def scrape_page(text)
|
23
|
+
def scrape_page(text, is_relation = false)
|
20
24
|
# Remove initial "IETF " string if specified
|
21
|
-
ref = text.
|
22
|
-
gsub(/^IETF /, "").
|
23
|
-
sub(" ", ".") + ".xml"
|
25
|
+
ref = text.gsub(/^IETF /, "")
|
24
26
|
|
25
27
|
case ref
|
26
|
-
when /^RFC/
|
27
|
-
|
28
|
-
|
29
|
-
when /^I-D/
|
30
|
-
uri = ID_URI_PATTERN.dup
|
31
|
-
doctype = "internet-draft"
|
28
|
+
when /^RFC/ then rfc_item RFC_URI_PATTERN.dup, ref, is_relation
|
29
|
+
when /^I-D/ then rfc_item ID_URI_PATTERN.dup, ref, is_relation
|
30
|
+
when /^BCP/ then bcp_item BCP_URI_PATTERN.dup, ref
|
32
31
|
else
|
33
32
|
raise RelatonBib::RequestError, "#{ref}: not recognised for RFC"
|
34
33
|
end
|
35
|
-
|
36
|
-
uri = uri.gsub("CODE", ref)
|
37
|
-
res = Net::HTTP.get_response(URI(uri))
|
38
|
-
if res.code != "200"
|
39
|
-
raise RelatonBib::RequestError, "No document found at #{uri}"
|
40
|
-
end
|
41
|
-
|
42
|
-
doc = Nokogiri::HTML Net::HTTP.get(URI(uri))
|
43
|
-
reference = doc.at("//reference")
|
44
|
-
return unless reference
|
45
|
-
|
46
|
-
bib_item reference, doctype
|
47
34
|
rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
48
35
|
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
49
36
|
Net::ProtocolError, SocketError
|
50
|
-
raise RelatonBib::RequestError, "No document found
|
37
|
+
raise RelatonBib::RequestError, "No document found for #{ref} reference."
|
51
38
|
end
|
52
39
|
|
53
|
-
#
|
54
|
-
|
40
|
+
# @param reference [String]
|
55
41
|
# @return [RelatonIetf::IetfBibliographicItem]
|
56
|
-
def
|
57
|
-
|
58
|
-
|
42
|
+
def fetch_rfc(reference, is_relation = false)
|
43
|
+
return unless reference
|
44
|
+
|
45
|
+
ietf_item(
|
46
|
+
is_relation: is_relation,
|
59
47
|
id: reference[:anchor],
|
60
48
|
docid: docids(reference),
|
61
49
|
status: status(reference),
|
62
50
|
language: [language(reference)],
|
63
|
-
script: ["Latn"],
|
64
51
|
link: [{ type: "src", content: reference[:target] }],
|
65
52
|
titles: titles(reference),
|
66
53
|
abstract: abstracts(reference),
|
67
54
|
contributors: contributors(reference),
|
68
55
|
dates: dates(reference),
|
69
56
|
series: series(reference),
|
70
|
-
doctype: doctype,
|
71
57
|
keywords: reference.xpath("front/keyword").map(&:text),
|
72
58
|
)
|
73
59
|
end
|
@@ -75,6 +61,67 @@ module RelatonIetf
|
|
75
61
|
|
76
62
|
private
|
77
63
|
|
64
|
+
# @param attrs [Hash]
|
65
|
+
# @return [RelatonIetf::IetfBibliographicItem]
|
66
|
+
def ietf_item(**attrs)
|
67
|
+
attrs[:fetched] = Date.today.to_s unless attrs.delete(:is_relation)
|
68
|
+
attrs[:script] = ["Latn"]
|
69
|
+
attrs[:doctype] = "standard"
|
70
|
+
RelatonIetf::IetfBibliographicItem.new **attrs
|
71
|
+
end
|
72
|
+
|
73
|
+
# @param uri_template [String]
|
74
|
+
# @param ref [String]
|
75
|
+
# @return [RelatonIetf::IetfBibliographicItem]
|
76
|
+
def rfc_item(uri_template, ref, is_relation)
|
77
|
+
uri = uri_template.sub "CODE", ref.sub(/\s|\u00a0/, ".") + ".xml"
|
78
|
+
doc = Nokogiri::XML get_page(uri)
|
79
|
+
fetch_rfc doc.at("//reference"), is_relation
|
80
|
+
end
|
81
|
+
|
82
|
+
# @param uri_template [String]
|
83
|
+
# @param reference [String]
|
84
|
+
# @return [RelatonIetf::IetfBibliographicItem]
|
85
|
+
def bcp_item(uri_template, reference)
|
86
|
+
uri = uri_template.sub "CODE", reference.sub(" ", "").downcase
|
87
|
+
doc = Nokogiri::HTML get_page(uri)
|
88
|
+
ietf_item(
|
89
|
+
id: reference,
|
90
|
+
docid: [RelatonBib::DocumentIdentifier.new(type: "IETF", id: reference)],
|
91
|
+
language: ["en"],
|
92
|
+
link: [{ type: "src", content: uri }],
|
93
|
+
relations: fetch_relations(doc),
|
94
|
+
# titles: titles(reference),
|
95
|
+
# abstract: abstracts(reference),
|
96
|
+
# contributors: contributors(reference),
|
97
|
+
# dates: dates(reference),
|
98
|
+
# series: series(reference),
|
99
|
+
# keywords: reference.xpath("front/keyword").map(&:text),
|
100
|
+
)
|
101
|
+
end
|
102
|
+
|
103
|
+
def fetch_relations(doc)
|
104
|
+
doc.xpath("//table/tr/td/a[contains(., 'RFC')]").map do |r|
|
105
|
+
RelatonBib::DocumentRelation.new(
|
106
|
+
type: "merges",
|
107
|
+
bibitem: scrape_page(r.text, true),
|
108
|
+
)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def get_page(uri)
|
113
|
+
res = Net::HTTP.get_response(URI(uri))
|
114
|
+
if res.code != "200"
|
115
|
+
raise RelatonBib::RequestError, "No document found at #{uri}"
|
116
|
+
end
|
117
|
+
|
118
|
+
res.body
|
119
|
+
end
|
120
|
+
|
121
|
+
def make_uri(uri_template, reference)
|
122
|
+
uri_template.gsub("CODE", reference)
|
123
|
+
end
|
124
|
+
|
78
125
|
# @return [String]
|
79
126
|
def language(reference)
|
80
127
|
reference[:lang] || "en"
|
@@ -86,6 +133,7 @@ module RelatonIetf
|
|
86
133
|
[{ content: title.text, language: language(reference), script: "Latn" }]
|
87
134
|
end
|
88
135
|
|
136
|
+
# @return [Array<RelatonBib::FormattedString>]
|
89
137
|
def abstracts(ref)
|
90
138
|
ref.xpath("./front/abstract").map do |a|
|
91
139
|
RelatonBib::FormattedString.new(
|
@@ -232,7 +280,7 @@ module RelatonIetf
|
|
232
280
|
# @return [Array<RelatonBib::Series>]
|
233
281
|
#
|
234
282
|
def series(reference)
|
235
|
-
reference.xpath("./
|
283
|
+
reference.xpath("./seriesInfo").map do |si|
|
236
284
|
next if si[:name] == "DOI" || si[:stream] || si[:status]
|
237
285
|
|
238
286
|
RelatonBib::Series.new(
|
data/lib/relaton_ietf/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-ietf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-07-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -214,7 +214,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
214
214
|
version: '0'
|
215
215
|
requirements: []
|
216
216
|
rubyforge_project:
|
217
|
-
rubygems_version: 2.6
|
217
|
+
rubygems_version: 2.7.6
|
218
218
|
signing_key:
|
219
219
|
specification_version: 4
|
220
220
|
summary: 'RelatonIetf: retrieve IETF Standards for bibliographic use using the BibliographicItem
|