relaton-itu 0.3.5 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/Gemfile.lock +3 -3
- data/lib/relaton_itu/scrapper.rb +25 -22
- data/lib/relaton_itu/version.rb +1 -1
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 4ca4655d0b046848fd1ddd492e4e0d3e7ae53dd28bf491bc4cc049d2eccb5810
|
4
|
+
data.tar.gz: 40c7fbd808c557a460fbfed3811c4b87d51ed2dfb937a38a77b07c7c323ae554
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '08234d67ae2e8ec6e5461e80446e183c3482390a850b33e114fac0328b609409eb137eeb58cbf216e5025f4a4d97ea3fcad6d4e4cca8550400ac92ed916fee38'
|
7
|
+
data.tar.gz: 2c24f8c0b1fe4f4c25b94c128c51c939dc4980b9c5e38d0b48575c8705abb33d99b3b77ca1bc5222903048ab4b8f3ee6dfdd75c6fa0cdd6e25cf110ed7c4be0c
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
relaton-itu (0.3.
|
4
|
+
relaton-itu (0.3.6)
|
5
5
|
relaton-iso-bib (~> 0.3.0)
|
6
6
|
|
7
7
|
GEM
|
@@ -35,10 +35,10 @@ GEM
|
|
35
35
|
pry (~> 0.10)
|
36
36
|
public_suffix (4.0.1)
|
37
37
|
rake (10.5.0)
|
38
|
-
relaton-bib (0.3.
|
38
|
+
relaton-bib (0.3.11)
|
39
39
|
addressable
|
40
40
|
nokogiri
|
41
|
-
relaton-iso-bib (0.3.
|
41
|
+
relaton-iso-bib (0.3.11)
|
42
42
|
isoics (~> 0.1.6)
|
43
43
|
relaton-bib (~> 0.3.0)
|
44
44
|
ruby_deep_clone (~> 0.8.0)
|
data/lib/relaton_itu/scrapper.rb
CHANGED
@@ -48,18 +48,18 @@ module RelatonItu
|
|
48
48
|
# @return [Hash]
|
49
49
|
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
50
50
|
def parse_page(hit_data)
|
51
|
-
doc = get_page hit_data[:url]
|
51
|
+
url, doc = get_page hit_data[:url]
|
52
52
|
|
53
53
|
# Fetch edition.
|
54
54
|
edition = doc.at("//table/tr/td/span[contains(@id, 'Label8')]/b")&.text
|
55
55
|
|
56
56
|
ItuBibliographicItem.new(
|
57
57
|
fetched: Date.today.to_s,
|
58
|
-
docid: fetch_docid(
|
58
|
+
docid: fetch_docid(doc),
|
59
59
|
edition: edition,
|
60
60
|
language: ["en"],
|
61
61
|
script: ["Latn"],
|
62
|
-
title: fetch_titles(
|
62
|
+
title: fetch_titles(doc),
|
63
63
|
doctype: hit_data[:type],
|
64
64
|
docstatus: fetch_status(doc),
|
65
65
|
ics: [], # fetch_ics(doc),
|
@@ -68,7 +68,7 @@ module RelatonItu
|
|
68
68
|
editorialgroup: fetch_workgroup(doc),
|
69
69
|
abstract: fetch_abstract(doc),
|
70
70
|
copyright: fetch_copyright(hit_data[:code], doc),
|
71
|
-
link: fetch_link(doc,
|
71
|
+
link: fetch_link(doc, url),
|
72
72
|
relation: fetch_relations(doc),
|
73
73
|
)
|
74
74
|
end
|
@@ -119,7 +119,7 @@ module RelatonItu
|
|
119
119
|
uri = URI resp["location"]
|
120
120
|
resp = Net::HTTP.get_response(uri) # .encode("UTF-8")
|
121
121
|
end
|
122
|
-
Nokogiri::HTML(resp.body)
|
122
|
+
[uri.to_s, Nokogiri::HTML(resp.body)]
|
123
123
|
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
124
124
|
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError,
|
125
125
|
OpenSSL::SSL::SSLError
|
@@ -130,12 +130,15 @@ module RelatonItu
|
|
130
130
|
# Fetch docid.
|
131
131
|
# @param doc [Nokogiri::HTML::Document]
|
132
132
|
# @return [Hash]
|
133
|
-
def fetch_docid(
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
133
|
+
def fetch_docid(doc)
|
134
|
+
doc.xpath(
|
135
|
+
"//span[@id='ctl00_content_main_uc_rec_main_info1_rpt_main_ctl00_lbl_rec']",
|
136
|
+
"//td[.='Identical standard:']/following-sibling::td",
|
137
|
+
).map do |code|
|
138
|
+
id = code.text.match(%r{^.*?(?= \()}).to_s.squeeze(" ")
|
139
|
+
type = id.match(%r{^\w+}).to_s
|
140
|
+
RelatonBib::DocumentIdentifier.new(type: type, id: id)
|
141
|
+
end
|
139
142
|
end
|
140
143
|
|
141
144
|
# Fetch status.
|
@@ -186,16 +189,12 @@ module RelatonItu
|
|
186
189
|
# @return [Array<Hash>]
|
187
190
|
def fetch_relations(doc)
|
188
191
|
doc.xpath('//div[contains(@id, "tab_sup")]//table/tr[position()>2]').map do |r|
|
189
|
-
r_type = r.at('./td/span[contains(@id, "Label4")]/nobr').text.downcase
|
190
|
-
type = case r_type
|
191
|
-
when "in force" then "published"
|
192
|
-
else r_type
|
193
|
-
end
|
192
|
+
# r_type = r.at('./td/span[contains(@id, "Label4")]/nobr').text.downcase
|
194
193
|
ref = r.at('./td/span[contains(@id, "title_e")]/nobr/a')
|
195
194
|
# url = DOMAIN + ref[:href].sub(/^\./, "/ITU-T/recommendations")
|
196
195
|
fref = RelatonBib::FormattedRef.new(content: ref.text, language: "en", script: "Latn")
|
197
196
|
bibitem = RelatonIsoBib::IsoBibliographicItem.new(formattedref: fref)
|
198
|
-
{ type:
|
197
|
+
{ type: "complements", bibitem: bibitem }
|
199
198
|
end
|
200
199
|
end
|
201
200
|
# rubocop:enable Metrics/MethodLength
|
@@ -208,10 +207,14 @@ module RelatonItu
|
|
208
207
|
# end
|
209
208
|
|
210
209
|
# Fetch titles.
|
211
|
-
# @param
|
210
|
+
# @param doc [Nokogiri::HTML::Document]
|
212
211
|
# @return [Array<Hash>]
|
213
|
-
def fetch_titles(
|
214
|
-
|
212
|
+
def fetch_titles(doc)
|
213
|
+
# t = hit_data[:title].match(%r{(?<=\(\d{2}\/\d{4}\): ).*}).to_s
|
214
|
+
# t = hit_data[:title] if t.empty?
|
215
|
+
t = doc.at("//td[@class='title']")
|
216
|
+
return [] unless t
|
217
|
+
titles = t.text.split " - "
|
215
218
|
case titles.size
|
216
219
|
when 0
|
217
220
|
intro, main, part = nil, "", nil
|
@@ -298,8 +301,8 @@ module RelatonItu
|
|
298
301
|
# @return [Array<Hash>]
|
299
302
|
def fetch_link(doc, url)
|
300
303
|
links = [{ type: "src", content: url }]
|
301
|
-
obp_elms = doc.at('//
|
302
|
-
links << { type: "obp", content: DOMAIN + obp_elms[:href] } if obp_elms
|
304
|
+
obp_elms = doc.at('//a[@title="Persistent link to download the PDF file"]')
|
305
|
+
links << { type: "obp", content: DOMAIN + obp_elms[:href].strip } if obp_elms
|
303
306
|
links
|
304
307
|
end
|
305
308
|
|
data/lib/relaton_itu/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-itu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-10-
|
11
|
+
date: 2019-10-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -216,8 +216,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
216
216
|
- !ruby/object:Gem::Version
|
217
217
|
version: '0'
|
218
218
|
requirements: []
|
219
|
-
|
220
|
-
rubygems_version: 2.6.12
|
219
|
+
rubygems_version: 3.0.6
|
221
220
|
signing_key:
|
222
221
|
specification_version: 4
|
223
222
|
summary: 'RelatonItu: retrieve ITU Standards for bibliographic use using the BibliographicItem
|