relaton-itu 0.3.5 → 0.3.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/Gemfile.lock +3 -3
- data/lib/relaton_itu/scrapper.rb +25 -22
- data/lib/relaton_itu/version.rb +1 -1
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 4ca4655d0b046848fd1ddd492e4e0d3e7ae53dd28bf491bc4cc049d2eccb5810
|
4
|
+
data.tar.gz: 40c7fbd808c557a460fbfed3811c4b87d51ed2dfb937a38a77b07c7c323ae554
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '08234d67ae2e8ec6e5461e80446e183c3482390a850b33e114fac0328b609409eb137eeb58cbf216e5025f4a4d97ea3fcad6d4e4cca8550400ac92ed916fee38'
|
7
|
+
data.tar.gz: 2c24f8c0b1fe4f4c25b94c128c51c939dc4980b9c5e38d0b48575c8705abb33d99b3b77ca1bc5222903048ab4b8f3ee6dfdd75c6fa0cdd6e25cf110ed7c4be0c
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
relaton-itu (0.3.
|
4
|
+
relaton-itu (0.3.6)
|
5
5
|
relaton-iso-bib (~> 0.3.0)
|
6
6
|
|
7
7
|
GEM
|
@@ -35,10 +35,10 @@ GEM
|
|
35
35
|
pry (~> 0.10)
|
36
36
|
public_suffix (4.0.1)
|
37
37
|
rake (10.5.0)
|
38
|
-
relaton-bib (0.3.
|
38
|
+
relaton-bib (0.3.11)
|
39
39
|
addressable
|
40
40
|
nokogiri
|
41
|
-
relaton-iso-bib (0.3.
|
41
|
+
relaton-iso-bib (0.3.11)
|
42
42
|
isoics (~> 0.1.6)
|
43
43
|
relaton-bib (~> 0.3.0)
|
44
44
|
ruby_deep_clone (~> 0.8.0)
|
data/lib/relaton_itu/scrapper.rb
CHANGED
@@ -48,18 +48,18 @@ module RelatonItu
|
|
48
48
|
# @return [Hash]
|
49
49
|
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
50
50
|
def parse_page(hit_data)
|
51
|
-
doc = get_page hit_data[:url]
|
51
|
+
url, doc = get_page hit_data[:url]
|
52
52
|
|
53
53
|
# Fetch edition.
|
54
54
|
edition = doc.at("//table/tr/td/span[contains(@id, 'Label8')]/b")&.text
|
55
55
|
|
56
56
|
ItuBibliographicItem.new(
|
57
57
|
fetched: Date.today.to_s,
|
58
|
-
docid: fetch_docid(
|
58
|
+
docid: fetch_docid(doc),
|
59
59
|
edition: edition,
|
60
60
|
language: ["en"],
|
61
61
|
script: ["Latn"],
|
62
|
-
title: fetch_titles(
|
62
|
+
title: fetch_titles(doc),
|
63
63
|
doctype: hit_data[:type],
|
64
64
|
docstatus: fetch_status(doc),
|
65
65
|
ics: [], # fetch_ics(doc),
|
@@ -68,7 +68,7 @@ module RelatonItu
|
|
68
68
|
editorialgroup: fetch_workgroup(doc),
|
69
69
|
abstract: fetch_abstract(doc),
|
70
70
|
copyright: fetch_copyright(hit_data[:code], doc),
|
71
|
-
link: fetch_link(doc,
|
71
|
+
link: fetch_link(doc, url),
|
72
72
|
relation: fetch_relations(doc),
|
73
73
|
)
|
74
74
|
end
|
@@ -119,7 +119,7 @@ module RelatonItu
|
|
119
119
|
uri = URI resp["location"]
|
120
120
|
resp = Net::HTTP.get_response(uri) # .encode("UTF-8")
|
121
121
|
end
|
122
|
-
Nokogiri::HTML(resp.body)
|
122
|
+
[uri.to_s, Nokogiri::HTML(resp.body)]
|
123
123
|
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
124
124
|
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError,
|
125
125
|
OpenSSL::SSL::SSLError
|
@@ -130,12 +130,15 @@ module RelatonItu
|
|
130
130
|
# Fetch docid.
|
131
131
|
# @param doc [Nokogiri::HTML::Document]
|
132
132
|
# @return [Hash]
|
133
|
-
def fetch_docid(
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
133
|
+
def fetch_docid(doc)
|
134
|
+
doc.xpath(
|
135
|
+
"//span[@id='ctl00_content_main_uc_rec_main_info1_rpt_main_ctl00_lbl_rec']",
|
136
|
+
"//td[.='Identical standard:']/following-sibling::td",
|
137
|
+
).map do |code|
|
138
|
+
id = code.text.match(%r{^.*?(?= \()}).to_s.squeeze(" ")
|
139
|
+
type = id.match(%r{^\w+}).to_s
|
140
|
+
RelatonBib::DocumentIdentifier.new(type: type, id: id)
|
141
|
+
end
|
139
142
|
end
|
140
143
|
|
141
144
|
# Fetch status.
|
@@ -186,16 +189,12 @@ module RelatonItu
|
|
186
189
|
# @return [Array<Hash>]
|
187
190
|
def fetch_relations(doc)
|
188
191
|
doc.xpath('//div[contains(@id, "tab_sup")]//table/tr[position()>2]').map do |r|
|
189
|
-
r_type = r.at('./td/span[contains(@id, "Label4")]/nobr').text.downcase
|
190
|
-
type = case r_type
|
191
|
-
when "in force" then "published"
|
192
|
-
else r_type
|
193
|
-
end
|
192
|
+
# r_type = r.at('./td/span[contains(@id, "Label4")]/nobr').text.downcase
|
194
193
|
ref = r.at('./td/span[contains(@id, "title_e")]/nobr/a')
|
195
194
|
# url = DOMAIN + ref[:href].sub(/^\./, "/ITU-T/recommendations")
|
196
195
|
fref = RelatonBib::FormattedRef.new(content: ref.text, language: "en", script: "Latn")
|
197
196
|
bibitem = RelatonIsoBib::IsoBibliographicItem.new(formattedref: fref)
|
198
|
-
{ type:
|
197
|
+
{ type: "complements", bibitem: bibitem }
|
199
198
|
end
|
200
199
|
end
|
201
200
|
# rubocop:enable Metrics/MethodLength
|
@@ -208,10 +207,14 @@ module RelatonItu
|
|
208
207
|
# end
|
209
208
|
|
210
209
|
# Fetch titles.
|
211
|
-
# @param
|
210
|
+
# @param doc [Nokogiri::HTML::Document]
|
212
211
|
# @return [Array<Hash>]
|
213
|
-
def fetch_titles(
|
214
|
-
|
212
|
+
def fetch_titles(doc)
|
213
|
+
# t = hit_data[:title].match(%r{(?<=\(\d{2}\/\d{4}\): ).*}).to_s
|
214
|
+
# t = hit_data[:title] if t.empty?
|
215
|
+
t = doc.at("//td[@class='title']")
|
216
|
+
return [] unless t
|
217
|
+
titles = t.text.split " - "
|
215
218
|
case titles.size
|
216
219
|
when 0
|
217
220
|
intro, main, part = nil, "", nil
|
@@ -298,8 +301,8 @@ module RelatonItu
|
|
298
301
|
# @return [Array<Hash>]
|
299
302
|
def fetch_link(doc, url)
|
300
303
|
links = [{ type: "src", content: url }]
|
301
|
-
obp_elms = doc.at('//
|
302
|
-
links << { type: "obp", content: DOMAIN + obp_elms[:href] } if obp_elms
|
304
|
+
obp_elms = doc.at('//a[@title="Persistent link to download the PDF file"]')
|
305
|
+
links << { type: "obp", content: DOMAIN + obp_elms[:href].strip } if obp_elms
|
303
306
|
links
|
304
307
|
end
|
305
308
|
|
data/lib/relaton_itu/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-itu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-10-
|
11
|
+
date: 2019-10-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -216,8 +216,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
216
216
|
- !ruby/object:Gem::Version
|
217
217
|
version: '0'
|
218
218
|
requirements: []
|
219
|
-
|
220
|
-
rubygems_version: 2.6.12
|
219
|
+
rubygems_version: 3.0.6
|
221
220
|
signing_key:
|
222
221
|
specification_version: 4
|
223
222
|
summary: 'RelatonItu: retrieve ITU Standards for bibliographic use using the BibliographicItem
|